From: Greg Kroah-Hartman Date: Fri, 25 Jun 2010 23:30:20 +0000 (-0700) Subject: .33 patches X-Git-Tag: v2.6.31.14~15 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=6c4a7ca669bdbb1c7c9b354a5ea9744570bd857d;p=thirdparty%2Fkernel%2Fstable-queue.git .33 patches --- diff --git a/queue-2.6.33/0001-KVM-SVM-Don-t-use-kmap_atomic-in-nested_svm_map.patch b/queue-2.6.33/0001-KVM-SVM-Don-t-use-kmap_atomic-in-nested_svm_map.patch new file mode 100644 index 00000000000..1eed513be2a --- /dev/null +++ b/queue-2.6.33/0001-KVM-SVM-Don-t-use-kmap_atomic-in-nested_svm_map.patch @@ -0,0 +1,202 @@ +From 60cddf3700fe0760425aebe1d0d0850a15faf50e Mon Sep 17 00:00:00 2001 +From: Joerg Roedel +Date: Fri, 19 Feb 2010 16:23:00 +0100 +Subject: KVM: SVM: Don't use kmap_atomic in nested_svm_map + +From: Joerg Roedel + +Use of kmap_atomic disables preemption but if we run in +shadow-shadow mode the vmrun emulation executes kvm_set_cr3 +which might sleep or fault. So use kmap instead for +nested_svm_map. + +Cc: stable@kernel.org +Signed-off-by: Joerg Roedel +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman + +(cherry picked from commit 7597f129d8b6799da7a264e6d6f7401668d3a36d) +--- + arch/x86/kvm/svm.c | 47 ++++++++++++++++++++++++----------------------- + 1 file changed, 24 insertions(+), 23 deletions(-) + +--- a/arch/x86/kvm/svm.c ++++ b/arch/x86/kvm/svm.c +@@ -1398,7 +1398,7 @@ static inline int nested_svm_intr(struct + return 0; + } + +-static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, enum km_type idx) ++static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, struct page **_page) + { + struct page *page; + +@@ -1406,7 +1406,9 @@ static void *nested_svm_map(struct vcpu_ + if (is_error_page(page)) + goto error; + +- return kmap_atomic(page, idx); ++ *_page = page; ++ ++ return kmap(page); + + error: + kvm_release_page_clean(page); +@@ -1415,16 +1417,9 @@ error: + return NULL; + } + +-static void nested_svm_unmap(void *addr, enum km_type idx) ++static void nested_svm_unmap(struct page *page) + { +- struct page *page; +- +- if (!addr) +- return; +- +- page = kmap_atomic_to_page(addr); +- +- kunmap_atomic(addr, idx); ++ kunmap(page); + kvm_release_page_dirty(page); + } + +@@ -1432,6 +1427,7 @@ static bool nested_svm_exit_handled_msr( + { + u32 param = svm->vmcb->control.exit_info_1 & 1; + u32 msr = svm->vcpu.arch.regs[VCPU_REGS_RCX]; ++ struct page *page; + bool ret = false; + u32 t0, t1; + u8 *msrpm; +@@ -1439,7 +1435,7 @@ static bool nested_svm_exit_handled_msr( + if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT))) + return false; + +- msrpm = nested_svm_map(svm, svm->nested.vmcb_msrpm, KM_USER0); ++ msrpm = nested_svm_map(svm, svm->nested.vmcb_msrpm, &page); + + if (!msrpm) + goto out; +@@ -1467,7 +1463,7 @@ static bool nested_svm_exit_handled_msr( + ret = msrpm[t1] & ((1 << param) << t0); + + out: +- nested_svm_unmap(msrpm, KM_USER0); ++ nested_svm_unmap(page); + + return ret; + } +@@ -1590,6 +1586,7 @@ static int nested_svm_vmexit(struct vcpu + struct vmcb *nested_vmcb; + struct vmcb *hsave = svm->nested.hsave; + struct vmcb *vmcb = svm->vmcb; ++ struct page *page; + + trace_kvm_nested_vmexit_inject(vmcb->control.exit_code, + vmcb->control.exit_info_1, +@@ -1597,7 +1594,7 @@ static int nested_svm_vmexit(struct vcpu + vmcb->control.exit_int_info, + vmcb->control.exit_int_info_err); + +- nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, KM_USER0); ++ nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, &page); + if (!nested_vmcb) + return 1; + +@@ -1687,7 +1684,7 @@ static int nested_svm_vmexit(struct vcpu + /* Exit nested SVM mode */ + svm->nested.vmcb = 0; + +- nested_svm_unmap(nested_vmcb, KM_USER0); ++ nested_svm_unmap(page); + + kvm_mmu_reset_context(&svm->vcpu); + kvm_mmu_load(&svm->vcpu); +@@ -1698,9 +1695,10 @@ static int nested_svm_vmexit(struct vcpu + static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) + { + u32 *nested_msrpm; ++ struct page *page; + int i; + +- nested_msrpm = nested_svm_map(svm, svm->nested.vmcb_msrpm, KM_USER0); ++ nested_msrpm = nested_svm_map(svm, svm->nested.vmcb_msrpm, &page); + if (!nested_msrpm) + return false; + +@@ -1709,7 +1707,7 @@ static bool nested_svm_vmrun_msrpm(struc + + svm->vmcb->control.msrpm_base_pa = __pa(svm->nested.msrpm); + +- nested_svm_unmap(nested_msrpm, KM_USER0); ++ nested_svm_unmap(page); + + return true; + } +@@ -1719,8 +1717,9 @@ static bool nested_svm_vmrun(struct vcpu + struct vmcb *nested_vmcb; + struct vmcb *hsave = svm->nested.hsave; + struct vmcb *vmcb = svm->vmcb; ++ struct page *page; + +- nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, KM_USER0); ++ nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); + if (!nested_vmcb) + return false; + +@@ -1832,7 +1831,7 @@ static bool nested_svm_vmrun(struct vcpu + svm->vmcb->control.event_inj = nested_vmcb->control.event_inj; + svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err; + +- nested_svm_unmap(nested_vmcb, KM_USER0); ++ nested_svm_unmap(page); + + enable_gif(svm); + +@@ -1858,6 +1857,7 @@ static void nested_svm_vmloadsave(struct + static int vmload_interception(struct vcpu_svm *svm) + { + struct vmcb *nested_vmcb; ++ struct page *page; + + if (nested_svm_check_permissions(svm)) + return 1; +@@ -1865,12 +1865,12 @@ static int vmload_interception(struct vc + svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; + skip_emulated_instruction(&svm->vcpu); + +- nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, KM_USER0); ++ nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); + if (!nested_vmcb) + return 1; + + nested_svm_vmloadsave(nested_vmcb, svm->vmcb); +- nested_svm_unmap(nested_vmcb, KM_USER0); ++ nested_svm_unmap(page); + + return 1; + } +@@ -1878,6 +1878,7 @@ static int vmload_interception(struct vc + static int vmsave_interception(struct vcpu_svm *svm) + { + struct vmcb *nested_vmcb; ++ struct page *page; + + if (nested_svm_check_permissions(svm)) + return 1; +@@ -1885,12 +1886,12 @@ static int vmsave_interception(struct vc + svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; + skip_emulated_instruction(&svm->vcpu); + +- nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, KM_USER0); ++ nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); + if (!nested_vmcb) + return 1; + + nested_svm_vmloadsave(svm->vmcb, nested_vmcb); +- nested_svm_unmap(nested_vmcb, KM_USER0); ++ nested_svm_unmap(page); + + return 1; + } diff --git a/queue-2.6.33/0002-KVM-SVM-Fix-schedule-while-atomic-on-nested-exceptio.patch b/queue-2.6.33/0002-KVM-SVM-Fix-schedule-while-atomic-on-nested-exceptio.patch new file mode 100644 index 00000000000..b0c5c980cb4 --- /dev/null +++ b/queue-2.6.33/0002-KVM-SVM-Fix-schedule-while-atomic-on-nested-exceptio.patch @@ -0,0 +1,81 @@ +From 397cb347161b605d7bdff4240d0d267bf48f4ae2 Mon Sep 17 00:00:00 2001 +From: Joerg Roedel +Date: Fri, 19 Feb 2010 16:23:02 +0100 +Subject: KVM: SVM: Fix schedule-while-atomic on nested exception handling + +From: Joerg Roedel + +Move the actual vmexit routine out of code that runs with +irqs and preemption disabled. + +Cc: stable@kernel.org +Signed-off-by: Joerg Roedel +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman + +(cherry picked from commit b8e88bc8ffba5fe53fb8d8a0a4be3bbcffeebe56) +--- + arch/x86/kvm/svm.c | 23 +++++++++++++++++++---- + 1 file changed, 19 insertions(+), 4 deletions(-) + +--- a/arch/x86/kvm/svm.c ++++ b/arch/x86/kvm/svm.c +@@ -128,6 +128,7 @@ static void svm_flush_tlb(struct kvm_vcp + static void svm_complete_interrupts(struct vcpu_svm *svm); + + static int nested_svm_exit_handled(struct vcpu_svm *svm); ++static int nested_svm_intercept(struct vcpu_svm *svm); + static int nested_svm_vmexit(struct vcpu_svm *svm); + static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, + bool has_error_code, u32 error_code); +@@ -1359,6 +1360,8 @@ static int nested_svm_check_permissions( + static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, + bool has_error_code, u32 error_code) + { ++ int vmexit; ++ + if (!is_nested(svm)) + return 0; + +@@ -1367,7 +1370,11 @@ static int nested_svm_check_exception(st + svm->vmcb->control.exit_info_1 = error_code; + svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2; + +- return nested_svm_exit_handled(svm); ++ vmexit = nested_svm_intercept(svm); ++ if (vmexit == NESTED_EXIT_DONE) ++ svm->nested.exit_required = true; ++ ++ return vmexit; + } + + static inline int nested_svm_intr(struct vcpu_svm *svm) +@@ -1496,7 +1503,7 @@ static int nested_svm_exit_special(struc + /* + * If this function returns true, this #vmexit was already handled + */ +-static int nested_svm_exit_handled(struct vcpu_svm *svm) ++static int nested_svm_intercept(struct vcpu_svm *svm) + { + u32 exit_code = svm->vmcb->control.exit_code; + int vmexit = NESTED_EXIT_HOST; +@@ -1542,9 +1549,17 @@ static int nested_svm_exit_handled(struc + } + } + +- if (vmexit == NESTED_EXIT_DONE) { ++ return vmexit; ++} ++ ++static int nested_svm_exit_handled(struct vcpu_svm *svm) ++{ ++ int vmexit; ++ ++ vmexit = nested_svm_intercept(svm); ++ ++ if (vmexit == NESTED_EXIT_DONE) + nested_svm_vmexit(svm); +- } + + return vmexit; + } diff --git a/queue-2.6.33/0003-KVM-SVM-Sync-all-control-registers-on-nested-vmexit.patch b/queue-2.6.33/0003-KVM-SVM-Sync-all-control-registers-on-nested-vmexit.patch new file mode 100644 index 00000000000..3028ac7a115 --- /dev/null +++ b/queue-2.6.33/0003-KVM-SVM-Sync-all-control-registers-on-nested-vmexit.patch @@ -0,0 +1,39 @@ +From d137ecd6791eb7c4553b3bd06a5e58309639e9d9 Mon Sep 17 00:00:00 2001 +From: Joerg Roedel +Date: Fri, 19 Feb 2010 16:23:03 +0100 +Subject: KVM: SVM: Sync all control registers on nested vmexit + +From: Joerg Roedel + +Currently the vmexit emulation does not sync control +registers were the access is typically intercepted by the +nested hypervisor. But we can not count on that intercepts +to sync these registers too and make the code +architecturally more correct. + +Cc: stable@kernel.org +Signed-off-by: Joerg Roedel +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman + +(cherry picked from commit cdbbdc1210223879450555fee04c29ebf116576b) +--- + arch/x86/kvm/svm.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/arch/x86/kvm/svm.c ++++ b/arch/x86/kvm/svm.c +@@ -1622,9 +1622,13 @@ static int nested_svm_vmexit(struct vcpu + nested_vmcb->save.ds = vmcb->save.ds; + nested_vmcb->save.gdtr = vmcb->save.gdtr; + nested_vmcb->save.idtr = vmcb->save.idtr; ++ nested_vmcb->save.cr0 = svm->vcpu.arch.cr0; + if (npt_enabled) + nested_vmcb->save.cr3 = vmcb->save.cr3; ++ else ++ nested_vmcb->save.cr3 = svm->vcpu.arch.cr3; + nested_vmcb->save.cr2 = vmcb->save.cr2; ++ nested_vmcb->save.cr4 = svm->vcpu.arch.cr4; + nested_vmcb->save.rflags = vmcb->save.rflags; + nested_vmcb->save.rip = vmcb->save.rip; + nested_vmcb->save.rsp = vmcb->save.rsp; diff --git a/queue-2.6.33/0004-KVM-SVM-Fix-nested-msr-intercept-handling.patch b/queue-2.6.33/0004-KVM-SVM-Fix-nested-msr-intercept-handling.patch new file mode 100644 index 00000000000..3538fdc4ad9 --- /dev/null +++ b/queue-2.6.33/0004-KVM-SVM-Fix-nested-msr-intercept-handling.patch @@ -0,0 +1,57 @@ +From 536abd6ee4d98ef086686406124c83281d462c11 Mon Sep 17 00:00:00 2001 +From: Joerg Roedel +Date: Fri, 19 Feb 2010 16:23:05 +0100 +Subject: KVM: SVM: Fix nested msr intercept handling + +From: Joerg Roedel + +The nested_svm_exit_handled_msr() function maps only one +page of the guests msr permission bitmap. This patch changes +the code to use kvm_read_guest to fix the bug. + +Cc: stable@kernel.org +Signed-off-by: Joerg Roedel +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman +(cherry picked from commit 4c7da8cb43c09e71a405b5aeaa58a1dbac3c39e9) +--- + arch/x86/kvm/svm.c | 13 +++---------- + 1 file changed, 3 insertions(+), 10 deletions(-) + +--- a/arch/x86/kvm/svm.c ++++ b/arch/x86/kvm/svm.c +@@ -1434,19 +1434,13 @@ static bool nested_svm_exit_handled_msr( + { + u32 param = svm->vmcb->control.exit_info_1 & 1; + u32 msr = svm->vcpu.arch.regs[VCPU_REGS_RCX]; +- struct page *page; + bool ret = false; + u32 t0, t1; +- u8 *msrpm; ++ u8 val; + + if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT))) + return false; + +- msrpm = nested_svm_map(svm, svm->nested.vmcb_msrpm, &page); +- +- if (!msrpm) +- goto out; +- + switch (msr) { + case 0 ... 0x1fff: + t0 = (msr * 2) % 8; +@@ -1467,11 +1461,10 @@ static bool nested_svm_exit_handled_msr( + goto out; + } + +- ret = msrpm[t1] & ((1 << param) << t0); ++ if (!kvm_read_guest(svm->vcpu.kvm, svm->nested.vmcb_msrpm + t1, &val, 1)) ++ ret = val & ((1 << param) << t0); + + out: +- nested_svm_unmap(page); +- + return ret; + } + diff --git a/queue-2.6.33/0005-KVM-SVM-Don-t-sync-nested-cr8-to-lapic-and-back.patch b/queue-2.6.33/0005-KVM-SVM-Don-t-sync-nested-cr8-to-lapic-and-back.patch new file mode 100644 index 00000000000..74ec234f4cb --- /dev/null +++ b/queue-2.6.33/0005-KVM-SVM-Don-t-sync-nested-cr8-to-lapic-and-back.patch @@ -0,0 +1,109 @@ +From 53ea99c19f1754706cca7265172ed6fb091a8e03 Mon Sep 17 00:00:00 2001 +From: Joerg Roedel +Date: Fri, 19 Feb 2010 16:23:06 +0100 +Subject: KVM: SVM: Don't sync nested cr8 to lapic and back + +From: Joerg Roedel + +This patch makes syncing of the guest tpr to the lapic +conditional on !nested. Otherwise a nested guest using the +TPR could freeze the guest. +Another important change this patch introduces is that the +cr8 intercept bits are no longer ORed at vmrun emulation if +the guest sets VINTR_MASKING in its VMCB. The reason is that +nested cr8 accesses need alway be handled by the nested +hypervisor because they change the shadow version of the +tpr. + +Cc: stable@kernel.org +Signed-off-by: Joerg Roedel +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman +(cherry picked from commit 88ab24adc7142506c8583ac36a34fa388300b750) +--- + arch/x86/kvm/svm.c | 46 +++++++++++++++++++++++++++++++--------------- + 1 file changed, 31 insertions(+), 15 deletions(-) + +--- a/arch/x86/kvm/svm.c ++++ b/arch/x86/kvm/svm.c +@@ -1805,21 +1805,6 @@ static bool nested_svm_vmrun(struct vcpu + svm->vmcb->save.dr6 = nested_vmcb->save.dr6; + svm->vmcb->save.cpl = nested_vmcb->save.cpl; + +- /* We don't want a nested guest to be more powerful than the guest, +- so all intercepts are ORed */ +- svm->vmcb->control.intercept_cr_read |= +- nested_vmcb->control.intercept_cr_read; +- svm->vmcb->control.intercept_cr_write |= +- nested_vmcb->control.intercept_cr_write; +- svm->vmcb->control.intercept_dr_read |= +- nested_vmcb->control.intercept_dr_read; +- svm->vmcb->control.intercept_dr_write |= +- nested_vmcb->control.intercept_dr_write; +- svm->vmcb->control.intercept_exceptions |= +- nested_vmcb->control.intercept_exceptions; +- +- svm->vmcb->control.intercept |= nested_vmcb->control.intercept; +- + svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa; + + /* cache intercepts */ +@@ -1837,6 +1822,28 @@ static bool nested_svm_vmrun(struct vcpu + else + svm->vcpu.arch.hflags &= ~HF_VINTR_MASK; + ++ if (svm->vcpu.arch.hflags & HF_VINTR_MASK) { ++ /* We only want the cr8 intercept bits of the guest */ ++ svm->vmcb->control.intercept_cr_read &= ~INTERCEPT_CR8_MASK; ++ svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK; ++ } ++ ++ /* We don't want a nested guest to be more powerful than the guest, ++ so all intercepts are ORed */ ++ svm->vmcb->control.intercept_cr_read |= ++ nested_vmcb->control.intercept_cr_read; ++ svm->vmcb->control.intercept_cr_write |= ++ nested_vmcb->control.intercept_cr_write; ++ svm->vmcb->control.intercept_dr_read |= ++ nested_vmcb->control.intercept_dr_read; ++ svm->vmcb->control.intercept_dr_write |= ++ nested_vmcb->control.intercept_dr_write; ++ svm->vmcb->control.intercept_exceptions |= ++ nested_vmcb->control.intercept_exceptions; ++ ++ svm->vmcb->control.intercept |= nested_vmcb->control.intercept; ++ ++ svm->vmcb->control.lbr_ctl = nested_vmcb->control.lbr_ctl; + svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; + svm->vmcb->control.int_state = nested_vmcb->control.int_state; + svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset; +@@ -2500,6 +2507,9 @@ static void update_cr8_intercept(struct + { + struct vcpu_svm *svm = to_svm(vcpu); + ++ if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK)) ++ return; ++ + if (irr == -1) + return; + +@@ -2603,6 +2613,9 @@ static inline void sync_cr8_to_lapic(str + { + struct vcpu_svm *svm = to_svm(vcpu); + ++ if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK)) ++ return; ++ + if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR8_MASK)) { + int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK; + kvm_set_cr8(vcpu, cr8); +@@ -2614,6 +2627,9 @@ static inline void sync_lapic_to_cr8(str + struct vcpu_svm *svm = to_svm(vcpu); + u64 cr8; + ++ if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK)) ++ return; ++ + cr8 = kvm_get_cr8(vcpu); + svm->vmcb->control.int_ctl &= ~V_TPR_MASK; + svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK; diff --git a/queue-2.6.33/0006-KVM-SVM-Fix-wrong-interrupt-injection-in-enable_irq_.patch b/queue-2.6.33/0006-KVM-SVM-Fix-wrong-interrupt-injection-in-enable_irq_.patch new file mode 100644 index 00000000000..c30c63c4278 --- /dev/null +++ b/queue-2.6.33/0006-KVM-SVM-Fix-wrong-interrupt-injection-in-enable_irq_.patch @@ -0,0 +1,73 @@ +From 5721224671983e9fb964e668712da5ee2f508fda Mon Sep 17 00:00:00 2001 +From: Joerg Roedel +Date: Fri, 19 Feb 2010 16:23:01 +0100 +Subject: KVM: SVM: Fix wrong interrupt injection in enable_irq_windows + +From: Joerg Roedel + +The nested_svm_intr() function does not execute the vmexit +anymore. Therefore we may still be in the nested state after +that function ran. This patch changes the nested_svm_intr() +function to return wether the irq window could be enabled. + +Cc: stable@kernel.org +Signed-off-by: Joerg Roedel +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman +(cherry picked from commit 8fe546547cf6857a9d984bfe2f2194910f3fc5d0) +--- + arch/x86/kvm/svm.c | 17 ++++++++--------- + 1 file changed, 8 insertions(+), 9 deletions(-) + +--- a/arch/x86/kvm/svm.c ++++ b/arch/x86/kvm/svm.c +@@ -1377,16 +1377,17 @@ static int nested_svm_check_exception(st + return vmexit; + } + +-static inline int nested_svm_intr(struct vcpu_svm *svm) ++/* This function returns true if it is save to enable the irq window */ ++static inline bool nested_svm_intr(struct vcpu_svm *svm) + { + if (!is_nested(svm)) +- return 0; ++ return true; + + if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK)) +- return 0; ++ return true; + + if (!(svm->vcpu.arch.hflags & HF_HIF_MASK)) +- return 0; ++ return false; + + svm->vmcb->control.exit_code = SVM_EXIT_INTR; + +@@ -1399,10 +1400,10 @@ static inline int nested_svm_intr(struct + */ + svm->nested.exit_required = true; + trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip); +- return 1; ++ return false; + } + +- return 0; ++ return true; + } + + static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, struct page **_page) +@@ -2567,13 +2568,11 @@ static void enable_irq_window(struct kvm + { + struct vcpu_svm *svm = to_svm(vcpu); + +- nested_svm_intr(svm); +- + /* In case GIF=0 we can't rely on the CPU to tell us when + * GIF becomes 1, because that's a separate STGI/VMRUN intercept. + * The next time we get that intercept, this function will be + * called again though and we'll get the vintr intercept. */ +- if (gif_set(svm)) { ++ if (gif_set(svm) && nested_svm_intr(svm)) { + svm_set_vintr(svm); + svm_inject_irq(svm, 0x0); + } diff --git a/queue-2.6.33/0007-KVM-s390-Fix-possible-memory-leak-of-in-kvm_arch_vcp.patch b/queue-2.6.33/0007-KVM-s390-Fix-possible-memory-leak-of-in-kvm_arch_vcp.patch new file mode 100644 index 00000000000..35ff5e8aa61 --- /dev/null +++ b/queue-2.6.33/0007-KVM-s390-Fix-possible-memory-leak-of-in-kvm_arch_vcp.patch @@ -0,0 +1,37 @@ +From cd87b5b7c290bea9e5d5473abe05fe7b145d0e33 Mon Sep 17 00:00:00 2001 +From: Wei Yongjun +Date: Tue, 9 Mar 2010 14:37:53 +0800 +Subject: KVM: s390: Fix possible memory leak of in kvm_arch_vcpu_create() + +From: Wei Yongjun + +This patch fixed possible memory leak in kvm_arch_vcpu_create() +under s390, which would happen when kvm_arch_vcpu_create() fails. + +Signed-off-by: Wei Yongjun +Acked-by: Carsten Otte +Cc: stable@kernel.org +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman +(cherry picked from commit 7b06bf2ffa15e119c7439ed0b024d44f66d7b605) +--- + arch/s390/kvm/kvm-s390.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/arch/s390/kvm/kvm-s390.c ++++ b/arch/s390/kvm/kvm-s390.c +@@ -339,11 +339,13 @@ struct kvm_vcpu *kvm_arch_vcpu_create(st + + rc = kvm_vcpu_init(vcpu, kvm, id); + if (rc) +- goto out_free_cpu; ++ goto out_free_sie_block; + VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu, + vcpu->arch.sie_block); + + return vcpu; ++out_free_sie_block: ++ free_page((unsigned long)(vcpu->arch.sie_block)); + out_free_cpu: + kfree(vcpu); + out_nomem: diff --git a/queue-2.6.33/0008-KVM-PPC-Do-not-create-debugfs-if-fail-to-create-vcpu.patch b/queue-2.6.33/0008-KVM-PPC-Do-not-create-debugfs-if-fail-to-create-vcpu.patch new file mode 100644 index 00000000000..1114b1caed9 --- /dev/null +++ b/queue-2.6.33/0008-KVM-PPC-Do-not-create-debugfs-if-fail-to-create-vcpu.patch @@ -0,0 +1,32 @@ +From 07d4434372555d3ed2d333692b8919cc9cabf4d7 Mon Sep 17 00:00:00 2001 +From: Wei Yongjun +Date: Tue, 9 Mar 2010 14:13:43 +0800 +Subject: KVM: PPC: Do not create debugfs if fail to create vcpu + +From: Wei Yongjun + +If fail to create the vcpu, we should not create the debugfs +for it. + +Signed-off-by: Wei Yongjun +Acked-by: Alexander Graf +Cc: stable@kernel.org +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman +(cherry picked from commit 06056bfb944a0302a8f22eb45f09123de7fb417b) +--- + arch/powerpc/kvm/powerpc.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/arch/powerpc/kvm/powerpc.c ++++ b/arch/powerpc/kvm/powerpc.c +@@ -181,7 +181,8 @@ struct kvm_vcpu *kvm_arch_vcpu_create(st + { + struct kvm_vcpu *vcpu; + vcpu = kvmppc_core_vcpu_create(kvm, id); +- kvmppc_create_vcpu_debugfs(vcpu, id); ++ if (!IS_ERR(vcpu)) ++ kvmppc_create_vcpu_debugfs(vcpu, id); + return vcpu; + } + diff --git a/queue-2.6.33/0009-KVM-x86-Add-callback-to-let-modules-decide-over-some.patch b/queue-2.6.33/0009-KVM-x86-Add-callback-to-let-modules-decide-over-some.patch new file mode 100644 index 00000000000..fa085c207ac --- /dev/null +++ b/queue-2.6.33/0009-KVM-x86-Add-callback-to-let-modules-decide-over-some.patch @@ -0,0 +1,90 @@ +From a2365272251916c9c2e646ee8f63f589981e7b42 Mon Sep 17 00:00:00 2001 +From: Joerg Roedel +Date: Thu, 22 Apr 2010 12:33:11 +0200 +Subject: KVM: x86: Add callback to let modules decide over some supported cpuid bits + +From: Joerg Roedel + +This patch adds the get_supported_cpuid callback to +kvm_x86_ops. It will be used in do_cpuid_ent to delegate the +decission about some supported cpuid bits to the +architecture modules. + +Cc: stable@kernel.org +Signed-off-by: Joerg Roedel +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman + +(cherry picked from commit d4330ef2fb2236a1e3a176f0f68360f4c0a8661b) +--- + arch/x86/include/asm/kvm_host.h | 2 ++ + arch/x86/kvm/svm.c | 5 +++++ + arch/x86/kvm/vmx.c | 5 +++++ + arch/x86/kvm/x86.c | 3 +++ + 4 files changed, 15 insertions(+) + +--- a/arch/x86/include/asm/kvm_host.h ++++ b/arch/x86/include/asm/kvm_host.h +@@ -533,6 +533,8 @@ struct kvm_x86_ops { + u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); + bool (*gb_page_enable)(void); + ++ void (*set_supported_cpuid)(u32 func, struct kvm_cpuid_entry2 *entry); ++ + const struct trace_print_flags *exit_reasons_str; + }; + +--- a/arch/x86/kvm/svm.c ++++ b/arch/x86/kvm/svm.c +@@ -2885,6 +2885,10 @@ static u64 svm_get_mt_mask(struct kvm_vc + return 0; + } + ++static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) ++{ ++} ++ + static const struct trace_print_flags svm_exit_reasons_str[] = { + { SVM_EXIT_READ_CR0, "read_cr0" }, + { SVM_EXIT_READ_CR3, "read_cr3" }, +@@ -3009,6 +3013,7 @@ static struct kvm_x86_ops svm_x86_ops = + + .exit_reasons_str = svm_exit_reasons_str, + .gb_page_enable = svm_gb_page_enable, ++ .set_supported_cpuid = svm_set_supported_cpuid, + }; + + static int __init svm_init(void) +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -3993,6 +3993,10 @@ static bool vmx_gb_page_enable(void) + return false; + } + ++static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) ++{ ++} ++ + static struct kvm_x86_ops vmx_x86_ops = { + .cpu_has_kvm_support = cpu_has_kvm_support, + .disabled_by_bios = vmx_disabled_by_bios, +@@ -4057,6 +4061,7 @@ static struct kvm_x86_ops vmx_x86_ops = + + .exit_reasons_str = vmx_exit_reasons_str, + .gb_page_enable = vmx_gb_page_enable, ++ .set_supported_cpuid = vmx_set_supported_cpuid, + }; + + static int __init vmx_init(void) +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -1688,6 +1688,9 @@ static void do_cpuid_ent(struct kvm_cpui + entry->ecx &= kvm_supported_word6_x86_features; + break; + } ++ ++ kvm_x86_ops->set_supported_cpuid(function, entry); ++ + put_cpu(); + } + diff --git a/queue-2.6.33/0010-KVM-SVM-Report-emulated-SVM-features-to-userspace.patch b/queue-2.6.33/0010-KVM-SVM-Report-emulated-SVM-features-to-userspace.patch new file mode 100644 index 00000000000..f6971bda124 --- /dev/null +++ b/queue-2.6.33/0010-KVM-SVM-Report-emulated-SVM-features-to-userspace.patch @@ -0,0 +1,40 @@ +From 09e6feff68dc71b22881d30e1ff44f04c474f399 Mon Sep 17 00:00:00 2001 +From: Joerg Roedel +Date: Thu, 22 Apr 2010 12:33:12 +0200 +Subject: KVM: SVM: Report emulated SVM features to userspace + +From: Joerg Roedel + +This patch implements the reporting of the emulated SVM +features to userspace instead of the real hardware +capabilities. Every real hardware capability needs emulation +in nested svm so the old behavior was broken. + +Cc: stable@kernel.org +Signed-off-by: Joerg Roedel +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman +(cherry picked from commit c2c63a493924e09a1984d1374a0e60dfd54fc0b0) +--- + arch/x86/kvm/svm.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +--- a/arch/x86/kvm/svm.c ++++ b/arch/x86/kvm/svm.c +@@ -2887,6 +2887,16 @@ static u64 svm_get_mt_mask(struct kvm_vc + + static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) + { ++ switch (func) { ++ case 0x8000000A: ++ entry->eax = 1; /* SVM revision 1 */ ++ entry->ebx = 8; /* Lets support 8 ASIDs in case we add proper ++ ASID emulation to nested SVM */ ++ entry->ecx = 0; /* Reserved */ ++ entry->edx = 0; /* Do not support any additional features */ ++ ++ break; ++ } + } + + static const struct trace_print_flags svm_exit_reasons_str[] = { diff --git a/queue-2.6.33/0011-x86-paravirt-Add-a-global-synchronization-point-for-.patch b/queue-2.6.33/0011-x86-paravirt-Add-a-global-synchronization-point-for-.patch new file mode 100644 index 00000000000..02b91e2f4f8 --- /dev/null +++ b/queue-2.6.33/0011-x86-paravirt-Add-a-global-synchronization-point-for-.patch @@ -0,0 +1,102 @@ +From 6b0692eb1808e5adffe1e7478475554fad1ea38a Mon Sep 17 00:00:00 2001 +From: Glauber Costa +Date: Tue, 11 May 2010 12:17:40 -0400 +Subject: x86, paravirt: Add a global synchronization point for pvclock + +From: Glauber Costa + +In recent stress tests, it was found that pvclock-based systems +could seriously warp in smp systems. Using ingo's time-warp-test.c, +I could trigger a scenario as bad as 1.5mi warps a minute in some systems. +(to be fair, it wasn't that bad in most of them). Investigating further, I +found out that such warps were caused by the very offset-based calculation +pvclock is based on. + +This happens even on some machines that report constant_tsc in its tsc flags, +specially on multi-socket ones. + +Two reads of the same kernel timestamp at approx the same time, will likely +have tsc timestamped in different occasions too. This means the delta we +calculate is unpredictable at best, and can probably be smaller in a cpu +that is legitimately reading clock in a forward ocasion. + +Some adjustments on the host could make this window less likely to happen, +but still, it pretty much poses as an intrinsic problem of the mechanism. + +A while ago, I though about using a shared variable anyway, to hold clock +last state, but gave up due to the high contention locking was likely +to introduce, possibly rendering the thing useless on big machines. I argue, +however, that locking is not necessary. + +We do a read-and-return sequence in pvclock, and between read and return, +the global value can have changed. However, it can only have changed +by means of an addition of a positive value. So if we detected that our +clock timestamp is less than the current global, we know that we need to +return a higher one, even though it is not exactly the one we compared to. + +OTOH, if we detect we're greater than the current time source, we atomically +replace the value with our new readings. This do causes contention on big +boxes (but big here means *BIG*), but it seems like a good trade off, since +it provide us with a time source guaranteed to be stable wrt time warps. + +After this patch is applied, I don't see a single warp in time during 5 days +of execution, in any of the machines I saw them before. + +Signed-off-by: Glauber Costa +Acked-by: Zachary Amsden +CC: Jeremy Fitzhardinge +CC: Avi Kivity +CC: Marcelo Tosatti +CC: Zachary Amsden +Signed-off-by: Marcelo Tosatti +Signed-off-by: Greg Kroah-Hartman +(cherry picked from commit 489fb490dbf8dab0249ad82b56688ae3842a79e8) +--- + arch/x86/kernel/pvclock.c | 24 ++++++++++++++++++++++++ + 1 file changed, 24 insertions(+) + +--- a/arch/x86/kernel/pvclock.c ++++ b/arch/x86/kernel/pvclock.c +@@ -109,11 +109,14 @@ unsigned long pvclock_tsc_khz(struct pvc + return pv_tsc_khz; + } + ++static atomic64_t last_value = ATOMIC64_INIT(0); ++ + cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) + { + struct pvclock_shadow_time shadow; + unsigned version; + cycle_t ret, offset; ++ u64 last; + + do { + version = pvclock_get_time_values(&shadow, src); +@@ -123,6 +126,27 @@ cycle_t pvclock_clocksource_read(struct + barrier(); + } while (version != src->version); + ++ /* ++ * Assumption here is that last_value, a global accumulator, always goes ++ * forward. If we are less than that, we should not be much smaller. ++ * We assume there is an error marging we're inside, and then the correction ++ * does not sacrifice accuracy. ++ * ++ * For reads: global may have changed between test and return, ++ * but this means someone else updated poked the clock at a later time. ++ * We just need to make sure we are not seeing a backwards event. ++ * ++ * For updates: last_value = ret is not enough, since two vcpus could be ++ * updating at the same time, and one of them could be slightly behind, ++ * making the assumption that last_value always go forward fail to hold. ++ */ ++ last = atomic64_read(&last_value); ++ do { ++ if (ret < last) ++ return last; ++ last = atomic64_cmpxchg(&last_value, last, ret); ++ } while (unlikely(last != ret)); ++ + return ret; + } + diff --git a/queue-2.6.33/0012-KVM-Don-t-allow-lmsw-to-clear-cr0.pe.patch b/queue-2.6.33/0012-KVM-Don-t-allow-lmsw-to-clear-cr0.pe.patch new file mode 100644 index 00000000000..5bb85fb7fe8 --- /dev/null +++ b/queue-2.6.33/0012-KVM-Don-t-allow-lmsw-to-clear-cr0.pe.patch @@ -0,0 +1,31 @@ +From d00d043d62d0201da2935e542ae7fe41d245be3b Mon Sep 17 00:00:00 2001 +From: Avi Kivity +Date: Wed, 12 May 2010 00:28:44 +0300 +Subject: KVM: Don't allow lmsw to clear cr0.pe + +From: Avi Kivity + +The current lmsw implementation allows the guest to clear cr0.pe, contrary +to the manual, which breaks EMM386.EXE. + +Fix by ORing the old cr0.pe with lmsw's operand. + +Signed-off-by: Avi Kivity +Signed-off-by: Marcelo Tosatti +Signed-off-by: Greg Kroah-Hartman +(cherry picked from commit f78e917688edbf1f14c318d2e50dc8e7dad20445) +--- + arch/x86/kvm/x86.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -432,7 +432,7 @@ EXPORT_SYMBOL_GPL(kvm_set_cr0); + + void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw) + { +- kvm_set_cr0(vcpu, (vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f)); ++ kvm_set_cr0(vcpu, (vcpu->arch.cr0 & ~0x0eul) | (msw & 0x0f)); + } + EXPORT_SYMBOL_GPL(kvm_lmsw); + diff --git a/queue-2.6.33/0013-KVM-x86-Check-LMA-bit-before-set_efer.patch b/queue-2.6.33/0013-KVM-x86-Check-LMA-bit-before-set_efer.patch new file mode 100644 index 00000000000..5fd5900ce47 --- /dev/null +++ b/queue-2.6.33/0013-KVM-x86-Check-LMA-bit-before-set_efer.patch @@ -0,0 +1,34 @@ +From 2effde8fa003ee7b472505bddfc24c8d62344ace Mon Sep 17 00:00:00 2001 +From: Sheng Yang +Date: Wed, 12 May 2010 16:40:40 +0800 +Subject: KVM: x86: Check LMA bit before set_efer + +From: Sheng Yang + +kvm_x86_ops->set_efer() would execute vcpu->arch.efer = efer, so the +checking of LMA bit didn't work. + +Signed-off-by: Sheng Yang +Signed-off-by: Marcelo Tosatti +Signed-off-by: Greg Kroah-Hartman +(cherry picked from commit a3d204e28579427609c3d15d2310127ebaa47d94) +--- + arch/x86/kvm/x86.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -603,11 +603,11 @@ static void set_efer(struct kvm_vcpu *vc + } + } + +- kvm_x86_ops->set_efer(vcpu, efer); +- + efer &= ~EFER_LMA; + efer |= vcpu->arch.shadow_efer & EFER_LMA; + ++ kvm_x86_ops->set_efer(vcpu, efer); ++ + vcpu->arch.shadow_efer = efer; + + vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled; diff --git a/queue-2.6.33/0014-KVM-MMU-Segregate-shadow-pages-with-different-cr0.wp.patch b/queue-2.6.33/0014-KVM-MMU-Segregate-shadow-pages-with-different-cr0.wp.patch new file mode 100644 index 00000000000..f9093deb1cb --- /dev/null +++ b/queue-2.6.33/0014-KVM-MMU-Segregate-shadow-pages-with-different-cr0.wp.patch @@ -0,0 +1,52 @@ +From dbad9722e24d96866696ca728032b3a09b8eb78e Mon Sep 17 00:00:00 2001 +From: Avi Kivity +Date: Wed, 12 May 2010 11:48:18 +0300 +Subject: KVM: MMU: Segregate shadow pages with different cr0.wp + +From: Avi Kivity + +When cr0.wp=0, we may shadow a gpte having u/s=1 and r/w=0 with an spte +having u/s=0 and r/w=1. This allows excessive access if the guest sets +cr0.wp=1 and accesses through this spte. + +Fix by making cr0.wp part of the base role; we'll have different sptes for +the two cases and the problem disappears. + +Signed-off-by: Avi Kivity +Signed-off-by: Marcelo Tosatti +Signed-off-by: Greg Kroah-Hartman +(cherry picked from commit 3dbe141595faa48a067add3e47bba3205b79d33c) +--- + arch/x86/include/asm/kvm_host.h | 1 + + arch/x86/kvm/mmu.c | 3 ++- + 2 files changed, 3 insertions(+), 1 deletion(-) + +--- a/arch/x86/include/asm/kvm_host.h ++++ b/arch/x86/include/asm/kvm_host.h +@@ -193,6 +193,7 @@ union kvm_mmu_page_role { + unsigned invalid:1; + unsigned cr4_pge:1; + unsigned nxe:1; ++ unsigned cr0_wp:1; + }; + }; + +--- a/arch/x86/kvm/mmu.c ++++ b/arch/x86/kvm/mmu.c +@@ -227,7 +227,7 @@ void kvm_mmu_set_mask_ptes(u64 user_mask + } + EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); + +-static int is_write_protection(struct kvm_vcpu *vcpu) ++static bool is_write_protection(struct kvm_vcpu *vcpu) + { + return vcpu->arch.cr0 & X86_CR0_WP; + } +@@ -2448,6 +2448,7 @@ static int init_kvm_softmmu(struct kvm_v + r = paging32_init_context(vcpu); + + vcpu->arch.mmu.base_role.glevels = vcpu->arch.mmu.root_level; ++ vcpu->arch.mmu.base_role.cr0_wp = is_write_protection(vcpu); + + return r; + } diff --git a/queue-2.6.33/0015-KVM-VMX-enable-VMXON-check-with-SMX-enabled-Intel-TX.patch b/queue-2.6.33/0015-KVM-VMX-enable-VMXON-check-with-SMX-enabled-Intel-TX.patch new file mode 100644 index 00000000000..95fd28092af --- /dev/null +++ b/queue-2.6.33/0015-KVM-VMX-enable-VMXON-check-with-SMX-enabled-Intel-TX.patch @@ -0,0 +1,123 @@ +From fd52c9ad75e418e2a38aa0e662e88cd8b95b74be Mon Sep 17 00:00:00 2001 +From: Shane Wang +Date: Thu, 29 Apr 2010 12:09:01 -0400 +Subject: KVM: VMX: enable VMXON check with SMX enabled (Intel TXT) + +From: Shane Wang + +Per document, for feature control MSR: + + Bit 1 enables VMXON in SMX operation. If the bit is clear, execution + of VMXON in SMX operation causes a general-protection exception. + Bit 2 enables VMXON outside SMX operation. If the bit is clear, execution + of VMXON outside SMX operation causes a general-protection exception. + +This patch is to enable this kind of check with SMX for VMXON in KVM. + +Signed-off-by: Shane Wang +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman +(cherry picked from commit cafd66595d92591e4bd25c3904e004fc6f897e2d) +--- + arch/x86/include/asm/msr-index.h | 5 +++-- + arch/x86/kernel/tboot.c | 1 + + arch/x86/kvm/vmx.c | 32 +++++++++++++++++++++----------- + include/linux/tboot.h | 1 + + 4 files changed, 26 insertions(+), 13 deletions(-) + +--- a/arch/x86/include/asm/msr-index.h ++++ b/arch/x86/include/asm/msr-index.h +@@ -199,8 +199,9 @@ + #define MSR_IA32_EBL_CR_POWERON 0x0000002a + #define MSR_IA32_FEATURE_CONTROL 0x0000003a + +-#define FEATURE_CONTROL_LOCKED (1<<0) +-#define FEATURE_CONTROL_VMXON_ENABLED (1<<2) ++#define FEATURE_CONTROL_LOCKED (1<<0) ++#define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1) ++#define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX (1<<2) + + #define MSR_IA32_APICBASE 0x0000001b + #define MSR_IA32_APICBASE_BSP (1<<8) +--- a/arch/x86/kernel/tboot.c ++++ b/arch/x86/kernel/tboot.c +@@ -46,6 +46,7 @@ + + /* Global pointer to shared data; NULL means no measured launch. */ + struct tboot *tboot __read_mostly; ++EXPORT_SYMBOL(tboot); + + /* timeout for APs (in secs) to enter wait-for-SIPI state during shutdown */ + #define AP_WAIT_TIMEOUT 1 +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -26,6 +26,7 @@ + #include + #include + #include ++#include + #include "kvm_cache_regs.h" + #include "x86.h" + +@@ -1125,9 +1126,16 @@ static __init int vmx_disabled_by_bios(v + u64 msr; + + rdmsrl(MSR_IA32_FEATURE_CONTROL, msr); +- return (msr & (FEATURE_CONTROL_LOCKED | +- FEATURE_CONTROL_VMXON_ENABLED)) +- == FEATURE_CONTROL_LOCKED; ++ if (msr & FEATURE_CONTROL_LOCKED) { ++ if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX) ++ && tboot_enabled()) ++ return 1; ++ if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX) ++ && !tboot_enabled()) ++ return 1; ++ } ++ ++ return 0; + /* locked but not enabled */ + } + +@@ -1135,21 +1143,23 @@ static int hardware_enable(void *garbage + { + int cpu = raw_smp_processor_id(); + u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); +- u64 old; ++ u64 old, test_bits; + + if (read_cr4() & X86_CR4_VMXE) + return -EBUSY; + + INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu)); + rdmsrl(MSR_IA32_FEATURE_CONTROL, old); +- if ((old & (FEATURE_CONTROL_LOCKED | +- FEATURE_CONTROL_VMXON_ENABLED)) +- != (FEATURE_CONTROL_LOCKED | +- FEATURE_CONTROL_VMXON_ENABLED)) ++ ++ test_bits = FEATURE_CONTROL_LOCKED; ++ test_bits |= FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; ++ if (tboot_enabled()) ++ test_bits |= FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX; ++ ++ if ((old & test_bits) != test_bits) { + /* enable and lock */ +- wrmsrl(MSR_IA32_FEATURE_CONTROL, old | +- FEATURE_CONTROL_LOCKED | +- FEATURE_CONTROL_VMXON_ENABLED); ++ wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits); ++ } + write_cr4(read_cr4() | X86_CR4_VMXE); /* FIXME: not cpu hotplug safe */ + asm volatile (ASM_VMX_VMXON_RAX + : : "a"(&phys_addr), "m"(phys_addr) +--- a/include/linux/tboot.h ++++ b/include/linux/tboot.h +@@ -150,6 +150,7 @@ extern int tboot_force_iommu(void); + + #else + ++#define tboot_enabled() 0 + #define tboot_probe() do { } while (0) + #define tboot_shutdown(shutdown_type) do { } while (0) + #define tboot_sleep(sleep_state, pm1a_control, pm1b_control) \ diff --git a/queue-2.6.33/0016-KVM-MMU-Don-t-read-pdptrs-with-mmu-spinlock-held-in-.patch b/queue-2.6.33/0016-KVM-MMU-Don-t-read-pdptrs-with-mmu-spinlock-held-in-.patch new file mode 100644 index 00000000000..c9f26f0f695 --- /dev/null +++ b/queue-2.6.33/0016-KVM-MMU-Don-t-read-pdptrs-with-mmu-spinlock-held-in-.patch @@ -0,0 +1,63 @@ +From b7e2778692b503bc7a8fc362b1147d8df542f9c6 Mon Sep 17 00:00:00 2001 +From: Avi Kivity +Date: Tue, 4 May 2010 12:58:32 +0300 +Subject: KVM: MMU: Don't read pdptrs with mmu spinlock held in mmu_alloc_roots + +From: Avi Kivity + +On svm, kvm_read_pdptr() may require reading guest memory, which can sleep. + +Push the spinlock into mmu_alloc_roots(), and only take it after we've read +the pdptr. + +Tested-by: Joerg Roedel +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman +(cherry picked from commit 8facbbff071ff2b19268d3732e31badc60471e21) +--- + arch/x86/kvm/mmu.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +--- a/arch/x86/kvm/mmu.c ++++ b/arch/x86/kvm/mmu.c +@@ -2097,11 +2097,14 @@ static int mmu_alloc_roots(struct kvm_vc + direct = 1; + if (mmu_check_root(vcpu, root_gfn)) + return 1; ++ ++ spin_lock(&vcpu->kvm->mmu_lock); + sp = kvm_mmu_get_page(vcpu, root_gfn, 0, + PT64_ROOT_LEVEL, direct, + ACC_ALL, NULL); + root = __pa(sp->spt); + ++sp->root_count; ++ spin_unlock(&vcpu->kvm->mmu_lock); + vcpu->arch.mmu.root_hpa = root; + return 0; + } +@@ -2123,11 +2126,15 @@ static int mmu_alloc_roots(struct kvm_vc + root_gfn = 0; + if (mmu_check_root(vcpu, root_gfn)) + return 1; ++ ++ spin_lock(&vcpu->kvm->mmu_lock); + sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, + PT32_ROOT_LEVEL, direct, + ACC_ALL, NULL); + root = __pa(sp->spt); + ++sp->root_count; ++ spin_unlock(&vcpu->kvm->mmu_lock); ++ + vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK; + } + vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root); +@@ -2488,7 +2495,9 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu) + goto out; + spin_lock(&vcpu->kvm->mmu_lock); + kvm_mmu_free_some_pages(vcpu); ++ spin_unlock(&vcpu->kvm->mmu_lock); + r = mmu_alloc_roots(vcpu); ++ spin_lock(&vcpu->kvm->mmu_lock); + mmu_sync_roots(vcpu); + spin_unlock(&vcpu->kvm->mmu_lock); + if (r) diff --git a/queue-2.6.33/0017-KVM-Fix-wallclock-version-writing-race.patch b/queue-2.6.33/0017-KVM-Fix-wallclock-version-writing-race.patch new file mode 100644 index 00000000000..f14fbcca956 --- /dev/null +++ b/queue-2.6.33/0017-KVM-Fix-wallclock-version-writing-race.patch @@ -0,0 +1,46 @@ +From a2cfe4423331f25aaf816cce92ce6e2544f74966 Mon Sep 17 00:00:00 2001 +From: Avi Kivity +Date: Tue, 4 May 2010 15:00:37 +0300 +Subject: KVM: Fix wallclock version writing race + +From: Avi Kivity + +Wallclock writing uses an unprotected global variable to hold the version; +this can cause one guest to interfere with another if both write their +wallclock at the same time. + +Acked-by: Glauber Costa +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman +(cherry picked from commit 9ed3c444ab8987c7b219173a2f7807e3f71e234e) +--- + arch/x86/kvm/x86.c | 12 ++++++++++-- + 1 file changed, 10 insertions(+), 2 deletions(-) + +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -641,14 +641,22 @@ static int do_set_msr(struct kvm_vcpu *v + + static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) + { +- static int version; ++ int version; ++ int r; + struct pvclock_wall_clock wc; + struct timespec boot; + + if (!wall_clock) + return; + +- version++; ++ r = kvm_read_guest(kvm, wall_clock, &version, sizeof(version)); ++ if (r) ++ return; ++ ++ if (version & 1) ++ ++version; /* first time write, random junk */ ++ ++ ++version; + + kvm_write_guest(kvm, wall_clock, &version, sizeof(version)); + diff --git a/queue-2.6.33/0018-KVM-PPC-Add-missing-vcpu_load-vcpu_put-in-vcpu-ioctl.patch b/queue-2.6.33/0018-KVM-PPC-Add-missing-vcpu_load-vcpu_put-in-vcpu-ioctl.patch new file mode 100644 index 00000000000..1116efe9f9c --- /dev/null +++ b/queue-2.6.33/0018-KVM-PPC-Add-missing-vcpu_load-vcpu_put-in-vcpu-ioctl.patch @@ -0,0 +1,107 @@ +From c0fc62a92b17c61146f20141a2ddc6c0bcdc548b Mon Sep 17 00:00:00 2001 +From: Avi Kivity +Date: Thu, 13 May 2010 11:05:49 +0300 +Subject: KVM: PPC: Add missing vcpu_load()/vcpu_put() in vcpu ioctls + +From: Avi Kivity + +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman +(cherry picked from commit 98001d8d017cea1ee0f9f35c6227bbd63ef5005b) +--- + arch/powerpc/kvm/book3s.c | 10 ++++++++++ + arch/powerpc/kvm/booke.c | 15 ++++++++++++++- + 2 files changed, 24 insertions(+), 1 deletion(-) + +--- a/arch/powerpc/kvm/book3s.c ++++ b/arch/powerpc/kvm/book3s.c +@@ -766,6 +766,8 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct + struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); + int i; + ++ vcpu_load(vcpu); ++ + sregs->pvr = vcpu->arch.pvr; + + sregs->u.s.sdr1 = to_book3s(vcpu)->sdr1; +@@ -784,6 +786,9 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct + sregs->u.s.ppc32.dbat[i] = vcpu3s->dbat[i].raw; + } + } ++ ++ vcpu_put(vcpu); ++ + return 0; + } + +@@ -793,6 +798,8 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct + struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); + int i; + ++ vcpu_load(vcpu); ++ + kvmppc_set_pvr(vcpu, sregs->pvr); + + vcpu3s->sdr1 = sregs->u.s.sdr1; +@@ -819,6 +826,9 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct + + /* Flush the MMU after messing with the segments */ + kvmppc_mmu_pte_flush(vcpu, 0, 0); ++ ++ vcpu_put(vcpu); ++ + return 0; + } + +--- a/arch/powerpc/kvm/booke.c ++++ b/arch/powerpc/kvm/booke.c +@@ -443,6 +443,8 @@ int kvm_arch_vcpu_ioctl_get_regs(struct + { + int i; + ++ vcpu_load(vcpu); ++ + regs->pc = vcpu->arch.pc; + regs->cr = vcpu->arch.cr; + regs->ctr = vcpu->arch.ctr; +@@ -463,6 +465,8 @@ int kvm_arch_vcpu_ioctl_get_regs(struct + for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) + regs->gpr[i] = vcpu->arch.gpr[i]; + ++ vcpu_put(vcpu); ++ + return 0; + } + +@@ -470,6 +474,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct + { + int i; + ++ vcpu_load(vcpu); ++ + vcpu->arch.pc = regs->pc; + vcpu->arch.cr = regs->cr; + vcpu->arch.ctr = regs->ctr; +@@ -489,6 +495,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct + for (i = 0; i < ARRAY_SIZE(vcpu->arch.gpr); i++) + vcpu->arch.gpr[i] = regs->gpr[i]; + ++ vcpu_put(vcpu); ++ + return 0; + } + +@@ -517,7 +525,12 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct k + int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, + struct kvm_translation *tr) + { +- return kvmppc_core_vcpu_translate(vcpu, tr); ++ int r; ++ ++ vcpu_load(vcpu); ++ r = kvmppc_core_vcpu_translate(vcpu, tr); ++ vcpu_put(vcpu); ++ return r; + } + + int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) diff --git a/queue-2.6.33/0019-KVM-x86-Add-missing-locking-to-arch-specific-vcpu-io.patch b/queue-2.6.33/0019-KVM-x86-Add-missing-locking-to-arch-specific-vcpu-io.patch new file mode 100644 index 00000000000..2c44f63b938 --- /dev/null +++ b/queue-2.6.33/0019-KVM-x86-Add-missing-locking-to-arch-specific-vcpu-io.patch @@ -0,0 +1,58 @@ +From 769481950f87db77b640daec6241727570c63622 Mon Sep 17 00:00:00 2001 +From: Avi Kivity +Date: Thu, 13 May 2010 11:50:19 +0300 +Subject: KVM: x86: Add missing locking to arch specific vcpu ioctls + +From: Avi Kivity + +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman +(cherry picked from commit 8fbf065d625617bbbf6b72d5f78f84ad13c8b547) +--- + arch/x86/kvm/x86.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -1550,6 +1550,7 @@ static int kvm_vcpu_ioctl_get_cpuid2(str + { + int r; + ++ vcpu_load(vcpu); + r = -E2BIG; + if (cpuid->nent < vcpu->arch.cpuid_nent) + goto out; +@@ -1561,6 +1562,7 @@ static int kvm_vcpu_ioctl_get_cpuid2(str + + out: + cpuid->nent = vcpu->arch.cpuid_nent; ++ vcpu_put(vcpu); + return r; + } + +@@ -1813,6 +1815,7 @@ static int kvm_vcpu_ioctl_x86_setup_mce( + int r; + unsigned bank_num = mcg_cap & 0xff, bank; + ++ vcpu_load(vcpu); + r = -EINVAL; + if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS) + goto out; +@@ -1827,6 +1830,7 @@ static int kvm_vcpu_ioctl_x86_setup_mce( + for (bank = 0; bank < bank_num; bank++) + vcpu->arch.mce_banks[bank*4] = ~(u64)0; + out: ++ vcpu_put(vcpu); + return r; + } + +@@ -2094,7 +2098,9 @@ long kvm_arch_vcpu_ioctl(struct file *fi + r = -EFAULT; + if (copy_from_user(&mce, argp, sizeof mce)) + goto out; ++ vcpu_load(vcpu); + r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce); ++ vcpu_put(vcpu); + break; + } + case KVM_GET_VCPU_EVENTS: { diff --git a/queue-2.6.33/0020-KVM-x86-Inject-GP-with-the-right-rip-on-efer-writes.patch b/queue-2.6.33/0020-KVM-x86-Inject-GP-with-the-right-rip-on-efer-writes.patch new file mode 100644 index 00000000000..aba508aae5f --- /dev/null +++ b/queue-2.6.33/0020-KVM-x86-Inject-GP-with-the-right-rip-on-efer-writes.patch @@ -0,0 +1,91 @@ +From 5acedf13f81c250bf394ce7561bf404792b44558 Mon Sep 17 00:00:00 2001 +From: Roedel, Joerg +Date: Thu, 6 May 2010 11:38:43 +0200 +Subject: KVM: x86: Inject #GP with the right rip on efer writes + +From: Roedel, Joerg + +This patch fixes a bug in the KVM efer-msr write path. If a +guest writes to a reserved efer bit the set_efer function +injects the #GP directly. The architecture dependent wrmsr +function does not see this, assumes success and advances the +rip. This results in a #GP in the guest with the wrong rip. +This patch fixes this by reporting efer write errors back to +the architectural wrmsr function. + +Signed-off-by: Joerg Roedel +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman +(cherry picked from commit b69e8caef5b190af48c525f6d715e7b7728a77f6) +--- + arch/x86/kvm/x86.c | 31 ++++++++++++------------------- + 1 file changed, 12 insertions(+), 19 deletions(-) + +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -570,37 +570,29 @@ static u32 emulated_msrs[] = { + MSR_IA32_MISC_ENABLE, + }; + +-static void set_efer(struct kvm_vcpu *vcpu, u64 efer) ++static int set_efer(struct kvm_vcpu *vcpu, u64 efer) + { +- if (efer & efer_reserved_bits) { +- kvm_inject_gp(vcpu, 0); +- return; +- } ++ if (efer & efer_reserved_bits) ++ return 1; + + if (is_paging(vcpu) +- && (vcpu->arch.shadow_efer & EFER_LME) != (efer & EFER_LME)) { +- kvm_inject_gp(vcpu, 0); +- return; +- } ++ && (vcpu->arch.shadow_efer & EFER_LME) != (efer & EFER_LME)) ++ return 1; + + if (efer & EFER_FFXSR) { + struct kvm_cpuid_entry2 *feat; + + feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); +- if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) { +- kvm_inject_gp(vcpu, 0); +- return; +- } ++ if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) ++ return 1; + } + + if (efer & EFER_SVME) { + struct kvm_cpuid_entry2 *feat; + + feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); +- if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) { +- kvm_inject_gp(vcpu, 0); +- return; +- } ++ if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) ++ return 1; + } + + efer &= ~EFER_LMA; +@@ -612,6 +604,8 @@ static void set_efer(struct kvm_vcpu *vc + + vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled; + kvm_mmu_reset_context(vcpu); ++ ++ return 0; + } + + void kvm_enable_efer_bits(u64 mask) +@@ -946,8 +940,7 @@ int kvm_set_msr_common(struct kvm_vcpu * + { + switch (msr) { + case MSR_EFER: +- set_efer(vcpu, data); +- break; ++ return set_efer(vcpu, data); + case MSR_K7_HWCR: + data &= ~(u64)0x40; /* ignore flush filter disable */ + if (data != 0) { diff --git a/queue-2.6.33/0021-KVM-SVM-Don-t-allow-nested-guest-to-VMMCALL-into-hos.patch b/queue-2.6.33/0021-KVM-SVM-Don-t-allow-nested-guest-to-VMMCALL-into-hos.patch new file mode 100644 index 00000000000..ffc783645f7 --- /dev/null +++ b/queue-2.6.33/0021-KVM-SVM-Don-t-allow-nested-guest-to-VMMCALL-into-hos.patch @@ -0,0 +1,38 @@ +From 2d7753bec146c9e0030c6b52520ad052a9c2a45e Mon Sep 17 00:00:00 2001 +From: Joerg Roedel +Date: Wed, 5 May 2010 16:04:45 +0200 +Subject: KVM: SVM: Don't allow nested guest to VMMCALL into host + +From: Joerg Roedel + +This patch disables the possibility for a l2-guest to do a +VMMCALL directly into the host. This would happen if the +l1-hypervisor doesn't intercept VMMCALL and the l2-guest +executes this instruction. + +Signed-off-by: Joerg Roedel +Signed-off-by: Avi Kivity +Signed-off-by: Greg Kroah-Hartman +(cherry picked from commit 0d945bd9351199744c1e89d57a70615b6ee9f394) +--- + arch/x86/kvm/svm.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +--- a/arch/x86/kvm/svm.c ++++ b/arch/x86/kvm/svm.c +@@ -1829,8 +1829,13 @@ static bool nested_svm_vmrun(struct vcpu + svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK; + } + +- /* We don't want a nested guest to be more powerful than the guest, +- so all intercepts are ORed */ ++ /* We don't want to see VMMCALLs from a nested guest */ ++ svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VMMCALL); ++ ++ /* ++ * We don't want a nested guest to be more powerful than the guest, so ++ * all intercepts are ORed ++ */ + svm->vmcb->control.intercept_cr_read |= + nested_vmcb->control.intercept_cr_read; + svm->vmcb->control.intercept_cr_write |= diff --git a/queue-2.6.33/btrfs-should-add-a-permission-check-for-setfacl.patch b/queue-2.6.33/btrfs-should-add-a-permission-check-for-setfacl.patch new file mode 100644 index 00000000000..e7e0af4592e --- /dev/null +++ b/queue-2.6.33/btrfs-should-add-a-permission-check-for-setfacl.patch @@ -0,0 +1,58 @@ +From 2f26afba46f0ebf155cf9be746496a0304a5b7cf Mon Sep 17 00:00:00 2001 +From: Shi Weihua +Date: Tue, 18 May 2010 00:50:32 +0000 +Subject: Btrfs: should add a permission check for setfacl + +From: Shi Weihua + +commit 2f26afba46f0ebf155cf9be746496a0304a5b7cf upstream. + +On btrfs, do the following +------------------ +# su user1 +# cd btrfs-part/ +# touch aaa +# getfacl aaa + # file: aaa + # owner: user1 + # group: user1 + user::rw- + group::rw- + other::r-- +# su user2 +# cd btrfs-part/ +# setfacl -m u::rwx aaa +# getfacl aaa + # file: aaa + # owner: user1 + # group: user1 + user::rwx <- successed to setfacl + group::rw- + other::r-- +------------------ +but we should prohibit it that user2 changing user1's acl. +In fact, on ext3 and other fs, a message occurs: + setfacl: aaa: Operation not permitted + +This patch fixed it. + +Signed-off-by: Shi Weihua +Signed-off-by: Chris Mason +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/acl.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/fs/btrfs/acl.c ++++ b/fs/btrfs/acl.c +@@ -159,6 +159,9 @@ static int btrfs_xattr_acl_set(struct de + int ret; + struct posix_acl *acl = NULL; + ++ if (!is_owner_or_cap(dentry->d_inode)) ++ return -EPERM; ++ + if (value) { + acl = posix_acl_from_xattr(value, size); + if (acl == NULL) { diff --git a/queue-2.6.33/cifs-allow-null-nd-as-nfs-server-uses-on-create.patch b/queue-2.6.33/cifs-allow-null-nd-as-nfs-server-uses-on-create.patch new file mode 100644 index 00000000000..68496fc2e6c --- /dev/null +++ b/queue-2.6.33/cifs-allow-null-nd-as-nfs-server-uses-on-create.patch @@ -0,0 +1,133 @@ +From fa588e0c57048b3d4bfcd772d80dc0615f83fd35 Mon Sep 17 00:00:00 2001 +From: Steve French +Date: Thu, 22 Apr 2010 19:21:55 +0000 +Subject: CIFS: Allow null nd (as nfs server uses) on create + +From: Steve French + +commit fa588e0c57048b3d4bfcd772d80dc0615f83fd35 upstream. + +While creating a file on a server which supports unix extensions +such as Samba, if a file is being created which does not supply +nameidata (i.e. nd is null), cifs client can oops when calling +cifs_posix_open. + +Signed-off-by: Shirish Pargaonkar +Signed-off-by: Steve French +Signed-off-by: Greg Kroah-Hartman + +--- + fs/cifs/cifsproto.h | 6 ++++-- + fs/cifs/dir.c | 20 ++++++++++++-------- + fs/cifs/file.c | 11 +++++++---- + 3 files changed, 23 insertions(+), 14 deletions(-) + +--- a/fs/cifs/cifsproto.h ++++ b/fs/cifs/cifsproto.h +@@ -95,8 +95,10 @@ extern struct cifsFileInfo *cifs_new_fil + __u16 fileHandle, struct file *file, + struct vfsmount *mnt, unsigned int oflags); + extern int cifs_posix_open(char *full_path, struct inode **pinode, +- struct vfsmount *mnt, int mode, int oflags, +- __u32 *poplock, __u16 *pnetfid, int xid); ++ struct vfsmount *mnt, ++ struct super_block *sb, ++ int mode, int oflags, ++ __u32 *poplock, __u16 *pnetfid, int xid); + extern void cifs_unix_basic_to_fattr(struct cifs_fattr *fattr, + FILE_UNIX_BASIC_INFO *info, + struct cifs_sb_info *cifs_sb); +--- a/fs/cifs/dir.c ++++ b/fs/cifs/dir.c +@@ -183,13 +183,14 @@ cifs_new_fileinfo(struct inode *newinode + } + + int cifs_posix_open(char *full_path, struct inode **pinode, +- struct vfsmount *mnt, int mode, int oflags, +- __u32 *poplock, __u16 *pnetfid, int xid) ++ struct vfsmount *mnt, struct super_block *sb, ++ int mode, int oflags, ++ __u32 *poplock, __u16 *pnetfid, int xid) + { + int rc; + FILE_UNIX_BASIC_INFO *presp_data; + __u32 posix_flags = 0; +- struct cifs_sb_info *cifs_sb = CIFS_SB(mnt->mnt_sb); ++ struct cifs_sb_info *cifs_sb = CIFS_SB(sb); + struct cifs_fattr fattr; + + cFYI(1, ("posix open %s", full_path)); +@@ -242,7 +243,7 @@ int cifs_posix_open(char *full_path, str + + /* get new inode and set it up */ + if (*pinode == NULL) { +- *pinode = cifs_iget(mnt->mnt_sb, &fattr); ++ *pinode = cifs_iget(sb, &fattr); + if (!*pinode) { + rc = -ENOMEM; + goto posix_open_ret; +@@ -251,7 +252,8 @@ int cifs_posix_open(char *full_path, str + cifs_fattr_to_inode(*pinode, &fattr); + } + +- cifs_new_fileinfo(*pinode, *pnetfid, NULL, mnt, oflags); ++ if (mnt) ++ cifs_new_fileinfo(*pinode, *pnetfid, NULL, mnt, oflags); + + posix_open_ret: + kfree(presp_data); +@@ -315,13 +317,14 @@ cifs_create(struct inode *inode, struct + if (nd && (nd->flags & LOOKUP_OPEN)) + oflags = nd->intent.open.flags; + else +- oflags = FMODE_READ; ++ oflags = FMODE_READ | SMB_O_CREAT; + + if (tcon->unix_ext && (tcon->ses->capabilities & CAP_UNIX) && + (CIFS_UNIX_POSIX_PATH_OPS_CAP & + le64_to_cpu(tcon->fsUnixInfo.Capability))) { +- rc = cifs_posix_open(full_path, &newinode, nd->path.mnt, +- mode, oflags, &oplock, &fileHandle, xid); ++ rc = cifs_posix_open(full_path, &newinode, ++ nd ? nd->path.mnt : NULL, ++ inode->i_sb, mode, oflags, &oplock, &fileHandle, xid); + /* EIO could indicate that (posix open) operation is not + supported, despite what server claimed in capability + negotation. EREMOTE indicates DFS junction, which is not +@@ -678,6 +681,7 @@ cifs_lookup(struct inode *parent_dir_ino + (nd->flags & LOOKUP_OPEN) && !pTcon->broken_posix_open && + (nd->intent.open.flags & O_CREAT)) { + rc = cifs_posix_open(full_path, &newInode, nd->path.mnt, ++ parent_dir_inode->i_sb, + nd->intent.open.create_mode, + nd->intent.open.flags, &oplock, + &fileHandle, xid); +--- a/fs/cifs/file.c ++++ b/fs/cifs/file.c +@@ -297,10 +297,12 @@ int cifs_open(struct inode *inode, struc + (CIFS_UNIX_POSIX_PATH_OPS_CAP & + le64_to_cpu(tcon->fsUnixInfo.Capability))) { + int oflags = (int) cifs_posix_convert_flags(file->f_flags); ++ oflags |= SMB_O_CREAT; + /* can not refresh inode info since size could be stale */ + rc = cifs_posix_open(full_path, &inode, file->f_path.mnt, +- cifs_sb->mnt_file_mode /* ignored */, +- oflags, &oplock, &netfid, xid); ++ inode->i_sb, ++ cifs_sb->mnt_file_mode /* ignored */, ++ oflags, &oplock, &netfid, xid); + if (rc == 0) { + cFYI(1, ("posix open succeeded")); + /* no need for special case handling of setting mode +@@ -512,8 +514,9 @@ reopen_error_exit: + int oflags = (int) cifs_posix_convert_flags(file->f_flags); + /* can not refresh inode info since size could be stale */ + rc = cifs_posix_open(full_path, NULL, file->f_path.mnt, +- cifs_sb->mnt_file_mode /* ignored */, +- oflags, &oplock, &netfid, xid); ++ inode->i_sb, ++ cifs_sb->mnt_file_mode /* ignored */, ++ oflags, &oplock, &netfid, xid); + if (rc == 0) { + cFYI(1, ("posix reopen succeeded")); + goto reopen_success; diff --git a/queue-2.6.33/eeepc-laptop-check-wireless-hotplug-events.patch b/queue-2.6.33/eeepc-laptop-check-wireless-hotplug-events.patch new file mode 100644 index 00000000000..ef38cd36be0 --- /dev/null +++ b/queue-2.6.33/eeepc-laptop-check-wireless-hotplug-events.patch @@ -0,0 +1,64 @@ +From bc9d24a3aeb1532fc3e234907a8b6d671f7ed68f Mon Sep 17 00:00:00 2001 +From: Alan Jenkins +Date: Mon, 22 Feb 2010 16:03:58 +0000 +Subject: eeepc-laptop: check wireless hotplug events + +From: Alan Jenkins + +commit bc9d24a3aeb1532fc3e234907a8b6d671f7ed68f upstream. + +Before we mark the wireless device as unplugged, check PCI config space +to see whether the wireless device is really disabled (and vice versa). +This works around newer models which don't want the hotplug code, where +we end up disabling the wired network device. + +My old 701 still works correctly with this. I can also simulate an +afflicted model by changing the hardcoded PCI bus/slot number in the +driver, and it seems to work nicely (although it is a bit noisy). + +In future this type of hotplug support will be implemented by the PCI +core. The existing blacklist and the new warning message will be +removed at that point. + +Signed-off-by: Alan Jenkins +Signed-off-by: Corentin Chary +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/platform/x86/eeepc-laptop.c | 18 ++++++++++++++++++ + 1 file changed, 18 insertions(+) + +--- a/drivers/platform/x86/eeepc-laptop.c ++++ b/drivers/platform/x86/eeepc-laptop.c +@@ -578,6 +578,8 @@ static void eeepc_rfkill_hotplug(struct + struct pci_dev *dev; + struct pci_bus *bus; + bool blocked = eeepc_wlan_rfkill_blocked(eeepc); ++ bool absent; ++ u32 l; + + if (eeepc->wlan_rfkill) + rfkill_set_sw_state(eeepc->wlan_rfkill, blocked); +@@ -591,6 +593,22 @@ static void eeepc_rfkill_hotplug(struct + goto out_unlock; + } + ++ if (pci_bus_read_config_dword(bus, 0, PCI_VENDOR_ID, &l)) { ++ pr_err("Unable to read PCI config space?\n"); ++ goto out_unlock; ++ } ++ absent = (l == 0xffffffff); ++ ++ if (blocked != absent) { ++ pr_warning("BIOS says wireless lan is %s, " ++ "but the pci device is %s\n", ++ blocked ? "blocked" : "unblocked", ++ absent ? "absent" : "present"); ++ pr_warning("skipped wireless hotplug as probably " ++ "inappropriate for this model\n"); ++ goto out_unlock; ++ } ++ + if (!blocked) { + dev = pci_get_slot(bus, 0); + if (dev) { diff --git a/queue-2.6.33/ext4-check-s_log_groups_per_flex-in-online-resize-code.patch b/queue-2.6.33/ext4-check-s_log_groups_per_flex-in-online-resize-code.patch new file mode 100644 index 00000000000..d8da3214c52 --- /dev/null +++ b/queue-2.6.33/ext4-check-s_log_groups_per_flex-in-online-resize-code.patch @@ -0,0 +1,49 @@ +From 42007efd569f1cf3bfb9a61da60ef6c2179508ca Mon Sep 17 00:00:00 2001 +From: Eric Sandeen +Date: Sun, 16 May 2010 01:00:00 -0400 +Subject: ext4: check s_log_groups_per_flex in online resize code + +From: Eric Sandeen + +commit 42007efd569f1cf3bfb9a61da60ef6c2179508ca upstream. + +If groups_per_flex < 2, sbi->s_flex_groups[] doesn't get filled out, +and every other access to this first tests s_log_groups_per_flex; +same thing needs to happen in resize or we'll wander off into +a null pointer when doing an online resize of the file system. + +Thanks to Christoph Biedl, who came up with the trivial testcase: + +# truncate --size 128M fsfile +# mkfs.ext3 -F fsfile +# tune2fs -O extents,uninit_bg,dir_index,flex_bg,huge_file,dir_nlink,extra_isize fsfile +# e2fsck -yDf -C0 fsfile +# truncate --size 132M fsfile +# losetup /dev/loop0 fsfile +# mount /dev/loop0 mnt +# resize2fs -p /dev/loop0 + + https://bugzilla.kernel.org/show_bug.cgi?id=13549 + +Reported-by: Alessandro Polverini +Test-case-by: Christoph Biedl +Signed-off-by: Eric Sandeen +Signed-off-by: "Theodore Ts'o" +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/resize.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/fs/ext4/resize.c ++++ b/fs/ext4/resize.c +@@ -930,7 +930,8 @@ int ext4_group_add(struct super_block *s + percpu_counter_add(&sbi->s_freeinodes_counter, + EXT4_INODES_PER_GROUP(sb)); + +- if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) { ++ if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG) && ++ sbi->s_log_groups_per_flex) { + ext4_group_t flex_group; + flex_group = ext4_flex_group(sbi, input->group); + atomic_add(input->free_blocks_count, diff --git a/queue-2.6.33/ext4-make-sure-the-move_ext-ioctl-can-t-overwrite-append-only-files.patch b/queue-2.6.33/ext4-make-sure-the-move_ext-ioctl-can-t-overwrite-append-only-files.patch new file mode 100644 index 00000000000..cf9e3555539 --- /dev/null +++ b/queue-2.6.33/ext4-make-sure-the-move_ext-ioctl-can-t-overwrite-append-only-files.patch @@ -0,0 +1,34 @@ +From 1f5a81e41f8b1a782c68d3843e9ec1bfaadf7d72 Mon Sep 17 00:00:00 2001 +From: Theodore Ts'o +Date: Wed, 2 Jun 2010 22:04:39 -0400 +Subject: ext4: Make sure the MOVE_EXT ioctl can't overwrite append-only files + +From: Theodore Ts'o + +commit 1f5a81e41f8b1a782c68d3843e9ec1bfaadf7d72 upstream. + +Dan Roseberg has reported a problem with the MOVE_EXT ioctl. If the +donor file is an append-only file, we should not allow the operation +to proceed, lest we end up overwriting the contents of an append-only +file. + +Signed-off-by: "Theodore Ts'o" +Cc: Dan Rosenberg +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/move_extent.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/fs/ext4/move_extent.c ++++ b/fs/ext4/move_extent.c +@@ -964,6 +964,9 @@ mext_check_arguments(struct inode *orig_ + return -EINVAL; + } + ++ if (IS_IMMUTABLE(donor_inode) || IS_APPEND(donor_inode)) ++ return -EPERM; ++ + /* Ext4 move extent does not support swapfile */ + if (IS_SWAPFILE(orig_inode) || IS_SWAPFILE(donor_inode)) { + ext4_debug("ext4 move extent: The argument files should " diff --git a/queue-2.6.33/gfs2-fix-permissions-checking-for-setflags-ioctl.patch b/queue-2.6.33/gfs2-fix-permissions-checking-for-setflags-ioctl.patch new file mode 100644 index 00000000000..60f94873539 --- /dev/null +++ b/queue-2.6.33/gfs2-fix-permissions-checking-for-setflags-ioctl.patch @@ -0,0 +1,45 @@ +From 7df0e0397b9a18358573274db9fdab991941062f Mon Sep 17 00:00:00 2001 +From: Steven Whitehouse +Date: Mon, 24 May 2010 14:36:48 +0100 +Subject: GFS2: Fix permissions checking for setflags ioctl() + +From: Steven Whitehouse + +commit 7df0e0397b9a18358573274db9fdab991941062f upstream. + +We should be checking for the ownership of the file for which +flags are being set, rather than just for write access. + +Reported-by: Dan Rosenberg +Signed-off-by: Steven Whitehouse +Signed-off-by: Greg Kroah-Hartman + +--- + fs/gfs2/file.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/fs/gfs2/file.c ++++ b/fs/gfs2/file.c +@@ -218,6 +218,11 @@ static int do_gfs2_set_flags(struct file + if (error) + goto out_drop_write; + ++ error = -EACCES; ++ if (!is_owner_or_cap(inode)) ++ goto out; ++ ++ error = 0; + flags = ip->i_diskflags; + new_flags = (flags & ~mask) | (reqflags & mask); + if ((new_flags ^ flags) == 0) +@@ -275,8 +280,10 @@ static int gfs2_set_flags(struct file *f + { + struct inode *inode = filp->f_path.dentry->d_inode; + u32 fsflags, gfsflags; ++ + if (get_user(fsflags, ptr)) + return -EFAULT; ++ + gfsflags = fsflags_cvt(fsflags_to_gfs2, fsflags); + if (!S_ISDIR(inode->i_mode)) { + if (gfsflags & GFS2_DIF_INHERIT_JDATA) diff --git a/queue-2.6.33/input-psmouse-reset-all-types-of-mice-before-reconnecting.patch b/queue-2.6.33/input-psmouse-reset-all-types-of-mice-before-reconnecting.patch new file mode 100644 index 00000000000..f423fa54f85 --- /dev/null +++ b/queue-2.6.33/input-psmouse-reset-all-types-of-mice-before-reconnecting.patch @@ -0,0 +1,55 @@ +From ef110b24e28f36620f63dab94708a17c7e267358 Mon Sep 17 00:00:00 2001 +From: Dmitry Torokhov +Date: Thu, 13 May 2010 00:42:23 -0700 +Subject: Input: psmouse - reset all types of mice before reconnecting + +From: Dmitry Torokhov + +commit ef110b24e28f36620f63dab94708a17c7e267358 upstream. + +Synaptics hardware requires resetting device after suspend to ram +in order for the device to be operational. The reset lives in +synaptics-specific reconnect handler, but it is not being invoked +if synaptics support is disabled and the device is handled as a +standard PS/2 device (bare or IntelliMouse protocol). + +Let's add reset into generic reconnect handler as well. + +Signed-off-by: Dmitry Torokhov +Cc: Tim Gardner +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/input/mouse/psmouse-base.c | 14 ++++++++++---- + 1 file changed, 10 insertions(+), 4 deletions(-) + +--- a/drivers/input/mouse/psmouse-base.c ++++ b/drivers/input/mouse/psmouse-base.c +@@ -1382,6 +1382,7 @@ static int psmouse_reconnect(struct seri + struct psmouse *psmouse = serio_get_drvdata(serio); + struct psmouse *parent = NULL; + struct serio_driver *drv = serio->drv; ++ unsigned char type; + int rc = -1; + + if (!drv || !psmouse) { +@@ -1401,10 +1402,15 @@ static int psmouse_reconnect(struct seri + if (psmouse->reconnect) { + if (psmouse->reconnect(psmouse)) + goto out; +- } else if (psmouse_probe(psmouse) < 0 || +- psmouse->type != psmouse_extensions(psmouse, +- psmouse_max_proto, false)) { +- goto out; ++ } else { ++ psmouse_reset(psmouse); ++ ++ if (psmouse_probe(psmouse) < 0) ++ goto out; ++ ++ type = psmouse_extensions(psmouse, psmouse_max_proto, false); ++ if (psmouse->type != type) ++ goto out; + } + + /* ok, the device type (and capabilities) match the old one, diff --git a/queue-2.6.33/keys-find_keyring_by_name-can-gain-access-to-a-freed-keyring.patch b/queue-2.6.33/keys-find_keyring_by_name-can-gain-access-to-a-freed-keyring.patch new file mode 100644 index 00000000000..80a2f839450 --- /dev/null +++ b/queue-2.6.33/keys-find_keyring_by_name-can-gain-access-to-a-freed-keyring.patch @@ -0,0 +1,191 @@ +From cea7daa3589d6b550546a8c8963599f7c1a3ae5c Mon Sep 17 00:00:00 2001 +From: Toshiyuki Okajima +Date: Fri, 30 Apr 2010 14:32:13 +0100 +Subject: KEYS: find_keyring_by_name() can gain access to a freed keyring + +From: Toshiyuki Okajima + +commit cea7daa3589d6b550546a8c8963599f7c1a3ae5c upstream. + +find_keyring_by_name() can gain access to a keyring that has had its reference +count reduced to zero, and is thus ready to be freed. This then allows the +dead keyring to be brought back into use whilst it is being destroyed. + +The following timeline illustrates the process: + +|(cleaner) (user) +| +| free_user(user) sys_keyctl() +| | | +| key_put(user->session_keyring) keyctl_get_keyring_ID() +| || //=> keyring->usage = 0 | +| |schedule_work(&key_cleanup_task) lookup_user_key() +| || | +| kmem_cache_free(,user) | +| . |[KEY_SPEC_USER_KEYRING] +| . install_user_keyrings() +| . || +| key_cleanup() [<= worker_thread()] || +| | || +| [spin_lock(&key_serial_lock)] |[mutex_lock(&key_user_keyr..mutex)] +| | || +| atomic_read() == 0 || +| |{ rb_ease(&key->serial_node,) } || +| | || +| [spin_unlock(&key_serial_lock)] |find_keyring_by_name() +| | ||| +| keyring_destroy(keyring) ||[read_lock(&keyring_name_lock)] +| || ||| +| |[write_lock(&keyring_name_lock)] ||atomic_inc(&keyring->usage) +| |. ||| *** GET freeing keyring *** +| |. ||[read_unlock(&keyring_name_lock)] +| || || +| |list_del() |[mutex_unlock(&key_user_k..mutex)] +| || | +| |[write_unlock(&keyring_name_lock)] ** INVALID keyring is returned ** +| | . +| kmem_cache_free(,keyring) . +| . +| atomic_dec(&keyring->usage) +v *** DESTROYED *** +TIME + +If CONFIG_SLUB_DEBUG=y then we may see the following message generated: + + ============================================================================= + BUG key_jar: Poison overwritten + ----------------------------------------------------------------------------- + + INFO: 0xffff880197a7e200-0xffff880197a7e200. First byte 0x6a instead of 0x6b + INFO: Allocated in key_alloc+0x10b/0x35f age=25 cpu=1 pid=5086 + INFO: Freed in key_cleanup+0xd0/0xd5 age=12 cpu=1 pid=10 + INFO: Slab 0xffffea000592cb90 objects=16 used=2 fp=0xffff880197a7e200 flags=0x200000000000c3 + INFO: Object 0xffff880197a7e200 @offset=512 fp=0xffff880197a7e300 + + Bytes b4 0xffff880197a7e1f0: 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZZZZZZZZZ + Object 0xffff880197a7e200: 6a 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b jkkkkkkkkkkkkkkk + +Alternatively, we may see a system panic happen, such as: + + BUG: unable to handle kernel NULL pointer dereference at 0000000000000001 + IP: [] kmem_cache_alloc+0x5b/0xe9 + PGD 6b2b4067 PUD 6a80d067 PMD 0 + Oops: 0000 [#1] SMP + last sysfs file: /sys/kernel/kexec_crash_loaded + CPU 1 + ... + Pid: 31245, comm: su Not tainted 2.6.34-rc5-nofixed-nodebug #2 D2089/PRIMERGY + RIP: 0010:[] [] kmem_cache_alloc+0x5b/0xe9 + RSP: 0018:ffff88006af3bd98 EFLAGS: 00010002 + RAX: 0000000000000000 RBX: 0000000000000001 RCX: ffff88007d19900b + RDX: 0000000100000000 RSI: 00000000000080d0 RDI: ffffffff81828430 + RBP: ffffffff81828430 R08: ffff88000a293750 R09: 0000000000000000 + R10: 0000000000000001 R11: 0000000000100000 R12: 00000000000080d0 + R13: 00000000000080d0 R14: 0000000000000296 R15: ffffffff810f20ce + FS: 00007f97116bc700(0000) GS:ffff88000a280000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 0000000000000001 CR3: 000000006a91c000 CR4: 00000000000006e0 + DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 + DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 + Process su (pid: 31245, threadinfo ffff88006af3a000, task ffff8800374414c0) + Stack: + 0000000512e0958e 0000000000008000 ffff880037f8d180 0000000000000001 + 0000000000000000 0000000000008001 ffff88007d199000 ffffffff810f20ce + 0000000000008000 ffff88006af3be48 0000000000000024 ffffffff810face3 + Call Trace: + [] ? get_empty_filp+0x70/0x12f + [] ? do_filp_open+0x145/0x590 + [] ? tlb_finish_mmu+0x2a/0x33 + [] ? unmap_region+0xd3/0xe2 + [] ? virt_to_head_page+0x9/0x2d + [] ? alloc_fd+0x69/0x10e + [] ? do_sys_open+0x56/0xfc + [] ? system_call_fastpath+0x16/0x1b + Code: 0f 1f 44 00 00 49 89 c6 fa 66 0f 1f 44 00 00 65 4c 8b 04 25 60 e8 00 00 48 8b 45 00 49 01 c0 49 8b 18 48 85 db 74 0d 48 63 45 18 <48> 8b 04 03 49 89 00 eb 14 4c 89 f9 83 ca ff 44 89 e6 48 89 ef + RIP [] kmem_cache_alloc+0x5b/0xe9 + +This problem is that find_keyring_by_name does not confirm that the keyring is +valid before accepting it. + +Skipping keyrings that have been reduced to a zero count seems the way to go. +To this end, use atomic_inc_not_zero() to increment the usage count and skip +the candidate keyring if that returns false. + +The following script _may_ cause the bug to happen, but there's no guarantee +as the window of opportunity is small: + + #!/bin/sh + LOOP=100000 + USER=dummy_user + /bin/su -c "exit;" $USER || { /usr/sbin/adduser -m $USER; add=1; } + for ((i=0; i /dev/null" $USER + done + (( add == 1 )) && /usr/sbin/userdel -r $USER + exit + +Note that the nominated user must not be in use. + +An alternative way of testing this may be: + + for ((i=0; i<100000; i++)) + do + keyctl session foo /bin/true || break + done >&/dev/null + +as that uses a keyring named "foo" rather than relying on the user and +user-session named keyrings. + +Reported-by: Toshiyuki Okajima +Signed-off-by: David Howells +Tested-by: Toshiyuki Okajima +Acked-by: Serge Hallyn +Signed-off-by: James Morris +Cc: Ben Hutchings +Cc: Chuck Ebbert +Signed-off-by: Greg Kroah-Hartman + +--- + security/keys/keyring.c | 18 +++++++++--------- + 1 file changed, 9 insertions(+), 9 deletions(-) + +--- a/security/keys/keyring.c ++++ b/security/keys/keyring.c +@@ -524,9 +524,8 @@ struct key *find_keyring_by_name(const c + struct key *keyring; + int bucket; + +- keyring = ERR_PTR(-EINVAL); + if (!name) +- goto error; ++ return ERR_PTR(-EINVAL); + + bucket = keyring_hash(name); + +@@ -553,17 +552,18 @@ struct key *find_keyring_by_name(const c + KEY_SEARCH) < 0) + continue; + +- /* we've got a match */ +- atomic_inc(&keyring->usage); +- read_unlock(&keyring_name_lock); +- goto error; ++ /* we've got a match but we might end up racing with ++ * key_cleanup() if the keyring is currently 'dead' ++ * (ie. it has a zero usage count) */ ++ if (!atomic_inc_not_zero(&keyring->usage)) ++ continue; ++ goto out; + } + } + +- read_unlock(&keyring_name_lock); + keyring = ERR_PTR(-ENOKEY); +- +- error: ++out: ++ read_unlock(&keyring_name_lock); + return keyring; + + } /* end find_keyring_by_name() */ diff --git a/queue-2.6.33/keys-return-more-accurate-error-codes.patch b/queue-2.6.33/keys-return-more-accurate-error-codes.patch new file mode 100644 index 00000000000..3bde51e12d1 --- /dev/null +++ b/queue-2.6.33/keys-return-more-accurate-error-codes.patch @@ -0,0 +1,50 @@ +From 4d09ec0f705cf88a12add029c058b53f288cfaa2 Mon Sep 17 00:00:00 2001 +From: Dan Carpenter +Date: Mon, 17 May 2010 14:42:35 +0100 +Subject: KEYS: Return more accurate error codes + +From: Dan Carpenter + +commit 4d09ec0f705cf88a12add029c058b53f288cfaa2 upstream. + +We were using the wrong variable here so the error codes weren't being returned +properly. The original code returns -ENOKEY. + +Signed-off-by: Dan Carpenter +Signed-off-by: David Howells +Signed-off-by: James Morris +Signed-off-by: Greg Kroah-Hartman + +--- + security/keys/process_keys.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/security/keys/process_keys.c ++++ b/security/keys/process_keys.c +@@ -509,7 +509,7 @@ try_again: + + ret = install_thread_keyring(); + if (ret < 0) { +- key = ERR_PTR(ret); ++ key_ref = ERR_PTR(ret); + goto error; + } + goto reget_creds; +@@ -527,7 +527,7 @@ try_again: + + ret = install_process_keyring(); + if (ret < 0) { +- key = ERR_PTR(ret); ++ key_ref = ERR_PTR(ret); + goto error; + } + goto reget_creds; +@@ -586,7 +586,7 @@ try_again: + + case KEY_SPEC_GROUP_KEYRING: + /* group keyrings are not yet supported */ +- key = ERR_PTR(-EINVAL); ++ key_ref = ERR_PTR(-EINVAL); + goto error; + + case KEY_SPEC_REQKEY_AUTH_KEY: diff --git a/queue-2.6.33/l2tp-fix-oops-in-pppol2tp_xmit.patch b/queue-2.6.33/l2tp-fix-oops-in-pppol2tp_xmit.patch new file mode 100644 index 00000000000..1920ee7d183 --- /dev/null +++ b/queue-2.6.33/l2tp-fix-oops-in-pppol2tp_xmit.patch @@ -0,0 +1,81 @@ +From 3feec9095d12e311b7d4eb7fe7e5dfa75d4a72a5 Mon Sep 17 00:00:00 2001 +From: James Chapman +Date: Tue, 16 Mar 2010 06:46:31 +0000 +Subject: l2tp: Fix oops in pppol2tp_xmit + +From: James Chapman + +commit 3feec9095d12e311b7d4eb7fe7e5dfa75d4a72a5 upstream. + +When transmitting L2TP frames, we derive the outgoing interface's UDP +checksum hardware assist capabilities from the tunnel dst dev. This +can sometimes be NULL, especially when routing protocols are used and +routing changes occur. This patch just checks for NULL dst or dev +pointers when checking for netdev hardware assist features. + +BUG: unable to handle kernel NULL pointer dereference at 0000000c +IP: [] pppol2tp_xmit+0x341/0x4da [pppol2tp] +*pde = 00000000 +Oops: 0000 [#1] SMP +last sysfs file: /sys/class/net/lo/operstate +Modules linked in: pppol2tp pppox ppp_generic slhc ipv6 dummy loop snd_hda_codec_atihdmi snd_hda_intel snd_hda_codec snd_pcm snd_timer snd soundcore snd_page_alloc evdev psmouse serio_raw processor button i2c_piix4 i2c_core ati_agp agpgart pcspkr ext3 jbd mbcache sd_mod ide_pci_generic atiixp ide_core ahci ata_generic floppy ehci_hcd ohci_hcd libata e1000e scsi_mod usbcore nls_base thermal fan thermal_sys [last unloaded: scsi_wait_scan] + +Pid: 0, comm: swapper Not tainted (2.6.32.8 #1) +EIP: 0060:[] EFLAGS: 00010297 CPU: 3 +EIP is at pppol2tp_xmit+0x341/0x4da [pppol2tp] +EAX: 00000000 EBX: f64d1680 ECX: 000005b9 EDX: 00000000 +ESI: f6b91850 EDI: f64d16ac EBP: f6a0c4c0 ESP: f70a9cac + DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068 +Process swapper (pid: 0, ti=f70a8000 task=f70a31c0 task.ti=f70a8000) +Stack: + 000005a9 000005b9 f734c400 f66652c0 f7352e00 f67dc800 00000000 f6b91800 +<0> 000005a3 f70ef6c4 f67dcda9 000005a3 f89b192e 00000246 000005a3 f64d1680 +<0> f63633e0 f6363320 f64d1680 f65a7320 f65a7364 f65856c0 f64d1680 f679f02f +Call Trace: + [] ? ppp_push+0x459/0x50e [ppp_generic] + [] ? ppp_xmit_process+0x3b6/0x430 [ppp_generic] + [] ? ppp_start_xmit+0x10d/0x120 [ppp_generic] + [] ? dev_hard_start_xmit+0x21f/0x2b2 + [] ? sch_direct_xmit+0x48/0x10e + [] ? dev_queue_xmit+0x263/0x3a6 + [] ? ip_finish_output+0x1f7/0x221 + [] ? ip_forward_finish+0x2e/0x30 + [] ? ip_rcv_finish+0x295/0x2a9 + [] ? netif_receive_skb+0x3e9/0x404 + [] ? e1000_clean_rx_irq+0x253/0x2fc [e1000e] + [] ? e1000_clean+0x63/0x1fc [e1000e] + [] ? sched_clock_local+0x15/0x11b + [] ? net_rx_action+0x96/0x195 + [] ? __do_softirq+0xaa/0x151 + [] ? do_softirq+0x31/0x3c + [] ? irq_exit+0x26/0x58 + [] ? do_IRQ+0x78/0x89 + [] ? common_interrupt+0x29/0x30 + [] ? native_safe_halt+0x2/0x3 + [] ? default_idle+0x55/0x75 + [] ? c1e_idle+0xd2/0xd5 + [] ? cpu_idle+0x46/0x62 +Code: 8d 45 08 f0 ff 45 08 89 6b 08 c7 43 68 7e fb 9c f8 8a 45 24 83 e0 0c 3c 04 75 09 80 63 64 f3 e9 b4 00 00 00 8b 43 18 8b 4c 24 04 <8b> 40 0c 8d 79 11 f6 40 44 0e 8a 43 64 75 51 6a 00 8b 4c 24 08 +EIP: [] pppol2tp_xmit+0x341/0x4da [pppol2tp] SS:ESP 0068:f70a9cac +CR2: 000000000000000c + +Signed-off-by: James Chapman +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/net/pppol2tp.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/net/pppol2tp.c ++++ b/drivers/net/pppol2tp.c +@@ -977,7 +977,8 @@ static int pppol2tp_sendmsg(struct kiocb + /* Calculate UDP checksum if configured to do so */ + if (sk_tun->sk_no_check == UDP_CSUM_NOXMIT) + skb->ip_summed = CHECKSUM_NONE; +- else if (!(skb_dst(skb)->dev->features & NETIF_F_V4_CSUM)) { ++ else if ((skb_dst(skb) && skb_dst(skb)->dev) && ++ (!(skb_dst(skb)->dev->features & NETIF_F_V4_CSUM))) { + skb->ip_summed = CHECKSUM_COMPLETE; + csum = skb_checksum(skb, 0, udp_len, 0); + uh->check = csum_tcpudp_magic(inet->inet_saddr, diff --git a/queue-2.6.33/parisc-clear-floating-point-exception-flag-on-sigfpe-signal.patch b/queue-2.6.33/parisc-clear-floating-point-exception-flag-on-sigfpe-signal.patch new file mode 100644 index 00000000000..3080f86c577 --- /dev/null +++ b/queue-2.6.33/parisc-clear-floating-point-exception-flag-on-sigfpe-signal.patch @@ -0,0 +1,34 @@ +From 550f0d922286556c7ea43974bb7921effb5a5278 Mon Sep 17 00:00:00 2001 +From: Helge Deller +Date: Mon, 3 May 2010 20:44:21 +0000 +Subject: parisc: clear floating point exception flag on SIGFPE signal + +From: Helge Deller + +commit 550f0d922286556c7ea43974bb7921effb5a5278 upstream. + +Clear the floating point exception flag before returning to +user space. This is needed, else the libc trampoline handler +may hit the same SIGFPE again while building up a trampoline +to a signal handler. + +Fixes debian bug #559406. + +Signed-off-by: Helge Deller +Signed-off-by: Kyle McMartin +Signed-off-by: Greg Kroah-Hartman + +--- + arch/parisc/math-emu/decode_exc.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/parisc/math-emu/decode_exc.c ++++ b/arch/parisc/math-emu/decode_exc.c +@@ -342,6 +342,7 @@ decode_fpu(unsigned int Fpu_register[], + return SIGNALCODE(SIGFPE, FPE_FLTINV); + case DIVISIONBYZEROEXCEPTION: + update_trap_counts(Fpu_register, aflags, bflags, trap_counts); ++ Clear_excp_register(exception_index); + return SIGNALCODE(SIGFPE, FPE_FLTDIV); + case INEXACTEXCEPTION: + update_trap_counts(Fpu_register, aflags, bflags, trap_counts); diff --git a/queue-2.6.33/qla2xxx-disable-msi-on-qla24xx-chips-other-than-qla2432.patch b/queue-2.6.33/qla2xxx-disable-msi-on-qla24xx-chips-other-than-qla2432.patch new file mode 100644 index 00000000000..fc0bca02887 --- /dev/null +++ b/queue-2.6.33/qla2xxx-disable-msi-on-qla24xx-chips-other-than-qla2432.patch @@ -0,0 +1,68 @@ +From 6377a7ae1ab82859edccdbc8eaea63782efb134d Mon Sep 17 00:00:00 2001 +From: Ben Hutchings +Date: Fri, 19 Mar 2010 16:59:19 -0700 +Subject: [SCSI] qla2xxx: Disable MSI on qla24xx chips other than QLA2432. + +From: Ben Hutchings + +commit 6377a7ae1ab82859edccdbc8eaea63782efb134d upstream. + +On specific platforms, MSI is unreliable on some of the QLA24xx chips, resulting +in fatal I/O errors under load, as reported in +and by some RHEL customers. + +Signed-off-by: Giridhar Malavali +Signed-off-by: James Bottomley +Cc: Ben Hutchings +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/scsi/qla2xxx/qla_isr.c | 28 +++++++++++++--------------- + 1 file changed, 13 insertions(+), 15 deletions(-) + +--- a/drivers/scsi/qla2xxx/qla_isr.c ++++ b/drivers/scsi/qla2xxx/qla_isr.c +@@ -2169,30 +2169,28 @@ qla2x00_request_irqs(struct qla_hw_data + + /* If possible, enable MSI-X. */ + if (!IS_QLA2432(ha) && !IS_QLA2532(ha) && +- !IS_QLA8432(ha) && !IS_QLA8001(ha)) +- goto skip_msix; ++ !IS_QLA8432(ha) && !IS_QLA8001(ha)) ++ goto skip_msi; ++ ++ if (ha->pdev->subsystem_vendor == PCI_VENDOR_ID_HP && ++ (ha->pdev->subsystem_device == 0x7040 || ++ ha->pdev->subsystem_device == 0x7041 || ++ ha->pdev->subsystem_device == 0x1705)) { ++ DEBUG2(qla_printk(KERN_WARNING, ha, ++ "MSI-X: Unsupported ISP2432 SSVID/SSDID (0x%X,0x%X).\n", ++ ha->pdev->subsystem_vendor, ++ ha->pdev->subsystem_device)); ++ goto skip_msi; ++ } + + if (IS_QLA2432(ha) && (ha->pdev->revision < QLA_MSIX_CHIP_REV_24XX || + !QLA_MSIX_FW_MODE_1(ha->fw_attributes))) { + DEBUG2(qla_printk(KERN_WARNING, ha, + "MSI-X: Unsupported ISP2432 (0x%X, 0x%X).\n", + ha->pdev->revision, ha->fw_attributes)); +- + goto skip_msix; + } + +- if (ha->pdev->subsystem_vendor == PCI_VENDOR_ID_HP && +- (ha->pdev->subsystem_device == 0x7040 || +- ha->pdev->subsystem_device == 0x7041 || +- ha->pdev->subsystem_device == 0x1705)) { +- DEBUG2(qla_printk(KERN_WARNING, ha, +- "MSI-X: Unsupported ISP2432 SSVID/SSDID (0x%X, 0x%X).\n", +- ha->pdev->subsystem_vendor, +- ha->pdev->subsystem_device)); +- +- goto skip_msi; +- } +- + ret = qla24xx_enable_msix(ha, rsp); + if (!ret) { + DEBUG2(qla_printk(KERN_INFO, ha, diff --git a/queue-2.6.33/sctp-fix-skb_over_panic-resulting-from-multiple-invalid-parameter-errors-cve-2010-1173-v4.patch b/queue-2.6.33/sctp-fix-skb_over_panic-resulting-from-multiple-invalid-parameter-errors-cve-2010-1173-v4.patch new file mode 100644 index 00000000000..30eb1cbea17 --- /dev/null +++ b/queue-2.6.33/sctp-fix-skb_over_panic-resulting-from-multiple-invalid-parameter-errors-cve-2010-1173-v4.patch @@ -0,0 +1,223 @@ +From 5fa782c2f5ef6c2e4f04d3e228412c9b4a4c8809 Mon Sep 17 00:00:00 2001 +From: Neil Horman +Date: Wed, 28 Apr 2010 10:30:59 +0000 +Subject: sctp: Fix skb_over_panic resulting from multiple invalid parameter errors (CVE-2010-1173) (v4) + +From: Neil Horman + +commit 5fa782c2f5ef6c2e4f04d3e228412c9b4a4c8809 upstream. + +Ok, version 4 + +Change Notes: +1) Minor cleanups, from Vlads notes + +Summary: + +Hey- + Recently, it was reported to me that the kernel could oops in the +following way: + +<5> kernel BUG at net/core/skbuff.c:91! +<5> invalid operand: 0000 [#1] +<5> Modules linked in: sctp netconsole nls_utf8 autofs4 sunrpc iptable_filter +ip_tables cpufreq_powersave parport_pc lp parport vmblock(U) vsock(U) vmci(U) +vmxnet(U) vmmemctl(U) vmhgfs(U) acpiphp dm_mirror dm_mod button battery ac md5 +ipv6 uhci_hcd ehci_hcd snd_ens1371 snd_rawmidi snd_seq_device snd_pcm_oss +snd_mixer_oss snd_pcm snd_timer snd_page_alloc snd_ac97_codec snd soundcore +pcnet32 mii floppy ext3 jbd ata_piix libata mptscsih mptsas mptspi mptscsi +mptbase sd_mod scsi_mod +<5> CPU: 0 +<5> EIP: 0060:[] Not tainted VLI +<5> EFLAGS: 00010216 (2.6.9-89.0.25.EL) +<5> EIP is at skb_over_panic+0x1f/0x2d +<5> eax: 0000002c ebx: c033f461 ecx: c0357d96 edx: c040fd44 +<5> esi: c033f461 edi: df653280 ebp: 00000000 esp: c040fd40 +<5> ds: 007b es: 007b ss: 0068 +<5> Process swapper (pid: 0, threadinfo=c040f000 task=c0370be0) +<5> Stack: c0357d96 e0c29478 00000084 00000004 c033f461 df653280 d7883180 +e0c2947d +<5> 00000000 00000080 df653490 00000004 de4f1ac0 de4f1ac0 00000004 +df653490 +<5> 00000001 e0c2877a 08000800 de4f1ac0 df653490 00000000 e0c29d2e +00000004 +<5> Call Trace: +<5> [] sctp_addto_chunk+0xb0/0x128 [sctp] +<5> [] sctp_addto_chunk+0xb5/0x128 [sctp] +<5> [] sctp_init_cause+0x3f/0x47 [sctp] +<5> [] sctp_process_unk_param+0xac/0xb8 [sctp] +<5> [] sctp_verify_init+0xcc/0x134 [sctp] +<5> [] sctp_sf_do_5_1B_init+0x83/0x28e [sctp] +<5> [] sctp_do_sm+0x41/0x77 [sctp] +<5> [] cache_grow+0x140/0x233 +<5> [] sctp_endpoint_bh_rcv+0xc5/0x108 [sctp] +<5> [] sctp_inq_push+0xe/0x10 [sctp] +<5> [] sctp_rcv+0x454/0x509 [sctp] +<5> [] ipt_hook+0x17/0x1c [iptable_filter] +<5> [] nf_iterate+0x40/0x81 +<5> [] ip_local_deliver_finish+0x0/0x151 +<5> [] ip_local_deliver_finish+0xc6/0x151 +<5> [] nf_hook_slow+0x83/0xb5 +<5> [] ip_local_deliver+0x1a2/0x1a9 +<5> [] ip_local_deliver_finish+0x0/0x151 +<5> [] ip_rcv+0x334/0x3b4 +<5> [] netif_receive_skb+0x320/0x35b +<5> [] init_stall_timer+0x67/0x6a [uhci_hcd] +<5> [] process_backlog+0x6c/0xd9 +<5> [] net_rx_action+0xfe/0x1f8 +<5> [] __do_softirq+0x35/0x79 +<5> [] handle_IRQ_event+0x0/0x4f +<5> [] do_softirq+0x46/0x4d + +Its an skb_over_panic BUG halt that results from processing an init chunk in +which too many of its variable length parameters are in some way malformed. + +The problem is in sctp_process_unk_param: +if (NULL == *errp) + *errp = sctp_make_op_error_space(asoc, chunk, + ntohs(chunk->chunk_hdr->length)); + + if (*errp) { + sctp_init_cause(*errp, SCTP_ERROR_UNKNOWN_PARAM, + WORD_ROUND(ntohs(param.p->length))); + sctp_addto_chunk(*errp, + WORD_ROUND(ntohs(param.p->length)), + param.v); + +When we allocate an error chunk, we assume that the worst case scenario requires +that we have chunk_hdr->length data allocated, which would be correct nominally, +given that we call sctp_addto_chunk for the violating parameter. Unfortunately, +we also, in sctp_init_cause insert a sctp_errhdr_t structure into the error +chunk, so the worst case situation in which all parameters are in violation +requires chunk_hdr->length+(sizeof(sctp_errhdr_t)*param_count) bytes of data. + +The result of this error is that a deliberately malformed packet sent to a +listening host can cause a remote DOS, described in CVE-2010-1173: +http://cve.mitre.org/cgi-bin/cvename.cgi?name=2010-1173 + +I've tested the below fix and confirmed that it fixes the issue. We move to a +strategy whereby we allocate a fixed size error chunk and ignore errors we don't +have space to report. Tested by me successfully + +Signed-off-by: Neil Horman +Acked-by: Vlad Yasevich +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman + +--- + include/net/sctp/structs.h | 1 + net/sctp/sm_make_chunk.c | 62 +++++++++++++++++++++++++++++++++++++++++---- + 2 files changed, 58 insertions(+), 5 deletions(-) + +--- a/include/net/sctp/structs.h ++++ b/include/net/sctp/structs.h +@@ -778,6 +778,7 @@ int sctp_user_addto_chunk(struct sctp_ch + struct iovec *data); + void sctp_chunk_free(struct sctp_chunk *); + void *sctp_addto_chunk(struct sctp_chunk *, int len, const void *data); ++void *sctp_addto_chunk_fixed(struct sctp_chunk *, int len, const void *data); + struct sctp_chunk *sctp_chunkify(struct sk_buff *, + const struct sctp_association *, + struct sock *); +--- a/net/sctp/sm_make_chunk.c ++++ b/net/sctp/sm_make_chunk.c +@@ -107,7 +107,7 @@ static const struct sctp_paramhdr prsctp + cpu_to_be16(sizeof(struct sctp_paramhdr)), + }; + +-/* A helper to initialize to initialize an op error inside a ++/* A helper to initialize an op error inside a + * provided chunk, as most cause codes will be embedded inside an + * abort chunk. + */ +@@ -124,6 +124,29 @@ void sctp_init_cause(struct sctp_chunk + chunk->subh.err_hdr = sctp_addto_chunk(chunk, sizeof(sctp_errhdr_t), &err); + } + ++/* A helper to initialize an op error inside a ++ * provided chunk, as most cause codes will be embedded inside an ++ * abort chunk. Differs from sctp_init_cause in that it won't oops ++ * if there isn't enough space in the op error chunk ++ */ ++int sctp_init_cause_fixed(struct sctp_chunk *chunk, __be16 cause_code, ++ size_t paylen) ++{ ++ sctp_errhdr_t err; ++ __u16 len; ++ ++ /* Cause code constants are now defined in network order. */ ++ err.cause = cause_code; ++ len = sizeof(sctp_errhdr_t) + paylen; ++ err.length = htons(len); ++ ++ if (skb_tailroom(chunk->skb) > len) ++ return -ENOSPC; ++ chunk->subh.err_hdr = sctp_addto_chunk_fixed(chunk, ++ sizeof(sctp_errhdr_t), ++ &err); ++ return 0; ++} + /* 3.3.2 Initiation (INIT) (1) + * + * This chunk is used to initiate a SCTP association between two +@@ -1131,6 +1154,24 @@ nodata: + return retval; + } + ++/* Create an Operation Error chunk of a fixed size, ++ * specifically, max(asoc->pathmtu, SCTP_DEFAULT_MAXSEGMENT) ++ * This is a helper function to allocate an error chunk for ++ * for those invalid parameter codes in which we may not want ++ * to report all the errors, if the incomming chunk is large ++ */ ++static inline struct sctp_chunk *sctp_make_op_error_fixed( ++ const struct sctp_association *asoc, ++ const struct sctp_chunk *chunk) ++{ ++ size_t size = asoc ? asoc->pathmtu : 0; ++ ++ if (!size) ++ size = SCTP_DEFAULT_MAXSEGMENT; ++ ++ return sctp_make_op_error_space(asoc, chunk, size); ++} ++ + /* Create an Operation Error chunk. */ + struct sctp_chunk *sctp_make_op_error(const struct sctp_association *asoc, + const struct sctp_chunk *chunk, +@@ -1373,6 +1414,18 @@ void *sctp_addto_chunk(struct sctp_chunk + return target; + } + ++/* Append bytes to the end of a chunk. Returns NULL if there isn't sufficient ++ * space in the chunk ++ */ ++void *sctp_addto_chunk_fixed(struct sctp_chunk *chunk, ++ int len, const void *data) ++{ ++ if (skb_tailroom(chunk->skb) > len) ++ return sctp_addto_chunk(chunk, len, data); ++ else ++ return NULL; ++} ++ + /* Append bytes from user space to the end of a chunk. Will panic if + * chunk is not big enough. + * Returns a kernel err value. +@@ -1976,13 +2029,12 @@ static sctp_ierror_t sctp_process_unk_pa + * returning multiple unknown parameters. + */ + if (NULL == *errp) +- *errp = sctp_make_op_error_space(asoc, chunk, +- ntohs(chunk->chunk_hdr->length)); ++ *errp = sctp_make_op_error_fixed(asoc, chunk); + + if (*errp) { +- sctp_init_cause(*errp, SCTP_ERROR_UNKNOWN_PARAM, ++ sctp_init_cause_fixed(*errp, SCTP_ERROR_UNKNOWN_PARAM, + WORD_ROUND(ntohs(param.p->length))); +- sctp_addto_chunk(*errp, ++ sctp_addto_chunk_fixed(*errp, + WORD_ROUND(ntohs(param.p->length)), + param.v); + } else { diff --git a/queue-2.6.33/series b/queue-2.6.33/series index 6a5dd3e0302..3529c5b1ef5 100644 --- a/queue-2.6.33/series +++ b/queue-2.6.33/series @@ -125,3 +125,39 @@ iwlwifi-recalculate-average-tpt-if-not-current.patch perf-fix-signed-comparison-in-perf_adjust_period.patch tracing-fix-null-pointer-deref-with-send_sig_forced.patch wl1251-fix-a-memory-leak-in-probe.patch +ext4-check-s_log_groups_per_flex-in-online-resize-code.patch +ext4-make-sure-the-move_ext-ioctl-can-t-overwrite-append-only-files.patch +gfs2-fix-permissions-checking-for-setflags-ioctl.patch +sctp-fix-skb_over_panic-resulting-from-multiple-invalid-parameter-errors-cve-2010-1173-v4.patch +cifs-allow-null-nd-as-nfs-server-uses-on-create.patch +vfs-add-nofollow-flag-to-umount-2.patch +l2tp-fix-oops-in-pppol2tp_xmit.patch +btrfs-should-add-a-permission-check-for-setfacl.patch +eeepc-laptop-check-wireless-hotplug-events.patch +tracing-consolidate-protection-of-reader-access-to-the-ring-buffer.patch +input-psmouse-reset-all-types-of-mice-before-reconnecting.patch +0001-KVM-SVM-Don-t-use-kmap_atomic-in-nested_svm_map.patch +0002-KVM-SVM-Fix-schedule-while-atomic-on-nested-exceptio.patch +0003-KVM-SVM-Sync-all-control-registers-on-nested-vmexit.patch +0004-KVM-SVM-Fix-nested-msr-intercept-handling.patch +0005-KVM-SVM-Don-t-sync-nested-cr8-to-lapic-and-back.patch +0006-KVM-SVM-Fix-wrong-interrupt-injection-in-enable_irq_.patch +0007-KVM-s390-Fix-possible-memory-leak-of-in-kvm_arch_vcp.patch +0008-KVM-PPC-Do-not-create-debugfs-if-fail-to-create-vcpu.patch +0009-KVM-x86-Add-callback-to-let-modules-decide-over-some.patch +0010-KVM-SVM-Report-emulated-SVM-features-to-userspace.patch +0011-x86-paravirt-Add-a-global-synchronization-point-for-.patch +0012-KVM-Don-t-allow-lmsw-to-clear-cr0.pe.patch +0013-KVM-x86-Check-LMA-bit-before-set_efer.patch +0014-KVM-MMU-Segregate-shadow-pages-with-different-cr0.wp.patch +0015-KVM-VMX-enable-VMXON-check-with-SMX-enabled-Intel-TX.patch +0016-KVM-MMU-Don-t-read-pdptrs-with-mmu-spinlock-held-in-.patch +0017-KVM-Fix-wallclock-version-writing-race.patch +0018-KVM-PPC-Add-missing-vcpu_load-vcpu_put-in-vcpu-ioctl.patch +0019-KVM-x86-Add-missing-locking-to-arch-specific-vcpu-io.patch +0020-KVM-x86-Inject-GP-with-the-right-rip-on-efer-writes.patch +0021-KVM-SVM-Don-t-allow-nested-guest-to-VMMCALL-into-hos.patch +parisc-clear-floating-point-exception-flag-on-sigfpe-signal.patch +keys-return-more-accurate-error-codes.patch +keys-find_keyring_by_name-can-gain-access-to-a-freed-keyring.patch +qla2xxx-disable-msi-on-qla24xx-chips-other-than-qla2432.patch diff --git a/queue-2.6.33/tracing-consolidate-protection-of-reader-access-to-the-ring-buffer.patch b/queue-2.6.33/tracing-consolidate-protection-of-reader-access-to-the-ring-buffer.patch new file mode 100644 index 00000000000..bc6e367fc7b --- /dev/null +++ b/queue-2.6.33/tracing-consolidate-protection-of-reader-access-to-the-ring-buffer.patch @@ -0,0 +1,324 @@ +From 7e53bd42d14c75192b99674c40fcc359392da59d Mon Sep 17 00:00:00 2001 +From: Lai Jiangshan +Date: Wed, 6 Jan 2010 20:08:50 +0800 +Subject: tracing: Consolidate protection of reader access to the ring buffer + +From: Lai Jiangshan + +commit 7e53bd42d14c75192b99674c40fcc359392da59d upstream. + +At the beginning, access to the ring buffer was fully serialized +by trace_types_lock. Patch d7350c3f4569 gives more freedom to readers, +and patch b04cc6b1f6 adds code to protect trace_pipe and cpu#/trace_pipe. + +But actually it is not enough, ring buffer readers are not always +read-only, they may consume data. + +This patch makes accesses to trace, trace_pipe, trace_pipe_raw +cpu#/trace, cpu#/trace_pipe and cpu#/trace_pipe_raw serialized. +And removes tracing_reader_cpumask which is used to protect trace_pipe. + +Details: + +Ring buffer serializes readers, but it is low level protection. +The validity of the events (which returns by ring_buffer_peek() ..etc) +are not protected by ring buffer. + +The content of events may become garbage if we allow another process to consume +these events concurrently: + A) the page of the consumed events may become a normal page + (not reader page) in ring buffer, and this page will be rewritten + by the events producer. + B) The page of the consumed events may become a page for splice_read, + and this page will be returned to system. + +This patch adds trace_access_lock() and trace_access_unlock() primitives. + +These primitives allow multi process access to different cpu ring buffers +concurrently. + +These primitives don't distinguish read-only and read-consume access. +Multi read-only access is also serialized. + +And we don't use these primitives when we open files, +we only use them when we read files. + +Signed-off-by: Lai Jiangshan +LKML-Reference: <4B447D52.1050602@cn.fujitsu.com> +Signed-off-by: Steven Rostedt +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/trace/trace.c | 136 ++++++++++++++++++++++++++++++++++++--------------- + 1 file changed, 97 insertions(+), 39 deletions(-) + +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -32,6 +32,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -102,9 +103,6 @@ static inline void ftrace_enable_cpu(voi + + static cpumask_var_t __read_mostly tracing_buffer_mask; + +-/* Define which cpu buffers are currently read in trace_pipe */ +-static cpumask_var_t tracing_reader_cpumask; +- + #define for_each_tracing_cpu(cpu) \ + for_each_cpu(cpu, tracing_buffer_mask) + +@@ -243,12 +241,91 @@ static struct tracer *current_trace __r + + /* + * trace_types_lock is used to protect the trace_types list. +- * This lock is also used to keep user access serialized. +- * Accesses from userspace will grab this lock while userspace +- * activities happen inside the kernel. + */ + static DEFINE_MUTEX(trace_types_lock); + ++/* ++ * serialize the access of the ring buffer ++ * ++ * ring buffer serializes readers, but it is low level protection. ++ * The validity of the events (which returns by ring_buffer_peek() ..etc) ++ * are not protected by ring buffer. ++ * ++ * The content of events may become garbage if we allow other process consumes ++ * these events concurrently: ++ * A) the page of the consumed events may become a normal page ++ * (not reader page) in ring buffer, and this page will be rewrited ++ * by events producer. ++ * B) The page of the consumed events may become a page for splice_read, ++ * and this page will be returned to system. ++ * ++ * These primitives allow multi process access to different cpu ring buffer ++ * concurrently. ++ * ++ * These primitives don't distinguish read-only and read-consume access. ++ * Multi read-only access are also serialized. ++ */ ++ ++#ifdef CONFIG_SMP ++static DECLARE_RWSEM(all_cpu_access_lock); ++static DEFINE_PER_CPU(struct mutex, cpu_access_lock); ++ ++static inline void trace_access_lock(int cpu) ++{ ++ if (cpu == TRACE_PIPE_ALL_CPU) { ++ /* gain it for accessing the whole ring buffer. */ ++ down_write(&all_cpu_access_lock); ++ } else { ++ /* gain it for accessing a cpu ring buffer. */ ++ ++ /* Firstly block other trace_access_lock(TRACE_PIPE_ALL_CPU). */ ++ down_read(&all_cpu_access_lock); ++ ++ /* Secondly block other access to this @cpu ring buffer. */ ++ mutex_lock(&per_cpu(cpu_access_lock, cpu)); ++ } ++} ++ ++static inline void trace_access_unlock(int cpu) ++{ ++ if (cpu == TRACE_PIPE_ALL_CPU) { ++ up_write(&all_cpu_access_lock); ++ } else { ++ mutex_unlock(&per_cpu(cpu_access_lock, cpu)); ++ up_read(&all_cpu_access_lock); ++ } ++} ++ ++static inline void trace_access_lock_init(void) ++{ ++ int cpu; ++ ++ for_each_possible_cpu(cpu) ++ mutex_init(&per_cpu(cpu_access_lock, cpu)); ++} ++ ++#else ++ ++static DEFINE_MUTEX(access_lock); ++ ++static inline void trace_access_lock(int cpu) ++{ ++ (void)cpu; ++ mutex_lock(&access_lock); ++} ++ ++static inline void trace_access_unlock(int cpu) ++{ ++ (void)cpu; ++ mutex_unlock(&access_lock); ++} ++ ++static inline void trace_access_lock_init(void) ++{ ++} ++ ++#endif ++ + /* trace_wait is a waitqueue for tasks blocked on trace_poll */ + static DECLARE_WAIT_QUEUE_HEAD(trace_wait); + +@@ -1601,12 +1678,6 @@ static void tracing_iter_reset(struct tr + } + + /* +- * No necessary locking here. The worst thing which can +- * happen is loosing events consumed at the same time +- * by a trace_pipe reader. +- * Other than that, we don't risk to crash the ring buffer +- * because it serializes the readers. +- * + * The current tracer is copied to avoid a global locking + * all around. + */ +@@ -1662,12 +1733,16 @@ static void *s_start(struct seq_file *m, + } + + trace_event_read_lock(); ++ trace_access_lock(cpu_file); + return p; + } + + static void s_stop(struct seq_file *m, void *p) + { ++ struct trace_iterator *iter = m->private; ++ + atomic_dec(&trace_record_cmdline_disabled); ++ trace_access_unlock(iter->cpu_file); + trace_event_read_unlock(); + } + +@@ -2858,22 +2933,6 @@ static int tracing_open_pipe(struct inod + + mutex_lock(&trace_types_lock); + +- /* We only allow one reader per cpu */ +- if (cpu_file == TRACE_PIPE_ALL_CPU) { +- if (!cpumask_empty(tracing_reader_cpumask)) { +- ret = -EBUSY; +- goto out; +- } +- cpumask_setall(tracing_reader_cpumask); +- } else { +- if (!cpumask_test_cpu(cpu_file, tracing_reader_cpumask)) +- cpumask_set_cpu(cpu_file, tracing_reader_cpumask); +- else { +- ret = -EBUSY; +- goto out; +- } +- } +- + /* create a buffer to store the information to pass to userspace */ + iter = kzalloc(sizeof(*iter), GFP_KERNEL); + if (!iter) { +@@ -2929,12 +2988,6 @@ static int tracing_release_pipe(struct i + + mutex_lock(&trace_types_lock); + +- if (iter->cpu_file == TRACE_PIPE_ALL_CPU) +- cpumask_clear(tracing_reader_cpumask); +- else +- cpumask_clear_cpu(iter->cpu_file, tracing_reader_cpumask); +- +- + if (iter->trace->pipe_close) + iter->trace->pipe_close(iter); + +@@ -3096,6 +3149,7 @@ waitagain: + iter->pos = -1; + + trace_event_read_lock(); ++ trace_access_lock(iter->cpu_file); + while (find_next_entry_inc(iter) != NULL) { + enum print_line_t ret; + int len = iter->seq.len; +@@ -3112,6 +3166,7 @@ waitagain: + if (iter->seq.len >= cnt) + break; + } ++ trace_access_unlock(iter->cpu_file); + trace_event_read_unlock(); + + /* Now copy what we have to the user */ +@@ -3237,6 +3292,7 @@ static ssize_t tracing_splice_read_pipe( + } + + trace_event_read_lock(); ++ trace_access_lock(iter->cpu_file); + + /* Fill as many pages as possible. */ + for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) { +@@ -3260,6 +3316,7 @@ static ssize_t tracing_splice_read_pipe( + trace_seq_init(&iter->seq); + } + ++ trace_access_unlock(iter->cpu_file); + trace_event_read_unlock(); + mutex_unlock(&iter->mutex); + +@@ -3561,10 +3618,12 @@ tracing_buffers_read(struct file *filp, + + info->read = 0; + ++ trace_access_lock(info->cpu); + ret = ring_buffer_read_page(info->tr->buffer, + &info->spare, + count, + info->cpu, 0); ++ trace_access_unlock(info->cpu); + if (ret < 0) + return 0; + +@@ -3692,6 +3751,7 @@ tracing_buffers_splice_read(struct file + len &= PAGE_MASK; + } + ++ trace_access_lock(info->cpu); + entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); + + for (i = 0; i < PIPE_BUFFERS && len && entries; i++, len -= PAGE_SIZE) { +@@ -3739,6 +3799,7 @@ tracing_buffers_splice_read(struct file + entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); + } + ++ trace_access_unlock(info->cpu); + spd.nr_pages = i; + + /* did we read anything? */ +@@ -4175,6 +4236,8 @@ static __init int tracer_init_debugfs(vo + struct dentry *d_tracer; + int cpu; + ++ trace_access_lock_init(); ++ + d_tracer = tracing_init_dentry(); + + trace_create_file("tracing_enabled", 0644, d_tracer, +@@ -4409,9 +4472,6 @@ __init static int tracer_alloc_buffers(v + if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL)) + goto out_free_buffer_mask; + +- if (!zalloc_cpumask_var(&tracing_reader_cpumask, GFP_KERNEL)) +- goto out_free_tracing_cpumask; +- + /* To save memory, keep the ring buffer size to its minimum */ + if (ring_buffer_expanded) + ring_buf_size = trace_buf_size; +@@ -4469,8 +4529,6 @@ __init static int tracer_alloc_buffers(v + return 0; + + out_free_cpumask: +- free_cpumask_var(tracing_reader_cpumask); +-out_free_tracing_cpumask: + free_cpumask_var(tracing_cpumask); + out_free_buffer_mask: + free_cpumask_var(tracing_buffer_mask); diff --git a/queue-2.6.33/vfs-add-nofollow-flag-to-umount-2.patch b/queue-2.6.33/vfs-add-nofollow-flag-to-umount-2.patch new file mode 100644 index 00000000000..64c5cd695c0 --- /dev/null +++ b/queue-2.6.33/vfs-add-nofollow-flag-to-umount-2.patch @@ -0,0 +1,57 @@ +From db1f05bb85d7966b9176e293f3ceead1cb8b5d79 Mon Sep 17 00:00:00 2001 +From: Miklos Szeredi +Date: Wed, 10 Feb 2010 12:15:53 +0100 +Subject: vfs: add NOFOLLOW flag to umount(2) + +From: Miklos Szeredi + +commit db1f05bb85d7966b9176e293f3ceead1cb8b5d79 upstream. + +Add a new UMOUNT_NOFOLLOW flag to umount(2). This is needed to prevent +symlink attacks in unprivileged unmounts (fuse, samba, ncpfs). + +Additionally, return -EINVAL if an unknown flag is used (and specify +an explicitly unused flag: UMOUNT_UNUSED). This makes it possible for +the caller to determine if a flag is supported or not. + +CC: Eugene Teo +CC: Michael Kerrisk +Signed-off-by: Miklos Szeredi +Signed-off-by: Al Viro +Signed-off-by: Greg Kroah-Hartman + +--- + fs/namespace.c | 9 ++++++++- + include/linux/fs.h | 2 ++ + 2 files changed, 10 insertions(+), 1 deletion(-) + +--- a/fs/namespace.c ++++ b/fs/namespace.c +@@ -1121,8 +1121,15 @@ SYSCALL_DEFINE2(umount, char __user *, n + { + struct path path; + int retval; ++ int lookup_flags = 0; + +- retval = user_path(name, &path); ++ if (flags & ~(MNT_FORCE | MNT_DETACH | MNT_EXPIRE | UMOUNT_NOFOLLOW)) ++ return -EINVAL; ++ ++ if (!(flags & UMOUNT_NOFOLLOW)) ++ lookup_flags |= LOOKUP_FOLLOW; ++ ++ retval = user_path_at(AT_FDCWD, name, lookup_flags, &path); + if (retval) + goto out; + retval = -EINVAL; +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -1308,6 +1308,8 @@ extern int send_sigurg(struct fown_struc + #define MNT_FORCE 0x00000001 /* Attempt to forcibily umount */ + #define MNT_DETACH 0x00000002 /* Just detach from the tree */ + #define MNT_EXPIRE 0x00000004 /* Mark for expiry */ ++#define UMOUNT_NOFOLLOW 0x00000008 /* Don't follow symlink on umount */ ++#define UMOUNT_UNUSED 0x80000000 /* Flag guaranteed to be unused */ + + extern struct list_head super_blocks; + extern spinlock_t sb_lock;