--- /dev/null
+From 60cddf3700fe0760425aebe1d0d0850a15faf50e Mon Sep 17 00:00:00 2001
+From: Joerg Roedel <joerg.roedel@amd.com>
+Date: Fri, 19 Feb 2010 16:23:00 +0100
+Subject: KVM: SVM: Don't use kmap_atomic in nested_svm_map
+
+From: Joerg Roedel <joerg.roedel@amd.com>
+
+Use of kmap_atomic disables preemption but if we run in
+shadow-shadow mode the vmrun emulation executes kvm_set_cr3
+which might sleep or fault. So use kmap instead for
+nested_svm_map.
+
+Cc: stable@kernel.org
+Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+(cherry picked from commit 7597f129d8b6799da7a264e6d6f7401668d3a36d)
+---
+ arch/x86/kvm/svm.c | 47 ++++++++++++++++++++++++-----------------------
+ 1 file changed, 24 insertions(+), 23 deletions(-)
+
+--- a/arch/x86/kvm/svm.c
++++ b/arch/x86/kvm/svm.c
+@@ -1398,7 +1398,7 @@ static inline int nested_svm_intr(struct
+ return 0;
+ }
+
+-static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, enum km_type idx)
++static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, struct page **_page)
+ {
+ struct page *page;
+
+@@ -1406,7 +1406,9 @@ static void *nested_svm_map(struct vcpu_
+ if (is_error_page(page))
+ goto error;
+
+- return kmap_atomic(page, idx);
++ *_page = page;
++
++ return kmap(page);
+
+ error:
+ kvm_release_page_clean(page);
+@@ -1415,16 +1417,9 @@ error:
+ return NULL;
+ }
+
+-static void nested_svm_unmap(void *addr, enum km_type idx)
++static void nested_svm_unmap(struct page *page)
+ {
+- struct page *page;
+-
+- if (!addr)
+- return;
+-
+- page = kmap_atomic_to_page(addr);
+-
+- kunmap_atomic(addr, idx);
++ kunmap(page);
+ kvm_release_page_dirty(page);
+ }
+
+@@ -1432,6 +1427,7 @@ static bool nested_svm_exit_handled_msr(
+ {
+ u32 param = svm->vmcb->control.exit_info_1 & 1;
+ u32 msr = svm->vcpu.arch.regs[VCPU_REGS_RCX];
++ struct page *page;
+ bool ret = false;
+ u32 t0, t1;
+ u8 *msrpm;
+@@ -1439,7 +1435,7 @@ static bool nested_svm_exit_handled_msr(
+ if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT)))
+ return false;
+
+- msrpm = nested_svm_map(svm, svm->nested.vmcb_msrpm, KM_USER0);
++ msrpm = nested_svm_map(svm, svm->nested.vmcb_msrpm, &page);
+
+ if (!msrpm)
+ goto out;
+@@ -1467,7 +1463,7 @@ static bool nested_svm_exit_handled_msr(
+ ret = msrpm[t1] & ((1 << param) << t0);
+
+ out:
+- nested_svm_unmap(msrpm, KM_USER0);
++ nested_svm_unmap(page);
+
+ return ret;
+ }
+@@ -1590,6 +1586,7 @@ static int nested_svm_vmexit(struct vcpu
+ struct vmcb *nested_vmcb;
+ struct vmcb *hsave = svm->nested.hsave;
+ struct vmcb *vmcb = svm->vmcb;
++ struct page *page;
+
+ trace_kvm_nested_vmexit_inject(vmcb->control.exit_code,
+ vmcb->control.exit_info_1,
+@@ -1597,7 +1594,7 @@ static int nested_svm_vmexit(struct vcpu
+ vmcb->control.exit_int_info,
+ vmcb->control.exit_int_info_err);
+
+- nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, KM_USER0);
++ nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, &page);
+ if (!nested_vmcb)
+ return 1;
+
+@@ -1687,7 +1684,7 @@ static int nested_svm_vmexit(struct vcpu
+ /* Exit nested SVM mode */
+ svm->nested.vmcb = 0;
+
+- nested_svm_unmap(nested_vmcb, KM_USER0);
++ nested_svm_unmap(page);
+
+ kvm_mmu_reset_context(&svm->vcpu);
+ kvm_mmu_load(&svm->vcpu);
+@@ -1698,9 +1695,10 @@ static int nested_svm_vmexit(struct vcpu
+ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
+ {
+ u32 *nested_msrpm;
++ struct page *page;
+ int i;
+
+- nested_msrpm = nested_svm_map(svm, svm->nested.vmcb_msrpm, KM_USER0);
++ nested_msrpm = nested_svm_map(svm, svm->nested.vmcb_msrpm, &page);
+ if (!nested_msrpm)
+ return false;
+
+@@ -1709,7 +1707,7 @@ static bool nested_svm_vmrun_msrpm(struc
+
+ svm->vmcb->control.msrpm_base_pa = __pa(svm->nested.msrpm);
+
+- nested_svm_unmap(nested_msrpm, KM_USER0);
++ nested_svm_unmap(page);
+
+ return true;
+ }
+@@ -1719,8 +1717,9 @@ static bool nested_svm_vmrun(struct vcpu
+ struct vmcb *nested_vmcb;
+ struct vmcb *hsave = svm->nested.hsave;
+ struct vmcb *vmcb = svm->vmcb;
++ struct page *page;
+
+- nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, KM_USER0);
++ nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
+ if (!nested_vmcb)
+ return false;
+
+@@ -1832,7 +1831,7 @@ static bool nested_svm_vmrun(struct vcpu
+ svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
+ svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;
+
+- nested_svm_unmap(nested_vmcb, KM_USER0);
++ nested_svm_unmap(page);
+
+ enable_gif(svm);
+
+@@ -1858,6 +1857,7 @@ static void nested_svm_vmloadsave(struct
+ static int vmload_interception(struct vcpu_svm *svm)
+ {
+ struct vmcb *nested_vmcb;
++ struct page *page;
+
+ if (nested_svm_check_permissions(svm))
+ return 1;
+@@ -1865,12 +1865,12 @@ static int vmload_interception(struct vc
+ svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
+ skip_emulated_instruction(&svm->vcpu);
+
+- nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, KM_USER0);
++ nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
+ if (!nested_vmcb)
+ return 1;
+
+ nested_svm_vmloadsave(nested_vmcb, svm->vmcb);
+- nested_svm_unmap(nested_vmcb, KM_USER0);
++ nested_svm_unmap(page);
+
+ return 1;
+ }
+@@ -1878,6 +1878,7 @@ static int vmload_interception(struct vc
+ static int vmsave_interception(struct vcpu_svm *svm)
+ {
+ struct vmcb *nested_vmcb;
++ struct page *page;
+
+ if (nested_svm_check_permissions(svm))
+ return 1;
+@@ -1885,12 +1886,12 @@ static int vmsave_interception(struct vc
+ svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
+ skip_emulated_instruction(&svm->vcpu);
+
+- nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, KM_USER0);
++ nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
+ if (!nested_vmcb)
+ return 1;
+
+ nested_svm_vmloadsave(svm->vmcb, nested_vmcb);
+- nested_svm_unmap(nested_vmcb, KM_USER0);
++ nested_svm_unmap(page);
+
+ return 1;
+ }
--- /dev/null
+From 397cb347161b605d7bdff4240d0d267bf48f4ae2 Mon Sep 17 00:00:00 2001
+From: Joerg Roedel <joerg.roedel@amd.com>
+Date: Fri, 19 Feb 2010 16:23:02 +0100
+Subject: KVM: SVM: Fix schedule-while-atomic on nested exception handling
+
+From: Joerg Roedel <joerg.roedel@amd.com>
+
+Move the actual vmexit routine out of code that runs with
+irqs and preemption disabled.
+
+Cc: stable@kernel.org
+Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+(cherry picked from commit b8e88bc8ffba5fe53fb8d8a0a4be3bbcffeebe56)
+---
+ arch/x86/kvm/svm.c | 23 +++++++++++++++++++----
+ 1 file changed, 19 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/kvm/svm.c
++++ b/arch/x86/kvm/svm.c
+@@ -128,6 +128,7 @@ static void svm_flush_tlb(struct kvm_vcp
+ static void svm_complete_interrupts(struct vcpu_svm *svm);
+
+ static int nested_svm_exit_handled(struct vcpu_svm *svm);
++static int nested_svm_intercept(struct vcpu_svm *svm);
+ static int nested_svm_vmexit(struct vcpu_svm *svm);
+ static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
+ bool has_error_code, u32 error_code);
+@@ -1359,6 +1360,8 @@ static int nested_svm_check_permissions(
+ static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
+ bool has_error_code, u32 error_code)
+ {
++ int vmexit;
++
+ if (!is_nested(svm))
+ return 0;
+
+@@ -1367,7 +1370,11 @@ static int nested_svm_check_exception(st
+ svm->vmcb->control.exit_info_1 = error_code;
+ svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
+
+- return nested_svm_exit_handled(svm);
++ vmexit = nested_svm_intercept(svm);
++ if (vmexit == NESTED_EXIT_DONE)
++ svm->nested.exit_required = true;
++
++ return vmexit;
+ }
+
+ static inline int nested_svm_intr(struct vcpu_svm *svm)
+@@ -1496,7 +1503,7 @@ static int nested_svm_exit_special(struc
+ /*
+ * If this function returns true, this #vmexit was already handled
+ */
+-static int nested_svm_exit_handled(struct vcpu_svm *svm)
++static int nested_svm_intercept(struct vcpu_svm *svm)
+ {
+ u32 exit_code = svm->vmcb->control.exit_code;
+ int vmexit = NESTED_EXIT_HOST;
+@@ -1542,9 +1549,17 @@ static int nested_svm_exit_handled(struc
+ }
+ }
+
+- if (vmexit == NESTED_EXIT_DONE) {
++ return vmexit;
++}
++
++static int nested_svm_exit_handled(struct vcpu_svm *svm)
++{
++ int vmexit;
++
++ vmexit = nested_svm_intercept(svm);
++
++ if (vmexit == NESTED_EXIT_DONE)
+ nested_svm_vmexit(svm);
+- }
+
+ return vmexit;
+ }
--- /dev/null
+From d137ecd6791eb7c4553b3bd06a5e58309639e9d9 Mon Sep 17 00:00:00 2001
+From: Joerg Roedel <joerg.roedel@amd.com>
+Date: Fri, 19 Feb 2010 16:23:03 +0100
+Subject: KVM: SVM: Sync all control registers on nested vmexit
+
+From: Joerg Roedel <joerg.roedel@amd.com>
+
+Currently the vmexit emulation does not sync control
+registers were the access is typically intercepted by the
+nested hypervisor. But we can not count on that intercepts
+to sync these registers too and make the code
+architecturally more correct.
+
+Cc: stable@kernel.org
+Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+(cherry picked from commit cdbbdc1210223879450555fee04c29ebf116576b)
+---
+ arch/x86/kvm/svm.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/arch/x86/kvm/svm.c
++++ b/arch/x86/kvm/svm.c
+@@ -1622,9 +1622,13 @@ static int nested_svm_vmexit(struct vcpu
+ nested_vmcb->save.ds = vmcb->save.ds;
+ nested_vmcb->save.gdtr = vmcb->save.gdtr;
+ nested_vmcb->save.idtr = vmcb->save.idtr;
++ nested_vmcb->save.cr0 = svm->vcpu.arch.cr0;
+ if (npt_enabled)
+ nested_vmcb->save.cr3 = vmcb->save.cr3;
++ else
++ nested_vmcb->save.cr3 = svm->vcpu.arch.cr3;
+ nested_vmcb->save.cr2 = vmcb->save.cr2;
++ nested_vmcb->save.cr4 = svm->vcpu.arch.cr4;
+ nested_vmcb->save.rflags = vmcb->save.rflags;
+ nested_vmcb->save.rip = vmcb->save.rip;
+ nested_vmcb->save.rsp = vmcb->save.rsp;
--- /dev/null
+From 536abd6ee4d98ef086686406124c83281d462c11 Mon Sep 17 00:00:00 2001
+From: Joerg Roedel <joerg.roedel@amd.com>
+Date: Fri, 19 Feb 2010 16:23:05 +0100
+Subject: KVM: SVM: Fix nested msr intercept handling
+
+From: Joerg Roedel <joerg.roedel@amd.com>
+
+The nested_svm_exit_handled_msr() function maps only one
+page of the guests msr permission bitmap. This patch changes
+the code to use kvm_read_guest to fix the bug.
+
+Cc: stable@kernel.org
+Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+(cherry picked from commit 4c7da8cb43c09e71a405b5aeaa58a1dbac3c39e9)
+---
+ arch/x86/kvm/svm.c | 13 +++----------
+ 1 file changed, 3 insertions(+), 10 deletions(-)
+
+--- a/arch/x86/kvm/svm.c
++++ b/arch/x86/kvm/svm.c
+@@ -1434,19 +1434,13 @@ static bool nested_svm_exit_handled_msr(
+ {
+ u32 param = svm->vmcb->control.exit_info_1 & 1;
+ u32 msr = svm->vcpu.arch.regs[VCPU_REGS_RCX];
+- struct page *page;
+ bool ret = false;
+ u32 t0, t1;
+- u8 *msrpm;
++ u8 val;
+
+ if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT)))
+ return false;
+
+- msrpm = nested_svm_map(svm, svm->nested.vmcb_msrpm, &page);
+-
+- if (!msrpm)
+- goto out;
+-
+ switch (msr) {
+ case 0 ... 0x1fff:
+ t0 = (msr * 2) % 8;
+@@ -1467,11 +1461,10 @@ static bool nested_svm_exit_handled_msr(
+ goto out;
+ }
+
+- ret = msrpm[t1] & ((1 << param) << t0);
++ if (!kvm_read_guest(svm->vcpu.kvm, svm->nested.vmcb_msrpm + t1, &val, 1))
++ ret = val & ((1 << param) << t0);
+
+ out:
+- nested_svm_unmap(page);
+-
+ return ret;
+ }
+
--- /dev/null
+From 53ea99c19f1754706cca7265172ed6fb091a8e03 Mon Sep 17 00:00:00 2001
+From: Joerg Roedel <joerg.roedel@amd.com>
+Date: Fri, 19 Feb 2010 16:23:06 +0100
+Subject: KVM: SVM: Don't sync nested cr8 to lapic and back
+
+From: Joerg Roedel <joerg.roedel@amd.com>
+
+This patch makes syncing of the guest tpr to the lapic
+conditional on !nested. Otherwise a nested guest using the
+TPR could freeze the guest.
+Another important change this patch introduces is that the
+cr8 intercept bits are no longer ORed at vmrun emulation if
+the guest sets VINTR_MASKING in its VMCB. The reason is that
+nested cr8 accesses need alway be handled by the nested
+hypervisor because they change the shadow version of the
+tpr.
+
+Cc: stable@kernel.org
+Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+(cherry picked from commit 88ab24adc7142506c8583ac36a34fa388300b750)
+---
+ arch/x86/kvm/svm.c | 46 +++++++++++++++++++++++++++++++---------------
+ 1 file changed, 31 insertions(+), 15 deletions(-)
+
+--- a/arch/x86/kvm/svm.c
++++ b/arch/x86/kvm/svm.c
+@@ -1805,21 +1805,6 @@ static bool nested_svm_vmrun(struct vcpu
+ svm->vmcb->save.dr6 = nested_vmcb->save.dr6;
+ svm->vmcb->save.cpl = nested_vmcb->save.cpl;
+
+- /* We don't want a nested guest to be more powerful than the guest,
+- so all intercepts are ORed */
+- svm->vmcb->control.intercept_cr_read |=
+- nested_vmcb->control.intercept_cr_read;
+- svm->vmcb->control.intercept_cr_write |=
+- nested_vmcb->control.intercept_cr_write;
+- svm->vmcb->control.intercept_dr_read |=
+- nested_vmcb->control.intercept_dr_read;
+- svm->vmcb->control.intercept_dr_write |=
+- nested_vmcb->control.intercept_dr_write;
+- svm->vmcb->control.intercept_exceptions |=
+- nested_vmcb->control.intercept_exceptions;
+-
+- svm->vmcb->control.intercept |= nested_vmcb->control.intercept;
+-
+ svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa;
+
+ /* cache intercepts */
+@@ -1837,6 +1822,28 @@ static bool nested_svm_vmrun(struct vcpu
+ else
+ svm->vcpu.arch.hflags &= ~HF_VINTR_MASK;
+
++ if (svm->vcpu.arch.hflags & HF_VINTR_MASK) {
++ /* We only want the cr8 intercept bits of the guest */
++ svm->vmcb->control.intercept_cr_read &= ~INTERCEPT_CR8_MASK;
++ svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK;
++ }
++
++ /* We don't want a nested guest to be more powerful than the guest,
++ so all intercepts are ORed */
++ svm->vmcb->control.intercept_cr_read |=
++ nested_vmcb->control.intercept_cr_read;
++ svm->vmcb->control.intercept_cr_write |=
++ nested_vmcb->control.intercept_cr_write;
++ svm->vmcb->control.intercept_dr_read |=
++ nested_vmcb->control.intercept_dr_read;
++ svm->vmcb->control.intercept_dr_write |=
++ nested_vmcb->control.intercept_dr_write;
++ svm->vmcb->control.intercept_exceptions |=
++ nested_vmcb->control.intercept_exceptions;
++
++ svm->vmcb->control.intercept |= nested_vmcb->control.intercept;
++
++ svm->vmcb->control.lbr_ctl = nested_vmcb->control.lbr_ctl;
+ svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
+ svm->vmcb->control.int_state = nested_vmcb->control.int_state;
+ svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset;
+@@ -2500,6 +2507,9 @@ static void update_cr8_intercept(struct
+ {
+ struct vcpu_svm *svm = to_svm(vcpu);
+
++ if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK))
++ return;
++
+ if (irr == -1)
+ return;
+
+@@ -2603,6 +2613,9 @@ static inline void sync_cr8_to_lapic(str
+ {
+ struct vcpu_svm *svm = to_svm(vcpu);
+
++ if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK))
++ return;
++
+ if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR8_MASK)) {
+ int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK;
+ kvm_set_cr8(vcpu, cr8);
+@@ -2614,6 +2627,9 @@ static inline void sync_lapic_to_cr8(str
+ struct vcpu_svm *svm = to_svm(vcpu);
+ u64 cr8;
+
++ if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK))
++ return;
++
+ cr8 = kvm_get_cr8(vcpu);
+ svm->vmcb->control.int_ctl &= ~V_TPR_MASK;
+ svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK;
--- /dev/null
+From 5721224671983e9fb964e668712da5ee2f508fda Mon Sep 17 00:00:00 2001
+From: Joerg Roedel <joerg.roedel@amd.com>
+Date: Fri, 19 Feb 2010 16:23:01 +0100
+Subject: KVM: SVM: Fix wrong interrupt injection in enable_irq_windows
+
+From: Joerg Roedel <joerg.roedel@amd.com>
+
+The nested_svm_intr() function does not execute the vmexit
+anymore. Therefore we may still be in the nested state after
+that function ran. This patch changes the nested_svm_intr()
+function to return wether the irq window could be enabled.
+
+Cc: stable@kernel.org
+Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+(cherry picked from commit 8fe546547cf6857a9d984bfe2f2194910f3fc5d0)
+---
+ arch/x86/kvm/svm.c | 17 ++++++++---------
+ 1 file changed, 8 insertions(+), 9 deletions(-)
+
+--- a/arch/x86/kvm/svm.c
++++ b/arch/x86/kvm/svm.c
+@@ -1377,16 +1377,17 @@ static int nested_svm_check_exception(st
+ return vmexit;
+ }
+
+-static inline int nested_svm_intr(struct vcpu_svm *svm)
++/* This function returns true if it is save to enable the irq window */
++static inline bool nested_svm_intr(struct vcpu_svm *svm)
+ {
+ if (!is_nested(svm))
+- return 0;
++ return true;
+
+ if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
+- return 0;
++ return true;
+
+ if (!(svm->vcpu.arch.hflags & HF_HIF_MASK))
+- return 0;
++ return false;
+
+ svm->vmcb->control.exit_code = SVM_EXIT_INTR;
+
+@@ -1399,10 +1400,10 @@ static inline int nested_svm_intr(struct
+ */
+ svm->nested.exit_required = true;
+ trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip);
+- return 1;
++ return false;
+ }
+
+- return 0;
++ return true;
+ }
+
+ static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, struct page **_page)
+@@ -2567,13 +2568,11 @@ static void enable_irq_window(struct kvm
+ {
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+- nested_svm_intr(svm);
+-
+ /* In case GIF=0 we can't rely on the CPU to tell us when
+ * GIF becomes 1, because that's a separate STGI/VMRUN intercept.
+ * The next time we get that intercept, this function will be
+ * called again though and we'll get the vintr intercept. */
+- if (gif_set(svm)) {
++ if (gif_set(svm) && nested_svm_intr(svm)) {
+ svm_set_vintr(svm);
+ svm_inject_irq(svm, 0x0);
+ }
--- /dev/null
+From cd87b5b7c290bea9e5d5473abe05fe7b145d0e33 Mon Sep 17 00:00:00 2001
+From: Wei Yongjun <yjwei@cn.fujitsu.com>
+Date: Tue, 9 Mar 2010 14:37:53 +0800
+Subject: KVM: s390: Fix possible memory leak of in kvm_arch_vcpu_create()
+
+From: Wei Yongjun <yjwei@cn.fujitsu.com>
+
+This patch fixed possible memory leak in kvm_arch_vcpu_create()
+under s390, which would happen when kvm_arch_vcpu_create() fails.
+
+Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
+Acked-by: Carsten Otte <cotte@de.ibm.com>
+Cc: stable@kernel.org
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+(cherry picked from commit 7b06bf2ffa15e119c7439ed0b024d44f66d7b605)
+---
+ arch/s390/kvm/kvm-s390.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/arch/s390/kvm/kvm-s390.c
++++ b/arch/s390/kvm/kvm-s390.c
+@@ -339,11 +339,13 @@ struct kvm_vcpu *kvm_arch_vcpu_create(st
+
+ rc = kvm_vcpu_init(vcpu, kvm, id);
+ if (rc)
+- goto out_free_cpu;
++ goto out_free_sie_block;
+ VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
+ vcpu->arch.sie_block);
+
+ return vcpu;
++out_free_sie_block:
++ free_page((unsigned long)(vcpu->arch.sie_block));
+ out_free_cpu:
+ kfree(vcpu);
+ out_nomem:
--- /dev/null
+From 07d4434372555d3ed2d333692b8919cc9cabf4d7 Mon Sep 17 00:00:00 2001
+From: Wei Yongjun <yjwei@cn.fujitsu.com>
+Date: Tue, 9 Mar 2010 14:13:43 +0800
+Subject: KVM: PPC: Do not create debugfs if fail to create vcpu
+
+From: Wei Yongjun <yjwei@cn.fujitsu.com>
+
+If fail to create the vcpu, we should not create the debugfs
+for it.
+
+Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
+Acked-by: Alexander Graf <agraf@suse.de>
+Cc: stable@kernel.org
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+(cherry picked from commit 06056bfb944a0302a8f22eb45f09123de7fb417b)
+---
+ arch/powerpc/kvm/powerpc.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/arch/powerpc/kvm/powerpc.c
++++ b/arch/powerpc/kvm/powerpc.c
+@@ -181,7 +181,8 @@ struct kvm_vcpu *kvm_arch_vcpu_create(st
+ {
+ struct kvm_vcpu *vcpu;
+ vcpu = kvmppc_core_vcpu_create(kvm, id);
+- kvmppc_create_vcpu_debugfs(vcpu, id);
++ if (!IS_ERR(vcpu))
++ kvmppc_create_vcpu_debugfs(vcpu, id);
+ return vcpu;
+ }
+
--- /dev/null
+From a2365272251916c9c2e646ee8f63f589981e7b42 Mon Sep 17 00:00:00 2001
+From: Joerg Roedel <joerg.roedel@amd.com>
+Date: Thu, 22 Apr 2010 12:33:11 +0200
+Subject: KVM: x86: Add callback to let modules decide over some supported cpuid bits
+
+From: Joerg Roedel <joerg.roedel@amd.com>
+
+This patch adds the get_supported_cpuid callback to
+kvm_x86_ops. It will be used in do_cpuid_ent to delegate the
+decission about some supported cpuid bits to the
+architecture modules.
+
+Cc: stable@kernel.org
+Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+(cherry picked from commit d4330ef2fb2236a1e3a176f0f68360f4c0a8661b)
+---
+ arch/x86/include/asm/kvm_host.h | 2 ++
+ arch/x86/kvm/svm.c | 5 +++++
+ arch/x86/kvm/vmx.c | 5 +++++
+ arch/x86/kvm/x86.c | 3 +++
+ 4 files changed, 15 insertions(+)
+
+--- a/arch/x86/include/asm/kvm_host.h
++++ b/arch/x86/include/asm/kvm_host.h
+@@ -533,6 +533,8 @@ struct kvm_x86_ops {
+ u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
+ bool (*gb_page_enable)(void);
+
++ void (*set_supported_cpuid)(u32 func, struct kvm_cpuid_entry2 *entry);
++
+ const struct trace_print_flags *exit_reasons_str;
+ };
+
+--- a/arch/x86/kvm/svm.c
++++ b/arch/x86/kvm/svm.c
+@@ -2885,6 +2885,10 @@ static u64 svm_get_mt_mask(struct kvm_vc
+ return 0;
+ }
+
++static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
++{
++}
++
+ static const struct trace_print_flags svm_exit_reasons_str[] = {
+ { SVM_EXIT_READ_CR0, "read_cr0" },
+ { SVM_EXIT_READ_CR3, "read_cr3" },
+@@ -3009,6 +3013,7 @@ static struct kvm_x86_ops svm_x86_ops =
+
+ .exit_reasons_str = svm_exit_reasons_str,
+ .gb_page_enable = svm_gb_page_enable,
++ .set_supported_cpuid = svm_set_supported_cpuid,
+ };
+
+ static int __init svm_init(void)
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -3993,6 +3993,10 @@ static bool vmx_gb_page_enable(void)
+ return false;
+ }
+
++static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
++{
++}
++
+ static struct kvm_x86_ops vmx_x86_ops = {
+ .cpu_has_kvm_support = cpu_has_kvm_support,
+ .disabled_by_bios = vmx_disabled_by_bios,
+@@ -4057,6 +4061,7 @@ static struct kvm_x86_ops vmx_x86_ops =
+
+ .exit_reasons_str = vmx_exit_reasons_str,
+ .gb_page_enable = vmx_gb_page_enable,
++ .set_supported_cpuid = vmx_set_supported_cpuid,
+ };
+
+ static int __init vmx_init(void)
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -1688,6 +1688,9 @@ static void do_cpuid_ent(struct kvm_cpui
+ entry->ecx &= kvm_supported_word6_x86_features;
+ break;
+ }
++
++ kvm_x86_ops->set_supported_cpuid(function, entry);
++
+ put_cpu();
+ }
+
--- /dev/null
+From 09e6feff68dc71b22881d30e1ff44f04c474f399 Mon Sep 17 00:00:00 2001
+From: Joerg Roedel <joerg.roedel@amd.com>
+Date: Thu, 22 Apr 2010 12:33:12 +0200
+Subject: KVM: SVM: Report emulated SVM features to userspace
+
+From: Joerg Roedel <joerg.roedel@amd.com>
+
+This patch implements the reporting of the emulated SVM
+features to userspace instead of the real hardware
+capabilities. Every real hardware capability needs emulation
+in nested svm so the old behavior was broken.
+
+Cc: stable@kernel.org
+Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+(cherry picked from commit c2c63a493924e09a1984d1374a0e60dfd54fc0b0)
+---
+ arch/x86/kvm/svm.c | 10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+--- a/arch/x86/kvm/svm.c
++++ b/arch/x86/kvm/svm.c
+@@ -2887,6 +2887,16 @@ static u64 svm_get_mt_mask(struct kvm_vc
+
+ static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
+ {
++ switch (func) {
++ case 0x8000000A:
++ entry->eax = 1; /* SVM revision 1 */
++ entry->ebx = 8; /* Lets support 8 ASIDs in case we add proper
++ ASID emulation to nested SVM */
++ entry->ecx = 0; /* Reserved */
++ entry->edx = 0; /* Do not support any additional features */
++
++ break;
++ }
+ }
+
+ static const struct trace_print_flags svm_exit_reasons_str[] = {
--- /dev/null
+From 6b0692eb1808e5adffe1e7478475554fad1ea38a Mon Sep 17 00:00:00 2001
+From: Glauber Costa <glommer@redhat.com>
+Date: Tue, 11 May 2010 12:17:40 -0400
+Subject: x86, paravirt: Add a global synchronization point for pvclock
+
+From: Glauber Costa <glommer@redhat.com>
+
+In recent stress tests, it was found that pvclock-based systems
+could seriously warp in smp systems. Using ingo's time-warp-test.c,
+I could trigger a scenario as bad as 1.5mi warps a minute in some systems.
+(to be fair, it wasn't that bad in most of them). Investigating further, I
+found out that such warps were caused by the very offset-based calculation
+pvclock is based on.
+
+This happens even on some machines that report constant_tsc in its tsc flags,
+specially on multi-socket ones.
+
+Two reads of the same kernel timestamp at approx the same time, will likely
+have tsc timestamped in different occasions too. This means the delta we
+calculate is unpredictable at best, and can probably be smaller in a cpu
+that is legitimately reading clock in a forward ocasion.
+
+Some adjustments on the host could make this window less likely to happen,
+but still, it pretty much poses as an intrinsic problem of the mechanism.
+
+A while ago, I though about using a shared variable anyway, to hold clock
+last state, but gave up due to the high contention locking was likely
+to introduce, possibly rendering the thing useless on big machines. I argue,
+however, that locking is not necessary.
+
+We do a read-and-return sequence in pvclock, and between read and return,
+the global value can have changed. However, it can only have changed
+by means of an addition of a positive value. So if we detected that our
+clock timestamp is less than the current global, we know that we need to
+return a higher one, even though it is not exactly the one we compared to.
+
+OTOH, if we detect we're greater than the current time source, we atomically
+replace the value with our new readings. This do causes contention on big
+boxes (but big here means *BIG*), but it seems like a good trade off, since
+it provide us with a time source guaranteed to be stable wrt time warps.
+
+After this patch is applied, I don't see a single warp in time during 5 days
+of execution, in any of the machines I saw them before.
+
+Signed-off-by: Glauber Costa <glommer@redhat.com>
+Acked-by: Zachary Amsden <zamsden@redhat.com>
+CC: Jeremy Fitzhardinge <jeremy@goop.org>
+CC: Avi Kivity <avi@redhat.com>
+CC: Marcelo Tosatti <mtosatti@redhat.com>
+CC: Zachary Amsden <zamsden@redhat.com>
+Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+(cherry picked from commit 489fb490dbf8dab0249ad82b56688ae3842a79e8)
+---
+ arch/x86/kernel/pvclock.c | 24 ++++++++++++++++++++++++
+ 1 file changed, 24 insertions(+)
+
+--- a/arch/x86/kernel/pvclock.c
++++ b/arch/x86/kernel/pvclock.c
+@@ -109,11 +109,14 @@ unsigned long pvclock_tsc_khz(struct pvc
+ return pv_tsc_khz;
+ }
+
++static atomic64_t last_value = ATOMIC64_INIT(0);
++
+ cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
+ {
+ struct pvclock_shadow_time shadow;
+ unsigned version;
+ cycle_t ret, offset;
++ u64 last;
+
+ do {
+ version = pvclock_get_time_values(&shadow, src);
+@@ -123,6 +126,27 @@ cycle_t pvclock_clocksource_read(struct
+ barrier();
+ } while (version != src->version);
+
++ /*
++ * Assumption here is that last_value, a global accumulator, always goes
++ * forward. If we are less than that, we should not be much smaller.
++ * We assume there is an error marging we're inside, and then the correction
++ * does not sacrifice accuracy.
++ *
++ * For reads: global may have changed between test and return,
++ * but this means someone else updated poked the clock at a later time.
++ * We just need to make sure we are not seeing a backwards event.
++ *
++ * For updates: last_value = ret is not enough, since two vcpus could be
++ * updating at the same time, and one of them could be slightly behind,
++ * making the assumption that last_value always go forward fail to hold.
++ */
++ last = atomic64_read(&last_value);
++ do {
++ if (ret < last)
++ return last;
++ last = atomic64_cmpxchg(&last_value, last, ret);
++ } while (unlikely(last != ret));
++
+ return ret;
+ }
+
--- /dev/null
+From d00d043d62d0201da2935e542ae7fe41d245be3b Mon Sep 17 00:00:00 2001
+From: Avi Kivity <avi@redhat.com>
+Date: Wed, 12 May 2010 00:28:44 +0300
+Subject: KVM: Don't allow lmsw to clear cr0.pe
+
+From: Avi Kivity <avi@redhat.com>
+
+The current lmsw implementation allows the guest to clear cr0.pe, contrary
+to the manual, which breaks EMM386.EXE.
+
+Fix by ORing the old cr0.pe with lmsw's operand.
+
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+(cherry picked from commit f78e917688edbf1f14c318d2e50dc8e7dad20445)
+---
+ arch/x86/kvm/x86.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -432,7 +432,7 @@ EXPORT_SYMBOL_GPL(kvm_set_cr0);
+
+ void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
+ {
+- kvm_set_cr0(vcpu, (vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f));
++ kvm_set_cr0(vcpu, (vcpu->arch.cr0 & ~0x0eul) | (msw & 0x0f));
+ }
+ EXPORT_SYMBOL_GPL(kvm_lmsw);
+
--- /dev/null
+From 2effde8fa003ee7b472505bddfc24c8d62344ace Mon Sep 17 00:00:00 2001
+From: Sheng Yang <sheng@linux.intel.com>
+Date: Wed, 12 May 2010 16:40:40 +0800
+Subject: KVM: x86: Check LMA bit before set_efer
+
+From: Sheng Yang <sheng@linux.intel.com>
+
+kvm_x86_ops->set_efer() would execute vcpu->arch.efer = efer, so the
+checking of LMA bit didn't work.
+
+Signed-off-by: Sheng Yang <sheng@linux.intel.com>
+Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+(cherry picked from commit a3d204e28579427609c3d15d2310127ebaa47d94)
+---
+ arch/x86/kvm/x86.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -603,11 +603,11 @@ static void set_efer(struct kvm_vcpu *vc
+ }
+ }
+
+- kvm_x86_ops->set_efer(vcpu, efer);
+-
+ efer &= ~EFER_LMA;
+ efer |= vcpu->arch.shadow_efer & EFER_LMA;
+
++ kvm_x86_ops->set_efer(vcpu, efer);
++
+ vcpu->arch.shadow_efer = efer;
+
+ vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled;
--- /dev/null
+From dbad9722e24d96866696ca728032b3a09b8eb78e Mon Sep 17 00:00:00 2001
+From: Avi Kivity <avi@redhat.com>
+Date: Wed, 12 May 2010 11:48:18 +0300
+Subject: KVM: MMU: Segregate shadow pages with different cr0.wp
+
+From: Avi Kivity <avi@redhat.com>
+
+When cr0.wp=0, we may shadow a gpte having u/s=1 and r/w=0 with an spte
+having u/s=0 and r/w=1. This allows excessive access if the guest sets
+cr0.wp=1 and accesses through this spte.
+
+Fix by making cr0.wp part of the base role; we'll have different sptes for
+the two cases and the problem disappears.
+
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+(cherry picked from commit 3dbe141595faa48a067add3e47bba3205b79d33c)
+---
+ arch/x86/include/asm/kvm_host.h | 1 +
+ arch/x86/kvm/mmu.c | 3 ++-
+ 2 files changed, 3 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/include/asm/kvm_host.h
++++ b/arch/x86/include/asm/kvm_host.h
+@@ -193,6 +193,7 @@ union kvm_mmu_page_role {
+ unsigned invalid:1;
+ unsigned cr4_pge:1;
+ unsigned nxe:1;
++ unsigned cr0_wp:1;
+ };
+ };
+
+--- a/arch/x86/kvm/mmu.c
++++ b/arch/x86/kvm/mmu.c
+@@ -227,7 +227,7 @@ void kvm_mmu_set_mask_ptes(u64 user_mask
+ }
+ EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
+
+-static int is_write_protection(struct kvm_vcpu *vcpu)
++static bool is_write_protection(struct kvm_vcpu *vcpu)
+ {
+ return vcpu->arch.cr0 & X86_CR0_WP;
+ }
+@@ -2448,6 +2448,7 @@ static int init_kvm_softmmu(struct kvm_v
+ r = paging32_init_context(vcpu);
+
+ vcpu->arch.mmu.base_role.glevels = vcpu->arch.mmu.root_level;
++ vcpu->arch.mmu.base_role.cr0_wp = is_write_protection(vcpu);
+
+ return r;
+ }
--- /dev/null
+From fd52c9ad75e418e2a38aa0e662e88cd8b95b74be Mon Sep 17 00:00:00 2001
+From: Shane Wang <shane.wang@intel.com>
+Date: Thu, 29 Apr 2010 12:09:01 -0400
+Subject: KVM: VMX: enable VMXON check with SMX enabled (Intel TXT)
+
+From: Shane Wang <shane.wang@intel.com>
+
+Per document, for feature control MSR:
+
+ Bit 1 enables VMXON in SMX operation. If the bit is clear, execution
+ of VMXON in SMX operation causes a general-protection exception.
+ Bit 2 enables VMXON outside SMX operation. If the bit is clear, execution
+ of VMXON outside SMX operation causes a general-protection exception.
+
+This patch is to enable this kind of check with SMX for VMXON in KVM.
+
+Signed-off-by: Shane Wang <shane.wang@intel.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+(cherry picked from commit cafd66595d92591e4bd25c3904e004fc6f897e2d)
+---
+ arch/x86/include/asm/msr-index.h | 5 +++--
+ arch/x86/kernel/tboot.c | 1 +
+ arch/x86/kvm/vmx.c | 32 +++++++++++++++++++++-----------
+ include/linux/tboot.h | 1 +
+ 4 files changed, 26 insertions(+), 13 deletions(-)
+
+--- a/arch/x86/include/asm/msr-index.h
++++ b/arch/x86/include/asm/msr-index.h
+@@ -199,8 +199,9 @@
+ #define MSR_IA32_EBL_CR_POWERON 0x0000002a
+ #define MSR_IA32_FEATURE_CONTROL 0x0000003a
+
+-#define FEATURE_CONTROL_LOCKED (1<<0)
+-#define FEATURE_CONTROL_VMXON_ENABLED (1<<2)
++#define FEATURE_CONTROL_LOCKED (1<<0)
++#define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1)
++#define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX (1<<2)
+
+ #define MSR_IA32_APICBASE 0x0000001b
+ #define MSR_IA32_APICBASE_BSP (1<<8)
+--- a/arch/x86/kernel/tboot.c
++++ b/arch/x86/kernel/tboot.c
+@@ -46,6 +46,7 @@
+
+ /* Global pointer to shared data; NULL means no measured launch. */
+ struct tboot *tboot __read_mostly;
++EXPORT_SYMBOL(tboot);
+
+ /* timeout for APs (in secs) to enter wait-for-SIPI state during shutdown */
+ #define AP_WAIT_TIMEOUT 1
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -26,6 +26,7 @@
+ #include <linux/sched.h>
+ #include <linux/moduleparam.h>
+ #include <linux/ftrace_event.h>
++#include <linux/tboot.h>
+ #include "kvm_cache_regs.h"
+ #include "x86.h"
+
+@@ -1125,9 +1126,16 @@ static __init int vmx_disabled_by_bios(v
+ u64 msr;
+
+ rdmsrl(MSR_IA32_FEATURE_CONTROL, msr);
+- return (msr & (FEATURE_CONTROL_LOCKED |
+- FEATURE_CONTROL_VMXON_ENABLED))
+- == FEATURE_CONTROL_LOCKED;
++ if (msr & FEATURE_CONTROL_LOCKED) {
++ if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX)
++ && tboot_enabled())
++ return 1;
++ if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX)
++ && !tboot_enabled())
++ return 1;
++ }
++
++ return 0;
+ /* locked but not enabled */
+ }
+
+@@ -1135,21 +1143,23 @@ static int hardware_enable(void *garbage
+ {
+ int cpu = raw_smp_processor_id();
+ u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
+- u64 old;
++ u64 old, test_bits;
+
+ if (read_cr4() & X86_CR4_VMXE)
+ return -EBUSY;
+
+ INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu));
+ rdmsrl(MSR_IA32_FEATURE_CONTROL, old);
+- if ((old & (FEATURE_CONTROL_LOCKED |
+- FEATURE_CONTROL_VMXON_ENABLED))
+- != (FEATURE_CONTROL_LOCKED |
+- FEATURE_CONTROL_VMXON_ENABLED))
++
++ test_bits = FEATURE_CONTROL_LOCKED;
++ test_bits |= FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
++ if (tboot_enabled())
++ test_bits |= FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX;
++
++ if ((old & test_bits) != test_bits) {
+ /* enable and lock */
+- wrmsrl(MSR_IA32_FEATURE_CONTROL, old |
+- FEATURE_CONTROL_LOCKED |
+- FEATURE_CONTROL_VMXON_ENABLED);
++ wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits);
++ }
+ write_cr4(read_cr4() | X86_CR4_VMXE); /* FIXME: not cpu hotplug safe */
+ asm volatile (ASM_VMX_VMXON_RAX
+ : : "a"(&phys_addr), "m"(phys_addr)
+--- a/include/linux/tboot.h
++++ b/include/linux/tboot.h
+@@ -150,6 +150,7 @@ extern int tboot_force_iommu(void);
+
+ #else
+
++#define tboot_enabled() 0
+ #define tboot_probe() do { } while (0)
+ #define tboot_shutdown(shutdown_type) do { } while (0)
+ #define tboot_sleep(sleep_state, pm1a_control, pm1b_control) \
--- /dev/null
+From b7e2778692b503bc7a8fc362b1147d8df542f9c6 Mon Sep 17 00:00:00 2001
+From: Avi Kivity <avi@redhat.com>
+Date: Tue, 4 May 2010 12:58:32 +0300
+Subject: KVM: MMU: Don't read pdptrs with mmu spinlock held in mmu_alloc_roots
+
+From: Avi Kivity <avi@redhat.com>
+
+On svm, kvm_read_pdptr() may require reading guest memory, which can sleep.
+
+Push the spinlock into mmu_alloc_roots(), and only take it after we've read
+the pdptr.
+
+Tested-by: Joerg Roedel <joerg.roedel@amd.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+(cherry picked from commit 8facbbff071ff2b19268d3732e31badc60471e21)
+---
+ arch/x86/kvm/mmu.c | 9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+--- a/arch/x86/kvm/mmu.c
++++ b/arch/x86/kvm/mmu.c
+@@ -2097,11 +2097,14 @@ static int mmu_alloc_roots(struct kvm_vc
+ direct = 1;
+ if (mmu_check_root(vcpu, root_gfn))
+ return 1;
++
++ spin_lock(&vcpu->kvm->mmu_lock);
+ sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
+ PT64_ROOT_LEVEL, direct,
+ ACC_ALL, NULL);
+ root = __pa(sp->spt);
+ ++sp->root_count;
++ spin_unlock(&vcpu->kvm->mmu_lock);
+ vcpu->arch.mmu.root_hpa = root;
+ return 0;
+ }
+@@ -2123,11 +2126,15 @@ static int mmu_alloc_roots(struct kvm_vc
+ root_gfn = 0;
+ if (mmu_check_root(vcpu, root_gfn))
+ return 1;
++
++ spin_lock(&vcpu->kvm->mmu_lock);
+ sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
+ PT32_ROOT_LEVEL, direct,
+ ACC_ALL, NULL);
+ root = __pa(sp->spt);
+ ++sp->root_count;
++ spin_unlock(&vcpu->kvm->mmu_lock);
++
+ vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK;
+ }
+ vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root);
+@@ -2488,7 +2495,9 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu)
+ goto out;
+ spin_lock(&vcpu->kvm->mmu_lock);
+ kvm_mmu_free_some_pages(vcpu);
++ spin_unlock(&vcpu->kvm->mmu_lock);
+ r = mmu_alloc_roots(vcpu);
++ spin_lock(&vcpu->kvm->mmu_lock);
+ mmu_sync_roots(vcpu);
+ spin_unlock(&vcpu->kvm->mmu_lock);
+ if (r)
--- /dev/null
+From a2cfe4423331f25aaf816cce92ce6e2544f74966 Mon Sep 17 00:00:00 2001
+From: Avi Kivity <avi@redhat.com>
+Date: Tue, 4 May 2010 15:00:37 +0300
+Subject: KVM: Fix wallclock version writing race
+
+From: Avi Kivity <avi@redhat.com>
+
+Wallclock writing uses an unprotected global variable to hold the version;
+this can cause one guest to interfere with another if both write their
+wallclock at the same time.
+
+Acked-by: Glauber Costa <glommer@redhat.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+(cherry picked from commit 9ed3c444ab8987c7b219173a2f7807e3f71e234e)
+---
+ arch/x86/kvm/x86.c | 12 ++++++++++--
+ 1 file changed, 10 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -641,14 +641,22 @@ static int do_set_msr(struct kvm_vcpu *v
+
+ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
+ {
+- static int version;
++ int version;
++ int r;
+ struct pvclock_wall_clock wc;
+ struct timespec boot;
+
+ if (!wall_clock)
+ return;
+
+- version++;
++ r = kvm_read_guest(kvm, wall_clock, &version, sizeof(version));
++ if (r)
++ return;
++
++ if (version & 1)
++ ++version; /* first time write, random junk */
++
++ ++version;
+
+ kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
+
--- /dev/null
+From c0fc62a92b17c61146f20141a2ddc6c0bcdc548b Mon Sep 17 00:00:00 2001
+From: Avi Kivity <avi@redhat.com>
+Date: Thu, 13 May 2010 11:05:49 +0300
+Subject: KVM: PPC: Add missing vcpu_load()/vcpu_put() in vcpu ioctls
+
+From: Avi Kivity <avi@redhat.com>
+
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+(cherry picked from commit 98001d8d017cea1ee0f9f35c6227bbd63ef5005b)
+---
+ arch/powerpc/kvm/book3s.c | 10 ++++++++++
+ arch/powerpc/kvm/booke.c | 15 ++++++++++++++-
+ 2 files changed, 24 insertions(+), 1 deletion(-)
+
+--- a/arch/powerpc/kvm/book3s.c
++++ b/arch/powerpc/kvm/book3s.c
+@@ -766,6 +766,8 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct
+ struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
+ int i;
+
++ vcpu_load(vcpu);
++
+ sregs->pvr = vcpu->arch.pvr;
+
+ sregs->u.s.sdr1 = to_book3s(vcpu)->sdr1;
+@@ -784,6 +786,9 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct
+ sregs->u.s.ppc32.dbat[i] = vcpu3s->dbat[i].raw;
+ }
+ }
++
++ vcpu_put(vcpu);
++
+ return 0;
+ }
+
+@@ -793,6 +798,8 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct
+ struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
+ int i;
+
++ vcpu_load(vcpu);
++
+ kvmppc_set_pvr(vcpu, sregs->pvr);
+
+ vcpu3s->sdr1 = sregs->u.s.sdr1;
+@@ -819,6 +826,9 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct
+
+ /* Flush the MMU after messing with the segments */
+ kvmppc_mmu_pte_flush(vcpu, 0, 0);
++
++ vcpu_put(vcpu);
++
+ return 0;
+ }
+
+--- a/arch/powerpc/kvm/booke.c
++++ b/arch/powerpc/kvm/booke.c
+@@ -443,6 +443,8 @@ int kvm_arch_vcpu_ioctl_get_regs(struct
+ {
+ int i;
+
++ vcpu_load(vcpu);
++
+ regs->pc = vcpu->arch.pc;
+ regs->cr = vcpu->arch.cr;
+ regs->ctr = vcpu->arch.ctr;
+@@ -463,6 +465,8 @@ int kvm_arch_vcpu_ioctl_get_regs(struct
+ for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
+ regs->gpr[i] = vcpu->arch.gpr[i];
+
++ vcpu_put(vcpu);
++
+ return 0;
+ }
+
+@@ -470,6 +474,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct
+ {
+ int i;
+
++ vcpu_load(vcpu);
++
+ vcpu->arch.pc = regs->pc;
+ vcpu->arch.cr = regs->cr;
+ vcpu->arch.ctr = regs->ctr;
+@@ -489,6 +495,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct
+ for (i = 0; i < ARRAY_SIZE(vcpu->arch.gpr); i++)
+ vcpu->arch.gpr[i] = regs->gpr[i];
+
++ vcpu_put(vcpu);
++
+ return 0;
+ }
+
+@@ -517,7 +525,12 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct k
+ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
+ struct kvm_translation *tr)
+ {
+- return kvmppc_core_vcpu_translate(vcpu, tr);
++ int r;
++
++ vcpu_load(vcpu);
++ r = kvmppc_core_vcpu_translate(vcpu, tr);
++ vcpu_put(vcpu);
++ return r;
+ }
+
+ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
--- /dev/null
+From 769481950f87db77b640daec6241727570c63622 Mon Sep 17 00:00:00 2001
+From: Avi Kivity <avi@redhat.com>
+Date: Thu, 13 May 2010 11:50:19 +0300
+Subject: KVM: x86: Add missing locking to arch specific vcpu ioctls
+
+From: Avi Kivity <avi@redhat.com>
+
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+(cherry picked from commit 8fbf065d625617bbbf6b72d5f78f84ad13c8b547)
+---
+ arch/x86/kvm/x86.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -1550,6 +1550,7 @@ static int kvm_vcpu_ioctl_get_cpuid2(str
+ {
+ int r;
+
++ vcpu_load(vcpu);
+ r = -E2BIG;
+ if (cpuid->nent < vcpu->arch.cpuid_nent)
+ goto out;
+@@ -1561,6 +1562,7 @@ static int kvm_vcpu_ioctl_get_cpuid2(str
+
+ out:
+ cpuid->nent = vcpu->arch.cpuid_nent;
++ vcpu_put(vcpu);
+ return r;
+ }
+
+@@ -1813,6 +1815,7 @@ static int kvm_vcpu_ioctl_x86_setup_mce(
+ int r;
+ unsigned bank_num = mcg_cap & 0xff, bank;
+
++ vcpu_load(vcpu);
+ r = -EINVAL;
+ if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS)
+ goto out;
+@@ -1827,6 +1830,7 @@ static int kvm_vcpu_ioctl_x86_setup_mce(
+ for (bank = 0; bank < bank_num; bank++)
+ vcpu->arch.mce_banks[bank*4] = ~(u64)0;
+ out:
++ vcpu_put(vcpu);
+ return r;
+ }
+
+@@ -2094,7 +2098,9 @@ long kvm_arch_vcpu_ioctl(struct file *fi
+ r = -EFAULT;
+ if (copy_from_user(&mce, argp, sizeof mce))
+ goto out;
++ vcpu_load(vcpu);
+ r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce);
++ vcpu_put(vcpu);
+ break;
+ }
+ case KVM_GET_VCPU_EVENTS: {
--- /dev/null
+From 5acedf13f81c250bf394ce7561bf404792b44558 Mon Sep 17 00:00:00 2001
+From: Roedel, Joerg <Joerg.Roedel@amd.com>
+Date: Thu, 6 May 2010 11:38:43 +0200
+Subject: KVM: x86: Inject #GP with the right rip on efer writes
+
+From: Roedel, Joerg <Joerg.Roedel@amd.com>
+
+This patch fixes a bug in the KVM efer-msr write path. If a
+guest writes to a reserved efer bit the set_efer function
+injects the #GP directly. The architecture dependent wrmsr
+function does not see this, assumes success and advances the
+rip. This results in a #GP in the guest with the wrong rip.
+This patch fixes this by reporting efer write errors back to
+the architectural wrmsr function.
+
+Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+(cherry picked from commit b69e8caef5b190af48c525f6d715e7b7728a77f6)
+---
+ arch/x86/kvm/x86.c | 31 ++++++++++++-------------------
+ 1 file changed, 12 insertions(+), 19 deletions(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -570,37 +570,29 @@ static u32 emulated_msrs[] = {
+ MSR_IA32_MISC_ENABLE,
+ };
+
+-static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
++static int set_efer(struct kvm_vcpu *vcpu, u64 efer)
+ {
+- if (efer & efer_reserved_bits) {
+- kvm_inject_gp(vcpu, 0);
+- return;
+- }
++ if (efer & efer_reserved_bits)
++ return 1;
+
+ if (is_paging(vcpu)
+- && (vcpu->arch.shadow_efer & EFER_LME) != (efer & EFER_LME)) {
+- kvm_inject_gp(vcpu, 0);
+- return;
+- }
++ && (vcpu->arch.shadow_efer & EFER_LME) != (efer & EFER_LME))
++ return 1;
+
+ if (efer & EFER_FFXSR) {
+ struct kvm_cpuid_entry2 *feat;
+
+ feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
+- if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) {
+- kvm_inject_gp(vcpu, 0);
+- return;
+- }
++ if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT)))
++ return 1;
+ }
+
+ if (efer & EFER_SVME) {
+ struct kvm_cpuid_entry2 *feat;
+
+ feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
+- if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) {
+- kvm_inject_gp(vcpu, 0);
+- return;
+- }
++ if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM)))
++ return 1;
+ }
+
+ efer &= ~EFER_LMA;
+@@ -612,6 +604,8 @@ static void set_efer(struct kvm_vcpu *vc
+
+ vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled;
+ kvm_mmu_reset_context(vcpu);
++
++ return 0;
+ }
+
+ void kvm_enable_efer_bits(u64 mask)
+@@ -946,8 +940,7 @@ int kvm_set_msr_common(struct kvm_vcpu *
+ {
+ switch (msr) {
+ case MSR_EFER:
+- set_efer(vcpu, data);
+- break;
++ return set_efer(vcpu, data);
+ case MSR_K7_HWCR:
+ data &= ~(u64)0x40; /* ignore flush filter disable */
+ if (data != 0) {
--- /dev/null
+From 2d7753bec146c9e0030c6b52520ad052a9c2a45e Mon Sep 17 00:00:00 2001
+From: Joerg Roedel <joerg.roedel@amd.com>
+Date: Wed, 5 May 2010 16:04:45 +0200
+Subject: KVM: SVM: Don't allow nested guest to VMMCALL into host
+
+From: Joerg Roedel <joerg.roedel@amd.com>
+
+This patch disables the possibility for a l2-guest to do a
+VMMCALL directly into the host. This would happen if the
+l1-hypervisor doesn't intercept VMMCALL and the l2-guest
+executes this instruction.
+
+Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+(cherry picked from commit 0d945bd9351199744c1e89d57a70615b6ee9f394)
+---
+ arch/x86/kvm/svm.c | 9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/svm.c
++++ b/arch/x86/kvm/svm.c
+@@ -1829,8 +1829,13 @@ static bool nested_svm_vmrun(struct vcpu
+ svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK;
+ }
+
+- /* We don't want a nested guest to be more powerful than the guest,
+- so all intercepts are ORed */
++ /* We don't want to see VMMCALLs from a nested guest */
++ svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VMMCALL);
++
++ /*
++ * We don't want a nested guest to be more powerful than the guest, so
++ * all intercepts are ORed
++ */
+ svm->vmcb->control.intercept_cr_read |=
+ nested_vmcb->control.intercept_cr_read;
+ svm->vmcb->control.intercept_cr_write |=
--- /dev/null
+From 2f26afba46f0ebf155cf9be746496a0304a5b7cf Mon Sep 17 00:00:00 2001
+From: Shi Weihua <shiwh@cn.fujitsu.com>
+Date: Tue, 18 May 2010 00:50:32 +0000
+Subject: Btrfs: should add a permission check for setfacl
+
+From: Shi Weihua <shiwh@cn.fujitsu.com>
+
+commit 2f26afba46f0ebf155cf9be746496a0304a5b7cf upstream.
+
+On btrfs, do the following
+------------------
+# su user1
+# cd btrfs-part/
+# touch aaa
+# getfacl aaa
+ # file: aaa
+ # owner: user1
+ # group: user1
+ user::rw-
+ group::rw-
+ other::r--
+# su user2
+# cd btrfs-part/
+# setfacl -m u::rwx aaa
+# getfacl aaa
+ # file: aaa
+ # owner: user1
+ # group: user1
+ user::rwx <- successed to setfacl
+ group::rw-
+ other::r--
+------------------
+but we should prohibit it that user2 changing user1's acl.
+In fact, on ext3 and other fs, a message occurs:
+ setfacl: aaa: Operation not permitted
+
+This patch fixed it.
+
+Signed-off-by: Shi Weihua <shiwh@cn.fujitsu.com>
+Signed-off-by: Chris Mason <chris.mason@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/btrfs/acl.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/fs/btrfs/acl.c
++++ b/fs/btrfs/acl.c
+@@ -159,6 +159,9 @@ static int btrfs_xattr_acl_set(struct de
+ int ret;
+ struct posix_acl *acl = NULL;
+
++ if (!is_owner_or_cap(dentry->d_inode))
++ return -EPERM;
++
+ if (value) {
+ acl = posix_acl_from_xattr(value, size);
+ if (acl == NULL) {
--- /dev/null
+From fa588e0c57048b3d4bfcd772d80dc0615f83fd35 Mon Sep 17 00:00:00 2001
+From: Steve French <sfrench@us.ibm.com>
+Date: Thu, 22 Apr 2010 19:21:55 +0000
+Subject: CIFS: Allow null nd (as nfs server uses) on create
+
+From: Steve French <sfrench@us.ibm.com>
+
+commit fa588e0c57048b3d4bfcd772d80dc0615f83fd35 upstream.
+
+While creating a file on a server which supports unix extensions
+such as Samba, if a file is being created which does not supply
+nameidata (i.e. nd is null), cifs client can oops when calling
+cifs_posix_open.
+
+Signed-off-by: Shirish Pargaonkar <shirishp@us.ibm.com>
+Signed-off-by: Steve French <sfrench@us.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/cifs/cifsproto.h | 6 ++++--
+ fs/cifs/dir.c | 20 ++++++++++++--------
+ fs/cifs/file.c | 11 +++++++----
+ 3 files changed, 23 insertions(+), 14 deletions(-)
+
+--- a/fs/cifs/cifsproto.h
++++ b/fs/cifs/cifsproto.h
+@@ -95,8 +95,10 @@ extern struct cifsFileInfo *cifs_new_fil
+ __u16 fileHandle, struct file *file,
+ struct vfsmount *mnt, unsigned int oflags);
+ extern int cifs_posix_open(char *full_path, struct inode **pinode,
+- struct vfsmount *mnt, int mode, int oflags,
+- __u32 *poplock, __u16 *pnetfid, int xid);
++ struct vfsmount *mnt,
++ struct super_block *sb,
++ int mode, int oflags,
++ __u32 *poplock, __u16 *pnetfid, int xid);
+ extern void cifs_unix_basic_to_fattr(struct cifs_fattr *fattr,
+ FILE_UNIX_BASIC_INFO *info,
+ struct cifs_sb_info *cifs_sb);
+--- a/fs/cifs/dir.c
++++ b/fs/cifs/dir.c
+@@ -183,13 +183,14 @@ cifs_new_fileinfo(struct inode *newinode
+ }
+
+ int cifs_posix_open(char *full_path, struct inode **pinode,
+- struct vfsmount *mnt, int mode, int oflags,
+- __u32 *poplock, __u16 *pnetfid, int xid)
++ struct vfsmount *mnt, struct super_block *sb,
++ int mode, int oflags,
++ __u32 *poplock, __u16 *pnetfid, int xid)
+ {
+ int rc;
+ FILE_UNIX_BASIC_INFO *presp_data;
+ __u32 posix_flags = 0;
+- struct cifs_sb_info *cifs_sb = CIFS_SB(mnt->mnt_sb);
++ struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
+ struct cifs_fattr fattr;
+
+ cFYI(1, ("posix open %s", full_path));
+@@ -242,7 +243,7 @@ int cifs_posix_open(char *full_path, str
+
+ /* get new inode and set it up */
+ if (*pinode == NULL) {
+- *pinode = cifs_iget(mnt->mnt_sb, &fattr);
++ *pinode = cifs_iget(sb, &fattr);
+ if (!*pinode) {
+ rc = -ENOMEM;
+ goto posix_open_ret;
+@@ -251,7 +252,8 @@ int cifs_posix_open(char *full_path, str
+ cifs_fattr_to_inode(*pinode, &fattr);
+ }
+
+- cifs_new_fileinfo(*pinode, *pnetfid, NULL, mnt, oflags);
++ if (mnt)
++ cifs_new_fileinfo(*pinode, *pnetfid, NULL, mnt, oflags);
+
+ posix_open_ret:
+ kfree(presp_data);
+@@ -315,13 +317,14 @@ cifs_create(struct inode *inode, struct
+ if (nd && (nd->flags & LOOKUP_OPEN))
+ oflags = nd->intent.open.flags;
+ else
+- oflags = FMODE_READ;
++ oflags = FMODE_READ | SMB_O_CREAT;
+
+ if (tcon->unix_ext && (tcon->ses->capabilities & CAP_UNIX) &&
+ (CIFS_UNIX_POSIX_PATH_OPS_CAP &
+ le64_to_cpu(tcon->fsUnixInfo.Capability))) {
+- rc = cifs_posix_open(full_path, &newinode, nd->path.mnt,
+- mode, oflags, &oplock, &fileHandle, xid);
++ rc = cifs_posix_open(full_path, &newinode,
++ nd ? nd->path.mnt : NULL,
++ inode->i_sb, mode, oflags, &oplock, &fileHandle, xid);
+ /* EIO could indicate that (posix open) operation is not
+ supported, despite what server claimed in capability
+ negotation. EREMOTE indicates DFS junction, which is not
+@@ -678,6 +681,7 @@ cifs_lookup(struct inode *parent_dir_ino
+ (nd->flags & LOOKUP_OPEN) && !pTcon->broken_posix_open &&
+ (nd->intent.open.flags & O_CREAT)) {
+ rc = cifs_posix_open(full_path, &newInode, nd->path.mnt,
++ parent_dir_inode->i_sb,
+ nd->intent.open.create_mode,
+ nd->intent.open.flags, &oplock,
+ &fileHandle, xid);
+--- a/fs/cifs/file.c
++++ b/fs/cifs/file.c
+@@ -297,10 +297,12 @@ int cifs_open(struct inode *inode, struc
+ (CIFS_UNIX_POSIX_PATH_OPS_CAP &
+ le64_to_cpu(tcon->fsUnixInfo.Capability))) {
+ int oflags = (int) cifs_posix_convert_flags(file->f_flags);
++ oflags |= SMB_O_CREAT;
+ /* can not refresh inode info since size could be stale */
+ rc = cifs_posix_open(full_path, &inode, file->f_path.mnt,
+- cifs_sb->mnt_file_mode /* ignored */,
+- oflags, &oplock, &netfid, xid);
++ inode->i_sb,
++ cifs_sb->mnt_file_mode /* ignored */,
++ oflags, &oplock, &netfid, xid);
+ if (rc == 0) {
+ cFYI(1, ("posix open succeeded"));
+ /* no need for special case handling of setting mode
+@@ -512,8 +514,9 @@ reopen_error_exit:
+ int oflags = (int) cifs_posix_convert_flags(file->f_flags);
+ /* can not refresh inode info since size could be stale */
+ rc = cifs_posix_open(full_path, NULL, file->f_path.mnt,
+- cifs_sb->mnt_file_mode /* ignored */,
+- oflags, &oplock, &netfid, xid);
++ inode->i_sb,
++ cifs_sb->mnt_file_mode /* ignored */,
++ oflags, &oplock, &netfid, xid);
+ if (rc == 0) {
+ cFYI(1, ("posix reopen succeeded"));
+ goto reopen_success;
--- /dev/null
+From bc9d24a3aeb1532fc3e234907a8b6d671f7ed68f Mon Sep 17 00:00:00 2001
+From: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
+Date: Mon, 22 Feb 2010 16:03:58 +0000
+Subject: eeepc-laptop: check wireless hotplug events
+
+From: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
+
+commit bc9d24a3aeb1532fc3e234907a8b6d671f7ed68f upstream.
+
+Before we mark the wireless device as unplugged, check PCI config space
+to see whether the wireless device is really disabled (and vice versa).
+This works around newer models which don't want the hotplug code, where
+we end up disabling the wired network device.
+
+My old 701 still works correctly with this. I can also simulate an
+afflicted model by changing the hardcoded PCI bus/slot number in the
+driver, and it seems to work nicely (although it is a bit noisy).
+
+In future this type of hotplug support will be implemented by the PCI
+core. The existing blacklist and the new warning message will be
+removed at that point.
+
+Signed-off-by: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
+Signed-off-by: Corentin Chary <corentincj@iksaif.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/platform/x86/eeepc-laptop.c | 18 ++++++++++++++++++
+ 1 file changed, 18 insertions(+)
+
+--- a/drivers/platform/x86/eeepc-laptop.c
++++ b/drivers/platform/x86/eeepc-laptop.c
+@@ -578,6 +578,8 @@ static void eeepc_rfkill_hotplug(struct
+ struct pci_dev *dev;
+ struct pci_bus *bus;
+ bool blocked = eeepc_wlan_rfkill_blocked(eeepc);
++ bool absent;
++ u32 l;
+
+ if (eeepc->wlan_rfkill)
+ rfkill_set_sw_state(eeepc->wlan_rfkill, blocked);
+@@ -591,6 +593,22 @@ static void eeepc_rfkill_hotplug(struct
+ goto out_unlock;
+ }
+
++ if (pci_bus_read_config_dword(bus, 0, PCI_VENDOR_ID, &l)) {
++ pr_err("Unable to read PCI config space?\n");
++ goto out_unlock;
++ }
++ absent = (l == 0xffffffff);
++
++ if (blocked != absent) {
++ pr_warning("BIOS says wireless lan is %s, "
++ "but the pci device is %s\n",
++ blocked ? "blocked" : "unblocked",
++ absent ? "absent" : "present");
++ pr_warning("skipped wireless hotplug as probably "
++ "inappropriate for this model\n");
++ goto out_unlock;
++ }
++
+ if (!blocked) {
+ dev = pci_get_slot(bus, 0);
+ if (dev) {
--- /dev/null
+From 42007efd569f1cf3bfb9a61da60ef6c2179508ca Mon Sep 17 00:00:00 2001
+From: Eric Sandeen <sandeen@redhat.com>
+Date: Sun, 16 May 2010 01:00:00 -0400
+Subject: ext4: check s_log_groups_per_flex in online resize code
+
+From: Eric Sandeen <sandeen@redhat.com>
+
+commit 42007efd569f1cf3bfb9a61da60ef6c2179508ca upstream.
+
+If groups_per_flex < 2, sbi->s_flex_groups[] doesn't get filled out,
+and every other access to this first tests s_log_groups_per_flex;
+same thing needs to happen in resize or we'll wander off into
+a null pointer when doing an online resize of the file system.
+
+Thanks to Christoph Biedl, who came up with the trivial testcase:
+
+# truncate --size 128M fsfile
+# mkfs.ext3 -F fsfile
+# tune2fs -O extents,uninit_bg,dir_index,flex_bg,huge_file,dir_nlink,extra_isize fsfile
+# e2fsck -yDf -C0 fsfile
+# truncate --size 132M fsfile
+# losetup /dev/loop0 fsfile
+# mount /dev/loop0 mnt
+# resize2fs -p /dev/loop0
+
+ https://bugzilla.kernel.org/show_bug.cgi?id=13549
+
+Reported-by: Alessandro Polverini <alex@nibbles.it>
+Test-case-by: Christoph Biedl <bugzilla.kernel.bpeb@manchmal.in-ulm.de>
+Signed-off-by: Eric Sandeen <sandeen@redhat.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/ext4/resize.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/fs/ext4/resize.c
++++ b/fs/ext4/resize.c
+@@ -930,7 +930,8 @@ int ext4_group_add(struct super_block *s
+ percpu_counter_add(&sbi->s_freeinodes_counter,
+ EXT4_INODES_PER_GROUP(sb));
+
+- if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
++ if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG) &&
++ sbi->s_log_groups_per_flex) {
+ ext4_group_t flex_group;
+ flex_group = ext4_flex_group(sbi, input->group);
+ atomic_add(input->free_blocks_count,
--- /dev/null
+From 1f5a81e41f8b1a782c68d3843e9ec1bfaadf7d72 Mon Sep 17 00:00:00 2001
+From: Theodore Ts'o <tytso@mit.edu>
+Date: Wed, 2 Jun 2010 22:04:39 -0400
+Subject: ext4: Make sure the MOVE_EXT ioctl can't overwrite append-only files
+
+From: Theodore Ts'o <tytso@mit.edu>
+
+commit 1f5a81e41f8b1a782c68d3843e9ec1bfaadf7d72 upstream.
+
+Dan Roseberg has reported a problem with the MOVE_EXT ioctl. If the
+donor file is an append-only file, we should not allow the operation
+to proceed, lest we end up overwriting the contents of an append-only
+file.
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Cc: Dan Rosenberg <dan.j.rosenberg@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/ext4/move_extent.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/fs/ext4/move_extent.c
++++ b/fs/ext4/move_extent.c
+@@ -964,6 +964,9 @@ mext_check_arguments(struct inode *orig_
+ return -EINVAL;
+ }
+
++ if (IS_IMMUTABLE(donor_inode) || IS_APPEND(donor_inode))
++ return -EPERM;
++
+ /* Ext4 move extent does not support swapfile */
+ if (IS_SWAPFILE(orig_inode) || IS_SWAPFILE(donor_inode)) {
+ ext4_debug("ext4 move extent: The argument files should "
--- /dev/null
+From 7df0e0397b9a18358573274db9fdab991941062f Mon Sep 17 00:00:00 2001
+From: Steven Whitehouse <swhiteho@redhat.com>
+Date: Mon, 24 May 2010 14:36:48 +0100
+Subject: GFS2: Fix permissions checking for setflags ioctl()
+
+From: Steven Whitehouse <swhiteho@redhat.com>
+
+commit 7df0e0397b9a18358573274db9fdab991941062f upstream.
+
+We should be checking for the ownership of the file for which
+flags are being set, rather than just for write access.
+
+Reported-by: Dan Rosenberg <dan.j.rosenberg@gmail.com>
+Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/gfs2/file.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/fs/gfs2/file.c
++++ b/fs/gfs2/file.c
+@@ -218,6 +218,11 @@ static int do_gfs2_set_flags(struct file
+ if (error)
+ goto out_drop_write;
+
++ error = -EACCES;
++ if (!is_owner_or_cap(inode))
++ goto out;
++
++ error = 0;
+ flags = ip->i_diskflags;
+ new_flags = (flags & ~mask) | (reqflags & mask);
+ if ((new_flags ^ flags) == 0)
+@@ -275,8 +280,10 @@ static int gfs2_set_flags(struct file *f
+ {
+ struct inode *inode = filp->f_path.dentry->d_inode;
+ u32 fsflags, gfsflags;
++
+ if (get_user(fsflags, ptr))
+ return -EFAULT;
++
+ gfsflags = fsflags_cvt(fsflags_to_gfs2, fsflags);
+ if (!S_ISDIR(inode->i_mode)) {
+ if (gfsflags & GFS2_DIF_INHERIT_JDATA)
--- /dev/null
+From ef110b24e28f36620f63dab94708a17c7e267358 Mon Sep 17 00:00:00 2001
+From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
+Date: Thu, 13 May 2010 00:42:23 -0700
+Subject: Input: psmouse - reset all types of mice before reconnecting
+
+From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
+
+commit ef110b24e28f36620f63dab94708a17c7e267358 upstream.
+
+Synaptics hardware requires resetting device after suspend to ram
+in order for the device to be operational. The reset lives in
+synaptics-specific reconnect handler, but it is not being invoked
+if synaptics support is disabled and the device is handled as a
+standard PS/2 device (bare or IntelliMouse protocol).
+
+Let's add reset into generic reconnect handler as well.
+
+Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
+Cc: Tim Gardner <tim.gardner@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/input/mouse/psmouse-base.c | 14 ++++++++++----
+ 1 file changed, 10 insertions(+), 4 deletions(-)
+
+--- a/drivers/input/mouse/psmouse-base.c
++++ b/drivers/input/mouse/psmouse-base.c
+@@ -1382,6 +1382,7 @@ static int psmouse_reconnect(struct seri
+ struct psmouse *psmouse = serio_get_drvdata(serio);
+ struct psmouse *parent = NULL;
+ struct serio_driver *drv = serio->drv;
++ unsigned char type;
+ int rc = -1;
+
+ if (!drv || !psmouse) {
+@@ -1401,10 +1402,15 @@ static int psmouse_reconnect(struct seri
+ if (psmouse->reconnect) {
+ if (psmouse->reconnect(psmouse))
+ goto out;
+- } else if (psmouse_probe(psmouse) < 0 ||
+- psmouse->type != psmouse_extensions(psmouse,
+- psmouse_max_proto, false)) {
+- goto out;
++ } else {
++ psmouse_reset(psmouse);
++
++ if (psmouse_probe(psmouse) < 0)
++ goto out;
++
++ type = psmouse_extensions(psmouse, psmouse_max_proto, false);
++ if (psmouse->type != type)
++ goto out;
+ }
+
+ /* ok, the device type (and capabilities) match the old one,
--- /dev/null
+From cea7daa3589d6b550546a8c8963599f7c1a3ae5c Mon Sep 17 00:00:00 2001
+From: Toshiyuki Okajima <toshi.okajima@jp.fujitsu.com>
+Date: Fri, 30 Apr 2010 14:32:13 +0100
+Subject: KEYS: find_keyring_by_name() can gain access to a freed keyring
+
+From: Toshiyuki Okajima <toshi.okajima@jp.fujitsu.com>
+
+commit cea7daa3589d6b550546a8c8963599f7c1a3ae5c upstream.
+
+find_keyring_by_name() can gain access to a keyring that has had its reference
+count reduced to zero, and is thus ready to be freed. This then allows the
+dead keyring to be brought back into use whilst it is being destroyed.
+
+The following timeline illustrates the process:
+
+|(cleaner) (user)
+|
+| free_user(user) sys_keyctl()
+| | |
+| key_put(user->session_keyring) keyctl_get_keyring_ID()
+| || //=> keyring->usage = 0 |
+| |schedule_work(&key_cleanup_task) lookup_user_key()
+| || |
+| kmem_cache_free(,user) |
+| . |[KEY_SPEC_USER_KEYRING]
+| . install_user_keyrings()
+| . ||
+| key_cleanup() [<= worker_thread()] ||
+| | ||
+| [spin_lock(&key_serial_lock)] |[mutex_lock(&key_user_keyr..mutex)]
+| | ||
+| atomic_read() == 0 ||
+| |{ rb_ease(&key->serial_node,) } ||
+| | ||
+| [spin_unlock(&key_serial_lock)] |find_keyring_by_name()
+| | |||
+| keyring_destroy(keyring) ||[read_lock(&keyring_name_lock)]
+| || |||
+| |[write_lock(&keyring_name_lock)] ||atomic_inc(&keyring->usage)
+| |. ||| *** GET freeing keyring ***
+| |. ||[read_unlock(&keyring_name_lock)]
+| || ||
+| |list_del() |[mutex_unlock(&key_user_k..mutex)]
+| || |
+| |[write_unlock(&keyring_name_lock)] ** INVALID keyring is returned **
+| | .
+| kmem_cache_free(,keyring) .
+| .
+| atomic_dec(&keyring->usage)
+v *** DESTROYED ***
+TIME
+
+If CONFIG_SLUB_DEBUG=y then we may see the following message generated:
+
+ =============================================================================
+ BUG key_jar: Poison overwritten
+ -----------------------------------------------------------------------------
+
+ INFO: 0xffff880197a7e200-0xffff880197a7e200. First byte 0x6a instead of 0x6b
+ INFO: Allocated in key_alloc+0x10b/0x35f age=25 cpu=1 pid=5086
+ INFO: Freed in key_cleanup+0xd0/0xd5 age=12 cpu=1 pid=10
+ INFO: Slab 0xffffea000592cb90 objects=16 used=2 fp=0xffff880197a7e200 flags=0x200000000000c3
+ INFO: Object 0xffff880197a7e200 @offset=512 fp=0xffff880197a7e300
+
+ Bytes b4 0xffff880197a7e1f0: 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZZZZZZZZZ
+ Object 0xffff880197a7e200: 6a 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b jkkkkkkkkkkkkkkk
+
+Alternatively, we may see a system panic happen, such as:
+
+ BUG: unable to handle kernel NULL pointer dereference at 0000000000000001
+ IP: [<ffffffff810e61a3>] kmem_cache_alloc+0x5b/0xe9
+ PGD 6b2b4067 PUD 6a80d067 PMD 0
+ Oops: 0000 [#1] SMP
+ last sysfs file: /sys/kernel/kexec_crash_loaded
+ CPU 1
+ ...
+ Pid: 31245, comm: su Not tainted 2.6.34-rc5-nofixed-nodebug #2 D2089/PRIMERGY
+ RIP: 0010:[<ffffffff810e61a3>] [<ffffffff810e61a3>] kmem_cache_alloc+0x5b/0xe9
+ RSP: 0018:ffff88006af3bd98 EFLAGS: 00010002
+ RAX: 0000000000000000 RBX: 0000000000000001 RCX: ffff88007d19900b
+ RDX: 0000000100000000 RSI: 00000000000080d0 RDI: ffffffff81828430
+ RBP: ffffffff81828430 R08: ffff88000a293750 R09: 0000000000000000
+ R10: 0000000000000001 R11: 0000000000100000 R12: 00000000000080d0
+ R13: 00000000000080d0 R14: 0000000000000296 R15: ffffffff810f20ce
+ FS: 00007f97116bc700(0000) GS:ffff88000a280000(0000) knlGS:0000000000000000
+ CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ CR2: 0000000000000001 CR3: 000000006a91c000 CR4: 00000000000006e0
+ DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+ DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
+ Process su (pid: 31245, threadinfo ffff88006af3a000, task ffff8800374414c0)
+ Stack:
+ 0000000512e0958e 0000000000008000 ffff880037f8d180 0000000000000001
+ 0000000000000000 0000000000008001 ffff88007d199000 ffffffff810f20ce
+ 0000000000008000 ffff88006af3be48 0000000000000024 ffffffff810face3
+ Call Trace:
+ [<ffffffff810f20ce>] ? get_empty_filp+0x70/0x12f
+ [<ffffffff810face3>] ? do_filp_open+0x145/0x590
+ [<ffffffff810ce208>] ? tlb_finish_mmu+0x2a/0x33
+ [<ffffffff810ce43c>] ? unmap_region+0xd3/0xe2
+ [<ffffffff810e4393>] ? virt_to_head_page+0x9/0x2d
+ [<ffffffff81103916>] ? alloc_fd+0x69/0x10e
+ [<ffffffff810ef4ed>] ? do_sys_open+0x56/0xfc
+ [<ffffffff81008a02>] ? system_call_fastpath+0x16/0x1b
+ Code: 0f 1f 44 00 00 49 89 c6 fa 66 0f 1f 44 00 00 65 4c 8b 04 25 60 e8 00 00 48 8b 45 00 49 01 c0 49 8b 18 48 85 db 74 0d 48 63 45 18 <48> 8b 04 03 49 89 00 eb 14 4c 89 f9 83 ca ff 44 89 e6 48 89 ef
+ RIP [<ffffffff810e61a3>] kmem_cache_alloc+0x5b/0xe9
+
+This problem is that find_keyring_by_name does not confirm that the keyring is
+valid before accepting it.
+
+Skipping keyrings that have been reduced to a zero count seems the way to go.
+To this end, use atomic_inc_not_zero() to increment the usage count and skip
+the candidate keyring if that returns false.
+
+The following script _may_ cause the bug to happen, but there's no guarantee
+as the window of opportunity is small:
+
+ #!/bin/sh
+ LOOP=100000
+ USER=dummy_user
+ /bin/su -c "exit;" $USER || { /usr/sbin/adduser -m $USER; add=1; }
+ for ((i=0; i<LOOP; i++))
+ do
+ /bin/su -c "echo '$i' > /dev/null" $USER
+ done
+ (( add == 1 )) && /usr/sbin/userdel -r $USER
+ exit
+
+Note that the nominated user must not be in use.
+
+An alternative way of testing this may be:
+
+ for ((i=0; i<100000; i++))
+ do
+ keyctl session foo /bin/true || break
+ done >&/dev/null
+
+as that uses a keyring named "foo" rather than relying on the user and
+user-session named keyrings.
+
+Reported-by: Toshiyuki Okajima <toshi.okajima@jp.fujitsu.com>
+Signed-off-by: David Howells <dhowells@redhat.com>
+Tested-by: Toshiyuki Okajima <toshi.okajima@jp.fujitsu.com>
+Acked-by: Serge Hallyn <serue@us.ibm.com>
+Signed-off-by: James Morris <jmorris@namei.org>
+Cc: Ben Hutchings <ben@decadent.org.uk>
+Cc: Chuck Ebbert <cebbert@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ security/keys/keyring.c | 18 +++++++++---------
+ 1 file changed, 9 insertions(+), 9 deletions(-)
+
+--- a/security/keys/keyring.c
++++ b/security/keys/keyring.c
+@@ -524,9 +524,8 @@ struct key *find_keyring_by_name(const c
+ struct key *keyring;
+ int bucket;
+
+- keyring = ERR_PTR(-EINVAL);
+ if (!name)
+- goto error;
++ return ERR_PTR(-EINVAL);
+
+ bucket = keyring_hash(name);
+
+@@ -553,17 +552,18 @@ struct key *find_keyring_by_name(const c
+ KEY_SEARCH) < 0)
+ continue;
+
+- /* we've got a match */
+- atomic_inc(&keyring->usage);
+- read_unlock(&keyring_name_lock);
+- goto error;
++ /* we've got a match but we might end up racing with
++ * key_cleanup() if the keyring is currently 'dead'
++ * (ie. it has a zero usage count) */
++ if (!atomic_inc_not_zero(&keyring->usage))
++ continue;
++ goto out;
+ }
+ }
+
+- read_unlock(&keyring_name_lock);
+ keyring = ERR_PTR(-ENOKEY);
+-
+- error:
++out:
++ read_unlock(&keyring_name_lock);
+ return keyring;
+
+ } /* end find_keyring_by_name() */
--- /dev/null
+From 4d09ec0f705cf88a12add029c058b53f288cfaa2 Mon Sep 17 00:00:00 2001
+From: Dan Carpenter <error27@gmail.com>
+Date: Mon, 17 May 2010 14:42:35 +0100
+Subject: KEYS: Return more accurate error codes
+
+From: Dan Carpenter <error27@gmail.com>
+
+commit 4d09ec0f705cf88a12add029c058b53f288cfaa2 upstream.
+
+We were using the wrong variable here so the error codes weren't being returned
+properly. The original code returns -ENOKEY.
+
+Signed-off-by: Dan Carpenter <error27@gmail.com>
+Signed-off-by: David Howells <dhowells@redhat.com>
+Signed-off-by: James Morris <jmorris@namei.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ security/keys/process_keys.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/security/keys/process_keys.c
++++ b/security/keys/process_keys.c
+@@ -509,7 +509,7 @@ try_again:
+
+ ret = install_thread_keyring();
+ if (ret < 0) {
+- key = ERR_PTR(ret);
++ key_ref = ERR_PTR(ret);
+ goto error;
+ }
+ goto reget_creds;
+@@ -527,7 +527,7 @@ try_again:
+
+ ret = install_process_keyring();
+ if (ret < 0) {
+- key = ERR_PTR(ret);
++ key_ref = ERR_PTR(ret);
+ goto error;
+ }
+ goto reget_creds;
+@@ -586,7 +586,7 @@ try_again:
+
+ case KEY_SPEC_GROUP_KEYRING:
+ /* group keyrings are not yet supported */
+- key = ERR_PTR(-EINVAL);
++ key_ref = ERR_PTR(-EINVAL);
+ goto error;
+
+ case KEY_SPEC_REQKEY_AUTH_KEY:
--- /dev/null
+From 3feec9095d12e311b7d4eb7fe7e5dfa75d4a72a5 Mon Sep 17 00:00:00 2001
+From: James Chapman <jchapman@katalix.com>
+Date: Tue, 16 Mar 2010 06:46:31 +0000
+Subject: l2tp: Fix oops in pppol2tp_xmit
+
+From: James Chapman <jchapman@katalix.com>
+
+commit 3feec9095d12e311b7d4eb7fe7e5dfa75d4a72a5 upstream.
+
+When transmitting L2TP frames, we derive the outgoing interface's UDP
+checksum hardware assist capabilities from the tunnel dst dev. This
+can sometimes be NULL, especially when routing protocols are used and
+routing changes occur. This patch just checks for NULL dst or dev
+pointers when checking for netdev hardware assist features.
+
+BUG: unable to handle kernel NULL pointer dereference at 0000000c
+IP: [<f89d074c>] pppol2tp_xmit+0x341/0x4da [pppol2tp]
+*pde = 00000000
+Oops: 0000 [#1] SMP
+last sysfs file: /sys/class/net/lo/operstate
+Modules linked in: pppol2tp pppox ppp_generic slhc ipv6 dummy loop snd_hda_codec_atihdmi snd_hda_intel snd_hda_codec snd_pcm snd_timer snd soundcore snd_page_alloc evdev psmouse serio_raw processor button i2c_piix4 i2c_core ati_agp agpgart pcspkr ext3 jbd mbcache sd_mod ide_pci_generic atiixp ide_core ahci ata_generic floppy ehci_hcd ohci_hcd libata e1000e scsi_mod usbcore nls_base thermal fan thermal_sys [last unloaded: scsi_wait_scan]
+
+Pid: 0, comm: swapper Not tainted (2.6.32.8 #1)
+EIP: 0060:[<f89d074c>] EFLAGS: 00010297 CPU: 3
+EIP is at pppol2tp_xmit+0x341/0x4da [pppol2tp]
+EAX: 00000000 EBX: f64d1680 ECX: 000005b9 EDX: 00000000
+ESI: f6b91850 EDI: f64d16ac EBP: f6a0c4c0 ESP: f70a9cac
+ DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068
+Process swapper (pid: 0, ti=f70a8000 task=f70a31c0 task.ti=f70a8000)
+Stack:
+ 000005a9 000005b9 f734c400 f66652c0 f7352e00 f67dc800 00000000 f6b91800
+<0> 000005a3 f70ef6c4 f67dcda9 000005a3 f89b192e 00000246 000005a3 f64d1680
+<0> f63633e0 f6363320 f64d1680 f65a7320 f65a7364 f65856c0 f64d1680 f679f02f
+Call Trace:
+ [<f89b192e>] ? ppp_push+0x459/0x50e [ppp_generic]
+ [<f89b217f>] ? ppp_xmit_process+0x3b6/0x430 [ppp_generic]
+ [<f89b2306>] ? ppp_start_xmit+0x10d/0x120 [ppp_generic]
+ [<c11c15cb>] ? dev_hard_start_xmit+0x21f/0x2b2
+ [<c11d0947>] ? sch_direct_xmit+0x48/0x10e
+ [<c11c19a0>] ? dev_queue_xmit+0x263/0x3a6
+ [<c11e2a9f>] ? ip_finish_output+0x1f7/0x221
+ [<c11df682>] ? ip_forward_finish+0x2e/0x30
+ [<c11de645>] ? ip_rcv_finish+0x295/0x2a9
+ [<c11c0b19>] ? netif_receive_skb+0x3e9/0x404
+ [<f814b791>] ? e1000_clean_rx_irq+0x253/0x2fc [e1000e]
+ [<f814cb7a>] ? e1000_clean+0x63/0x1fc [e1000e]
+ [<c1047eff>] ? sched_clock_local+0x15/0x11b
+ [<c11c1095>] ? net_rx_action+0x96/0x195
+ [<c1035750>] ? __do_softirq+0xaa/0x151
+ [<c1035828>] ? do_softirq+0x31/0x3c
+ [<c10358fe>] ? irq_exit+0x26/0x58
+ [<c1004b21>] ? do_IRQ+0x78/0x89
+ [<c1003729>] ? common_interrupt+0x29/0x30
+ [<c101ac28>] ? native_safe_halt+0x2/0x3
+ [<c1008c54>] ? default_idle+0x55/0x75
+ [<c1009045>] ? c1e_idle+0xd2/0xd5
+ [<c100233c>] ? cpu_idle+0x46/0x62
+Code: 8d 45 08 f0 ff 45 08 89 6b 08 c7 43 68 7e fb 9c f8 8a 45 24 83 e0 0c 3c 04 75 09 80 63 64 f3 e9 b4 00 00 00 8b 43 18 8b 4c 24 04 <8b> 40 0c 8d 79 11 f6 40 44 0e 8a 43 64 75 51 6a 00 8b 4c 24 08
+EIP: [<f89d074c>] pppol2tp_xmit+0x341/0x4da [pppol2tp] SS:ESP 0068:f70a9cac
+CR2: 000000000000000c
+
+Signed-off-by: James Chapman <jchapman@katalix.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/net/pppol2tp.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/pppol2tp.c
++++ b/drivers/net/pppol2tp.c
+@@ -977,7 +977,8 @@ static int pppol2tp_sendmsg(struct kiocb
+ /* Calculate UDP checksum if configured to do so */
+ if (sk_tun->sk_no_check == UDP_CSUM_NOXMIT)
+ skb->ip_summed = CHECKSUM_NONE;
+- else if (!(skb_dst(skb)->dev->features & NETIF_F_V4_CSUM)) {
++ else if ((skb_dst(skb) && skb_dst(skb)->dev) &&
++ (!(skb_dst(skb)->dev->features & NETIF_F_V4_CSUM))) {
+ skb->ip_summed = CHECKSUM_COMPLETE;
+ csum = skb_checksum(skb, 0, udp_len, 0);
+ uh->check = csum_tcpudp_magic(inet->inet_saddr,
--- /dev/null
+From 550f0d922286556c7ea43974bb7921effb5a5278 Mon Sep 17 00:00:00 2001
+From: Helge Deller <deller@gmx.de>
+Date: Mon, 3 May 2010 20:44:21 +0000
+Subject: parisc: clear floating point exception flag on SIGFPE signal
+
+From: Helge Deller <deller@gmx.de>
+
+commit 550f0d922286556c7ea43974bb7921effb5a5278 upstream.
+
+Clear the floating point exception flag before returning to
+user space. This is needed, else the libc trampoline handler
+may hit the same SIGFPE again while building up a trampoline
+to a signal handler.
+
+Fixes debian bug #559406.
+
+Signed-off-by: Helge Deller <deller@gmx.de>
+Signed-off-by: Kyle McMartin <kyle@mcmartin.ca>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/parisc/math-emu/decode_exc.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/parisc/math-emu/decode_exc.c
++++ b/arch/parisc/math-emu/decode_exc.c
+@@ -342,6 +342,7 @@ decode_fpu(unsigned int Fpu_register[],
+ return SIGNALCODE(SIGFPE, FPE_FLTINV);
+ case DIVISIONBYZEROEXCEPTION:
+ update_trap_counts(Fpu_register, aflags, bflags, trap_counts);
++ Clear_excp_register(exception_index);
+ return SIGNALCODE(SIGFPE, FPE_FLTDIV);
+ case INEXACTEXCEPTION:
+ update_trap_counts(Fpu_register, aflags, bflags, trap_counts);
--- /dev/null
+From 6377a7ae1ab82859edccdbc8eaea63782efb134d Mon Sep 17 00:00:00 2001
+From: Ben Hutchings <ben@decadent.org.uk>
+Date: Fri, 19 Mar 2010 16:59:19 -0700
+Subject: [SCSI] qla2xxx: Disable MSI on qla24xx chips other than QLA2432.
+
+From: Ben Hutchings <ben@decadent.org.uk>
+
+commit 6377a7ae1ab82859edccdbc8eaea63782efb134d upstream.
+
+On specific platforms, MSI is unreliable on some of the QLA24xx chips, resulting
+in fatal I/O errors under load, as reported in <http://bugs.debian.org/572322>
+and by some RHEL customers.
+
+Signed-off-by: Giridhar Malavali <giridhar.malavali@qlogic.com>
+Signed-off-by: James Bottomley <James.Bottomley@suse.de>
+Cc: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/scsi/qla2xxx/qla_isr.c | 28 +++++++++++++---------------
+ 1 file changed, 13 insertions(+), 15 deletions(-)
+
+--- a/drivers/scsi/qla2xxx/qla_isr.c
++++ b/drivers/scsi/qla2xxx/qla_isr.c
+@@ -2169,30 +2169,28 @@ qla2x00_request_irqs(struct qla_hw_data
+
+ /* If possible, enable MSI-X. */
+ if (!IS_QLA2432(ha) && !IS_QLA2532(ha) &&
+- !IS_QLA8432(ha) && !IS_QLA8001(ha))
+- goto skip_msix;
++ !IS_QLA8432(ha) && !IS_QLA8001(ha))
++ goto skip_msi;
++
++ if (ha->pdev->subsystem_vendor == PCI_VENDOR_ID_HP &&
++ (ha->pdev->subsystem_device == 0x7040 ||
++ ha->pdev->subsystem_device == 0x7041 ||
++ ha->pdev->subsystem_device == 0x1705)) {
++ DEBUG2(qla_printk(KERN_WARNING, ha,
++ "MSI-X: Unsupported ISP2432 SSVID/SSDID (0x%X,0x%X).\n",
++ ha->pdev->subsystem_vendor,
++ ha->pdev->subsystem_device));
++ goto skip_msi;
++ }
+
+ if (IS_QLA2432(ha) && (ha->pdev->revision < QLA_MSIX_CHIP_REV_24XX ||
+ !QLA_MSIX_FW_MODE_1(ha->fw_attributes))) {
+ DEBUG2(qla_printk(KERN_WARNING, ha,
+ "MSI-X: Unsupported ISP2432 (0x%X, 0x%X).\n",
+ ha->pdev->revision, ha->fw_attributes));
+-
+ goto skip_msix;
+ }
+
+- if (ha->pdev->subsystem_vendor == PCI_VENDOR_ID_HP &&
+- (ha->pdev->subsystem_device == 0x7040 ||
+- ha->pdev->subsystem_device == 0x7041 ||
+- ha->pdev->subsystem_device == 0x1705)) {
+- DEBUG2(qla_printk(KERN_WARNING, ha,
+- "MSI-X: Unsupported ISP2432 SSVID/SSDID (0x%X, 0x%X).\n",
+- ha->pdev->subsystem_vendor,
+- ha->pdev->subsystem_device));
+-
+- goto skip_msi;
+- }
+-
+ ret = qla24xx_enable_msix(ha, rsp);
+ if (!ret) {
+ DEBUG2(qla_printk(KERN_INFO, ha,
--- /dev/null
+From 5fa782c2f5ef6c2e4f04d3e228412c9b4a4c8809 Mon Sep 17 00:00:00 2001
+From: Neil Horman <nhorman@tuxdriver.com>
+Date: Wed, 28 Apr 2010 10:30:59 +0000
+Subject: sctp: Fix skb_over_panic resulting from multiple invalid parameter errors (CVE-2010-1173) (v4)
+
+From: Neil Horman <nhorman@tuxdriver.com>
+
+commit 5fa782c2f5ef6c2e4f04d3e228412c9b4a4c8809 upstream.
+
+Ok, version 4
+
+Change Notes:
+1) Minor cleanups, from Vlads notes
+
+Summary:
+
+Hey-
+ Recently, it was reported to me that the kernel could oops in the
+following way:
+
+<5> kernel BUG at net/core/skbuff.c:91!
+<5> invalid operand: 0000 [#1]
+<5> Modules linked in: sctp netconsole nls_utf8 autofs4 sunrpc iptable_filter
+ip_tables cpufreq_powersave parport_pc lp parport vmblock(U) vsock(U) vmci(U)
+vmxnet(U) vmmemctl(U) vmhgfs(U) acpiphp dm_mirror dm_mod button battery ac md5
+ipv6 uhci_hcd ehci_hcd snd_ens1371 snd_rawmidi snd_seq_device snd_pcm_oss
+snd_mixer_oss snd_pcm snd_timer snd_page_alloc snd_ac97_codec snd soundcore
+pcnet32 mii floppy ext3 jbd ata_piix libata mptscsih mptsas mptspi mptscsi
+mptbase sd_mod scsi_mod
+<5> CPU: 0
+<5> EIP: 0060:[<c02bff27>] Not tainted VLI
+<5> EFLAGS: 00010216 (2.6.9-89.0.25.EL)
+<5> EIP is at skb_over_panic+0x1f/0x2d
+<5> eax: 0000002c ebx: c033f461 ecx: c0357d96 edx: c040fd44
+<5> esi: c033f461 edi: df653280 ebp: 00000000 esp: c040fd40
+<5> ds: 007b es: 007b ss: 0068
+<5> Process swapper (pid: 0, threadinfo=c040f000 task=c0370be0)
+<5> Stack: c0357d96 e0c29478 00000084 00000004 c033f461 df653280 d7883180
+e0c2947d
+<5> 00000000 00000080 df653490 00000004 de4f1ac0 de4f1ac0 00000004
+df653490
+<5> 00000001 e0c2877a 08000800 de4f1ac0 df653490 00000000 e0c29d2e
+00000004
+<5> Call Trace:
+<5> [<e0c29478>] sctp_addto_chunk+0xb0/0x128 [sctp]
+<5> [<e0c2947d>] sctp_addto_chunk+0xb5/0x128 [sctp]
+<5> [<e0c2877a>] sctp_init_cause+0x3f/0x47 [sctp]
+<5> [<e0c29d2e>] sctp_process_unk_param+0xac/0xb8 [sctp]
+<5> [<e0c29e90>] sctp_verify_init+0xcc/0x134 [sctp]
+<5> [<e0c20322>] sctp_sf_do_5_1B_init+0x83/0x28e [sctp]
+<5> [<e0c25333>] sctp_do_sm+0x41/0x77 [sctp]
+<5> [<c01555a4>] cache_grow+0x140/0x233
+<5> [<e0c26ba1>] sctp_endpoint_bh_rcv+0xc5/0x108 [sctp]
+<5> [<e0c2b863>] sctp_inq_push+0xe/0x10 [sctp]
+<5> [<e0c34600>] sctp_rcv+0x454/0x509 [sctp]
+<5> [<e084e017>] ipt_hook+0x17/0x1c [iptable_filter]
+<5> [<c02d005e>] nf_iterate+0x40/0x81
+<5> [<c02e0bb9>] ip_local_deliver_finish+0x0/0x151
+<5> [<c02e0c7f>] ip_local_deliver_finish+0xc6/0x151
+<5> [<c02d0362>] nf_hook_slow+0x83/0xb5
+<5> [<c02e0bb2>] ip_local_deliver+0x1a2/0x1a9
+<5> [<c02e0bb9>] ip_local_deliver_finish+0x0/0x151
+<5> [<c02e103e>] ip_rcv+0x334/0x3b4
+<5> [<c02c66fd>] netif_receive_skb+0x320/0x35b
+<5> [<e0a0928b>] init_stall_timer+0x67/0x6a [uhci_hcd]
+<5> [<c02c67a4>] process_backlog+0x6c/0xd9
+<5> [<c02c690f>] net_rx_action+0xfe/0x1f8
+<5> [<c012a7b1>] __do_softirq+0x35/0x79
+<5> [<c0107efb>] handle_IRQ_event+0x0/0x4f
+<5> [<c01094de>] do_softirq+0x46/0x4d
+
+Its an skb_over_panic BUG halt that results from processing an init chunk in
+which too many of its variable length parameters are in some way malformed.
+
+The problem is in sctp_process_unk_param:
+if (NULL == *errp)
+ *errp = sctp_make_op_error_space(asoc, chunk,
+ ntohs(chunk->chunk_hdr->length));
+
+ if (*errp) {
+ sctp_init_cause(*errp, SCTP_ERROR_UNKNOWN_PARAM,
+ WORD_ROUND(ntohs(param.p->length)));
+ sctp_addto_chunk(*errp,
+ WORD_ROUND(ntohs(param.p->length)),
+ param.v);
+
+When we allocate an error chunk, we assume that the worst case scenario requires
+that we have chunk_hdr->length data allocated, which would be correct nominally,
+given that we call sctp_addto_chunk for the violating parameter. Unfortunately,
+we also, in sctp_init_cause insert a sctp_errhdr_t structure into the error
+chunk, so the worst case situation in which all parameters are in violation
+requires chunk_hdr->length+(sizeof(sctp_errhdr_t)*param_count) bytes of data.
+
+The result of this error is that a deliberately malformed packet sent to a
+listening host can cause a remote DOS, described in CVE-2010-1173:
+http://cve.mitre.org/cgi-bin/cvename.cgi?name=2010-1173
+
+I've tested the below fix and confirmed that it fixes the issue. We move to a
+strategy whereby we allocate a fixed size error chunk and ignore errors we don't
+have space to report. Tested by me successfully
+
+Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
+Acked-by: Vlad Yasevich <vladislav.yasevich@hp.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ include/net/sctp/structs.h | 1
+ net/sctp/sm_make_chunk.c | 62 +++++++++++++++++++++++++++++++++++++++++----
+ 2 files changed, 58 insertions(+), 5 deletions(-)
+
+--- a/include/net/sctp/structs.h
++++ b/include/net/sctp/structs.h
+@@ -778,6 +778,7 @@ int sctp_user_addto_chunk(struct sctp_ch
+ struct iovec *data);
+ void sctp_chunk_free(struct sctp_chunk *);
+ void *sctp_addto_chunk(struct sctp_chunk *, int len, const void *data);
++void *sctp_addto_chunk_fixed(struct sctp_chunk *, int len, const void *data);
+ struct sctp_chunk *sctp_chunkify(struct sk_buff *,
+ const struct sctp_association *,
+ struct sock *);
+--- a/net/sctp/sm_make_chunk.c
++++ b/net/sctp/sm_make_chunk.c
+@@ -107,7 +107,7 @@ static const struct sctp_paramhdr prsctp
+ cpu_to_be16(sizeof(struct sctp_paramhdr)),
+ };
+
+-/* A helper to initialize to initialize an op error inside a
++/* A helper to initialize an op error inside a
+ * provided chunk, as most cause codes will be embedded inside an
+ * abort chunk.
+ */
+@@ -124,6 +124,29 @@ void sctp_init_cause(struct sctp_chunk
+ chunk->subh.err_hdr = sctp_addto_chunk(chunk, sizeof(sctp_errhdr_t), &err);
+ }
+
++/* A helper to initialize an op error inside a
++ * provided chunk, as most cause codes will be embedded inside an
++ * abort chunk. Differs from sctp_init_cause in that it won't oops
++ * if there isn't enough space in the op error chunk
++ */
++int sctp_init_cause_fixed(struct sctp_chunk *chunk, __be16 cause_code,
++ size_t paylen)
++{
++ sctp_errhdr_t err;
++ __u16 len;
++
++ /* Cause code constants are now defined in network order. */
++ err.cause = cause_code;
++ len = sizeof(sctp_errhdr_t) + paylen;
++ err.length = htons(len);
++
++ if (skb_tailroom(chunk->skb) > len)
++ return -ENOSPC;
++ chunk->subh.err_hdr = sctp_addto_chunk_fixed(chunk,
++ sizeof(sctp_errhdr_t),
++ &err);
++ return 0;
++}
+ /* 3.3.2 Initiation (INIT) (1)
+ *
+ * This chunk is used to initiate a SCTP association between two
+@@ -1131,6 +1154,24 @@ nodata:
+ return retval;
+ }
+
++/* Create an Operation Error chunk of a fixed size,
++ * specifically, max(asoc->pathmtu, SCTP_DEFAULT_MAXSEGMENT)
++ * This is a helper function to allocate an error chunk for
++ * for those invalid parameter codes in which we may not want
++ * to report all the errors, if the incomming chunk is large
++ */
++static inline struct sctp_chunk *sctp_make_op_error_fixed(
++ const struct sctp_association *asoc,
++ const struct sctp_chunk *chunk)
++{
++ size_t size = asoc ? asoc->pathmtu : 0;
++
++ if (!size)
++ size = SCTP_DEFAULT_MAXSEGMENT;
++
++ return sctp_make_op_error_space(asoc, chunk, size);
++}
++
+ /* Create an Operation Error chunk. */
+ struct sctp_chunk *sctp_make_op_error(const struct sctp_association *asoc,
+ const struct sctp_chunk *chunk,
+@@ -1373,6 +1414,18 @@ void *sctp_addto_chunk(struct sctp_chunk
+ return target;
+ }
+
++/* Append bytes to the end of a chunk. Returns NULL if there isn't sufficient
++ * space in the chunk
++ */
++void *sctp_addto_chunk_fixed(struct sctp_chunk *chunk,
++ int len, const void *data)
++{
++ if (skb_tailroom(chunk->skb) > len)
++ return sctp_addto_chunk(chunk, len, data);
++ else
++ return NULL;
++}
++
+ /* Append bytes from user space to the end of a chunk. Will panic if
+ * chunk is not big enough.
+ * Returns a kernel err value.
+@@ -1976,13 +2029,12 @@ static sctp_ierror_t sctp_process_unk_pa
+ * returning multiple unknown parameters.
+ */
+ if (NULL == *errp)
+- *errp = sctp_make_op_error_space(asoc, chunk,
+- ntohs(chunk->chunk_hdr->length));
++ *errp = sctp_make_op_error_fixed(asoc, chunk);
+
+ if (*errp) {
+- sctp_init_cause(*errp, SCTP_ERROR_UNKNOWN_PARAM,
++ sctp_init_cause_fixed(*errp, SCTP_ERROR_UNKNOWN_PARAM,
+ WORD_ROUND(ntohs(param.p->length)));
+- sctp_addto_chunk(*errp,
++ sctp_addto_chunk_fixed(*errp,
+ WORD_ROUND(ntohs(param.p->length)),
+ param.v);
+ } else {
perf-fix-signed-comparison-in-perf_adjust_period.patch
tracing-fix-null-pointer-deref-with-send_sig_forced.patch
wl1251-fix-a-memory-leak-in-probe.patch
+ext4-check-s_log_groups_per_flex-in-online-resize-code.patch
+ext4-make-sure-the-move_ext-ioctl-can-t-overwrite-append-only-files.patch
+gfs2-fix-permissions-checking-for-setflags-ioctl.patch
+sctp-fix-skb_over_panic-resulting-from-multiple-invalid-parameter-errors-cve-2010-1173-v4.patch
+cifs-allow-null-nd-as-nfs-server-uses-on-create.patch
+vfs-add-nofollow-flag-to-umount-2.patch
+l2tp-fix-oops-in-pppol2tp_xmit.patch
+btrfs-should-add-a-permission-check-for-setfacl.patch
+eeepc-laptop-check-wireless-hotplug-events.patch
+tracing-consolidate-protection-of-reader-access-to-the-ring-buffer.patch
+input-psmouse-reset-all-types-of-mice-before-reconnecting.patch
+0001-KVM-SVM-Don-t-use-kmap_atomic-in-nested_svm_map.patch
+0002-KVM-SVM-Fix-schedule-while-atomic-on-nested-exceptio.patch
+0003-KVM-SVM-Sync-all-control-registers-on-nested-vmexit.patch
+0004-KVM-SVM-Fix-nested-msr-intercept-handling.patch
+0005-KVM-SVM-Don-t-sync-nested-cr8-to-lapic-and-back.patch
+0006-KVM-SVM-Fix-wrong-interrupt-injection-in-enable_irq_.patch
+0007-KVM-s390-Fix-possible-memory-leak-of-in-kvm_arch_vcp.patch
+0008-KVM-PPC-Do-not-create-debugfs-if-fail-to-create-vcpu.patch
+0009-KVM-x86-Add-callback-to-let-modules-decide-over-some.patch
+0010-KVM-SVM-Report-emulated-SVM-features-to-userspace.patch
+0011-x86-paravirt-Add-a-global-synchronization-point-for-.patch
+0012-KVM-Don-t-allow-lmsw-to-clear-cr0.pe.patch
+0013-KVM-x86-Check-LMA-bit-before-set_efer.patch
+0014-KVM-MMU-Segregate-shadow-pages-with-different-cr0.wp.patch
+0015-KVM-VMX-enable-VMXON-check-with-SMX-enabled-Intel-TX.patch
+0016-KVM-MMU-Don-t-read-pdptrs-with-mmu-spinlock-held-in-.patch
+0017-KVM-Fix-wallclock-version-writing-race.patch
+0018-KVM-PPC-Add-missing-vcpu_load-vcpu_put-in-vcpu-ioctl.patch
+0019-KVM-x86-Add-missing-locking-to-arch-specific-vcpu-io.patch
+0020-KVM-x86-Inject-GP-with-the-right-rip-on-efer-writes.patch
+0021-KVM-SVM-Don-t-allow-nested-guest-to-VMMCALL-into-hos.patch
+parisc-clear-floating-point-exception-flag-on-sigfpe-signal.patch
+keys-return-more-accurate-error-codes.patch
+keys-find_keyring_by_name-can-gain-access-to-a-freed-keyring.patch
+qla2xxx-disable-msi-on-qla24xx-chips-other-than-qla2432.patch
--- /dev/null
+From 7e53bd42d14c75192b99674c40fcc359392da59d Mon Sep 17 00:00:00 2001
+From: Lai Jiangshan <laijs@cn.fujitsu.com>
+Date: Wed, 6 Jan 2010 20:08:50 +0800
+Subject: tracing: Consolidate protection of reader access to the ring buffer
+
+From: Lai Jiangshan <laijs@cn.fujitsu.com>
+
+commit 7e53bd42d14c75192b99674c40fcc359392da59d upstream.
+
+At the beginning, access to the ring buffer was fully serialized
+by trace_types_lock. Patch d7350c3f4569 gives more freedom to readers,
+and patch b04cc6b1f6 adds code to protect trace_pipe and cpu#/trace_pipe.
+
+But actually it is not enough, ring buffer readers are not always
+read-only, they may consume data.
+
+This patch makes accesses to trace, trace_pipe, trace_pipe_raw
+cpu#/trace, cpu#/trace_pipe and cpu#/trace_pipe_raw serialized.
+And removes tracing_reader_cpumask which is used to protect trace_pipe.
+
+Details:
+
+Ring buffer serializes readers, but it is low level protection.
+The validity of the events (which returns by ring_buffer_peek() ..etc)
+are not protected by ring buffer.
+
+The content of events may become garbage if we allow another process to consume
+these events concurrently:
+ A) the page of the consumed events may become a normal page
+ (not reader page) in ring buffer, and this page will be rewritten
+ by the events producer.
+ B) The page of the consumed events may become a page for splice_read,
+ and this page will be returned to system.
+
+This patch adds trace_access_lock() and trace_access_unlock() primitives.
+
+These primitives allow multi process access to different cpu ring buffers
+concurrently.
+
+These primitives don't distinguish read-only and read-consume access.
+Multi read-only access is also serialized.
+
+And we don't use these primitives when we open files,
+we only use them when we read files.
+
+Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
+LKML-Reference: <4B447D52.1050602@cn.fujitsu.com>
+Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ kernel/trace/trace.c | 136 ++++++++++++++++++++++++++++++++++++---------------
+ 1 file changed, 97 insertions(+), 39 deletions(-)
+
+--- a/kernel/trace/trace.c
++++ b/kernel/trace/trace.c
+@@ -32,6 +32,7 @@
+ #include <linux/splice.h>
+ #include <linux/kdebug.h>
+ #include <linux/string.h>
++#include <linux/rwsem.h>
+ #include <linux/ctype.h>
+ #include <linux/init.h>
+ #include <linux/poll.h>
+@@ -102,9 +103,6 @@ static inline void ftrace_enable_cpu(voi
+
+ static cpumask_var_t __read_mostly tracing_buffer_mask;
+
+-/* Define which cpu buffers are currently read in trace_pipe */
+-static cpumask_var_t tracing_reader_cpumask;
+-
+ #define for_each_tracing_cpu(cpu) \
+ for_each_cpu(cpu, tracing_buffer_mask)
+
+@@ -243,12 +241,91 @@ static struct tracer *current_trace __r
+
+ /*
+ * trace_types_lock is used to protect the trace_types list.
+- * This lock is also used to keep user access serialized.
+- * Accesses from userspace will grab this lock while userspace
+- * activities happen inside the kernel.
+ */
+ static DEFINE_MUTEX(trace_types_lock);
+
++/*
++ * serialize the access of the ring buffer
++ *
++ * ring buffer serializes readers, but it is low level protection.
++ * The validity of the events (which returns by ring_buffer_peek() ..etc)
++ * are not protected by ring buffer.
++ *
++ * The content of events may become garbage if we allow other process consumes
++ * these events concurrently:
++ * A) the page of the consumed events may become a normal page
++ * (not reader page) in ring buffer, and this page will be rewrited
++ * by events producer.
++ * B) The page of the consumed events may become a page for splice_read,
++ * and this page will be returned to system.
++ *
++ * These primitives allow multi process access to different cpu ring buffer
++ * concurrently.
++ *
++ * These primitives don't distinguish read-only and read-consume access.
++ * Multi read-only access are also serialized.
++ */
++
++#ifdef CONFIG_SMP
++static DECLARE_RWSEM(all_cpu_access_lock);
++static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
++
++static inline void trace_access_lock(int cpu)
++{
++ if (cpu == TRACE_PIPE_ALL_CPU) {
++ /* gain it for accessing the whole ring buffer. */
++ down_write(&all_cpu_access_lock);
++ } else {
++ /* gain it for accessing a cpu ring buffer. */
++
++ /* Firstly block other trace_access_lock(TRACE_PIPE_ALL_CPU). */
++ down_read(&all_cpu_access_lock);
++
++ /* Secondly block other access to this @cpu ring buffer. */
++ mutex_lock(&per_cpu(cpu_access_lock, cpu));
++ }
++}
++
++static inline void trace_access_unlock(int cpu)
++{
++ if (cpu == TRACE_PIPE_ALL_CPU) {
++ up_write(&all_cpu_access_lock);
++ } else {
++ mutex_unlock(&per_cpu(cpu_access_lock, cpu));
++ up_read(&all_cpu_access_lock);
++ }
++}
++
++static inline void trace_access_lock_init(void)
++{
++ int cpu;
++
++ for_each_possible_cpu(cpu)
++ mutex_init(&per_cpu(cpu_access_lock, cpu));
++}
++
++#else
++
++static DEFINE_MUTEX(access_lock);
++
++static inline void trace_access_lock(int cpu)
++{
++ (void)cpu;
++ mutex_lock(&access_lock);
++}
++
++static inline void trace_access_unlock(int cpu)
++{
++ (void)cpu;
++ mutex_unlock(&access_lock);
++}
++
++static inline void trace_access_lock_init(void)
++{
++}
++
++#endif
++
+ /* trace_wait is a waitqueue for tasks blocked on trace_poll */
+ static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
+
+@@ -1601,12 +1678,6 @@ static void tracing_iter_reset(struct tr
+ }
+
+ /*
+- * No necessary locking here. The worst thing which can
+- * happen is loosing events consumed at the same time
+- * by a trace_pipe reader.
+- * Other than that, we don't risk to crash the ring buffer
+- * because it serializes the readers.
+- *
+ * The current tracer is copied to avoid a global locking
+ * all around.
+ */
+@@ -1662,12 +1733,16 @@ static void *s_start(struct seq_file *m,
+ }
+
+ trace_event_read_lock();
++ trace_access_lock(cpu_file);
+ return p;
+ }
+
+ static void s_stop(struct seq_file *m, void *p)
+ {
++ struct trace_iterator *iter = m->private;
++
+ atomic_dec(&trace_record_cmdline_disabled);
++ trace_access_unlock(iter->cpu_file);
+ trace_event_read_unlock();
+ }
+
+@@ -2858,22 +2933,6 @@ static int tracing_open_pipe(struct inod
+
+ mutex_lock(&trace_types_lock);
+
+- /* We only allow one reader per cpu */
+- if (cpu_file == TRACE_PIPE_ALL_CPU) {
+- if (!cpumask_empty(tracing_reader_cpumask)) {
+- ret = -EBUSY;
+- goto out;
+- }
+- cpumask_setall(tracing_reader_cpumask);
+- } else {
+- if (!cpumask_test_cpu(cpu_file, tracing_reader_cpumask))
+- cpumask_set_cpu(cpu_file, tracing_reader_cpumask);
+- else {
+- ret = -EBUSY;
+- goto out;
+- }
+- }
+-
+ /* create a buffer to store the information to pass to userspace */
+ iter = kzalloc(sizeof(*iter), GFP_KERNEL);
+ if (!iter) {
+@@ -2929,12 +2988,6 @@ static int tracing_release_pipe(struct i
+
+ mutex_lock(&trace_types_lock);
+
+- if (iter->cpu_file == TRACE_PIPE_ALL_CPU)
+- cpumask_clear(tracing_reader_cpumask);
+- else
+- cpumask_clear_cpu(iter->cpu_file, tracing_reader_cpumask);
+-
+-
+ if (iter->trace->pipe_close)
+ iter->trace->pipe_close(iter);
+
+@@ -3096,6 +3149,7 @@ waitagain:
+ iter->pos = -1;
+
+ trace_event_read_lock();
++ trace_access_lock(iter->cpu_file);
+ while (find_next_entry_inc(iter) != NULL) {
+ enum print_line_t ret;
+ int len = iter->seq.len;
+@@ -3112,6 +3166,7 @@ waitagain:
+ if (iter->seq.len >= cnt)
+ break;
+ }
++ trace_access_unlock(iter->cpu_file);
+ trace_event_read_unlock();
+
+ /* Now copy what we have to the user */
+@@ -3237,6 +3292,7 @@ static ssize_t tracing_splice_read_pipe(
+ }
+
+ trace_event_read_lock();
++ trace_access_lock(iter->cpu_file);
+
+ /* Fill as many pages as possible. */
+ for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) {
+@@ -3260,6 +3316,7 @@ static ssize_t tracing_splice_read_pipe(
+ trace_seq_init(&iter->seq);
+ }
+
++ trace_access_unlock(iter->cpu_file);
+ trace_event_read_unlock();
+ mutex_unlock(&iter->mutex);
+
+@@ -3561,10 +3618,12 @@ tracing_buffers_read(struct file *filp,
+
+ info->read = 0;
+
++ trace_access_lock(info->cpu);
+ ret = ring_buffer_read_page(info->tr->buffer,
+ &info->spare,
+ count,
+ info->cpu, 0);
++ trace_access_unlock(info->cpu);
+ if (ret < 0)
+ return 0;
+
+@@ -3692,6 +3751,7 @@ tracing_buffers_splice_read(struct file
+ len &= PAGE_MASK;
+ }
+
++ trace_access_lock(info->cpu);
+ entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
+
+ for (i = 0; i < PIPE_BUFFERS && len && entries; i++, len -= PAGE_SIZE) {
+@@ -3739,6 +3799,7 @@ tracing_buffers_splice_read(struct file
+ entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
+ }
+
++ trace_access_unlock(info->cpu);
+ spd.nr_pages = i;
+
+ /* did we read anything? */
+@@ -4175,6 +4236,8 @@ static __init int tracer_init_debugfs(vo
+ struct dentry *d_tracer;
+ int cpu;
+
++ trace_access_lock_init();
++
+ d_tracer = tracing_init_dentry();
+
+ trace_create_file("tracing_enabled", 0644, d_tracer,
+@@ -4409,9 +4472,6 @@ __init static int tracer_alloc_buffers(v
+ if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL))
+ goto out_free_buffer_mask;
+
+- if (!zalloc_cpumask_var(&tracing_reader_cpumask, GFP_KERNEL))
+- goto out_free_tracing_cpumask;
+-
+ /* To save memory, keep the ring buffer size to its minimum */
+ if (ring_buffer_expanded)
+ ring_buf_size = trace_buf_size;
+@@ -4469,8 +4529,6 @@ __init static int tracer_alloc_buffers(v
+ return 0;
+
+ out_free_cpumask:
+- free_cpumask_var(tracing_reader_cpumask);
+-out_free_tracing_cpumask:
+ free_cpumask_var(tracing_cpumask);
+ out_free_buffer_mask:
+ free_cpumask_var(tracing_buffer_mask);
--- /dev/null
+From db1f05bb85d7966b9176e293f3ceead1cb8b5d79 Mon Sep 17 00:00:00 2001
+From: Miklos Szeredi <mszeredi@suse.cz>
+Date: Wed, 10 Feb 2010 12:15:53 +0100
+Subject: vfs: add NOFOLLOW flag to umount(2)
+
+From: Miklos Szeredi <mszeredi@suse.cz>
+
+commit db1f05bb85d7966b9176e293f3ceead1cb8b5d79 upstream.
+
+Add a new UMOUNT_NOFOLLOW flag to umount(2). This is needed to prevent
+symlink attacks in unprivileged unmounts (fuse, samba, ncpfs).
+
+Additionally, return -EINVAL if an unknown flag is used (and specify
+an explicitly unused flag: UMOUNT_UNUSED). This makes it possible for
+the caller to determine if a flag is supported or not.
+
+CC: Eugene Teo <eugene@redhat.com>
+CC: Michael Kerrisk <mtk.manpages@gmail.com>
+Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/namespace.c | 9 ++++++++-
+ include/linux/fs.h | 2 ++
+ 2 files changed, 10 insertions(+), 1 deletion(-)
+
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -1121,8 +1121,15 @@ SYSCALL_DEFINE2(umount, char __user *, n
+ {
+ struct path path;
+ int retval;
++ int lookup_flags = 0;
+
+- retval = user_path(name, &path);
++ if (flags & ~(MNT_FORCE | MNT_DETACH | MNT_EXPIRE | UMOUNT_NOFOLLOW))
++ return -EINVAL;
++
++ if (!(flags & UMOUNT_NOFOLLOW))
++ lookup_flags |= LOOKUP_FOLLOW;
++
++ retval = user_path_at(AT_FDCWD, name, lookup_flags, &path);
+ if (retval)
+ goto out;
+ retval = -EINVAL;
+--- a/include/linux/fs.h
++++ b/include/linux/fs.h
+@@ -1308,6 +1308,8 @@ extern int send_sigurg(struct fown_struc
+ #define MNT_FORCE 0x00000001 /* Attempt to forcibily umount */
+ #define MNT_DETACH 0x00000002 /* Just detach from the tree */
+ #define MNT_EXPIRE 0x00000004 /* Mark for expiry */
++#define UMOUNT_NOFOLLOW 0x00000008 /* Don't follow symlink on umount */
++#define UMOUNT_UNUSED 0x80000000 /* Flag guaranteed to be unused */
+
+ extern struct list_head super_blocks;
+ extern spinlock_t sb_lock;