]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
.33 patches
authorGreg Kroah-Hartman <gregkh@suse.de>
Fri, 25 Jun 2010 23:30:20 +0000 (16:30 -0700)
committerGreg Kroah-Hartman <gregkh@suse.de>
Fri, 25 Jun 2010 23:30:20 +0000 (16:30 -0700)
37 files changed:
queue-2.6.33/0001-KVM-SVM-Don-t-use-kmap_atomic-in-nested_svm_map.patch [new file with mode: 0644]
queue-2.6.33/0002-KVM-SVM-Fix-schedule-while-atomic-on-nested-exceptio.patch [new file with mode: 0644]
queue-2.6.33/0003-KVM-SVM-Sync-all-control-registers-on-nested-vmexit.patch [new file with mode: 0644]
queue-2.6.33/0004-KVM-SVM-Fix-nested-msr-intercept-handling.patch [new file with mode: 0644]
queue-2.6.33/0005-KVM-SVM-Don-t-sync-nested-cr8-to-lapic-and-back.patch [new file with mode: 0644]
queue-2.6.33/0006-KVM-SVM-Fix-wrong-interrupt-injection-in-enable_irq_.patch [new file with mode: 0644]
queue-2.6.33/0007-KVM-s390-Fix-possible-memory-leak-of-in-kvm_arch_vcp.patch [new file with mode: 0644]
queue-2.6.33/0008-KVM-PPC-Do-not-create-debugfs-if-fail-to-create-vcpu.patch [new file with mode: 0644]
queue-2.6.33/0009-KVM-x86-Add-callback-to-let-modules-decide-over-some.patch [new file with mode: 0644]
queue-2.6.33/0010-KVM-SVM-Report-emulated-SVM-features-to-userspace.patch [new file with mode: 0644]
queue-2.6.33/0011-x86-paravirt-Add-a-global-synchronization-point-for-.patch [new file with mode: 0644]
queue-2.6.33/0012-KVM-Don-t-allow-lmsw-to-clear-cr0.pe.patch [new file with mode: 0644]
queue-2.6.33/0013-KVM-x86-Check-LMA-bit-before-set_efer.patch [new file with mode: 0644]
queue-2.6.33/0014-KVM-MMU-Segregate-shadow-pages-with-different-cr0.wp.patch [new file with mode: 0644]
queue-2.6.33/0015-KVM-VMX-enable-VMXON-check-with-SMX-enabled-Intel-TX.patch [new file with mode: 0644]
queue-2.6.33/0016-KVM-MMU-Don-t-read-pdptrs-with-mmu-spinlock-held-in-.patch [new file with mode: 0644]
queue-2.6.33/0017-KVM-Fix-wallclock-version-writing-race.patch [new file with mode: 0644]
queue-2.6.33/0018-KVM-PPC-Add-missing-vcpu_load-vcpu_put-in-vcpu-ioctl.patch [new file with mode: 0644]
queue-2.6.33/0019-KVM-x86-Add-missing-locking-to-arch-specific-vcpu-io.patch [new file with mode: 0644]
queue-2.6.33/0020-KVM-x86-Inject-GP-with-the-right-rip-on-efer-writes.patch [new file with mode: 0644]
queue-2.6.33/0021-KVM-SVM-Don-t-allow-nested-guest-to-VMMCALL-into-hos.patch [new file with mode: 0644]
queue-2.6.33/btrfs-should-add-a-permission-check-for-setfacl.patch [new file with mode: 0644]
queue-2.6.33/cifs-allow-null-nd-as-nfs-server-uses-on-create.patch [new file with mode: 0644]
queue-2.6.33/eeepc-laptop-check-wireless-hotplug-events.patch [new file with mode: 0644]
queue-2.6.33/ext4-check-s_log_groups_per_flex-in-online-resize-code.patch [new file with mode: 0644]
queue-2.6.33/ext4-make-sure-the-move_ext-ioctl-can-t-overwrite-append-only-files.patch [new file with mode: 0644]
queue-2.6.33/gfs2-fix-permissions-checking-for-setflags-ioctl.patch [new file with mode: 0644]
queue-2.6.33/input-psmouse-reset-all-types-of-mice-before-reconnecting.patch [new file with mode: 0644]
queue-2.6.33/keys-find_keyring_by_name-can-gain-access-to-a-freed-keyring.patch [new file with mode: 0644]
queue-2.6.33/keys-return-more-accurate-error-codes.patch [new file with mode: 0644]
queue-2.6.33/l2tp-fix-oops-in-pppol2tp_xmit.patch [new file with mode: 0644]
queue-2.6.33/parisc-clear-floating-point-exception-flag-on-sigfpe-signal.patch [new file with mode: 0644]
queue-2.6.33/qla2xxx-disable-msi-on-qla24xx-chips-other-than-qla2432.patch [new file with mode: 0644]
queue-2.6.33/sctp-fix-skb_over_panic-resulting-from-multiple-invalid-parameter-errors-cve-2010-1173-v4.patch [new file with mode: 0644]
queue-2.6.33/series
queue-2.6.33/tracing-consolidate-protection-of-reader-access-to-the-ring-buffer.patch [new file with mode: 0644]
queue-2.6.33/vfs-add-nofollow-flag-to-umount-2.patch [new file with mode: 0644]

diff --git a/queue-2.6.33/0001-KVM-SVM-Don-t-use-kmap_atomic-in-nested_svm_map.patch b/queue-2.6.33/0001-KVM-SVM-Don-t-use-kmap_atomic-in-nested_svm_map.patch
new file mode 100644 (file)
index 0000000..1eed513
--- /dev/null
@@ -0,0 +1,202 @@
+From 60cddf3700fe0760425aebe1d0d0850a15faf50e Mon Sep 17 00:00:00 2001
+From: Joerg Roedel <joerg.roedel@amd.com>
+Date: Fri, 19 Feb 2010 16:23:00 +0100
+Subject: KVM: SVM: Don't use kmap_atomic in nested_svm_map
+
+From: Joerg Roedel <joerg.roedel@amd.com>
+
+Use of kmap_atomic disables preemption but if we run in
+shadow-shadow mode the vmrun emulation executes kvm_set_cr3
+which might sleep or fault. So use kmap instead for
+nested_svm_map.
+
+Cc: stable@kernel.org
+Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+(cherry picked from commit 7597f129d8b6799da7a264e6d6f7401668d3a36d)
+---
+ arch/x86/kvm/svm.c |   47 ++++++++++++++++++++++++-----------------------
+ 1 file changed, 24 insertions(+), 23 deletions(-)
+
+--- a/arch/x86/kvm/svm.c
++++ b/arch/x86/kvm/svm.c
+@@ -1398,7 +1398,7 @@ static inline int nested_svm_intr(struct
+       return 0;
+ }
+-static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, enum km_type idx)
++static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, struct page **_page)
+ {
+       struct page *page;
+@@ -1406,7 +1406,9 @@ static void *nested_svm_map(struct vcpu_
+       if (is_error_page(page))
+               goto error;
+-      return kmap_atomic(page, idx);
++      *_page = page;
++
++      return kmap(page);
+ error:
+       kvm_release_page_clean(page);
+@@ -1415,16 +1417,9 @@ error:
+       return NULL;
+ }
+-static void nested_svm_unmap(void *addr, enum km_type idx)
++static void nested_svm_unmap(struct page *page)
+ {
+-      struct page *page;
+-
+-      if (!addr)
+-              return;
+-
+-      page = kmap_atomic_to_page(addr);
+-
+-      kunmap_atomic(addr, idx);
++      kunmap(page);
+       kvm_release_page_dirty(page);
+ }
+@@ -1432,6 +1427,7 @@ static bool nested_svm_exit_handled_msr(
+ {
+       u32 param = svm->vmcb->control.exit_info_1 & 1;
+       u32 msr = svm->vcpu.arch.regs[VCPU_REGS_RCX];
++      struct page *page;
+       bool ret = false;
+       u32 t0, t1;
+       u8 *msrpm;
+@@ -1439,7 +1435,7 @@ static bool nested_svm_exit_handled_msr(
+       if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT)))
+               return false;
+-      msrpm = nested_svm_map(svm, svm->nested.vmcb_msrpm, KM_USER0);
++      msrpm = nested_svm_map(svm, svm->nested.vmcb_msrpm, &page);
+       if (!msrpm)
+               goto out;
+@@ -1467,7 +1463,7 @@ static bool nested_svm_exit_handled_msr(
+       ret = msrpm[t1] & ((1 << param) << t0);
+ out:
+-      nested_svm_unmap(msrpm, KM_USER0);
++      nested_svm_unmap(page);
+       return ret;
+ }
+@@ -1590,6 +1586,7 @@ static int nested_svm_vmexit(struct vcpu
+       struct vmcb *nested_vmcb;
+       struct vmcb *hsave = svm->nested.hsave;
+       struct vmcb *vmcb = svm->vmcb;
++      struct page *page;
+       trace_kvm_nested_vmexit_inject(vmcb->control.exit_code,
+                                      vmcb->control.exit_info_1,
+@@ -1597,7 +1594,7 @@ static int nested_svm_vmexit(struct vcpu
+                                      vmcb->control.exit_int_info,
+                                      vmcb->control.exit_int_info_err);
+-      nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, KM_USER0);
++      nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, &page);
+       if (!nested_vmcb)
+               return 1;
+@@ -1687,7 +1684,7 @@ static int nested_svm_vmexit(struct vcpu
+       /* Exit nested SVM mode */
+       svm->nested.vmcb = 0;
+-      nested_svm_unmap(nested_vmcb, KM_USER0);
++      nested_svm_unmap(page);
+       kvm_mmu_reset_context(&svm->vcpu);
+       kvm_mmu_load(&svm->vcpu);
+@@ -1698,9 +1695,10 @@ static int nested_svm_vmexit(struct vcpu
+ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
+ {
+       u32 *nested_msrpm;
++      struct page *page;
+       int i;
+-      nested_msrpm = nested_svm_map(svm, svm->nested.vmcb_msrpm, KM_USER0);
++      nested_msrpm = nested_svm_map(svm, svm->nested.vmcb_msrpm, &page);
+       if (!nested_msrpm)
+               return false;
+@@ -1709,7 +1707,7 @@ static bool nested_svm_vmrun_msrpm(struc
+       svm->vmcb->control.msrpm_base_pa = __pa(svm->nested.msrpm);
+-      nested_svm_unmap(nested_msrpm, KM_USER0);
++      nested_svm_unmap(page);
+       return true;
+ }
+@@ -1719,8 +1717,9 @@ static bool nested_svm_vmrun(struct vcpu
+       struct vmcb *nested_vmcb;
+       struct vmcb *hsave = svm->nested.hsave;
+       struct vmcb *vmcb = svm->vmcb;
++      struct page *page;
+-      nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, KM_USER0);
++      nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
+       if (!nested_vmcb)
+               return false;
+@@ -1832,7 +1831,7 @@ static bool nested_svm_vmrun(struct vcpu
+       svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
+       svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;
+-      nested_svm_unmap(nested_vmcb, KM_USER0);
++      nested_svm_unmap(page);
+       enable_gif(svm);
+@@ -1858,6 +1857,7 @@ static void nested_svm_vmloadsave(struct
+ static int vmload_interception(struct vcpu_svm *svm)
+ {
+       struct vmcb *nested_vmcb;
++      struct page *page;
+       if (nested_svm_check_permissions(svm))
+               return 1;
+@@ -1865,12 +1865,12 @@ static int vmload_interception(struct vc
+       svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
+       skip_emulated_instruction(&svm->vcpu);
+-      nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, KM_USER0);
++      nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
+       if (!nested_vmcb)
+               return 1;
+       nested_svm_vmloadsave(nested_vmcb, svm->vmcb);
+-      nested_svm_unmap(nested_vmcb, KM_USER0);
++      nested_svm_unmap(page);
+       return 1;
+ }
+@@ -1878,6 +1878,7 @@ static int vmload_interception(struct vc
+ static int vmsave_interception(struct vcpu_svm *svm)
+ {
+       struct vmcb *nested_vmcb;
++      struct page *page;
+       if (nested_svm_check_permissions(svm))
+               return 1;
+@@ -1885,12 +1886,12 @@ static int vmsave_interception(struct vc
+       svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
+       skip_emulated_instruction(&svm->vcpu);
+-      nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, KM_USER0);
++      nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
+       if (!nested_vmcb)
+               return 1;
+       nested_svm_vmloadsave(svm->vmcb, nested_vmcb);
+-      nested_svm_unmap(nested_vmcb, KM_USER0);
++      nested_svm_unmap(page);
+       return 1;
+ }
diff --git a/queue-2.6.33/0002-KVM-SVM-Fix-schedule-while-atomic-on-nested-exceptio.patch b/queue-2.6.33/0002-KVM-SVM-Fix-schedule-while-atomic-on-nested-exceptio.patch
new file mode 100644 (file)
index 0000000..b0c5c98
--- /dev/null
@@ -0,0 +1,81 @@
+From 397cb347161b605d7bdff4240d0d267bf48f4ae2 Mon Sep 17 00:00:00 2001
+From: Joerg Roedel <joerg.roedel@amd.com>
+Date: Fri, 19 Feb 2010 16:23:02 +0100
+Subject: KVM: SVM: Fix schedule-while-atomic on nested exception handling
+
+From: Joerg Roedel <joerg.roedel@amd.com>
+
+Move the actual vmexit routine out of code that runs with
+irqs and preemption disabled.
+
+Cc: stable@kernel.org
+Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+(cherry picked from commit b8e88bc8ffba5fe53fb8d8a0a4be3bbcffeebe56)
+---
+ arch/x86/kvm/svm.c |   23 +++++++++++++++++++----
+ 1 file changed, 19 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/kvm/svm.c
++++ b/arch/x86/kvm/svm.c
+@@ -128,6 +128,7 @@ static void svm_flush_tlb(struct kvm_vcp
+ static void svm_complete_interrupts(struct vcpu_svm *svm);
+ static int nested_svm_exit_handled(struct vcpu_svm *svm);
++static int nested_svm_intercept(struct vcpu_svm *svm);
+ static int nested_svm_vmexit(struct vcpu_svm *svm);
+ static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
+                                     bool has_error_code, u32 error_code);
+@@ -1359,6 +1360,8 @@ static int nested_svm_check_permissions(
+ static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
+                                     bool has_error_code, u32 error_code)
+ {
++      int vmexit;
++
+       if (!is_nested(svm))
+               return 0;
+@@ -1367,7 +1370,11 @@ static int nested_svm_check_exception(st
+       svm->vmcb->control.exit_info_1 = error_code;
+       svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
+-      return nested_svm_exit_handled(svm);
++      vmexit = nested_svm_intercept(svm);
++      if (vmexit == NESTED_EXIT_DONE)
++              svm->nested.exit_required = true;
++
++      return vmexit;
+ }
+ static inline int nested_svm_intr(struct vcpu_svm *svm)
+@@ -1496,7 +1503,7 @@ static int nested_svm_exit_special(struc
+ /*
+  * If this function returns true, this #vmexit was already handled
+  */
+-static int nested_svm_exit_handled(struct vcpu_svm *svm)
++static int nested_svm_intercept(struct vcpu_svm *svm)
+ {
+       u32 exit_code = svm->vmcb->control.exit_code;
+       int vmexit = NESTED_EXIT_HOST;
+@@ -1542,9 +1549,17 @@ static int nested_svm_exit_handled(struc
+       }
+       }
+-      if (vmexit == NESTED_EXIT_DONE) {
++      return vmexit;
++}
++
++static int nested_svm_exit_handled(struct vcpu_svm *svm)
++{
++      int vmexit;
++
++      vmexit = nested_svm_intercept(svm);
++
++      if (vmexit == NESTED_EXIT_DONE)
+               nested_svm_vmexit(svm);
+-      }
+       return vmexit;
+ }
diff --git a/queue-2.6.33/0003-KVM-SVM-Sync-all-control-registers-on-nested-vmexit.patch b/queue-2.6.33/0003-KVM-SVM-Sync-all-control-registers-on-nested-vmexit.patch
new file mode 100644 (file)
index 0000000..3028ac7
--- /dev/null
@@ -0,0 +1,39 @@
+From d137ecd6791eb7c4553b3bd06a5e58309639e9d9 Mon Sep 17 00:00:00 2001
+From: Joerg Roedel <joerg.roedel@amd.com>
+Date: Fri, 19 Feb 2010 16:23:03 +0100
+Subject: KVM: SVM: Sync all control registers on nested vmexit
+
+From: Joerg Roedel <joerg.roedel@amd.com>
+
+Currently the vmexit emulation does not sync control
+registers were the access is typically intercepted by the
+nested hypervisor. But we can not count on that intercepts
+to sync these registers too and make the code
+architecturally more correct.
+
+Cc: stable@kernel.org
+Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+(cherry picked from commit cdbbdc1210223879450555fee04c29ebf116576b)
+---
+ arch/x86/kvm/svm.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/arch/x86/kvm/svm.c
++++ b/arch/x86/kvm/svm.c
+@@ -1622,9 +1622,13 @@ static int nested_svm_vmexit(struct vcpu
+       nested_vmcb->save.ds     = vmcb->save.ds;
+       nested_vmcb->save.gdtr   = vmcb->save.gdtr;
+       nested_vmcb->save.idtr   = vmcb->save.idtr;
++      nested_vmcb->save.cr0    = svm->vcpu.arch.cr0;
+       if (npt_enabled)
+               nested_vmcb->save.cr3    = vmcb->save.cr3;
++      else
++              nested_vmcb->save.cr3    = svm->vcpu.arch.cr3;
+       nested_vmcb->save.cr2    = vmcb->save.cr2;
++      nested_vmcb->save.cr4    = svm->vcpu.arch.cr4;
+       nested_vmcb->save.rflags = vmcb->save.rflags;
+       nested_vmcb->save.rip    = vmcb->save.rip;
+       nested_vmcb->save.rsp    = vmcb->save.rsp;
diff --git a/queue-2.6.33/0004-KVM-SVM-Fix-nested-msr-intercept-handling.patch b/queue-2.6.33/0004-KVM-SVM-Fix-nested-msr-intercept-handling.patch
new file mode 100644 (file)
index 0000000..3538fdc
--- /dev/null
@@ -0,0 +1,57 @@
+From 536abd6ee4d98ef086686406124c83281d462c11 Mon Sep 17 00:00:00 2001
+From: Joerg Roedel <joerg.roedel@amd.com>
+Date: Fri, 19 Feb 2010 16:23:05 +0100
+Subject: KVM: SVM: Fix nested msr intercept handling
+
+From: Joerg Roedel <joerg.roedel@amd.com>
+
+The nested_svm_exit_handled_msr() function maps only one
+page of the guests msr permission bitmap. This patch changes
+the code to use kvm_read_guest to fix the bug.
+
+Cc: stable@kernel.org
+Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+(cherry picked from commit 4c7da8cb43c09e71a405b5aeaa58a1dbac3c39e9)
+---
+ arch/x86/kvm/svm.c |   13 +++----------
+ 1 file changed, 3 insertions(+), 10 deletions(-)
+
+--- a/arch/x86/kvm/svm.c
++++ b/arch/x86/kvm/svm.c
+@@ -1434,19 +1434,13 @@ static bool nested_svm_exit_handled_msr(
+ {
+       u32 param = svm->vmcb->control.exit_info_1 & 1;
+       u32 msr = svm->vcpu.arch.regs[VCPU_REGS_RCX];
+-      struct page *page;
+       bool ret = false;
+       u32 t0, t1;
+-      u8 *msrpm;
++      u8 val;
+       if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT)))
+               return false;
+-      msrpm = nested_svm_map(svm, svm->nested.vmcb_msrpm, &page);
+-
+-      if (!msrpm)
+-              goto out;
+-
+       switch (msr) {
+       case 0 ... 0x1fff:
+               t0 = (msr * 2) % 8;
+@@ -1467,11 +1461,10 @@ static bool nested_svm_exit_handled_msr(
+               goto out;
+       }
+-      ret = msrpm[t1] & ((1 << param) << t0);
++      if (!kvm_read_guest(svm->vcpu.kvm, svm->nested.vmcb_msrpm + t1, &val, 1))
++              ret = val & ((1 << param) << t0);
+ out:
+-      nested_svm_unmap(page);
+-
+       return ret;
+ }
diff --git a/queue-2.6.33/0005-KVM-SVM-Don-t-sync-nested-cr8-to-lapic-and-back.patch b/queue-2.6.33/0005-KVM-SVM-Don-t-sync-nested-cr8-to-lapic-and-back.patch
new file mode 100644 (file)
index 0000000..74ec234
--- /dev/null
@@ -0,0 +1,109 @@
+From 53ea99c19f1754706cca7265172ed6fb091a8e03 Mon Sep 17 00:00:00 2001
+From: Joerg Roedel <joerg.roedel@amd.com>
+Date: Fri, 19 Feb 2010 16:23:06 +0100
+Subject: KVM: SVM: Don't sync nested cr8 to lapic and back
+
+From: Joerg Roedel <joerg.roedel@amd.com>
+
+This patch makes syncing of the guest tpr to the lapic
+conditional on !nested. Otherwise a nested guest using the
+TPR could freeze the guest.
+Another important change this patch introduces is that the
+cr8 intercept bits are no longer ORed at vmrun emulation if
+the guest sets VINTR_MASKING in its VMCB. The reason is that
+nested cr8 accesses need alway be handled by the nested
+hypervisor because they change the shadow version of the
+tpr.
+
+Cc: stable@kernel.org
+Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+(cherry picked from commit 88ab24adc7142506c8583ac36a34fa388300b750)
+---
+ arch/x86/kvm/svm.c |   46 +++++++++++++++++++++++++++++++---------------
+ 1 file changed, 31 insertions(+), 15 deletions(-)
+
+--- a/arch/x86/kvm/svm.c
++++ b/arch/x86/kvm/svm.c
+@@ -1805,21 +1805,6 @@ static bool nested_svm_vmrun(struct vcpu
+       svm->vmcb->save.dr6 = nested_vmcb->save.dr6;
+       svm->vmcb->save.cpl = nested_vmcb->save.cpl;
+-      /* We don't want a nested guest to be more powerful than the guest,
+-         so all intercepts are ORed */
+-      svm->vmcb->control.intercept_cr_read |=
+-              nested_vmcb->control.intercept_cr_read;
+-      svm->vmcb->control.intercept_cr_write |=
+-              nested_vmcb->control.intercept_cr_write;
+-      svm->vmcb->control.intercept_dr_read |=
+-              nested_vmcb->control.intercept_dr_read;
+-      svm->vmcb->control.intercept_dr_write |=
+-              nested_vmcb->control.intercept_dr_write;
+-      svm->vmcb->control.intercept_exceptions |=
+-              nested_vmcb->control.intercept_exceptions;
+-
+-      svm->vmcb->control.intercept |= nested_vmcb->control.intercept;
+-
+       svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa;
+       /* cache intercepts */
+@@ -1837,6 +1822,28 @@ static bool nested_svm_vmrun(struct vcpu
+       else
+               svm->vcpu.arch.hflags &= ~HF_VINTR_MASK;
++      if (svm->vcpu.arch.hflags & HF_VINTR_MASK) {
++              /* We only want the cr8 intercept bits of the guest */
++              svm->vmcb->control.intercept_cr_read &= ~INTERCEPT_CR8_MASK;
++              svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK;
++      }
++
++      /* We don't want a nested guest to be more powerful than the guest,
++         so all intercepts are ORed */
++      svm->vmcb->control.intercept_cr_read |=
++              nested_vmcb->control.intercept_cr_read;
++      svm->vmcb->control.intercept_cr_write |=
++              nested_vmcb->control.intercept_cr_write;
++      svm->vmcb->control.intercept_dr_read |=
++              nested_vmcb->control.intercept_dr_read;
++      svm->vmcb->control.intercept_dr_write |=
++              nested_vmcb->control.intercept_dr_write;
++      svm->vmcb->control.intercept_exceptions |=
++              nested_vmcb->control.intercept_exceptions;
++
++      svm->vmcb->control.intercept |= nested_vmcb->control.intercept;
++
++      svm->vmcb->control.lbr_ctl = nested_vmcb->control.lbr_ctl;
+       svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
+       svm->vmcb->control.int_state = nested_vmcb->control.int_state;
+       svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset;
+@@ -2500,6 +2507,9 @@ static void update_cr8_intercept(struct
+ {
+       struct vcpu_svm *svm = to_svm(vcpu);
++      if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK))
++              return;
++
+       if (irr == -1)
+               return;
+@@ -2603,6 +2613,9 @@ static inline void sync_cr8_to_lapic(str
+ {
+       struct vcpu_svm *svm = to_svm(vcpu);
++      if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK))
++              return;
++
+       if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR8_MASK)) {
+               int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK;
+               kvm_set_cr8(vcpu, cr8);
+@@ -2614,6 +2627,9 @@ static inline void sync_lapic_to_cr8(str
+       struct vcpu_svm *svm = to_svm(vcpu);
+       u64 cr8;
++      if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK))
++              return;
++
+       cr8 = kvm_get_cr8(vcpu);
+       svm->vmcb->control.int_ctl &= ~V_TPR_MASK;
+       svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK;
diff --git a/queue-2.6.33/0006-KVM-SVM-Fix-wrong-interrupt-injection-in-enable_irq_.patch b/queue-2.6.33/0006-KVM-SVM-Fix-wrong-interrupt-injection-in-enable_irq_.patch
new file mode 100644 (file)
index 0000000..c30c63c
--- /dev/null
@@ -0,0 +1,73 @@
+From 5721224671983e9fb964e668712da5ee2f508fda Mon Sep 17 00:00:00 2001
+From: Joerg Roedel <joerg.roedel@amd.com>
+Date: Fri, 19 Feb 2010 16:23:01 +0100
+Subject: KVM: SVM: Fix wrong interrupt injection in enable_irq_windows
+
+From: Joerg Roedel <joerg.roedel@amd.com>
+
+The nested_svm_intr() function does not execute the vmexit
+anymore. Therefore we may still be in the nested state after
+that function ran. This patch changes the nested_svm_intr()
+function to return wether the irq window could be enabled.
+
+Cc: stable@kernel.org
+Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+(cherry picked from commit 8fe546547cf6857a9d984bfe2f2194910f3fc5d0)
+---
+ arch/x86/kvm/svm.c |   17 ++++++++---------
+ 1 file changed, 8 insertions(+), 9 deletions(-)
+
+--- a/arch/x86/kvm/svm.c
++++ b/arch/x86/kvm/svm.c
+@@ -1377,16 +1377,17 @@ static int nested_svm_check_exception(st
+       return vmexit;
+ }
+-static inline int nested_svm_intr(struct vcpu_svm *svm)
++/* This function returns true if it is save to enable the irq window */
++static inline bool nested_svm_intr(struct vcpu_svm *svm)
+ {
+       if (!is_nested(svm))
+-              return 0;
++              return true;
+       if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
+-              return 0;
++              return true;
+       if (!(svm->vcpu.arch.hflags & HF_HIF_MASK))
+-              return 0;
++              return false;
+       svm->vmcb->control.exit_code = SVM_EXIT_INTR;
+@@ -1399,10 +1400,10 @@ static inline int nested_svm_intr(struct
+                */
+               svm->nested.exit_required = true;
+               trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip);
+-              return 1;
++              return false;
+       }
+-      return 0;
++      return true;
+ }
+ static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, struct page **_page)
+@@ -2567,13 +2568,11 @@ static void enable_irq_window(struct kvm
+ {
+       struct vcpu_svm *svm = to_svm(vcpu);
+-      nested_svm_intr(svm);
+-
+       /* In case GIF=0 we can't rely on the CPU to tell us when
+        * GIF becomes 1, because that's a separate STGI/VMRUN intercept.
+        * The next time we get that intercept, this function will be
+        * called again though and we'll get the vintr intercept. */
+-      if (gif_set(svm)) {
++      if (gif_set(svm) && nested_svm_intr(svm)) {
+               svm_set_vintr(svm);
+               svm_inject_irq(svm, 0x0);
+       }
diff --git a/queue-2.6.33/0007-KVM-s390-Fix-possible-memory-leak-of-in-kvm_arch_vcp.patch b/queue-2.6.33/0007-KVM-s390-Fix-possible-memory-leak-of-in-kvm_arch_vcp.patch
new file mode 100644 (file)
index 0000000..35ff5e8
--- /dev/null
@@ -0,0 +1,37 @@
+From cd87b5b7c290bea9e5d5473abe05fe7b145d0e33 Mon Sep 17 00:00:00 2001
+From: Wei Yongjun <yjwei@cn.fujitsu.com>
+Date: Tue, 9 Mar 2010 14:37:53 +0800
+Subject: KVM: s390: Fix possible memory leak of in kvm_arch_vcpu_create()
+
+From: Wei Yongjun <yjwei@cn.fujitsu.com>
+
+This patch fixed possible memory leak in kvm_arch_vcpu_create()
+under s390, which would happen when kvm_arch_vcpu_create() fails.
+
+Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
+Acked-by: Carsten Otte <cotte@de.ibm.com>
+Cc: stable@kernel.org
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+(cherry picked from commit 7b06bf2ffa15e119c7439ed0b024d44f66d7b605)
+---
+ arch/s390/kvm/kvm-s390.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/arch/s390/kvm/kvm-s390.c
++++ b/arch/s390/kvm/kvm-s390.c
+@@ -339,11 +339,13 @@ struct kvm_vcpu *kvm_arch_vcpu_create(st
+       rc = kvm_vcpu_init(vcpu, kvm, id);
+       if (rc)
+-              goto out_free_cpu;
++              goto out_free_sie_block;
+       VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
+                vcpu->arch.sie_block);
+       return vcpu;
++out_free_sie_block:
++      free_page((unsigned long)(vcpu->arch.sie_block));
+ out_free_cpu:
+       kfree(vcpu);
+ out_nomem:
diff --git a/queue-2.6.33/0008-KVM-PPC-Do-not-create-debugfs-if-fail-to-create-vcpu.patch b/queue-2.6.33/0008-KVM-PPC-Do-not-create-debugfs-if-fail-to-create-vcpu.patch
new file mode 100644 (file)
index 0000000..1114b1c
--- /dev/null
@@ -0,0 +1,32 @@
+From 07d4434372555d3ed2d333692b8919cc9cabf4d7 Mon Sep 17 00:00:00 2001
+From: Wei Yongjun <yjwei@cn.fujitsu.com>
+Date: Tue, 9 Mar 2010 14:13:43 +0800
+Subject: KVM: PPC: Do not create debugfs if fail to create vcpu
+
+From: Wei Yongjun <yjwei@cn.fujitsu.com>
+
+If fail to create the vcpu, we should not create the debugfs
+for it.
+
+Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
+Acked-by: Alexander Graf <agraf@suse.de>
+Cc: stable@kernel.org
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+(cherry picked from commit 06056bfb944a0302a8f22eb45f09123de7fb417b)
+---
+ arch/powerpc/kvm/powerpc.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/arch/powerpc/kvm/powerpc.c
++++ b/arch/powerpc/kvm/powerpc.c
+@@ -181,7 +181,8 @@ struct kvm_vcpu *kvm_arch_vcpu_create(st
+ {
+       struct kvm_vcpu *vcpu;
+       vcpu = kvmppc_core_vcpu_create(kvm, id);
+-      kvmppc_create_vcpu_debugfs(vcpu, id);
++      if (!IS_ERR(vcpu))
++              kvmppc_create_vcpu_debugfs(vcpu, id);
+       return vcpu;
+ }
diff --git a/queue-2.6.33/0009-KVM-x86-Add-callback-to-let-modules-decide-over-some.patch b/queue-2.6.33/0009-KVM-x86-Add-callback-to-let-modules-decide-over-some.patch
new file mode 100644 (file)
index 0000000..fa085c2
--- /dev/null
@@ -0,0 +1,90 @@
+From a2365272251916c9c2e646ee8f63f589981e7b42 Mon Sep 17 00:00:00 2001
+From: Joerg Roedel <joerg.roedel@amd.com>
+Date: Thu, 22 Apr 2010 12:33:11 +0200
+Subject: KVM: x86: Add callback to let modules decide over some supported cpuid bits
+
+From: Joerg Roedel <joerg.roedel@amd.com>
+
+This patch adds the get_supported_cpuid callback to
+kvm_x86_ops. It will be used in do_cpuid_ent to delegate the
+decission about some supported cpuid bits to the
+architecture modules.
+
+Cc: stable@kernel.org
+Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+(cherry picked from commit d4330ef2fb2236a1e3a176f0f68360f4c0a8661b)
+---
+ arch/x86/include/asm/kvm_host.h |    2 ++
+ arch/x86/kvm/svm.c              |    5 +++++
+ arch/x86/kvm/vmx.c              |    5 +++++
+ arch/x86/kvm/x86.c              |    3 +++
+ 4 files changed, 15 insertions(+)
+
+--- a/arch/x86/include/asm/kvm_host.h
++++ b/arch/x86/include/asm/kvm_host.h
+@@ -533,6 +533,8 @@ struct kvm_x86_ops {
+       u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
+       bool (*gb_page_enable)(void);
++      void (*set_supported_cpuid)(u32 func, struct kvm_cpuid_entry2 *entry);
++
+       const struct trace_print_flags *exit_reasons_str;
+ };
+--- a/arch/x86/kvm/svm.c
++++ b/arch/x86/kvm/svm.c
+@@ -2885,6 +2885,10 @@ static u64 svm_get_mt_mask(struct kvm_vc
+       return 0;
+ }
++static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
++{
++}
++
+ static const struct trace_print_flags svm_exit_reasons_str[] = {
+       { SVM_EXIT_READ_CR0,                    "read_cr0" },
+       { SVM_EXIT_READ_CR3,                    "read_cr3" },
+@@ -3009,6 +3013,7 @@ static struct kvm_x86_ops svm_x86_ops =
+       .exit_reasons_str = svm_exit_reasons_str,
+       .gb_page_enable = svm_gb_page_enable,
++      .set_supported_cpuid = svm_set_supported_cpuid,
+ };
+ static int __init svm_init(void)
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -3993,6 +3993,10 @@ static bool vmx_gb_page_enable(void)
+       return false;
+ }
++static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
++{
++}
++
+ static struct kvm_x86_ops vmx_x86_ops = {
+       .cpu_has_kvm_support = cpu_has_kvm_support,
+       .disabled_by_bios = vmx_disabled_by_bios,
+@@ -4057,6 +4061,7 @@ static struct kvm_x86_ops vmx_x86_ops =
+       .exit_reasons_str = vmx_exit_reasons_str,
+       .gb_page_enable = vmx_gb_page_enable,
++      .set_supported_cpuid = vmx_set_supported_cpuid,
+ };
+ static int __init vmx_init(void)
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -1688,6 +1688,9 @@ static void do_cpuid_ent(struct kvm_cpui
+               entry->ecx &= kvm_supported_word6_x86_features;
+               break;
+       }
++
++      kvm_x86_ops->set_supported_cpuid(function, entry);
++
+       put_cpu();
+ }
diff --git a/queue-2.6.33/0010-KVM-SVM-Report-emulated-SVM-features-to-userspace.patch b/queue-2.6.33/0010-KVM-SVM-Report-emulated-SVM-features-to-userspace.patch
new file mode 100644 (file)
index 0000000..f6971bd
--- /dev/null
@@ -0,0 +1,40 @@
+From 09e6feff68dc71b22881d30e1ff44f04c474f399 Mon Sep 17 00:00:00 2001
+From: Joerg Roedel <joerg.roedel@amd.com>
+Date: Thu, 22 Apr 2010 12:33:12 +0200
+Subject: KVM: SVM: Report emulated SVM features to userspace
+
+From: Joerg Roedel <joerg.roedel@amd.com>
+
+This patch implements the reporting of the emulated SVM
+features to userspace instead of the real hardware
+capabilities. Every real hardware capability needs emulation
+in nested svm so the old behavior was broken.
+
+Cc: stable@kernel.org
+Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+(cherry picked from commit c2c63a493924e09a1984d1374a0e60dfd54fc0b0)
+---
+ arch/x86/kvm/svm.c |   10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+--- a/arch/x86/kvm/svm.c
++++ b/arch/x86/kvm/svm.c
+@@ -2887,6 +2887,16 @@ static u64 svm_get_mt_mask(struct kvm_vc
+ static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
+ {
++      switch (func) {
++      case 0x8000000A:
++              entry->eax = 1; /* SVM revision 1 */
++              entry->ebx = 8; /* Lets support 8 ASIDs in case we add proper
++                                 ASID emulation to nested SVM */
++              entry->ecx = 0; /* Reserved */
++              entry->edx = 0; /* Do not support any additional features */
++
++              break;
++      }
+ }
+ static const struct trace_print_flags svm_exit_reasons_str[] = {
diff --git a/queue-2.6.33/0011-x86-paravirt-Add-a-global-synchronization-point-for-.patch b/queue-2.6.33/0011-x86-paravirt-Add-a-global-synchronization-point-for-.patch
new file mode 100644 (file)
index 0000000..02b91e2
--- /dev/null
@@ -0,0 +1,102 @@
+From 6b0692eb1808e5adffe1e7478475554fad1ea38a Mon Sep 17 00:00:00 2001
+From: Glauber Costa <glommer@redhat.com>
+Date: Tue, 11 May 2010 12:17:40 -0400
+Subject: x86, paravirt: Add a global synchronization point for pvclock
+
+From: Glauber Costa <glommer@redhat.com>
+
+In recent stress tests, it was found that pvclock-based systems
+could seriously warp in smp systems. Using ingo's time-warp-test.c,
+I could trigger a scenario as bad as 1.5mi warps a minute in some systems.
+(to be fair, it wasn't that bad in most of them). Investigating further, I
+found out that such warps were caused by the very offset-based calculation
+pvclock is based on.
+
+This happens even on some machines that report constant_tsc in its tsc flags,
+specially on multi-socket ones.
+
+Two reads of the same kernel timestamp at approx the same time, will likely
+have tsc timestamped in different occasions too. This means the delta we
+calculate is unpredictable at best, and can probably be smaller in a cpu
+that is legitimately reading clock in a forward ocasion.
+
+Some adjustments on the host could make this window less likely to happen,
+but still, it pretty much poses as an intrinsic problem of the mechanism.
+
+A while ago, I though about using a shared variable anyway, to hold clock
+last state, but gave up due to the high contention locking was likely
+to introduce, possibly rendering the thing useless on big machines. I argue,
+however, that locking is not necessary.
+
+We do a read-and-return sequence in pvclock, and between read and return,
+the global value can have changed. However, it can only have changed
+by means of an addition of a positive value. So if we detected that our
+clock timestamp is less than the current global, we know that we need to
+return a higher one, even though it is not exactly the one we compared to.
+
+OTOH, if we detect we're greater than the current time source, we atomically
+replace the value with our new readings. This do causes contention on big
+boxes (but big here means *BIG*), but it seems like a good trade off, since
+it provide us with a time source guaranteed to be stable wrt time warps.
+
+After this patch is applied, I don't see a single warp in time during 5 days
+of execution, in any of the machines I saw them before.
+
+Signed-off-by: Glauber Costa <glommer@redhat.com>
+Acked-by: Zachary Amsden <zamsden@redhat.com>
+CC: Jeremy Fitzhardinge <jeremy@goop.org>
+CC: Avi Kivity <avi@redhat.com>
+CC: Marcelo Tosatti <mtosatti@redhat.com>
+CC: Zachary Amsden <zamsden@redhat.com>
+Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+(cherry picked from commit 489fb490dbf8dab0249ad82b56688ae3842a79e8)
+---
+ arch/x86/kernel/pvclock.c |   24 ++++++++++++++++++++++++
+ 1 file changed, 24 insertions(+)
+
+--- a/arch/x86/kernel/pvclock.c
++++ b/arch/x86/kernel/pvclock.c
+@@ -109,11 +109,14 @@ unsigned long pvclock_tsc_khz(struct pvc
+       return pv_tsc_khz;
+ }
++static atomic64_t last_value = ATOMIC64_INIT(0);
++
+ cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
+ {
+       struct pvclock_shadow_time shadow;
+       unsigned version;
+       cycle_t ret, offset;
++      u64 last;
+       do {
+               version = pvclock_get_time_values(&shadow, src);
+@@ -123,6 +126,27 @@ cycle_t pvclock_clocksource_read(struct
+               barrier();
+       } while (version != src->version);
++      /*
++       * Assumption here is that last_value, a global accumulator, always goes
++       * forward. If we are less than that, we should not be much smaller.
++       * We assume there is an error marging we're inside, and then the correction
++       * does not sacrifice accuracy.
++       *
++       * For reads: global may have changed between test and return,
++       * but this means someone else updated poked the clock at a later time.
++       * We just need to make sure we are not seeing a backwards event.
++       *
++       * For updates: last_value = ret is not enough, since two vcpus could be
++       * updating at the same time, and one of them could be slightly behind,
++       * making the assumption that last_value always go forward fail to hold.
++       */
++      last = atomic64_read(&last_value);
++      do {
++              if (ret < last)
++                      return last;
++              last = atomic64_cmpxchg(&last_value, last, ret);
++      } while (unlikely(last != ret));
++
+       return ret;
+ }
diff --git a/queue-2.6.33/0012-KVM-Don-t-allow-lmsw-to-clear-cr0.pe.patch b/queue-2.6.33/0012-KVM-Don-t-allow-lmsw-to-clear-cr0.pe.patch
new file mode 100644 (file)
index 0000000..5bb85fb
--- /dev/null
@@ -0,0 +1,31 @@
+From d00d043d62d0201da2935e542ae7fe41d245be3b Mon Sep 17 00:00:00 2001
+From: Avi Kivity <avi@redhat.com>
+Date: Wed, 12 May 2010 00:28:44 +0300
+Subject: KVM: Don't allow lmsw to clear cr0.pe
+
+From: Avi Kivity <avi@redhat.com>
+
+The current lmsw implementation allows the guest to clear cr0.pe, contrary
+to the manual, which breaks EMM386.EXE.
+
+Fix by ORing the old cr0.pe with lmsw's operand.
+
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+(cherry picked from commit f78e917688edbf1f14c318d2e50dc8e7dad20445)
+---
+ arch/x86/kvm/x86.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -432,7 +432,7 @@ EXPORT_SYMBOL_GPL(kvm_set_cr0);
+ void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
+ {
+-      kvm_set_cr0(vcpu, (vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f));
++      kvm_set_cr0(vcpu, (vcpu->arch.cr0 & ~0x0eul) | (msw & 0x0f));
+ }
+ EXPORT_SYMBOL_GPL(kvm_lmsw);
diff --git a/queue-2.6.33/0013-KVM-x86-Check-LMA-bit-before-set_efer.patch b/queue-2.6.33/0013-KVM-x86-Check-LMA-bit-before-set_efer.patch
new file mode 100644 (file)
index 0000000..5fd5900
--- /dev/null
@@ -0,0 +1,34 @@
+From 2effde8fa003ee7b472505bddfc24c8d62344ace Mon Sep 17 00:00:00 2001
+From: Sheng Yang <sheng@linux.intel.com>
+Date: Wed, 12 May 2010 16:40:40 +0800
+Subject: KVM: x86: Check LMA bit before set_efer
+
+From: Sheng Yang <sheng@linux.intel.com>
+
+kvm_x86_ops->set_efer() would execute vcpu->arch.efer = efer, so the
+checking of LMA bit didn't work.
+
+Signed-off-by: Sheng Yang <sheng@linux.intel.com>
+Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+(cherry picked from commit a3d204e28579427609c3d15d2310127ebaa47d94)
+---
+ arch/x86/kvm/x86.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -603,11 +603,11 @@ static void set_efer(struct kvm_vcpu *vc
+               }
+       }
+-      kvm_x86_ops->set_efer(vcpu, efer);
+-
+       efer &= ~EFER_LMA;
+       efer |= vcpu->arch.shadow_efer & EFER_LMA;
++      kvm_x86_ops->set_efer(vcpu, efer);
++
+       vcpu->arch.shadow_efer = efer;
+       vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled;
diff --git a/queue-2.6.33/0014-KVM-MMU-Segregate-shadow-pages-with-different-cr0.wp.patch b/queue-2.6.33/0014-KVM-MMU-Segregate-shadow-pages-with-different-cr0.wp.patch
new file mode 100644 (file)
index 0000000..f9093de
--- /dev/null
@@ -0,0 +1,52 @@
+From dbad9722e24d96866696ca728032b3a09b8eb78e Mon Sep 17 00:00:00 2001
+From: Avi Kivity <avi@redhat.com>
+Date: Wed, 12 May 2010 11:48:18 +0300
+Subject: KVM: MMU: Segregate shadow pages with different cr0.wp
+
+From: Avi Kivity <avi@redhat.com>
+
+When cr0.wp=0, we may shadow a gpte having u/s=1 and r/w=0 with an spte
+having u/s=0 and r/w=1.  This allows excessive access if the guest sets
+cr0.wp=1 and accesses through this spte.
+
+Fix by making cr0.wp part of the base role; we'll have different sptes for
+the two cases and the problem disappears.
+
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+(cherry picked from commit 3dbe141595faa48a067add3e47bba3205b79d33c)
+---
+ arch/x86/include/asm/kvm_host.h |    1 +
+ arch/x86/kvm/mmu.c              |    3 ++-
+ 2 files changed, 3 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/include/asm/kvm_host.h
++++ b/arch/x86/include/asm/kvm_host.h
+@@ -193,6 +193,7 @@ union kvm_mmu_page_role {
+               unsigned invalid:1;
+               unsigned cr4_pge:1;
+               unsigned nxe:1;
++              unsigned cr0_wp:1;
+       };
+ };
+--- a/arch/x86/kvm/mmu.c
++++ b/arch/x86/kvm/mmu.c
+@@ -227,7 +227,7 @@ void kvm_mmu_set_mask_ptes(u64 user_mask
+ }
+ EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
+-static int is_write_protection(struct kvm_vcpu *vcpu)
++static bool is_write_protection(struct kvm_vcpu *vcpu)
+ {
+       return vcpu->arch.cr0 & X86_CR0_WP;
+ }
+@@ -2448,6 +2448,7 @@ static int init_kvm_softmmu(struct kvm_v
+               r = paging32_init_context(vcpu);
+       vcpu->arch.mmu.base_role.glevels = vcpu->arch.mmu.root_level;
++      vcpu->arch.mmu.base_role.cr0_wp = is_write_protection(vcpu);
+       return r;
+ }
diff --git a/queue-2.6.33/0015-KVM-VMX-enable-VMXON-check-with-SMX-enabled-Intel-TX.patch b/queue-2.6.33/0015-KVM-VMX-enable-VMXON-check-with-SMX-enabled-Intel-TX.patch
new file mode 100644 (file)
index 0000000..95fd280
--- /dev/null
@@ -0,0 +1,123 @@
+From fd52c9ad75e418e2a38aa0e662e88cd8b95b74be Mon Sep 17 00:00:00 2001
+From: Shane Wang <shane.wang@intel.com>
+Date: Thu, 29 Apr 2010 12:09:01 -0400
+Subject: KVM: VMX: enable VMXON check with SMX enabled (Intel TXT)
+
+From: Shane Wang <shane.wang@intel.com>
+
+Per document, for feature control MSR:
+
+  Bit 1 enables VMXON in SMX operation. If the bit is clear, execution
+        of VMXON in SMX operation causes a general-protection exception.
+  Bit 2 enables VMXON outside SMX operation. If the bit is clear, execution
+        of VMXON outside SMX operation causes a general-protection exception.
+
+This patch is to enable this kind of check with SMX for VMXON in KVM.
+
+Signed-off-by: Shane Wang <shane.wang@intel.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+(cherry picked from commit cafd66595d92591e4bd25c3904e004fc6f897e2d)
+---
+ arch/x86/include/asm/msr-index.h |    5 +++--
+ arch/x86/kernel/tboot.c          |    1 +
+ arch/x86/kvm/vmx.c               |   32 +++++++++++++++++++++-----------
+ include/linux/tboot.h            |    1 +
+ 4 files changed, 26 insertions(+), 13 deletions(-)
+
+--- a/arch/x86/include/asm/msr-index.h
++++ b/arch/x86/include/asm/msr-index.h
+@@ -199,8 +199,9 @@
+ #define MSR_IA32_EBL_CR_POWERON               0x0000002a
+ #define MSR_IA32_FEATURE_CONTROL        0x0000003a
+-#define FEATURE_CONTROL_LOCKED                (1<<0)
+-#define FEATURE_CONTROL_VMXON_ENABLED (1<<2)
++#define FEATURE_CONTROL_LOCKED                                (1<<0)
++#define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX      (1<<1)
++#define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX     (1<<2)
+ #define MSR_IA32_APICBASE             0x0000001b
+ #define MSR_IA32_APICBASE_BSP         (1<<8)
+--- a/arch/x86/kernel/tboot.c
++++ b/arch/x86/kernel/tboot.c
+@@ -46,6 +46,7 @@
+ /* Global pointer to shared data; NULL means no measured launch. */
+ struct tboot *tboot __read_mostly;
++EXPORT_SYMBOL(tboot);
+ /* timeout for APs (in secs) to enter wait-for-SIPI state during shutdown */
+ #define AP_WAIT_TIMEOUT               1
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -26,6 +26,7 @@
+ #include <linux/sched.h>
+ #include <linux/moduleparam.h>
+ #include <linux/ftrace_event.h>
++#include <linux/tboot.h>
+ #include "kvm_cache_regs.h"
+ #include "x86.h"
+@@ -1125,9 +1126,16 @@ static __init int vmx_disabled_by_bios(v
+       u64 msr;
+       rdmsrl(MSR_IA32_FEATURE_CONTROL, msr);
+-      return (msr & (FEATURE_CONTROL_LOCKED |
+-                     FEATURE_CONTROL_VMXON_ENABLED))
+-          == FEATURE_CONTROL_LOCKED;
++      if (msr & FEATURE_CONTROL_LOCKED) {
++              if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX)
++                      && tboot_enabled())
++                      return 1;
++              if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX)
++                      && !tboot_enabled())
++                      return 1;
++      }
++
++      return 0;
+       /* locked but not enabled */
+ }
+@@ -1135,21 +1143,23 @@ static int hardware_enable(void *garbage
+ {
+       int cpu = raw_smp_processor_id();
+       u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
+-      u64 old;
++      u64 old, test_bits;
+       if (read_cr4() & X86_CR4_VMXE)
+               return -EBUSY;
+       INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu));
+       rdmsrl(MSR_IA32_FEATURE_CONTROL, old);
+-      if ((old & (FEATURE_CONTROL_LOCKED |
+-                  FEATURE_CONTROL_VMXON_ENABLED))
+-          != (FEATURE_CONTROL_LOCKED |
+-              FEATURE_CONTROL_VMXON_ENABLED))
++
++      test_bits = FEATURE_CONTROL_LOCKED;
++      test_bits |= FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
++      if (tboot_enabled())
++              test_bits |= FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX;
++
++      if ((old & test_bits) != test_bits) {
+               /* enable and lock */
+-              wrmsrl(MSR_IA32_FEATURE_CONTROL, old |
+-                     FEATURE_CONTROL_LOCKED |
+-                     FEATURE_CONTROL_VMXON_ENABLED);
++              wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits);
++      }
+       write_cr4(read_cr4() | X86_CR4_VMXE); /* FIXME: not cpu hotplug safe */
+       asm volatile (ASM_VMX_VMXON_RAX
+                     : : "a"(&phys_addr), "m"(phys_addr)
+--- a/include/linux/tboot.h
++++ b/include/linux/tboot.h
+@@ -150,6 +150,7 @@ extern int tboot_force_iommu(void);
+ #else
++#define tboot_enabled()                       0
+ #define tboot_probe()                 do { } while (0)
+ #define tboot_shutdown(shutdown_type) do { } while (0)
+ #define tboot_sleep(sleep_state, pm1a_control, pm1b_control)  \
diff --git a/queue-2.6.33/0016-KVM-MMU-Don-t-read-pdptrs-with-mmu-spinlock-held-in-.patch b/queue-2.6.33/0016-KVM-MMU-Don-t-read-pdptrs-with-mmu-spinlock-held-in-.patch
new file mode 100644 (file)
index 0000000..c9f26f0
--- /dev/null
@@ -0,0 +1,63 @@
+From b7e2778692b503bc7a8fc362b1147d8df542f9c6 Mon Sep 17 00:00:00 2001
+From: Avi Kivity <avi@redhat.com>
+Date: Tue, 4 May 2010 12:58:32 +0300
+Subject: KVM: MMU: Don't read pdptrs with mmu spinlock held in mmu_alloc_roots
+
+From: Avi Kivity <avi@redhat.com>
+
+On svm, kvm_read_pdptr() may require reading guest memory, which can sleep.
+
+Push the spinlock into mmu_alloc_roots(), and only take it after we've read
+the pdptr.
+
+Tested-by: Joerg Roedel <joerg.roedel@amd.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+(cherry picked from commit 8facbbff071ff2b19268d3732e31badc60471e21)
+---
+ arch/x86/kvm/mmu.c |    9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+--- a/arch/x86/kvm/mmu.c
++++ b/arch/x86/kvm/mmu.c
+@@ -2097,11 +2097,14 @@ static int mmu_alloc_roots(struct kvm_vc
+                       direct = 1;
+               if (mmu_check_root(vcpu, root_gfn))
+                       return 1;
++
++              spin_lock(&vcpu->kvm->mmu_lock);
+               sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
+                                     PT64_ROOT_LEVEL, direct,
+                                     ACC_ALL, NULL);
+               root = __pa(sp->spt);
+               ++sp->root_count;
++              spin_unlock(&vcpu->kvm->mmu_lock);
+               vcpu->arch.mmu.root_hpa = root;
+               return 0;
+       }
+@@ -2123,11 +2126,15 @@ static int mmu_alloc_roots(struct kvm_vc
+                       root_gfn = 0;
+               if (mmu_check_root(vcpu, root_gfn))
+                       return 1;
++
++              spin_lock(&vcpu->kvm->mmu_lock);
+               sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
+                                     PT32_ROOT_LEVEL, direct,
+                                     ACC_ALL, NULL);
+               root = __pa(sp->spt);
+               ++sp->root_count;
++              spin_unlock(&vcpu->kvm->mmu_lock);
++
+               vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK;
+       }
+       vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root);
+@@ -2488,7 +2495,9 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu)
+               goto out;
+       spin_lock(&vcpu->kvm->mmu_lock);
+       kvm_mmu_free_some_pages(vcpu);
++      spin_unlock(&vcpu->kvm->mmu_lock);
+       r = mmu_alloc_roots(vcpu);
++      spin_lock(&vcpu->kvm->mmu_lock);
+       mmu_sync_roots(vcpu);
+       spin_unlock(&vcpu->kvm->mmu_lock);
+       if (r)
diff --git a/queue-2.6.33/0017-KVM-Fix-wallclock-version-writing-race.patch b/queue-2.6.33/0017-KVM-Fix-wallclock-version-writing-race.patch
new file mode 100644 (file)
index 0000000..f14fbcc
--- /dev/null
@@ -0,0 +1,46 @@
+From a2cfe4423331f25aaf816cce92ce6e2544f74966 Mon Sep 17 00:00:00 2001
+From: Avi Kivity <avi@redhat.com>
+Date: Tue, 4 May 2010 15:00:37 +0300
+Subject: KVM: Fix wallclock version writing race
+
+From: Avi Kivity <avi@redhat.com>
+
+Wallclock writing uses an unprotected global variable to hold the version;
+this can cause one guest to interfere with another if both write their
+wallclock at the same time.
+
+Acked-by: Glauber Costa <glommer@redhat.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+(cherry picked from commit 9ed3c444ab8987c7b219173a2f7807e3f71e234e)
+---
+ arch/x86/kvm/x86.c |   12 ++++++++++--
+ 1 file changed, 10 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -641,14 +641,22 @@ static int do_set_msr(struct kvm_vcpu *v
+ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
+ {
+-      static int version;
++      int version;
++      int r;
+       struct pvclock_wall_clock wc;
+       struct timespec boot;
+       if (!wall_clock)
+               return;
+-      version++;
++      r = kvm_read_guest(kvm, wall_clock, &version, sizeof(version));
++      if (r)
++              return;
++
++      if (version & 1)
++              ++version;  /* first time write, random junk */
++
++      ++version;
+       kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
diff --git a/queue-2.6.33/0018-KVM-PPC-Add-missing-vcpu_load-vcpu_put-in-vcpu-ioctl.patch b/queue-2.6.33/0018-KVM-PPC-Add-missing-vcpu_load-vcpu_put-in-vcpu-ioctl.patch
new file mode 100644 (file)
index 0000000..1116efe
--- /dev/null
@@ -0,0 +1,107 @@
+From c0fc62a92b17c61146f20141a2ddc6c0bcdc548b Mon Sep 17 00:00:00 2001
+From: Avi Kivity <avi@redhat.com>
+Date: Thu, 13 May 2010 11:05:49 +0300
+Subject: KVM: PPC: Add missing vcpu_load()/vcpu_put() in vcpu ioctls
+
+From: Avi Kivity <avi@redhat.com>
+
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+(cherry picked from commit 98001d8d017cea1ee0f9f35c6227bbd63ef5005b)
+---
+ arch/powerpc/kvm/book3s.c |   10 ++++++++++
+ arch/powerpc/kvm/booke.c  |   15 ++++++++++++++-
+ 2 files changed, 24 insertions(+), 1 deletion(-)
+
+--- a/arch/powerpc/kvm/book3s.c
++++ b/arch/powerpc/kvm/book3s.c
+@@ -766,6 +766,8 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct
+       struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
+       int i;
++      vcpu_load(vcpu);
++
+       sregs->pvr = vcpu->arch.pvr;
+       sregs->u.s.sdr1 = to_book3s(vcpu)->sdr1;
+@@ -784,6 +786,9 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct
+                       sregs->u.s.ppc32.dbat[i] = vcpu3s->dbat[i].raw;
+               }
+       }
++
++      vcpu_put(vcpu);
++
+       return 0;
+ }
+@@ -793,6 +798,8 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct
+       struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
+       int i;
++      vcpu_load(vcpu);
++
+       kvmppc_set_pvr(vcpu, sregs->pvr);
+       vcpu3s->sdr1 = sregs->u.s.sdr1;
+@@ -819,6 +826,9 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct
+       /* Flush the MMU after messing with the segments */
+       kvmppc_mmu_pte_flush(vcpu, 0, 0);
++
++      vcpu_put(vcpu);
++
+       return 0;
+ }
+--- a/arch/powerpc/kvm/booke.c
++++ b/arch/powerpc/kvm/booke.c
+@@ -443,6 +443,8 @@ int kvm_arch_vcpu_ioctl_get_regs(struct
+ {
+       int i;
++      vcpu_load(vcpu);
++
+       regs->pc = vcpu->arch.pc;
+       regs->cr = vcpu->arch.cr;
+       regs->ctr = vcpu->arch.ctr;
+@@ -463,6 +465,8 @@ int kvm_arch_vcpu_ioctl_get_regs(struct
+       for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
+               regs->gpr[i] = vcpu->arch.gpr[i];
++      vcpu_put(vcpu);
++
+       return 0;
+ }
+@@ -470,6 +474,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct
+ {
+       int i;
++      vcpu_load(vcpu);
++
+       vcpu->arch.pc = regs->pc;
+       vcpu->arch.cr = regs->cr;
+       vcpu->arch.ctr = regs->ctr;
+@@ -489,6 +495,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct
+       for (i = 0; i < ARRAY_SIZE(vcpu->arch.gpr); i++)
+               vcpu->arch.gpr[i] = regs->gpr[i];
++      vcpu_put(vcpu);
++
+       return 0;
+ }
+@@ -517,7 +525,12 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct k
+ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
+                                   struct kvm_translation *tr)
+ {
+-      return kvmppc_core_vcpu_translate(vcpu, tr);
++      int r;
++
++      vcpu_load(vcpu);
++      r = kvmppc_core_vcpu_translate(vcpu, tr);
++      vcpu_put(vcpu);
++      return r;
+ }
+ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
diff --git a/queue-2.6.33/0019-KVM-x86-Add-missing-locking-to-arch-specific-vcpu-io.patch b/queue-2.6.33/0019-KVM-x86-Add-missing-locking-to-arch-specific-vcpu-io.patch
new file mode 100644 (file)
index 0000000..2c44f63
--- /dev/null
@@ -0,0 +1,58 @@
+From 769481950f87db77b640daec6241727570c63622 Mon Sep 17 00:00:00 2001
+From: Avi Kivity <avi@redhat.com>
+Date: Thu, 13 May 2010 11:50:19 +0300
+Subject: KVM: x86: Add missing locking to arch specific vcpu ioctls
+
+From: Avi Kivity <avi@redhat.com>
+
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+(cherry picked from commit 8fbf065d625617bbbf6b72d5f78f84ad13c8b547)
+---
+ arch/x86/kvm/x86.c |    6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -1550,6 +1550,7 @@ static int kvm_vcpu_ioctl_get_cpuid2(str
+ {
+       int r;
++      vcpu_load(vcpu);
+       r = -E2BIG;
+       if (cpuid->nent < vcpu->arch.cpuid_nent)
+               goto out;
+@@ -1561,6 +1562,7 @@ static int kvm_vcpu_ioctl_get_cpuid2(str
+ out:
+       cpuid->nent = vcpu->arch.cpuid_nent;
++      vcpu_put(vcpu);
+       return r;
+ }
+@@ -1813,6 +1815,7 @@ static int kvm_vcpu_ioctl_x86_setup_mce(
+       int r;
+       unsigned bank_num = mcg_cap & 0xff, bank;
++      vcpu_load(vcpu);
+       r = -EINVAL;
+       if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS)
+               goto out;
+@@ -1827,6 +1830,7 @@ static int kvm_vcpu_ioctl_x86_setup_mce(
+       for (bank = 0; bank < bank_num; bank++)
+               vcpu->arch.mce_banks[bank*4] = ~(u64)0;
+ out:
++      vcpu_put(vcpu);
+       return r;
+ }
+@@ -2094,7 +2098,9 @@ long kvm_arch_vcpu_ioctl(struct file *fi
+               r = -EFAULT;
+               if (copy_from_user(&mce, argp, sizeof mce))
+                       goto out;
++              vcpu_load(vcpu);
+               r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce);
++              vcpu_put(vcpu);
+               break;
+       }
+       case KVM_GET_VCPU_EVENTS: {
diff --git a/queue-2.6.33/0020-KVM-x86-Inject-GP-with-the-right-rip-on-efer-writes.patch b/queue-2.6.33/0020-KVM-x86-Inject-GP-with-the-right-rip-on-efer-writes.patch
new file mode 100644 (file)
index 0000000..aba508a
--- /dev/null
@@ -0,0 +1,91 @@
+From 5acedf13f81c250bf394ce7561bf404792b44558 Mon Sep 17 00:00:00 2001
+From: Roedel, Joerg <Joerg.Roedel@amd.com>
+Date: Thu, 6 May 2010 11:38:43 +0200
+Subject: KVM: x86: Inject #GP with the right rip on efer writes
+
+From: Roedel, Joerg <Joerg.Roedel@amd.com>
+
+This patch fixes a bug in the KVM efer-msr write path. If a
+guest writes to a reserved efer bit the set_efer function
+injects the #GP directly. The architecture dependent wrmsr
+function does not see this, assumes success and advances the
+rip. This results in a #GP in the guest with the wrong rip.
+This patch fixes this by reporting efer write errors back to
+the architectural wrmsr function.
+
+Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+(cherry picked from commit b69e8caef5b190af48c525f6d715e7b7728a77f6)
+---
+ arch/x86/kvm/x86.c |   31 ++++++++++++-------------------
+ 1 file changed, 12 insertions(+), 19 deletions(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -570,37 +570,29 @@ static u32 emulated_msrs[] = {
+       MSR_IA32_MISC_ENABLE,
+ };
+-static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
++static int set_efer(struct kvm_vcpu *vcpu, u64 efer)
+ {
+-      if (efer & efer_reserved_bits) {
+-              kvm_inject_gp(vcpu, 0);
+-              return;
+-      }
++      if (efer & efer_reserved_bits)
++              return 1;
+       if (is_paging(vcpu)
+-          && (vcpu->arch.shadow_efer & EFER_LME) != (efer & EFER_LME)) {
+-              kvm_inject_gp(vcpu, 0);
+-              return;
+-      }
++          && (vcpu->arch.shadow_efer & EFER_LME) != (efer & EFER_LME))
++              return 1;
+       if (efer & EFER_FFXSR) {
+               struct kvm_cpuid_entry2 *feat;
+               feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
+-              if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) {
+-                      kvm_inject_gp(vcpu, 0);
+-                      return;
+-              }
++              if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT)))
++                      return 1;
+       }
+       if (efer & EFER_SVME) {
+               struct kvm_cpuid_entry2 *feat;
+               feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
+-              if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) {
+-                      kvm_inject_gp(vcpu, 0);
+-                      return;
+-              }
++              if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM)))
++                      return 1;
+       }
+       efer &= ~EFER_LMA;
+@@ -612,6 +604,8 @@ static void set_efer(struct kvm_vcpu *vc
+       vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled;
+       kvm_mmu_reset_context(vcpu);
++
++      return 0;
+ }
+ void kvm_enable_efer_bits(u64 mask)
+@@ -946,8 +940,7 @@ int kvm_set_msr_common(struct kvm_vcpu *
+ {
+       switch (msr) {
+       case MSR_EFER:
+-              set_efer(vcpu, data);
+-              break;
++              return set_efer(vcpu, data);
+       case MSR_K7_HWCR:
+               data &= ~(u64)0x40;     /* ignore flush filter disable */
+               if (data != 0) {
diff --git a/queue-2.6.33/0021-KVM-SVM-Don-t-allow-nested-guest-to-VMMCALL-into-hos.patch b/queue-2.6.33/0021-KVM-SVM-Don-t-allow-nested-guest-to-VMMCALL-into-hos.patch
new file mode 100644 (file)
index 0000000..ffc7836
--- /dev/null
@@ -0,0 +1,38 @@
+From 2d7753bec146c9e0030c6b52520ad052a9c2a45e Mon Sep 17 00:00:00 2001
+From: Joerg Roedel <joerg.roedel@amd.com>
+Date: Wed, 5 May 2010 16:04:45 +0200
+Subject: KVM: SVM: Don't allow nested guest to VMMCALL into host
+
+From: Joerg Roedel <joerg.roedel@amd.com>
+
+This patch disables the possibility for a l2-guest to do a
+VMMCALL directly into the host. This would happen if the
+l1-hypervisor doesn't intercept VMMCALL and the l2-guest
+executes this instruction.
+
+Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
+Signed-off-by: Avi Kivity <avi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+(cherry picked from commit 0d945bd9351199744c1e89d57a70615b6ee9f394)
+---
+ arch/x86/kvm/svm.c |    9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/svm.c
++++ b/arch/x86/kvm/svm.c
+@@ -1829,8 +1829,13 @@ static bool nested_svm_vmrun(struct vcpu
+               svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK;
+       }
+-      /* We don't want a nested guest to be more powerful than the guest,
+-         so all intercepts are ORed */
++      /* We don't want to see VMMCALLs from a nested guest */
++      svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VMMCALL);
++
++      /*
++       * We don't want a nested guest to be more powerful than the guest, so
++       * all intercepts are ORed
++       */
+       svm->vmcb->control.intercept_cr_read |=
+               nested_vmcb->control.intercept_cr_read;
+       svm->vmcb->control.intercept_cr_write |=
diff --git a/queue-2.6.33/btrfs-should-add-a-permission-check-for-setfacl.patch b/queue-2.6.33/btrfs-should-add-a-permission-check-for-setfacl.patch
new file mode 100644 (file)
index 0000000..e7e0af4
--- /dev/null
@@ -0,0 +1,58 @@
+From 2f26afba46f0ebf155cf9be746496a0304a5b7cf Mon Sep 17 00:00:00 2001
+From: Shi Weihua <shiwh@cn.fujitsu.com>
+Date: Tue, 18 May 2010 00:50:32 +0000
+Subject: Btrfs: should add a permission check for setfacl
+
+From: Shi Weihua <shiwh@cn.fujitsu.com>
+
+commit 2f26afba46f0ebf155cf9be746496a0304a5b7cf upstream.
+
+On btrfs, do the following
+------------------
+# su user1
+# cd btrfs-part/
+# touch aaa
+# getfacl aaa
+  # file: aaa
+  # owner: user1
+  # group: user1
+  user::rw-
+  group::rw-
+  other::r--
+# su user2
+# cd btrfs-part/
+# setfacl -m u::rwx aaa
+# getfacl aaa
+  # file: aaa
+  # owner: user1
+  # group: user1
+  user::rwx           <- successed to setfacl
+  group::rw-
+  other::r--
+------------------
+but we should prohibit it that user2 changing user1's acl.
+In fact, on ext3 and other fs, a message occurs:
+  setfacl: aaa: Operation not permitted
+
+This patch fixed it.
+
+Signed-off-by: Shi Weihua <shiwh@cn.fujitsu.com>
+Signed-off-by: Chris Mason <chris.mason@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/btrfs/acl.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/fs/btrfs/acl.c
++++ b/fs/btrfs/acl.c
+@@ -159,6 +159,9 @@ static int btrfs_xattr_acl_set(struct de
+       int ret;
+       struct posix_acl *acl = NULL;
++      if (!is_owner_or_cap(dentry->d_inode))
++              return -EPERM;
++
+       if (value) {
+               acl = posix_acl_from_xattr(value, size);
+               if (acl == NULL) {
diff --git a/queue-2.6.33/cifs-allow-null-nd-as-nfs-server-uses-on-create.patch b/queue-2.6.33/cifs-allow-null-nd-as-nfs-server-uses-on-create.patch
new file mode 100644 (file)
index 0000000..68496fc
--- /dev/null
@@ -0,0 +1,133 @@
+From fa588e0c57048b3d4bfcd772d80dc0615f83fd35 Mon Sep 17 00:00:00 2001
+From: Steve French <sfrench@us.ibm.com>
+Date: Thu, 22 Apr 2010 19:21:55 +0000
+Subject: CIFS: Allow null nd (as nfs server uses) on create
+
+From: Steve French <sfrench@us.ibm.com>
+
+commit fa588e0c57048b3d4bfcd772d80dc0615f83fd35 upstream.
+
+While creating a file on a server which supports unix extensions
+such as Samba, if a file is being created which does not supply
+nameidata (i.e. nd is null), cifs client can oops when calling
+cifs_posix_open.
+
+Signed-off-by: Shirish Pargaonkar <shirishp@us.ibm.com>
+Signed-off-by: Steve French <sfrench@us.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/cifs/cifsproto.h |    6 ++++--
+ fs/cifs/dir.c       |   20 ++++++++++++--------
+ fs/cifs/file.c      |   11 +++++++----
+ 3 files changed, 23 insertions(+), 14 deletions(-)
+
+--- a/fs/cifs/cifsproto.h
++++ b/fs/cifs/cifsproto.h
+@@ -95,8 +95,10 @@ extern struct cifsFileInfo *cifs_new_fil
+                               __u16 fileHandle, struct file *file,
+                               struct vfsmount *mnt, unsigned int oflags);
+ extern int cifs_posix_open(char *full_path, struct inode **pinode,
+-                         struct vfsmount *mnt, int mode, int oflags,
+-                         __u32 *poplock, __u16 *pnetfid, int xid);
++                              struct vfsmount *mnt,
++                              struct super_block *sb,
++                              int mode, int oflags,
++                              __u32 *poplock, __u16 *pnetfid, int xid);
+ extern void cifs_unix_basic_to_fattr(struct cifs_fattr *fattr,
+                                    FILE_UNIX_BASIC_INFO *info,
+                                    struct cifs_sb_info *cifs_sb);
+--- a/fs/cifs/dir.c
++++ b/fs/cifs/dir.c
+@@ -183,13 +183,14 @@ cifs_new_fileinfo(struct inode *newinode
+ }
+ int cifs_posix_open(char *full_path, struct inode **pinode,
+-                  struct vfsmount *mnt, int mode, int oflags,
+-                  __u32 *poplock, __u16 *pnetfid, int xid)
++                      struct vfsmount *mnt, struct super_block *sb,
++                      int mode, int oflags,
++                      __u32 *poplock, __u16 *pnetfid, int xid)
+ {
+       int rc;
+       FILE_UNIX_BASIC_INFO *presp_data;
+       __u32 posix_flags = 0;
+-      struct cifs_sb_info *cifs_sb = CIFS_SB(mnt->mnt_sb);
++      struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
+       struct cifs_fattr fattr;
+       cFYI(1, ("posix open %s", full_path));
+@@ -242,7 +243,7 @@ int cifs_posix_open(char *full_path, str
+       /* get new inode and set it up */
+       if (*pinode == NULL) {
+-              *pinode = cifs_iget(mnt->mnt_sb, &fattr);
++              *pinode = cifs_iget(sb, &fattr);
+               if (!*pinode) {
+                       rc = -ENOMEM;
+                       goto posix_open_ret;
+@@ -251,7 +252,8 @@ int cifs_posix_open(char *full_path, str
+               cifs_fattr_to_inode(*pinode, &fattr);
+       }
+-      cifs_new_fileinfo(*pinode, *pnetfid, NULL, mnt, oflags);
++      if (mnt)
++              cifs_new_fileinfo(*pinode, *pnetfid, NULL, mnt, oflags);
+ posix_open_ret:
+       kfree(presp_data);
+@@ -315,13 +317,14 @@ cifs_create(struct inode *inode, struct
+       if (nd && (nd->flags & LOOKUP_OPEN))
+               oflags = nd->intent.open.flags;
+       else
+-              oflags = FMODE_READ;
++              oflags = FMODE_READ | SMB_O_CREAT;
+       if (tcon->unix_ext && (tcon->ses->capabilities & CAP_UNIX) &&
+           (CIFS_UNIX_POSIX_PATH_OPS_CAP &
+                       le64_to_cpu(tcon->fsUnixInfo.Capability))) {
+-              rc = cifs_posix_open(full_path, &newinode, nd->path.mnt,
+-                                   mode, oflags, &oplock, &fileHandle, xid);
++              rc = cifs_posix_open(full_path, &newinode,
++                      nd ? nd->path.mnt : NULL,
++                      inode->i_sb, mode, oflags, &oplock, &fileHandle, xid);
+               /* EIO could indicate that (posix open) operation is not
+                  supported, despite what server claimed in capability
+                  negotation.  EREMOTE indicates DFS junction, which is not
+@@ -678,6 +681,7 @@ cifs_lookup(struct inode *parent_dir_ino
+                    (nd->flags & LOOKUP_OPEN) && !pTcon->broken_posix_open &&
+                    (nd->intent.open.flags & O_CREAT)) {
+                       rc = cifs_posix_open(full_path, &newInode, nd->path.mnt,
++                                      parent_dir_inode->i_sb,
+                                       nd->intent.open.create_mode,
+                                       nd->intent.open.flags, &oplock,
+                                       &fileHandle, xid);
+--- a/fs/cifs/file.c
++++ b/fs/cifs/file.c
+@@ -297,10 +297,12 @@ int cifs_open(struct inode *inode, struc
+           (CIFS_UNIX_POSIX_PATH_OPS_CAP &
+                       le64_to_cpu(tcon->fsUnixInfo.Capability))) {
+               int oflags = (int) cifs_posix_convert_flags(file->f_flags);
++              oflags |= SMB_O_CREAT;
+               /* can not refresh inode info since size could be stale */
+               rc = cifs_posix_open(full_path, &inode, file->f_path.mnt,
+-                                   cifs_sb->mnt_file_mode /* ignored */,
+-                                   oflags, &oplock, &netfid, xid);
++                              inode->i_sb,
++                              cifs_sb->mnt_file_mode /* ignored */,
++                              oflags, &oplock, &netfid, xid);
+               if (rc == 0) {
+                       cFYI(1, ("posix open succeeded"));
+                       /* no need for special case handling of setting mode
+@@ -512,8 +514,9 @@ reopen_error_exit:
+               int oflags = (int) cifs_posix_convert_flags(file->f_flags);
+               /* can not refresh inode info since size could be stale */
+               rc = cifs_posix_open(full_path, NULL, file->f_path.mnt,
+-                                   cifs_sb->mnt_file_mode /* ignored */,
+-                                   oflags, &oplock, &netfid, xid);
++                              inode->i_sb,
++                              cifs_sb->mnt_file_mode /* ignored */,
++                              oflags, &oplock, &netfid, xid);
+               if (rc == 0) {
+                       cFYI(1, ("posix reopen succeeded"));
+                       goto reopen_success;
diff --git a/queue-2.6.33/eeepc-laptop-check-wireless-hotplug-events.patch b/queue-2.6.33/eeepc-laptop-check-wireless-hotplug-events.patch
new file mode 100644 (file)
index 0000000..ef38cd3
--- /dev/null
@@ -0,0 +1,64 @@
+From bc9d24a3aeb1532fc3e234907a8b6d671f7ed68f Mon Sep 17 00:00:00 2001
+From: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
+Date: Mon, 22 Feb 2010 16:03:58 +0000
+Subject: eeepc-laptop: check wireless hotplug events
+
+From: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
+
+commit bc9d24a3aeb1532fc3e234907a8b6d671f7ed68f upstream.
+
+Before we mark the wireless device as unplugged, check PCI config space
+to see whether the wireless device is really disabled (and vice versa).
+This works around newer models which don't want the hotplug code, where
+we end up disabling the wired network device.
+
+My old 701 still works correctly with this.  I can also simulate an
+afflicted model by changing the hardcoded PCI bus/slot number in the
+driver, and it seems to work nicely (although it is a bit noisy).
+
+In future this type of hotplug support will be implemented by the PCI
+core.  The existing blacklist and the new warning message will be
+removed at that point.
+
+Signed-off-by: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
+Signed-off-by: Corentin Chary <corentincj@iksaif.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/platform/x86/eeepc-laptop.c |   18 ++++++++++++++++++
+ 1 file changed, 18 insertions(+)
+
+--- a/drivers/platform/x86/eeepc-laptop.c
++++ b/drivers/platform/x86/eeepc-laptop.c
+@@ -578,6 +578,8 @@ static void eeepc_rfkill_hotplug(struct
+       struct pci_dev *dev;
+       struct pci_bus *bus;
+       bool blocked = eeepc_wlan_rfkill_blocked(eeepc);
++      bool absent;
++      u32 l;
+       if (eeepc->wlan_rfkill)
+               rfkill_set_sw_state(eeepc->wlan_rfkill, blocked);
+@@ -591,6 +593,22 @@ static void eeepc_rfkill_hotplug(struct
+                       goto out_unlock;
+               }
++              if (pci_bus_read_config_dword(bus, 0, PCI_VENDOR_ID, &l)) {
++                      pr_err("Unable to read PCI config space?\n");
++                      goto out_unlock;
++              }
++              absent = (l == 0xffffffff);
++
++              if (blocked != absent) {
++                      pr_warning("BIOS says wireless lan is %s, "
++                                      "but the pci device is %s\n",
++                              blocked ? "blocked" : "unblocked",
++                              absent ? "absent" : "present");
++                      pr_warning("skipped wireless hotplug as probably "
++                                      "inappropriate for this model\n");
++                      goto out_unlock;
++              }
++
+               if (!blocked) {
+                       dev = pci_get_slot(bus, 0);
+                       if (dev) {
diff --git a/queue-2.6.33/ext4-check-s_log_groups_per_flex-in-online-resize-code.patch b/queue-2.6.33/ext4-check-s_log_groups_per_flex-in-online-resize-code.patch
new file mode 100644 (file)
index 0000000..d8da321
--- /dev/null
@@ -0,0 +1,49 @@
+From 42007efd569f1cf3bfb9a61da60ef6c2179508ca Mon Sep 17 00:00:00 2001
+From: Eric Sandeen <sandeen@redhat.com>
+Date: Sun, 16 May 2010 01:00:00 -0400
+Subject: ext4: check s_log_groups_per_flex in online resize code
+
+From: Eric Sandeen <sandeen@redhat.com>
+
+commit 42007efd569f1cf3bfb9a61da60ef6c2179508ca upstream.
+
+If groups_per_flex < 2, sbi->s_flex_groups[] doesn't get filled out,
+and every other access to this first tests s_log_groups_per_flex;
+same thing needs to happen in resize or we'll wander off into
+a null pointer when doing an online resize of the file system.
+
+Thanks to Christoph Biedl, who came up with the trivial testcase:
+
+# truncate --size 128M fsfile
+# mkfs.ext3 -F fsfile
+# tune2fs -O extents,uninit_bg,dir_index,flex_bg,huge_file,dir_nlink,extra_isize fsfile
+# e2fsck -yDf -C0 fsfile
+# truncate --size 132M fsfile
+# losetup /dev/loop0 fsfile
+# mount /dev/loop0 mnt
+# resize2fs -p /dev/loop0
+
+       https://bugzilla.kernel.org/show_bug.cgi?id=13549
+
+Reported-by: Alessandro Polverini <alex@nibbles.it>
+Test-case-by: Christoph Biedl  <bugzilla.kernel.bpeb@manchmal.in-ulm.de>
+Signed-off-by: Eric Sandeen <sandeen@redhat.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/ext4/resize.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/fs/ext4/resize.c
++++ b/fs/ext4/resize.c
+@@ -930,7 +930,8 @@ int ext4_group_add(struct super_block *s
+       percpu_counter_add(&sbi->s_freeinodes_counter,
+                          EXT4_INODES_PER_GROUP(sb));
+-      if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
++      if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG) &&
++          sbi->s_log_groups_per_flex) {
+               ext4_group_t flex_group;
+               flex_group = ext4_flex_group(sbi, input->group);
+               atomic_add(input->free_blocks_count,
diff --git a/queue-2.6.33/ext4-make-sure-the-move_ext-ioctl-can-t-overwrite-append-only-files.patch b/queue-2.6.33/ext4-make-sure-the-move_ext-ioctl-can-t-overwrite-append-only-files.patch
new file mode 100644 (file)
index 0000000..cf9e355
--- /dev/null
@@ -0,0 +1,34 @@
+From 1f5a81e41f8b1a782c68d3843e9ec1bfaadf7d72 Mon Sep 17 00:00:00 2001
+From: Theodore Ts'o <tytso@mit.edu>
+Date: Wed, 2 Jun 2010 22:04:39 -0400
+Subject: ext4: Make sure the MOVE_EXT ioctl can't overwrite append-only files
+
+From: Theodore Ts'o <tytso@mit.edu>
+
+commit 1f5a81e41f8b1a782c68d3843e9ec1bfaadf7d72 upstream.
+
+Dan Roseberg has reported a problem with the MOVE_EXT ioctl.  If the
+donor file is an append-only file, we should not allow the operation
+to proceed, lest we end up overwriting the contents of an append-only
+file.
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Cc: Dan Rosenberg <dan.j.rosenberg@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/ext4/move_extent.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/fs/ext4/move_extent.c
++++ b/fs/ext4/move_extent.c
+@@ -964,6 +964,9 @@ mext_check_arguments(struct inode *orig_
+               return -EINVAL;
+       }
++      if (IS_IMMUTABLE(donor_inode) || IS_APPEND(donor_inode))
++              return -EPERM;
++
+       /* Ext4 move extent does not support swapfile */
+       if (IS_SWAPFILE(orig_inode) || IS_SWAPFILE(donor_inode)) {
+               ext4_debug("ext4 move extent: The argument files should "
diff --git a/queue-2.6.33/gfs2-fix-permissions-checking-for-setflags-ioctl.patch b/queue-2.6.33/gfs2-fix-permissions-checking-for-setflags-ioctl.patch
new file mode 100644 (file)
index 0000000..60f9487
--- /dev/null
@@ -0,0 +1,45 @@
+From 7df0e0397b9a18358573274db9fdab991941062f Mon Sep 17 00:00:00 2001
+From: Steven Whitehouse <swhiteho@redhat.com>
+Date: Mon, 24 May 2010 14:36:48 +0100
+Subject: GFS2: Fix permissions checking for setflags ioctl()
+
+From: Steven Whitehouse <swhiteho@redhat.com>
+
+commit 7df0e0397b9a18358573274db9fdab991941062f upstream.
+
+We should be checking for the ownership of the file for which
+flags are being set, rather than just for write access.
+
+Reported-by: Dan Rosenberg <dan.j.rosenberg@gmail.com>
+Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/gfs2/file.c |    7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/fs/gfs2/file.c
++++ b/fs/gfs2/file.c
+@@ -218,6 +218,11 @@ static int do_gfs2_set_flags(struct file
+       if (error)
+               goto out_drop_write;
++      error = -EACCES;
++      if (!is_owner_or_cap(inode))
++              goto out;
++
++      error = 0;
+       flags = ip->i_diskflags;
+       new_flags = (flags & ~mask) | (reqflags & mask);
+       if ((new_flags ^ flags) == 0)
+@@ -275,8 +280,10 @@ static int gfs2_set_flags(struct file *f
+ {
+       struct inode *inode = filp->f_path.dentry->d_inode;
+       u32 fsflags, gfsflags;
++
+       if (get_user(fsflags, ptr))
+               return -EFAULT;
++
+       gfsflags = fsflags_cvt(fsflags_to_gfs2, fsflags);
+       if (!S_ISDIR(inode->i_mode)) {
+               if (gfsflags & GFS2_DIF_INHERIT_JDATA)
diff --git a/queue-2.6.33/input-psmouse-reset-all-types-of-mice-before-reconnecting.patch b/queue-2.6.33/input-psmouse-reset-all-types-of-mice-before-reconnecting.patch
new file mode 100644 (file)
index 0000000..f423fa5
--- /dev/null
@@ -0,0 +1,55 @@
+From ef110b24e28f36620f63dab94708a17c7e267358 Mon Sep 17 00:00:00 2001
+From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
+Date: Thu, 13 May 2010 00:42:23 -0700
+Subject: Input: psmouse - reset all types of mice before reconnecting
+
+From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
+
+commit ef110b24e28f36620f63dab94708a17c7e267358 upstream.
+
+Synaptics hardware requires resetting device after suspend to ram
+in order for the device to be operational. The reset lives in
+synaptics-specific reconnect handler, but it is not being invoked
+if synaptics support is disabled and the device is handled as a
+standard PS/2 device (bare or IntelliMouse protocol).
+
+Let's add reset into generic reconnect handler as well.
+
+Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
+Cc: Tim Gardner <tim.gardner@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/input/mouse/psmouse-base.c |   14 ++++++++++----
+ 1 file changed, 10 insertions(+), 4 deletions(-)
+
+--- a/drivers/input/mouse/psmouse-base.c
++++ b/drivers/input/mouse/psmouse-base.c
+@@ -1382,6 +1382,7 @@ static int psmouse_reconnect(struct seri
+       struct psmouse *psmouse = serio_get_drvdata(serio);
+       struct psmouse *parent = NULL;
+       struct serio_driver *drv = serio->drv;
++      unsigned char type;
+       int rc = -1;
+       if (!drv || !psmouse) {
+@@ -1401,10 +1402,15 @@ static int psmouse_reconnect(struct seri
+       if (psmouse->reconnect) {
+               if (psmouse->reconnect(psmouse))
+                       goto out;
+-      } else if (psmouse_probe(psmouse) < 0 ||
+-                 psmouse->type != psmouse_extensions(psmouse,
+-                                              psmouse_max_proto, false)) {
+-              goto out;
++      } else {
++              psmouse_reset(psmouse);
++
++              if (psmouse_probe(psmouse) < 0)
++                      goto out;
++
++              type = psmouse_extensions(psmouse, psmouse_max_proto, false);
++              if (psmouse->type != type)
++                      goto out;
+       }
+       /* ok, the device type (and capabilities) match the old one,
diff --git a/queue-2.6.33/keys-find_keyring_by_name-can-gain-access-to-a-freed-keyring.patch b/queue-2.6.33/keys-find_keyring_by_name-can-gain-access-to-a-freed-keyring.patch
new file mode 100644 (file)
index 0000000..80a2f83
--- /dev/null
@@ -0,0 +1,191 @@
+From cea7daa3589d6b550546a8c8963599f7c1a3ae5c Mon Sep 17 00:00:00 2001
+From: Toshiyuki Okajima <toshi.okajima@jp.fujitsu.com>
+Date: Fri, 30 Apr 2010 14:32:13 +0100
+Subject: KEYS: find_keyring_by_name() can gain access to a freed keyring
+
+From: Toshiyuki Okajima <toshi.okajima@jp.fujitsu.com>
+
+commit cea7daa3589d6b550546a8c8963599f7c1a3ae5c upstream.
+
+find_keyring_by_name() can gain access to a keyring that has had its reference
+count reduced to zero, and is thus ready to be freed.  This then allows the
+dead keyring to be brought back into use whilst it is being destroyed.
+
+The following timeline illustrates the process:
+
+|(cleaner)                           (user)
+|
+| free_user(user)                    sys_keyctl()
+|  |                                  |
+|  key_put(user->session_keyring)     keyctl_get_keyring_ID()
+|  ||  //=> keyring->usage = 0        |
+|  |schedule_work(&key_cleanup_task)   lookup_user_key()
+|  ||                                   |
+|  kmem_cache_free(,user)               |
+|  .                                    |[KEY_SPEC_USER_KEYRING]
+|  .                                    install_user_keyrings()
+|  .                                    ||
+| key_cleanup() [<= worker_thread()]    ||
+|  |                                    ||
+|  [spin_lock(&key_serial_lock)]        |[mutex_lock(&key_user_keyr..mutex)]
+|  |                                    ||
+|  atomic_read() == 0                   ||
+|  |{ rb_ease(&key->serial_node,) }     ||
+|  |                                    ||
+|  [spin_unlock(&key_serial_lock)]      |find_keyring_by_name()
+|  |                                    |||
+|  keyring_destroy(keyring)             ||[read_lock(&keyring_name_lock)]
+|  ||                                   |||
+|  |[write_lock(&keyring_name_lock)]    ||atomic_inc(&keyring->usage)
+|  |.                                   ||| *** GET freeing keyring ***
+|  |.                                   ||[read_unlock(&keyring_name_lock)]
+|  ||                                   ||
+|  |list_del()                          |[mutex_unlock(&key_user_k..mutex)]
+|  ||                                   |
+|  |[write_unlock(&keyring_name_lock)]  ** INVALID keyring is returned **
+|  |                                    .
+|  kmem_cache_free(,keyring)            .
+|                                       .
+|                                       atomic_dec(&keyring->usage)
+v                                         *** DESTROYED ***
+TIME
+
+If CONFIG_SLUB_DEBUG=y then we may see the following message generated:
+
+       =============================================================================
+       BUG key_jar: Poison overwritten
+       -----------------------------------------------------------------------------
+
+       INFO: 0xffff880197a7e200-0xffff880197a7e200. First byte 0x6a instead of 0x6b
+       INFO: Allocated in key_alloc+0x10b/0x35f age=25 cpu=1 pid=5086
+       INFO: Freed in key_cleanup+0xd0/0xd5 age=12 cpu=1 pid=10
+       INFO: Slab 0xffffea000592cb90 objects=16 used=2 fp=0xffff880197a7e200 flags=0x200000000000c3
+       INFO: Object 0xffff880197a7e200 @offset=512 fp=0xffff880197a7e300
+
+       Bytes b4 0xffff880197a7e1f0:  5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZZZZZZZZZ
+         Object 0xffff880197a7e200:  6a 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b jkkkkkkkkkkkkkkk
+
+Alternatively, we may see a system panic happen, such as:
+
+       BUG: unable to handle kernel NULL pointer dereference at 0000000000000001
+       IP: [<ffffffff810e61a3>] kmem_cache_alloc+0x5b/0xe9
+       PGD 6b2b4067 PUD 6a80d067 PMD 0
+       Oops: 0000 [#1] SMP
+       last sysfs file: /sys/kernel/kexec_crash_loaded
+       CPU 1
+       ...
+       Pid: 31245, comm: su Not tainted 2.6.34-rc5-nofixed-nodebug #2 D2089/PRIMERGY
+       RIP: 0010:[<ffffffff810e61a3>]  [<ffffffff810e61a3>] kmem_cache_alloc+0x5b/0xe9
+       RSP: 0018:ffff88006af3bd98  EFLAGS: 00010002
+       RAX: 0000000000000000 RBX: 0000000000000001 RCX: ffff88007d19900b
+       RDX: 0000000100000000 RSI: 00000000000080d0 RDI: ffffffff81828430
+       RBP: ffffffff81828430 R08: ffff88000a293750 R09: 0000000000000000
+       R10: 0000000000000001 R11: 0000000000100000 R12: 00000000000080d0
+       R13: 00000000000080d0 R14: 0000000000000296 R15: ffffffff810f20ce
+       FS:  00007f97116bc700(0000) GS:ffff88000a280000(0000) knlGS:0000000000000000
+       CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+       CR2: 0000000000000001 CR3: 000000006a91c000 CR4: 00000000000006e0
+       DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+       DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
+       Process su (pid: 31245, threadinfo ffff88006af3a000, task ffff8800374414c0)
+       Stack:
+        0000000512e0958e 0000000000008000 ffff880037f8d180 0000000000000001
+        0000000000000000 0000000000008001 ffff88007d199000 ffffffff810f20ce
+        0000000000008000 ffff88006af3be48 0000000000000024 ffffffff810face3
+       Call Trace:
+        [<ffffffff810f20ce>] ? get_empty_filp+0x70/0x12f
+        [<ffffffff810face3>] ? do_filp_open+0x145/0x590
+        [<ffffffff810ce208>] ? tlb_finish_mmu+0x2a/0x33
+        [<ffffffff810ce43c>] ? unmap_region+0xd3/0xe2
+        [<ffffffff810e4393>] ? virt_to_head_page+0x9/0x2d
+        [<ffffffff81103916>] ? alloc_fd+0x69/0x10e
+        [<ffffffff810ef4ed>] ? do_sys_open+0x56/0xfc
+        [<ffffffff81008a02>] ? system_call_fastpath+0x16/0x1b
+       Code: 0f 1f 44 00 00 49 89 c6 fa 66 0f 1f 44 00 00 65 4c 8b 04 25 60 e8 00 00 48 8b 45 00 49 01 c0 49 8b 18 48 85 db 74 0d 48 63 45 18 <48> 8b 04 03 49 89 00 eb 14 4c 89 f9 83 ca ff 44 89 e6 48 89 ef
+       RIP  [<ffffffff810e61a3>] kmem_cache_alloc+0x5b/0xe9
+
+This problem is that find_keyring_by_name does not confirm that the keyring is
+valid before accepting it.
+
+Skipping keyrings that have been reduced to a zero count seems the way to go.
+To this end, use atomic_inc_not_zero() to increment the usage count and skip
+the candidate keyring if that returns false.
+
+The following script _may_ cause the bug to happen, but there's no guarantee
+as the window of opportunity is small:
+
+       #!/bin/sh
+       LOOP=100000
+       USER=dummy_user
+       /bin/su -c "exit;" $USER || { /usr/sbin/adduser -m $USER; add=1; }
+       for ((i=0; i<LOOP; i++))
+       do
+               /bin/su -c "echo '$i' > /dev/null" $USER
+       done
+       (( add == 1 )) && /usr/sbin/userdel -r $USER
+       exit
+
+Note that the nominated user must not be in use.
+
+An alternative way of testing this may be:
+
+       for ((i=0; i<100000; i++))
+       do
+               keyctl session foo /bin/true || break
+       done >&/dev/null
+
+as that uses a keyring named "foo" rather than relying on the user and
+user-session named keyrings.
+
+Reported-by: Toshiyuki Okajima <toshi.okajima@jp.fujitsu.com>
+Signed-off-by: David Howells <dhowells@redhat.com>
+Tested-by: Toshiyuki Okajima <toshi.okajima@jp.fujitsu.com>
+Acked-by: Serge Hallyn <serue@us.ibm.com>
+Signed-off-by: James Morris <jmorris@namei.org>
+Cc: Ben Hutchings <ben@decadent.org.uk>
+Cc: Chuck Ebbert <cebbert@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ security/keys/keyring.c |   18 +++++++++---------
+ 1 file changed, 9 insertions(+), 9 deletions(-)
+
+--- a/security/keys/keyring.c
++++ b/security/keys/keyring.c
+@@ -524,9 +524,8 @@ struct key *find_keyring_by_name(const c
+       struct key *keyring;
+       int bucket;
+-      keyring = ERR_PTR(-EINVAL);
+       if (!name)
+-              goto error;
++              return ERR_PTR(-EINVAL);
+       bucket = keyring_hash(name);
+@@ -553,17 +552,18 @@ struct key *find_keyring_by_name(const c
+                                          KEY_SEARCH) < 0)
+                               continue;
+-                      /* we've got a match */
+-                      atomic_inc(&keyring->usage);
+-                      read_unlock(&keyring_name_lock);
+-                      goto error;
++                      /* we've got a match but we might end up racing with
++                       * key_cleanup() if the keyring is currently 'dead'
++                       * (ie. it has a zero usage count) */
++                      if (!atomic_inc_not_zero(&keyring->usage))
++                              continue;
++                      goto out;
+               }
+       }
+-      read_unlock(&keyring_name_lock);
+       keyring = ERR_PTR(-ENOKEY);
+-
+- error:
++out:
++      read_unlock(&keyring_name_lock);
+       return keyring;
+ } /* end find_keyring_by_name() */
diff --git a/queue-2.6.33/keys-return-more-accurate-error-codes.patch b/queue-2.6.33/keys-return-more-accurate-error-codes.patch
new file mode 100644 (file)
index 0000000..3bde51e
--- /dev/null
@@ -0,0 +1,50 @@
+From 4d09ec0f705cf88a12add029c058b53f288cfaa2 Mon Sep 17 00:00:00 2001
+From: Dan Carpenter <error27@gmail.com>
+Date: Mon, 17 May 2010 14:42:35 +0100
+Subject: KEYS: Return more accurate error codes
+
+From: Dan Carpenter <error27@gmail.com>
+
+commit 4d09ec0f705cf88a12add029c058b53f288cfaa2 upstream.
+
+We were using the wrong variable here so the error codes weren't being returned
+properly.  The original code returns -ENOKEY.
+
+Signed-off-by: Dan Carpenter <error27@gmail.com>
+Signed-off-by: David Howells <dhowells@redhat.com>
+Signed-off-by: James Morris <jmorris@namei.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ security/keys/process_keys.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/security/keys/process_keys.c
++++ b/security/keys/process_keys.c
+@@ -509,7 +509,7 @@ try_again:
+                       ret = install_thread_keyring();
+                       if (ret < 0) {
+-                              key = ERR_PTR(ret);
++                              key_ref = ERR_PTR(ret);
+                               goto error;
+                       }
+                       goto reget_creds;
+@@ -527,7 +527,7 @@ try_again:
+                       ret = install_process_keyring();
+                       if (ret < 0) {
+-                              key = ERR_PTR(ret);
++                              key_ref = ERR_PTR(ret);
+                               goto error;
+                       }
+                       goto reget_creds;
+@@ -586,7 +586,7 @@ try_again:
+       case KEY_SPEC_GROUP_KEYRING:
+               /* group keyrings are not yet supported */
+-              key = ERR_PTR(-EINVAL);
++              key_ref = ERR_PTR(-EINVAL);
+               goto error;
+       case KEY_SPEC_REQKEY_AUTH_KEY:
diff --git a/queue-2.6.33/l2tp-fix-oops-in-pppol2tp_xmit.patch b/queue-2.6.33/l2tp-fix-oops-in-pppol2tp_xmit.patch
new file mode 100644 (file)
index 0000000..1920ee7
--- /dev/null
@@ -0,0 +1,81 @@
+From 3feec9095d12e311b7d4eb7fe7e5dfa75d4a72a5 Mon Sep 17 00:00:00 2001
+From: James Chapman <jchapman@katalix.com>
+Date: Tue, 16 Mar 2010 06:46:31 +0000
+Subject: l2tp: Fix oops in pppol2tp_xmit
+
+From: James Chapman <jchapman@katalix.com>
+
+commit 3feec9095d12e311b7d4eb7fe7e5dfa75d4a72a5 upstream.
+
+When transmitting L2TP frames, we derive the outgoing interface's UDP
+checksum hardware assist capabilities from the tunnel dst dev. This
+can sometimes be NULL, especially when routing protocols are used and
+routing changes occur. This patch just checks for NULL dst or dev
+pointers when checking for netdev hardware assist features.
+
+BUG: unable to handle kernel NULL pointer dereference at 0000000c
+IP: [<f89d074c>] pppol2tp_xmit+0x341/0x4da [pppol2tp]
+*pde = 00000000
+Oops: 0000 [#1] SMP
+last sysfs file: /sys/class/net/lo/operstate
+Modules linked in: pppol2tp pppox ppp_generic slhc ipv6 dummy loop snd_hda_codec_atihdmi snd_hda_intel snd_hda_codec snd_pcm snd_timer snd soundcore snd_page_alloc evdev psmouse serio_raw processor button i2c_piix4 i2c_core ati_agp agpgart pcspkr ext3 jbd mbcache sd_mod ide_pci_generic atiixp ide_core ahci ata_generic floppy ehci_hcd ohci_hcd libata e1000e scsi_mod usbcore nls_base thermal fan thermal_sys [last unloaded: scsi_wait_scan]
+
+Pid: 0, comm: swapper Not tainted (2.6.32.8 #1)
+EIP: 0060:[<f89d074c>] EFLAGS: 00010297 CPU: 3
+EIP is at pppol2tp_xmit+0x341/0x4da [pppol2tp]
+EAX: 00000000 EBX: f64d1680 ECX: 000005b9 EDX: 00000000
+ESI: f6b91850 EDI: f64d16ac EBP: f6a0c4c0 ESP: f70a9cac
+ DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068
+Process swapper (pid: 0, ti=f70a8000 task=f70a31c0 task.ti=f70a8000)
+Stack:
+ 000005a9 000005b9 f734c400 f66652c0 f7352e00 f67dc800 00000000 f6b91800
+<0> 000005a3 f70ef6c4 f67dcda9 000005a3 f89b192e 00000246 000005a3 f64d1680
+<0> f63633e0 f6363320 f64d1680 f65a7320 f65a7364 f65856c0 f64d1680 f679f02f
+Call Trace:
+ [<f89b192e>] ? ppp_push+0x459/0x50e [ppp_generic]
+ [<f89b217f>] ? ppp_xmit_process+0x3b6/0x430 [ppp_generic]
+ [<f89b2306>] ? ppp_start_xmit+0x10d/0x120 [ppp_generic]
+ [<c11c15cb>] ? dev_hard_start_xmit+0x21f/0x2b2
+ [<c11d0947>] ? sch_direct_xmit+0x48/0x10e
+ [<c11c19a0>] ? dev_queue_xmit+0x263/0x3a6
+ [<c11e2a9f>] ? ip_finish_output+0x1f7/0x221
+ [<c11df682>] ? ip_forward_finish+0x2e/0x30
+ [<c11de645>] ? ip_rcv_finish+0x295/0x2a9
+ [<c11c0b19>] ? netif_receive_skb+0x3e9/0x404
+ [<f814b791>] ? e1000_clean_rx_irq+0x253/0x2fc [e1000e]
+ [<f814cb7a>] ? e1000_clean+0x63/0x1fc [e1000e]
+ [<c1047eff>] ? sched_clock_local+0x15/0x11b
+ [<c11c1095>] ? net_rx_action+0x96/0x195
+ [<c1035750>] ? __do_softirq+0xaa/0x151
+ [<c1035828>] ? do_softirq+0x31/0x3c
+ [<c10358fe>] ? irq_exit+0x26/0x58
+ [<c1004b21>] ? do_IRQ+0x78/0x89
+ [<c1003729>] ? common_interrupt+0x29/0x30
+ [<c101ac28>] ? native_safe_halt+0x2/0x3
+ [<c1008c54>] ? default_idle+0x55/0x75
+ [<c1009045>] ? c1e_idle+0xd2/0xd5
+ [<c100233c>] ? cpu_idle+0x46/0x62
+Code: 8d 45 08 f0 ff 45 08 89 6b 08 c7 43 68 7e fb 9c f8 8a 45 24 83 e0 0c 3c 04 75 09 80 63 64 f3 e9 b4 00 00 00 8b 43 18 8b 4c 24 04 <8b> 40 0c 8d 79 11 f6 40 44 0e 8a 43 64 75 51 6a 00 8b 4c 24 08
+EIP: [<f89d074c>] pppol2tp_xmit+0x341/0x4da [pppol2tp] SS:ESP 0068:f70a9cac
+CR2: 000000000000000c
+
+Signed-off-by: James Chapman <jchapman@katalix.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/net/pppol2tp.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/pppol2tp.c
++++ b/drivers/net/pppol2tp.c
+@@ -977,7 +977,8 @@ static int pppol2tp_sendmsg(struct kiocb
+       /* Calculate UDP checksum if configured to do so */
+       if (sk_tun->sk_no_check == UDP_CSUM_NOXMIT)
+               skb->ip_summed = CHECKSUM_NONE;
+-      else if (!(skb_dst(skb)->dev->features & NETIF_F_V4_CSUM)) {
++      else if ((skb_dst(skb) && skb_dst(skb)->dev) &&
++               (!(skb_dst(skb)->dev->features & NETIF_F_V4_CSUM))) {
+               skb->ip_summed = CHECKSUM_COMPLETE;
+               csum = skb_checksum(skb, 0, udp_len, 0);
+               uh->check = csum_tcpudp_magic(inet->inet_saddr,
diff --git a/queue-2.6.33/parisc-clear-floating-point-exception-flag-on-sigfpe-signal.patch b/queue-2.6.33/parisc-clear-floating-point-exception-flag-on-sigfpe-signal.patch
new file mode 100644 (file)
index 0000000..3080f86
--- /dev/null
@@ -0,0 +1,34 @@
+From 550f0d922286556c7ea43974bb7921effb5a5278 Mon Sep 17 00:00:00 2001
+From: Helge Deller <deller@gmx.de>
+Date: Mon, 3 May 2010 20:44:21 +0000
+Subject: parisc: clear floating point exception flag on SIGFPE signal
+
+From: Helge Deller <deller@gmx.de>
+
+commit 550f0d922286556c7ea43974bb7921effb5a5278 upstream.
+
+Clear the floating point exception flag before returning to
+user space. This is needed, else the libc trampoline handler
+may hit the same SIGFPE again while building up a trampoline
+to a signal handler.
+
+Fixes debian bug #559406.
+
+Signed-off-by: Helge Deller <deller@gmx.de>
+Signed-off-by: Kyle McMartin <kyle@mcmartin.ca>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/parisc/math-emu/decode_exc.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/parisc/math-emu/decode_exc.c
++++ b/arch/parisc/math-emu/decode_exc.c
+@@ -342,6 +342,7 @@ decode_fpu(unsigned int Fpu_register[],
+               return SIGNALCODE(SIGFPE, FPE_FLTINV);
+         case DIVISIONBYZEROEXCEPTION:
+               update_trap_counts(Fpu_register, aflags, bflags, trap_counts);
++              Clear_excp_register(exception_index);
+               return SIGNALCODE(SIGFPE, FPE_FLTDIV);
+         case INEXACTEXCEPTION:
+               update_trap_counts(Fpu_register, aflags, bflags, trap_counts);
diff --git a/queue-2.6.33/qla2xxx-disable-msi-on-qla24xx-chips-other-than-qla2432.patch b/queue-2.6.33/qla2xxx-disable-msi-on-qla24xx-chips-other-than-qla2432.patch
new file mode 100644 (file)
index 0000000..fc0bca0
--- /dev/null
@@ -0,0 +1,68 @@
+From 6377a7ae1ab82859edccdbc8eaea63782efb134d Mon Sep 17 00:00:00 2001
+From: Ben Hutchings <ben@decadent.org.uk>
+Date: Fri, 19 Mar 2010 16:59:19 -0700
+Subject: [SCSI] qla2xxx: Disable MSI on qla24xx chips other than QLA2432.
+
+From: Ben Hutchings <ben@decadent.org.uk>
+
+commit 6377a7ae1ab82859edccdbc8eaea63782efb134d upstream.
+
+On specific platforms, MSI is unreliable on some of the QLA24xx chips, resulting
+in fatal I/O errors under load, as reported in <http://bugs.debian.org/572322>
+and by some RHEL customers.
+
+Signed-off-by: Giridhar Malavali <giridhar.malavali@qlogic.com>
+Signed-off-by: James Bottomley <James.Bottomley@suse.de>
+Cc: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/scsi/qla2xxx/qla_isr.c |   28 +++++++++++++---------------
+ 1 file changed, 13 insertions(+), 15 deletions(-)
+
+--- a/drivers/scsi/qla2xxx/qla_isr.c
++++ b/drivers/scsi/qla2xxx/qla_isr.c
+@@ -2169,30 +2169,28 @@ qla2x00_request_irqs(struct qla_hw_data
+       /* If possible, enable MSI-X. */
+       if (!IS_QLA2432(ha) && !IS_QLA2532(ha) &&
+-          !IS_QLA8432(ha) && !IS_QLA8001(ha))
+-              goto skip_msix;
++              !IS_QLA8432(ha) && !IS_QLA8001(ha))
++              goto skip_msi;
++
++      if (ha->pdev->subsystem_vendor == PCI_VENDOR_ID_HP &&
++              (ha->pdev->subsystem_device == 0x7040 ||
++              ha->pdev->subsystem_device == 0x7041 ||
++              ha->pdev->subsystem_device == 0x1705)) {
++              DEBUG2(qla_printk(KERN_WARNING, ha,
++                      "MSI-X: Unsupported ISP2432 SSVID/SSDID (0x%X,0x%X).\n",
++                      ha->pdev->subsystem_vendor,
++                      ha->pdev->subsystem_device));
++              goto skip_msi;
++      }
+       if (IS_QLA2432(ha) && (ha->pdev->revision < QLA_MSIX_CHIP_REV_24XX ||
+               !QLA_MSIX_FW_MODE_1(ha->fw_attributes))) {
+               DEBUG2(qla_printk(KERN_WARNING, ha,
+               "MSI-X: Unsupported ISP2432 (0x%X, 0x%X).\n",
+                       ha->pdev->revision, ha->fw_attributes));
+-
+               goto skip_msix;
+       }
+-      if (ha->pdev->subsystem_vendor == PCI_VENDOR_ID_HP &&
+-          (ha->pdev->subsystem_device == 0x7040 ||
+-              ha->pdev->subsystem_device == 0x7041 ||
+-              ha->pdev->subsystem_device == 0x1705)) {
+-              DEBUG2(qla_printk(KERN_WARNING, ha,
+-                  "MSI-X: Unsupported ISP2432 SSVID/SSDID (0x%X, 0x%X).\n",
+-                  ha->pdev->subsystem_vendor,
+-                  ha->pdev->subsystem_device));
+-
+-              goto skip_msi;
+-      }
+-
+       ret = qla24xx_enable_msix(ha, rsp);
+       if (!ret) {
+               DEBUG2(qla_printk(KERN_INFO, ha,
diff --git a/queue-2.6.33/sctp-fix-skb_over_panic-resulting-from-multiple-invalid-parameter-errors-cve-2010-1173-v4.patch b/queue-2.6.33/sctp-fix-skb_over_panic-resulting-from-multiple-invalid-parameter-errors-cve-2010-1173-v4.patch
new file mode 100644 (file)
index 0000000..30eb1cb
--- /dev/null
@@ -0,0 +1,223 @@
+From 5fa782c2f5ef6c2e4f04d3e228412c9b4a4c8809 Mon Sep 17 00:00:00 2001
+From: Neil Horman <nhorman@tuxdriver.com>
+Date: Wed, 28 Apr 2010 10:30:59 +0000
+Subject: sctp: Fix skb_over_panic resulting from multiple invalid parameter errors (CVE-2010-1173) (v4)
+
+From: Neil Horman <nhorman@tuxdriver.com>
+
+commit 5fa782c2f5ef6c2e4f04d3e228412c9b4a4c8809 upstream.
+
+Ok, version 4
+
+Change Notes:
+1) Minor cleanups, from Vlads notes
+
+Summary:
+
+Hey-
+       Recently, it was reported to me that the kernel could oops in the
+following way:
+
+<5> kernel BUG at net/core/skbuff.c:91!
+<5> invalid operand: 0000 [#1]
+<5> Modules linked in: sctp netconsole nls_utf8 autofs4 sunrpc iptable_filter
+ip_tables cpufreq_powersave parport_pc lp parport vmblock(U) vsock(U) vmci(U)
+vmxnet(U) vmmemctl(U) vmhgfs(U) acpiphp dm_mirror dm_mod button battery ac md5
+ipv6 uhci_hcd ehci_hcd snd_ens1371 snd_rawmidi snd_seq_device snd_pcm_oss
+snd_mixer_oss snd_pcm snd_timer snd_page_alloc snd_ac97_codec snd soundcore
+pcnet32 mii floppy ext3 jbd ata_piix libata mptscsih mptsas mptspi mptscsi
+mptbase sd_mod scsi_mod
+<5> CPU:    0
+<5> EIP:    0060:[<c02bff27>]    Not tainted VLI
+<5> EFLAGS: 00010216   (2.6.9-89.0.25.EL)
+<5> EIP is at skb_over_panic+0x1f/0x2d
+<5> eax: 0000002c   ebx: c033f461   ecx: c0357d96   edx: c040fd44
+<5> esi: c033f461   edi: df653280   ebp: 00000000   esp: c040fd40
+<5> ds: 007b   es: 007b   ss: 0068
+<5> Process swapper (pid: 0, threadinfo=c040f000 task=c0370be0)
+<5> Stack: c0357d96 e0c29478 00000084 00000004 c033f461 df653280 d7883180
+e0c2947d
+<5>        00000000 00000080 df653490 00000004 de4f1ac0 de4f1ac0 00000004
+df653490
+<5>        00000001 e0c2877a 08000800 de4f1ac0 df653490 00000000 e0c29d2e
+00000004
+<5> Call Trace:
+<5>  [<e0c29478>] sctp_addto_chunk+0xb0/0x128 [sctp]
+<5>  [<e0c2947d>] sctp_addto_chunk+0xb5/0x128 [sctp]
+<5>  [<e0c2877a>] sctp_init_cause+0x3f/0x47 [sctp]
+<5>  [<e0c29d2e>] sctp_process_unk_param+0xac/0xb8 [sctp]
+<5>  [<e0c29e90>] sctp_verify_init+0xcc/0x134 [sctp]
+<5>  [<e0c20322>] sctp_sf_do_5_1B_init+0x83/0x28e [sctp]
+<5>  [<e0c25333>] sctp_do_sm+0x41/0x77 [sctp]
+<5>  [<c01555a4>] cache_grow+0x140/0x233
+<5>  [<e0c26ba1>] sctp_endpoint_bh_rcv+0xc5/0x108 [sctp]
+<5>  [<e0c2b863>] sctp_inq_push+0xe/0x10 [sctp]
+<5>  [<e0c34600>] sctp_rcv+0x454/0x509 [sctp]
+<5>  [<e084e017>] ipt_hook+0x17/0x1c [iptable_filter]
+<5>  [<c02d005e>] nf_iterate+0x40/0x81
+<5>  [<c02e0bb9>] ip_local_deliver_finish+0x0/0x151
+<5>  [<c02e0c7f>] ip_local_deliver_finish+0xc6/0x151
+<5>  [<c02d0362>] nf_hook_slow+0x83/0xb5
+<5>  [<c02e0bb2>] ip_local_deliver+0x1a2/0x1a9
+<5>  [<c02e0bb9>] ip_local_deliver_finish+0x0/0x151
+<5>  [<c02e103e>] ip_rcv+0x334/0x3b4
+<5>  [<c02c66fd>] netif_receive_skb+0x320/0x35b
+<5>  [<e0a0928b>] init_stall_timer+0x67/0x6a [uhci_hcd]
+<5>  [<c02c67a4>] process_backlog+0x6c/0xd9
+<5>  [<c02c690f>] net_rx_action+0xfe/0x1f8
+<5>  [<c012a7b1>] __do_softirq+0x35/0x79
+<5>  [<c0107efb>] handle_IRQ_event+0x0/0x4f
+<5>  [<c01094de>] do_softirq+0x46/0x4d
+
+Its an skb_over_panic BUG halt that results from processing an init chunk in
+which too many of its variable length parameters are in some way malformed.
+
+The problem is in sctp_process_unk_param:
+if (NULL == *errp)
+       *errp = sctp_make_op_error_space(asoc, chunk,
+                                        ntohs(chunk->chunk_hdr->length));
+
+       if (*errp) {
+               sctp_init_cause(*errp, SCTP_ERROR_UNKNOWN_PARAM,
+                                WORD_ROUND(ntohs(param.p->length)));
+               sctp_addto_chunk(*errp,
+                       WORD_ROUND(ntohs(param.p->length)),
+                                 param.v);
+
+When we allocate an error chunk, we assume that the worst case scenario requires
+that we have chunk_hdr->length data allocated, which would be correct nominally,
+given that we call sctp_addto_chunk for the violating parameter.  Unfortunately,
+we also, in sctp_init_cause insert a sctp_errhdr_t structure into the error
+chunk, so the worst case situation in which all parameters are in violation
+requires chunk_hdr->length+(sizeof(sctp_errhdr_t)*param_count) bytes of data.
+
+The result of this error is that a deliberately malformed packet sent to a
+listening host can cause a remote DOS, described in CVE-2010-1173:
+http://cve.mitre.org/cgi-bin/cvename.cgi?name=2010-1173
+
+I've tested the below fix and confirmed that it fixes the issue.  We move to a
+strategy whereby we allocate a fixed size error chunk and ignore errors we don't
+have space to report.  Tested by me successfully
+
+Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
+Acked-by: Vlad Yasevich <vladislav.yasevich@hp.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ include/net/sctp/structs.h |    1 
+ net/sctp/sm_make_chunk.c   |   62 +++++++++++++++++++++++++++++++++++++++++----
+ 2 files changed, 58 insertions(+), 5 deletions(-)
+
+--- a/include/net/sctp/structs.h
++++ b/include/net/sctp/structs.h
+@@ -778,6 +778,7 @@ int sctp_user_addto_chunk(struct sctp_ch
+                         struct iovec *data);
+ void sctp_chunk_free(struct sctp_chunk *);
+ void  *sctp_addto_chunk(struct sctp_chunk *, int len, const void *data);
++void  *sctp_addto_chunk_fixed(struct sctp_chunk *, int len, const void *data);
+ struct sctp_chunk *sctp_chunkify(struct sk_buff *,
+                                const struct sctp_association *,
+                                struct sock *);
+--- a/net/sctp/sm_make_chunk.c
++++ b/net/sctp/sm_make_chunk.c
+@@ -107,7 +107,7 @@ static const struct sctp_paramhdr prsctp
+       cpu_to_be16(sizeof(struct sctp_paramhdr)),
+ };
+-/* A helper to initialize to initialize an op error inside a
++/* A helper to initialize an op error inside a
+  * provided chunk, as most cause codes will be embedded inside an
+  * abort chunk.
+  */
+@@ -124,6 +124,29 @@ void  sctp_init_cause(struct sctp_chunk
+       chunk->subh.err_hdr = sctp_addto_chunk(chunk, sizeof(sctp_errhdr_t), &err);
+ }
++/* A helper to initialize an op error inside a
++ * provided chunk, as most cause codes will be embedded inside an
++ * abort chunk.  Differs from sctp_init_cause in that it won't oops
++ * if there isn't enough space in the op error chunk
++ */
++int sctp_init_cause_fixed(struct sctp_chunk *chunk, __be16 cause_code,
++                    size_t paylen)
++{
++      sctp_errhdr_t err;
++      __u16 len;
++
++      /* Cause code constants are now defined in network order.  */
++      err.cause = cause_code;
++      len = sizeof(sctp_errhdr_t) + paylen;
++      err.length  = htons(len);
++
++      if (skb_tailroom(chunk->skb) >  len)
++              return -ENOSPC;
++      chunk->subh.err_hdr = sctp_addto_chunk_fixed(chunk,
++                                                   sizeof(sctp_errhdr_t),
++                                                   &err);
++      return 0;
++}
+ /* 3.3.2 Initiation (INIT) (1)
+  *
+  * This chunk is used to initiate a SCTP association between two
+@@ -1131,6 +1154,24 @@ nodata:
+       return retval;
+ }
++/* Create an Operation Error chunk of a fixed size,
++ * specifically, max(asoc->pathmtu, SCTP_DEFAULT_MAXSEGMENT)
++ * This is a helper function to allocate an error chunk for
++ * for those invalid parameter codes in which we may not want
++ * to report all the errors, if the incomming chunk is large
++ */
++static inline struct sctp_chunk *sctp_make_op_error_fixed(
++      const struct sctp_association *asoc,
++      const struct sctp_chunk *chunk)
++{
++      size_t size = asoc ? asoc->pathmtu : 0;
++
++      if (!size)
++              size = SCTP_DEFAULT_MAXSEGMENT;
++
++      return sctp_make_op_error_space(asoc, chunk, size);
++}
++
+ /* Create an Operation Error chunk.  */
+ struct sctp_chunk *sctp_make_op_error(const struct sctp_association *asoc,
+                                const struct sctp_chunk *chunk,
+@@ -1373,6 +1414,18 @@ void *sctp_addto_chunk(struct sctp_chunk
+       return target;
+ }
++/* Append bytes to the end of a chunk. Returns NULL if there isn't sufficient
++ * space in the chunk
++ */
++void *sctp_addto_chunk_fixed(struct sctp_chunk *chunk,
++                           int len, const void *data)
++{
++      if (skb_tailroom(chunk->skb) > len)
++              return sctp_addto_chunk(chunk, len, data);
++      else
++              return NULL;
++}
++
+ /* Append bytes from user space to the end of a chunk.  Will panic if
+  * chunk is not big enough.
+  * Returns a kernel err value.
+@@ -1976,13 +2029,12 @@ static sctp_ierror_t sctp_process_unk_pa
+                * returning multiple unknown parameters.
+                */
+               if (NULL == *errp)
+-                      *errp = sctp_make_op_error_space(asoc, chunk,
+-                                      ntohs(chunk->chunk_hdr->length));
++                      *errp = sctp_make_op_error_fixed(asoc, chunk);
+               if (*errp) {
+-                      sctp_init_cause(*errp, SCTP_ERROR_UNKNOWN_PARAM,
++                      sctp_init_cause_fixed(*errp, SCTP_ERROR_UNKNOWN_PARAM,
+                                       WORD_ROUND(ntohs(param.p->length)));
+-                      sctp_addto_chunk(*errp,
++                      sctp_addto_chunk_fixed(*errp,
+                                       WORD_ROUND(ntohs(param.p->length)),
+                                       param.v);
+               } else {
index 6a5dd3e030221153a63615a24a147d863474a319..3529c5b1ef59081e1e5f18af1dc6b65f68842a3b 100644 (file)
@@ -125,3 +125,39 @@ iwlwifi-recalculate-average-tpt-if-not-current.patch
 perf-fix-signed-comparison-in-perf_adjust_period.patch
 tracing-fix-null-pointer-deref-with-send_sig_forced.patch
 wl1251-fix-a-memory-leak-in-probe.patch
+ext4-check-s_log_groups_per_flex-in-online-resize-code.patch
+ext4-make-sure-the-move_ext-ioctl-can-t-overwrite-append-only-files.patch
+gfs2-fix-permissions-checking-for-setflags-ioctl.patch
+sctp-fix-skb_over_panic-resulting-from-multiple-invalid-parameter-errors-cve-2010-1173-v4.patch
+cifs-allow-null-nd-as-nfs-server-uses-on-create.patch
+vfs-add-nofollow-flag-to-umount-2.patch
+l2tp-fix-oops-in-pppol2tp_xmit.patch
+btrfs-should-add-a-permission-check-for-setfacl.patch
+eeepc-laptop-check-wireless-hotplug-events.patch
+tracing-consolidate-protection-of-reader-access-to-the-ring-buffer.patch
+input-psmouse-reset-all-types-of-mice-before-reconnecting.patch
+0001-KVM-SVM-Don-t-use-kmap_atomic-in-nested_svm_map.patch
+0002-KVM-SVM-Fix-schedule-while-atomic-on-nested-exceptio.patch
+0003-KVM-SVM-Sync-all-control-registers-on-nested-vmexit.patch
+0004-KVM-SVM-Fix-nested-msr-intercept-handling.patch
+0005-KVM-SVM-Don-t-sync-nested-cr8-to-lapic-and-back.patch
+0006-KVM-SVM-Fix-wrong-interrupt-injection-in-enable_irq_.patch
+0007-KVM-s390-Fix-possible-memory-leak-of-in-kvm_arch_vcp.patch
+0008-KVM-PPC-Do-not-create-debugfs-if-fail-to-create-vcpu.patch
+0009-KVM-x86-Add-callback-to-let-modules-decide-over-some.patch
+0010-KVM-SVM-Report-emulated-SVM-features-to-userspace.patch
+0011-x86-paravirt-Add-a-global-synchronization-point-for-.patch
+0012-KVM-Don-t-allow-lmsw-to-clear-cr0.pe.patch
+0013-KVM-x86-Check-LMA-bit-before-set_efer.patch
+0014-KVM-MMU-Segregate-shadow-pages-with-different-cr0.wp.patch
+0015-KVM-VMX-enable-VMXON-check-with-SMX-enabled-Intel-TX.patch
+0016-KVM-MMU-Don-t-read-pdptrs-with-mmu-spinlock-held-in-.patch
+0017-KVM-Fix-wallclock-version-writing-race.patch
+0018-KVM-PPC-Add-missing-vcpu_load-vcpu_put-in-vcpu-ioctl.patch
+0019-KVM-x86-Add-missing-locking-to-arch-specific-vcpu-io.patch
+0020-KVM-x86-Inject-GP-with-the-right-rip-on-efer-writes.patch
+0021-KVM-SVM-Don-t-allow-nested-guest-to-VMMCALL-into-hos.patch
+parisc-clear-floating-point-exception-flag-on-sigfpe-signal.patch
+keys-return-more-accurate-error-codes.patch
+keys-find_keyring_by_name-can-gain-access-to-a-freed-keyring.patch
+qla2xxx-disable-msi-on-qla24xx-chips-other-than-qla2432.patch
diff --git a/queue-2.6.33/tracing-consolidate-protection-of-reader-access-to-the-ring-buffer.patch b/queue-2.6.33/tracing-consolidate-protection-of-reader-access-to-the-ring-buffer.patch
new file mode 100644 (file)
index 0000000..bc6e367
--- /dev/null
@@ -0,0 +1,324 @@
+From 7e53bd42d14c75192b99674c40fcc359392da59d Mon Sep 17 00:00:00 2001
+From: Lai Jiangshan <laijs@cn.fujitsu.com>
+Date: Wed, 6 Jan 2010 20:08:50 +0800
+Subject: tracing: Consolidate protection of reader access to the ring buffer
+
+From: Lai Jiangshan <laijs@cn.fujitsu.com>
+
+commit 7e53bd42d14c75192b99674c40fcc359392da59d upstream.
+
+At the beginning, access to the ring buffer was fully serialized
+by trace_types_lock. Patch d7350c3f4569 gives more freedom to readers,
+and patch b04cc6b1f6 adds code to protect trace_pipe and cpu#/trace_pipe.
+
+But actually it is not enough, ring buffer readers are not always
+read-only, they may consume data.
+
+This patch makes accesses to trace, trace_pipe, trace_pipe_raw
+cpu#/trace, cpu#/trace_pipe and cpu#/trace_pipe_raw serialized.
+And removes tracing_reader_cpumask which is used to protect trace_pipe.
+
+Details:
+
+Ring buffer serializes readers, but it is low level protection.
+The validity of the events (which returns by ring_buffer_peek() ..etc)
+are not protected by ring buffer.
+
+The content of events may become garbage if we allow another process to consume
+these events concurrently:
+  A) the page of the consumed events may become a normal page
+     (not reader page) in ring buffer, and this page will be rewritten
+     by the events producer.
+  B) The page of the consumed events may become a page for splice_read,
+     and this page will be returned to system.
+
+This patch adds trace_access_lock() and trace_access_unlock() primitives.
+
+These primitives allow multi process access to different cpu ring buffers
+concurrently.
+
+These primitives don't distinguish read-only and read-consume access.
+Multi read-only access is also serialized.
+
+And we don't use these primitives when we open files,
+we only use them when we read files.
+
+Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
+LKML-Reference: <4B447D52.1050602@cn.fujitsu.com>
+Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ kernel/trace/trace.c |  136 ++++++++++++++++++++++++++++++++++++---------------
+ 1 file changed, 97 insertions(+), 39 deletions(-)
+
+--- a/kernel/trace/trace.c
++++ b/kernel/trace/trace.c
+@@ -32,6 +32,7 @@
+ #include <linux/splice.h>
+ #include <linux/kdebug.h>
+ #include <linux/string.h>
++#include <linux/rwsem.h>
+ #include <linux/ctype.h>
+ #include <linux/init.h>
+ #include <linux/poll.h>
+@@ -102,9 +103,6 @@ static inline void ftrace_enable_cpu(voi
+ static cpumask_var_t __read_mostly    tracing_buffer_mask;
+-/* Define which cpu buffers are currently read in trace_pipe */
+-static cpumask_var_t                  tracing_reader_cpumask;
+-
+ #define for_each_tracing_cpu(cpu)     \
+       for_each_cpu(cpu, tracing_buffer_mask)
+@@ -243,12 +241,91 @@ static struct tracer             *current_trace __r
+ /*
+  * trace_types_lock is used to protect the trace_types list.
+- * This lock is also used to keep user access serialized.
+- * Accesses from userspace will grab this lock while userspace
+- * activities happen inside the kernel.
+  */
+ static DEFINE_MUTEX(trace_types_lock);
++/*
++ * serialize the access of the ring buffer
++ *
++ * ring buffer serializes readers, but it is low level protection.
++ * The validity of the events (which returns by ring_buffer_peek() ..etc)
++ * are not protected by ring buffer.
++ *
++ * The content of events may become garbage if we allow other process consumes
++ * these events concurrently:
++ *   A) the page of the consumed events may become a normal page
++ *      (not reader page) in ring buffer, and this page will be rewrited
++ *      by events producer.
++ *   B) The page of the consumed events may become a page for splice_read,
++ *      and this page will be returned to system.
++ *
++ * These primitives allow multi process access to different cpu ring buffer
++ * concurrently.
++ *
++ * These primitives don't distinguish read-only and read-consume access.
++ * Multi read-only access are also serialized.
++ */
++
++#ifdef CONFIG_SMP
++static DECLARE_RWSEM(all_cpu_access_lock);
++static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
++
++static inline void trace_access_lock(int cpu)
++{
++      if (cpu == TRACE_PIPE_ALL_CPU) {
++              /* gain it for accessing the whole ring buffer. */
++              down_write(&all_cpu_access_lock);
++      } else {
++              /* gain it for accessing a cpu ring buffer. */
++
++              /* Firstly block other trace_access_lock(TRACE_PIPE_ALL_CPU). */
++              down_read(&all_cpu_access_lock);
++
++              /* Secondly block other access to this @cpu ring buffer. */
++              mutex_lock(&per_cpu(cpu_access_lock, cpu));
++      }
++}
++
++static inline void trace_access_unlock(int cpu)
++{
++      if (cpu == TRACE_PIPE_ALL_CPU) {
++              up_write(&all_cpu_access_lock);
++      } else {
++              mutex_unlock(&per_cpu(cpu_access_lock, cpu));
++              up_read(&all_cpu_access_lock);
++      }
++}
++
++static inline void trace_access_lock_init(void)
++{
++      int cpu;
++
++      for_each_possible_cpu(cpu)
++              mutex_init(&per_cpu(cpu_access_lock, cpu));
++}
++
++#else
++
++static DEFINE_MUTEX(access_lock);
++
++static inline void trace_access_lock(int cpu)
++{
++      (void)cpu;
++      mutex_lock(&access_lock);
++}
++
++static inline void trace_access_unlock(int cpu)
++{
++      (void)cpu;
++      mutex_unlock(&access_lock);
++}
++
++static inline void trace_access_lock_init(void)
++{
++}
++
++#endif
++
+ /* trace_wait is a waitqueue for tasks blocked on trace_poll */
+ static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
+@@ -1601,12 +1678,6 @@ static void tracing_iter_reset(struct tr
+ }
+ /*
+- * No necessary locking here. The worst thing which can
+- * happen is loosing events consumed at the same time
+- * by a trace_pipe reader.
+- * Other than that, we don't risk to crash the ring buffer
+- * because it serializes the readers.
+- *
+  * The current tracer is copied to avoid a global locking
+  * all around.
+  */
+@@ -1662,12 +1733,16 @@ static void *s_start(struct seq_file *m,
+       }
+       trace_event_read_lock();
++      trace_access_lock(cpu_file);
+       return p;
+ }
+ static void s_stop(struct seq_file *m, void *p)
+ {
++      struct trace_iterator *iter = m->private;
++
+       atomic_dec(&trace_record_cmdline_disabled);
++      trace_access_unlock(iter->cpu_file);
+       trace_event_read_unlock();
+ }
+@@ -2858,22 +2933,6 @@ static int tracing_open_pipe(struct inod
+       mutex_lock(&trace_types_lock);
+-      /* We only allow one reader per cpu */
+-      if (cpu_file == TRACE_PIPE_ALL_CPU) {
+-              if (!cpumask_empty(tracing_reader_cpumask)) {
+-                      ret = -EBUSY;
+-                      goto out;
+-              }
+-              cpumask_setall(tracing_reader_cpumask);
+-      } else {
+-              if (!cpumask_test_cpu(cpu_file, tracing_reader_cpumask))
+-                      cpumask_set_cpu(cpu_file, tracing_reader_cpumask);
+-              else {
+-                      ret = -EBUSY;
+-                      goto out;
+-              }
+-      }
+-
+       /* create a buffer to store the information to pass to userspace */
+       iter = kzalloc(sizeof(*iter), GFP_KERNEL);
+       if (!iter) {
+@@ -2929,12 +2988,6 @@ static int tracing_release_pipe(struct i
+       mutex_lock(&trace_types_lock);
+-      if (iter->cpu_file == TRACE_PIPE_ALL_CPU)
+-              cpumask_clear(tracing_reader_cpumask);
+-      else
+-              cpumask_clear_cpu(iter->cpu_file, tracing_reader_cpumask);
+-
+-
+       if (iter->trace->pipe_close)
+               iter->trace->pipe_close(iter);
+@@ -3096,6 +3149,7 @@ waitagain:
+       iter->pos = -1;
+       trace_event_read_lock();
++      trace_access_lock(iter->cpu_file);
+       while (find_next_entry_inc(iter) != NULL) {
+               enum print_line_t ret;
+               int len = iter->seq.len;
+@@ -3112,6 +3166,7 @@ waitagain:
+               if (iter->seq.len >= cnt)
+                       break;
+       }
++      trace_access_unlock(iter->cpu_file);
+       trace_event_read_unlock();
+       /* Now copy what we have to the user */
+@@ -3237,6 +3292,7 @@ static ssize_t tracing_splice_read_pipe(
+       }
+       trace_event_read_lock();
++      trace_access_lock(iter->cpu_file);
+       /* Fill as many pages as possible. */
+       for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) {
+@@ -3260,6 +3316,7 @@ static ssize_t tracing_splice_read_pipe(
+               trace_seq_init(&iter->seq);
+       }
++      trace_access_unlock(iter->cpu_file);
+       trace_event_read_unlock();
+       mutex_unlock(&iter->mutex);
+@@ -3561,10 +3618,12 @@ tracing_buffers_read(struct file *filp,
+       info->read = 0;
++      trace_access_lock(info->cpu);
+       ret = ring_buffer_read_page(info->tr->buffer,
+                                   &info->spare,
+                                   count,
+                                   info->cpu, 0);
++      trace_access_unlock(info->cpu);
+       if (ret < 0)
+               return 0;
+@@ -3692,6 +3751,7 @@ tracing_buffers_splice_read(struct file
+               len &= PAGE_MASK;
+       }
++      trace_access_lock(info->cpu);
+       entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
+       for (i = 0; i < PIPE_BUFFERS && len && entries; i++, len -= PAGE_SIZE) {
+@@ -3739,6 +3799,7 @@ tracing_buffers_splice_read(struct file
+               entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
+       }
++      trace_access_unlock(info->cpu);
+       spd.nr_pages = i;
+       /* did we read anything? */
+@@ -4175,6 +4236,8 @@ static __init int tracer_init_debugfs(vo
+       struct dentry *d_tracer;
+       int cpu;
++      trace_access_lock_init();
++
+       d_tracer = tracing_init_dentry();
+       trace_create_file("tracing_enabled", 0644, d_tracer,
+@@ -4409,9 +4472,6 @@ __init static int tracer_alloc_buffers(v
+       if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL))
+               goto out_free_buffer_mask;
+-      if (!zalloc_cpumask_var(&tracing_reader_cpumask, GFP_KERNEL))
+-              goto out_free_tracing_cpumask;
+-
+       /* To save memory, keep the ring buffer size to its minimum */
+       if (ring_buffer_expanded)
+               ring_buf_size = trace_buf_size;
+@@ -4469,8 +4529,6 @@ __init static int tracer_alloc_buffers(v
+       return 0;
+ out_free_cpumask:
+-      free_cpumask_var(tracing_reader_cpumask);
+-out_free_tracing_cpumask:
+       free_cpumask_var(tracing_cpumask);
+ out_free_buffer_mask:
+       free_cpumask_var(tracing_buffer_mask);
diff --git a/queue-2.6.33/vfs-add-nofollow-flag-to-umount-2.patch b/queue-2.6.33/vfs-add-nofollow-flag-to-umount-2.patch
new file mode 100644 (file)
index 0000000..64c5cd6
--- /dev/null
@@ -0,0 +1,57 @@
+From db1f05bb85d7966b9176e293f3ceead1cb8b5d79 Mon Sep 17 00:00:00 2001
+From: Miklos Szeredi <mszeredi@suse.cz>
+Date: Wed, 10 Feb 2010 12:15:53 +0100
+Subject: vfs: add NOFOLLOW flag to umount(2)
+
+From: Miklos Szeredi <mszeredi@suse.cz>
+
+commit db1f05bb85d7966b9176e293f3ceead1cb8b5d79 upstream.
+
+Add a new UMOUNT_NOFOLLOW flag to umount(2).  This is needed to prevent
+symlink attacks in unprivileged unmounts (fuse, samba, ncpfs).
+
+Additionally, return -EINVAL if an unknown flag is used (and specify
+an explicitly unused flag: UMOUNT_UNUSED).  This makes it possible for
+the caller to determine if a flag is supported or not.
+
+CC: Eugene Teo <eugene@redhat.com>
+CC: Michael Kerrisk <mtk.manpages@gmail.com>
+Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/namespace.c     |    9 ++++++++-
+ include/linux/fs.h |    2 ++
+ 2 files changed, 10 insertions(+), 1 deletion(-)
+
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -1121,8 +1121,15 @@ SYSCALL_DEFINE2(umount, char __user *, n
+ {
+       struct path path;
+       int retval;
++      int lookup_flags = 0;
+-      retval = user_path(name, &path);
++      if (flags & ~(MNT_FORCE | MNT_DETACH | MNT_EXPIRE | UMOUNT_NOFOLLOW))
++              return -EINVAL;
++
++      if (!(flags & UMOUNT_NOFOLLOW))
++              lookup_flags |= LOOKUP_FOLLOW;
++
++      retval = user_path_at(AT_FDCWD, name, lookup_flags, &path);
+       if (retval)
+               goto out;
+       retval = -EINVAL;
+--- a/include/linux/fs.h
++++ b/include/linux/fs.h
+@@ -1308,6 +1308,8 @@ extern int send_sigurg(struct fown_struc
+ #define MNT_FORCE     0x00000001      /* Attempt to forcibily umount */
+ #define MNT_DETACH    0x00000002      /* Just detach from the tree */
+ #define MNT_EXPIRE    0x00000004      /* Mark for expiry */
++#define UMOUNT_NOFOLLOW       0x00000008      /* Don't follow symlink on umount */
++#define UMOUNT_UNUSED 0x80000000      /* Flag guaranteed to be unused */
+ extern struct list_head super_blocks;
+ extern spinlock_t sb_lock;