]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
3.4-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 3 May 2013 17:42:24 +0000 (10:42 -0700)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 3 May 2013 17:42:24 +0000 (10:42 -0700)
added patches:
fix-initialization-of-cmci-cmcp-interrupts.patch
fs-fscache-stats.c-fix-memory-leak.patch
mm-allow-arch-code-to-control-the-user-page-table-ceiling.patch
pci-acpi-don-t-query-osc-support-with-all-possible-controls.patch
pci-pm-fix-fallback-to-pci_d0-in-pci_platform_power_transition.patch
sysfs-fix-use-after-free-in-case-of-concurrent-read-write-and-readdir.patch
wrong-asm-register-contraints-in-the-futex-implementation.patch
wrong-asm-register-contraints-in-the-kvm-implementation.patch

queue-3.4/fix-initialization-of-cmci-cmcp-interrupts.patch [new file with mode: 0644]
queue-3.4/fs-fscache-stats.c-fix-memory-leak.patch [new file with mode: 0644]
queue-3.4/mm-allow-arch-code-to-control-the-user-page-table-ceiling.patch [new file with mode: 0644]
queue-3.4/pci-acpi-don-t-query-osc-support-with-all-possible-controls.patch [new file with mode: 0644]
queue-3.4/pci-pm-fix-fallback-to-pci_d0-in-pci_platform_power_transition.patch [new file with mode: 0644]
queue-3.4/series
queue-3.4/sysfs-fix-use-after-free-in-case-of-concurrent-read-write-and-readdir.patch [new file with mode: 0644]
queue-3.4/wrong-asm-register-contraints-in-the-futex-implementation.patch [new file with mode: 0644]
queue-3.4/wrong-asm-register-contraints-in-the-kvm-implementation.patch [new file with mode: 0644]

diff --git a/queue-3.4/fix-initialization-of-cmci-cmcp-interrupts.patch b/queue-3.4/fix-initialization-of-cmci-cmcp-interrupts.patch
new file mode 100644 (file)
index 0000000..f583307
--- /dev/null
@@ -0,0 +1,128 @@
+From d303e9e98fce56cdb3c6f2ac92f626fc2bd51c77 Mon Sep 17 00:00:00 2001
+From: Tony Luck <tony.luck@intel.com>
+Date: Wed, 20 Mar 2013 10:30:15 -0700
+Subject: Fix initialization of CMCI/CMCP interrupts
+
+From: Tony Luck <tony.luck@intel.com>
+
+commit d303e9e98fce56cdb3c6f2ac92f626fc2bd51c77 upstream.
+
+Back 2010 during a revamp of the irq code some initializations
+were moved from ia64_mca_init() to ia64_mca_late_init() in
+
+       commit c75f2aa13f5b268aba369b5dc566088b5194377c
+       Cannot use register_percpu_irq() from ia64_mca_init()
+
+But this was hideously wrong. First of all these initializations
+are now down far too late. Specifically after all the other cpus
+have been brought up and initialized their own CMC vectors from
+smp_callin(). Also ia64_mca_late_init() may be called from any cpu
+so the line:
+       ia64_mca_cmc_vector_setup();       /* Setup vector on BSP */
+is generally not executed on the BSP, and so the CMC vector isn't
+setup at all on that processor.
+
+Make use of the arch_early_irq_init() hook to get this code executed
+at just the right moment: not too early, not too late.
+
+Reported-by: Fred Hartnett <fred.hartnett@hp.com>
+Tested-by: Fred Hartnett <fred.hartnett@hp.com>
+Signed-off-by: Tony Luck <tony.luck@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/ia64/include/asm/mca.h |    1 +
+ arch/ia64/kernel/irq.c      |    8 ++++++++
+ arch/ia64/kernel/mca.c      |   37 ++++++++++++++++++++++++-------------
+ 3 files changed, 33 insertions(+), 13 deletions(-)
+
+--- a/arch/ia64/include/asm/mca.h
++++ b/arch/ia64/include/asm/mca.h
+@@ -143,6 +143,7 @@ extern unsigned long __per_cpu_mca[NR_CP
+ extern int cpe_vector;
+ extern int ia64_cpe_irq;
+ extern void ia64_mca_init(void);
++extern void ia64_mca_irq_init(void);
+ extern void ia64_mca_cpu_init(void *);
+ extern void ia64_os_mca_dispatch(void);
+ extern void ia64_os_mca_dispatch_end(void);
+--- a/arch/ia64/kernel/irq.c
++++ b/arch/ia64/kernel/irq.c
+@@ -23,6 +23,8 @@
+ #include <linux/interrupt.h>
+ #include <linux/kernel_stat.h>
++#include <asm/mca.h>
++
+ /*
+  * 'what should we do if we get a hw irq event on an illegal vector'.
+  * each architecture has to answer this themselves.
+@@ -83,6 +85,12 @@ bool is_affinity_mask_valid(const struct
+ #endif /* CONFIG_SMP */
++int __init arch_early_irq_init(void)
++{
++      ia64_mca_irq_init();
++      return 0;
++}
++
+ #ifdef CONFIG_HOTPLUG_CPU
+ unsigned int vectors_in_migration[NR_IRQS];
+--- a/arch/ia64/kernel/mca.c
++++ b/arch/ia64/kernel/mca.c
+@@ -2074,22 +2074,16 @@ ia64_mca_init(void)
+       printk(KERN_INFO "MCA related initialization done\n");
+ }
++
+ /*
+- * ia64_mca_late_init
+- *
+- *    Opportunity to setup things that require initialization later
+- *    than ia64_mca_init.  Setup a timer to poll for CPEs if the
+- *    platform doesn't support an interrupt driven mechanism.
+- *
+- *  Inputs  :   None
+- *  Outputs :   Status
++ * These pieces cannot be done in ia64_mca_init() because it is called before
++ * early_irq_init() which would wipe out our percpu irq registrations. But we
++ * cannot leave them until ia64_mca_late_init() because by then all the other
++ * processors have been brought online and have set their own CMC vectors to
++ * point at a non-existant action. Called from arch_early_irq_init().
+  */
+-static int __init
+-ia64_mca_late_init(void)
++void __init ia64_mca_irq_init(void)
+ {
+-      if (!mca_init)
+-              return 0;
+-
+       /*
+        *  Configure the CMCI/P vector and handler. Interrupts for CMC are
+        *  per-processor, so AP CMC interrupts are setup in smp_callin() (smpboot.c).
+@@ -2108,6 +2102,23 @@ ia64_mca_late_init(void)
+       /* Setup the CPEI/P handler */
+       register_percpu_irq(IA64_CPEP_VECTOR, &mca_cpep_irqaction);
+ #endif
++}
++
++/*
++ * ia64_mca_late_init
++ *
++ *    Opportunity to setup things that require initialization later
++ *    than ia64_mca_init.  Setup a timer to poll for CPEs if the
++ *    platform doesn't support an interrupt driven mechanism.
++ *
++ *  Inputs  :   None
++ *  Outputs :   Status
++ */
++static int __init
++ia64_mca_late_init(void)
++{
++      if (!mca_init)
++              return 0;
+       register_hotcpu_notifier(&mca_cpu_notifier);
diff --git a/queue-3.4/fs-fscache-stats.c-fix-memory-leak.patch b/queue-3.4/fs-fscache-stats.c-fix-memory-leak.patch
new file mode 100644 (file)
index 0000000..fbc4bf2
--- /dev/null
@@ -0,0 +1,41 @@
+From ec686c9239b4d472052a271c505d04dae84214cc Mon Sep 17 00:00:00 2001
+From: Anurup m <anurup.m@huawei.com>
+Date: Mon, 29 Apr 2013 15:05:52 -0700
+Subject: fs/fscache/stats.c: fix memory leak
+
+From: Anurup m <anurup.m@huawei.com>
+
+commit ec686c9239b4d472052a271c505d04dae84214cc upstream.
+
+There is a kernel memory leak observed when the proc file
+/proc/fs/fscache/stats is read.
+
+The reason is that in fscache_stats_open, single_open is called and the
+respective release function is not called during release.  Hence fix
+with correct release function - single_release().
+
+Addresses https://bugzilla.kernel.org/show_bug.cgi?id=57101
+
+Signed-off-by: Anurup m <anurup.m@huawei.com>
+Cc: shyju pv <shyju.pv@huawei.com>
+Cc: Sanil kumar <sanil.kumar@huawei.com>
+Cc: Nataraj m <nataraj.m@huawei.com>
+Cc: Li Zefan <lizefan@huawei.com>
+Cc: David Howells <dhowells@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/fscache/stats.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/fscache/stats.c
++++ b/fs/fscache/stats.c
+@@ -276,5 +276,5 @@ const struct file_operations fscache_sta
+       .open           = fscache_stats_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+-      .release        = seq_release,
++      .release        = single_release,
+ };
diff --git a/queue-3.4/mm-allow-arch-code-to-control-the-user-page-table-ceiling.patch b/queue-3.4/mm-allow-arch-code-to-control-the-user-page-table-ceiling.patch
new file mode 100644 (file)
index 0000000..a5397fb
--- /dev/null
@@ -0,0 +1,89 @@
+From 6ee8630e02be6dd89926ca0fbc21af68b23dc087 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Mon, 29 Apr 2013 15:07:44 -0700
+Subject: mm: allow arch code to control the user page table ceiling
+
+From: Hugh Dickins <hughd@google.com>
+
+commit 6ee8630e02be6dd89926ca0fbc21af68b23dc087 upstream.
+
+On architectures where a pgd entry may be shared between user and kernel
+(e.g.  ARM+LPAE), freeing page tables needs a ceiling other than 0.
+This patch introduces a generic USER_PGTABLES_CEILING that arch code can
+override.  It is the responsibility of the arch code setting the ceiling
+to ensure the complete freeing of the page tables (usually in
+pgd_free()).
+
+[catalin.marinas@arm.com: commit log; shift_arg_pages(), asm-generic/pgtables.h changes]
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
+Cc: Russell King <linux@arm.linux.org.uk>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/exec.c                     |    4 ++--
+ include/asm-generic/pgtable.h |   10 ++++++++++
+ mm/mmap.c                     |    4 ++--
+ 3 files changed, 14 insertions(+), 4 deletions(-)
+
+--- a/fs/exec.c
++++ b/fs/exec.c
+@@ -627,7 +627,7 @@ static int shift_arg_pages(struct vm_are
+                * when the old and new regions overlap clear from new_end.
+                */
+               free_pgd_range(&tlb, new_end, old_end, new_end,
+-                      vma->vm_next ? vma->vm_next->vm_start : 0);
++                      vma->vm_next ? vma->vm_next->vm_start : USER_PGTABLES_CEILING);
+       } else {
+               /*
+                * otherwise, clean from old_start; this is done to not touch
+@@ -636,7 +636,7 @@ static int shift_arg_pages(struct vm_are
+                * for the others its just a little faster.
+                */
+               free_pgd_range(&tlb, old_start, old_end, new_end,
+-                      vma->vm_next ? vma->vm_next->vm_start : 0);
++                      vma->vm_next ? vma->vm_next->vm_start : USER_PGTABLES_CEILING);
+       }
+       tlb_finish_mmu(&tlb, new_end, old_end);
+--- a/include/asm-generic/pgtable.h
++++ b/include/asm-generic/pgtable.h
+@@ -7,6 +7,16 @@
+ #include <linux/mm_types.h>
+ #include <linux/bug.h>
++/*
++ * On almost all architectures and configurations, 0 can be used as the
++ * upper ceiling to free_pgtables(): on many architectures it has the same
++ * effect as using TASK_SIZE.  However, there is one configuration which
++ * must impose a more careful limit, to avoid freeing kernel pgtables.
++ */
++#ifndef USER_PGTABLES_CEILING
++#define USER_PGTABLES_CEILING 0UL
++#endif
++
+ #ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+ extern int ptep_set_access_flags(struct vm_area_struct *vma,
+                                unsigned long address, pte_t *ptep,
+--- a/mm/mmap.c
++++ b/mm/mmap.c
+@@ -1920,7 +1920,7 @@ static void unmap_region(struct mm_struc
+       unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL);
+       vm_unacct_memory(nr_accounted);
+       free_pgtables(&tlb, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS,
+-                               next ? next->vm_start : 0);
++                               next ? next->vm_start : USER_PGTABLES_CEILING);
+       tlb_finish_mmu(&tlb, start, end);
+ }
+@@ -2308,7 +2308,7 @@ void exit_mmap(struct mm_struct *mm)
+       unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL);
+       vm_unacct_memory(nr_accounted);
+-      free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0);
++      free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING);
+       tlb_finish_mmu(&tlb, 0, -1);
+       /*
diff --git a/queue-3.4/pci-acpi-don-t-query-osc-support-with-all-possible-controls.patch b/queue-3.4/pci-acpi-don-t-query-osc-support-with-all-possible-controls.patch
new file mode 100644 (file)
index 0000000..d9938fd
--- /dev/null
@@ -0,0 +1,67 @@
+From 545d6e189a41c94c11f55045a771118eccc9d9eb Mon Sep 17 00:00:00 2001
+From: Yinghai Lu <yinghai@kernel.org>
+Date: Thu, 28 Mar 2013 04:28:58 +0000
+Subject: PCI / ACPI: Don't query OSC support with all possible controls
+
+From: Yinghai Lu <yinghai@kernel.org>
+
+commit 545d6e189a41c94c11f55045a771118eccc9d9eb upstream.
+
+Found problem on system that firmware that could handle pci aer.
+Firmware get error reporting after pci injecting error, before os boots.
+But after os boots, firmware can not get report anymore, even pci=noaer
+is passed.
+
+Root cause: BIOS _OSC has problem with query bit checking.
+It turns out that BIOS vendor is copying example code from ACPI Spec.
+In ACPI Spec 5.0, page 290:
+
+       If (Not(And(CDW1,1))) // Query flag clear?
+       {       // Disable GPEs for features granted native control.
+               If (And(CTRL,0x01)) // Hot plug control granted?
+               {
+                       Store(0,HPCE) // clear the hot plug SCI enable bit
+                       Store(1,HPCS) // clear the hot plug SCI status bit
+               }
+       ...
+       }
+
+When Query flag is set, And(CDW1,1) will be 1, Not(1) will return 0xfffffffe.
+So it will get into code path that should be for control set only.
+BIOS acpi code should be changed to "If (LEqual(And(CDW1,1), 0)))"
+
+Current kernel code is using _OSC query to notify firmware about support
+from OS and then use _OSC to set control bits.
+During query support, current code is using all possible controls.
+So will execute code that should be only for control set stage.
+
+That will have problem when pci=noaer or aer firmware_first is used.
+As firmware have that control set for os aer already in query support stage,
+but later will not os aer handling.
+
+We should avoid passing all possible controls, just use osc_control_set
+instead.
+That should workaround BIOS bugs with affected systems on the field
+as more bios vendors are copying sample code from ACPI spec.
+
+Signed-off-by: Yinghai Lu <yinghai@kernel.org>
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/acpi/pci_root.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/acpi/pci_root.c
++++ b/drivers/acpi/pci_root.c
+@@ -247,8 +247,8 @@ static acpi_status acpi_pci_query_osc(st
+               *control &= OSC_PCI_CONTROL_MASKS;
+               capbuf[OSC_CONTROL_TYPE] = *control | root->osc_control_set;
+       } else {
+-              /* Run _OSC query for all possible controls. */
+-              capbuf[OSC_CONTROL_TYPE] = OSC_PCI_CONTROL_MASKS;
++              /* Run _OSC query only with existing controls. */
++              capbuf[OSC_CONTROL_TYPE] = root->osc_control_set;
+       }
+       status = acpi_pci_run_osc(root->device->handle, capbuf, &result);
diff --git a/queue-3.4/pci-pm-fix-fallback-to-pci_d0-in-pci_platform_power_transition.patch b/queue-3.4/pci-pm-fix-fallback-to-pci_d0-in-pci_platform_power_transition.patch
new file mode 100644 (file)
index 0000000..d16931b
--- /dev/null
@@ -0,0 +1,56 @@
+From 769ba7212f2059ca9fe0c73371e3d415c8c1c529 Mon Sep 17 00:00:00 2001
+From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
+Date: Fri, 12 Apr 2013 13:58:17 +0000
+Subject: PCI/PM: Fix fallback to PCI_D0 in pci_platform_power_transition()
+
+From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
+
+commit 769ba7212f2059ca9fe0c73371e3d415c8c1c529 upstream.
+
+Commit b51306c (PCI: Set device power state to PCI_D0 for device
+without native PM support) modified pci_platform_power_transition()
+by adding code causing dev->current_state for devices that don't
+support native PCI PM but are power-manageable by the platform to be
+changed to PCI_D0 regardless of the value returned by the preceding
+platform_pci_set_power_state().  In particular, that also is done
+if the platform_pci_set_power_state() has been successful, which
+causes the correct power state of the device set by
+pci_update_current_state() in that case to be overwritten by PCI_D0.
+
+Fix that mistake by making the fallback to PCI_D0 only happen if
+the platform_pci_set_power_state() has returned an error.
+
+[bhelgaas: folded in Yinghai's simplification, added URL & stable info]
+Reference: http://lkml.kernel.org/r/27806FC4E5928A408B78E88BBC67A2306F466BBA@ORSMSX101.amr.corp.intel.com
+Reported-by: Chris J. Benenati <chris.j.benenati@intel.com>
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Acked-by: Yinghai Lu <yinghai@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/pci/pci.c |   12 ++++--------
+ 1 file changed, 4 insertions(+), 8 deletions(-)
+
+--- a/drivers/pci/pci.c
++++ b/drivers/pci/pci.c
+@@ -673,15 +673,11 @@ static int pci_platform_power_transition
+               error = platform_pci_set_power_state(dev, state);
+               if (!error)
+                       pci_update_current_state(dev, state);
+-              /* Fall back to PCI_D0 if native PM is not supported */
+-              if (!dev->pm_cap)
+-                      dev->current_state = PCI_D0;
+-      } else {
++      } else
+               error = -ENODEV;
+-              /* Fall back to PCI_D0 if native PM is not supported */
+-              if (!dev->pm_cap)
+-                      dev->current_state = PCI_D0;
+-      }
++
++      if (error && !dev->pm_cap) /* Fall back to PCI_D0 */
++              dev->current_state = PCI_D0;
+       return error;
+ }
index c606788e72bf5b8f156a8c239643188d5b7533a5..1c385ec82aafe77c07586e2446ec35e1421de495 100644 (file)
@@ -24,3 +24,11 @@ tracing-fix-off-by-one-on-allocating-stat-pages.patch
 tracing-check-return-value-of-tracing_init_dentry.patch
 tracing-reset-ftrace_graph_filter_enabled-if-count-is-zero.patch
 i2c-xiic-must-always-write-16-bit-words-to-tx_fifo.patch
+sysfs-fix-use-after-free-in-case-of-concurrent-read-write-and-readdir.patch
+fix-initialization-of-cmci-cmcp-interrupts.patch
+pci-acpi-don-t-query-osc-support-with-all-possible-controls.patch
+pci-pm-fix-fallback-to-pci_d0-in-pci_platform_power_transition.patch
+wrong-asm-register-contraints-in-the-futex-implementation.patch
+wrong-asm-register-contraints-in-the-kvm-implementation.patch
+fs-fscache-stats.c-fix-memory-leak.patch
+mm-allow-arch-code-to-control-the-user-page-table-ceiling.patch
diff --git a/queue-3.4/sysfs-fix-use-after-free-in-case-of-concurrent-read-write-and-readdir.patch b/queue-3.4/sysfs-fix-use-after-free-in-case-of-concurrent-read-write-and-readdir.patch
new file mode 100644 (file)
index 0000000..448d506
--- /dev/null
@@ -0,0 +1,76 @@
+From f7db5e7660b122142410dcf36ba903c73d473250 Mon Sep 17 00:00:00 2001
+From: Ming Lei <ming.lei@canonical.com>
+Date: Tue, 2 Apr 2013 10:12:26 +0800
+Subject: sysfs: fix use after free in case of concurrent read/write and readdir
+
+From: Ming Lei <ming.lei@canonical.com>
+
+commit f7db5e7660b122142410dcf36ba903c73d473250 upstream.
+
+The inode->i_mutex isn't hold when updating filp->f_pos
+in read()/write(), so the filp->f_pos might be read as
+0 or 1 in readdir() when there is concurrent read()/write()
+on this same file, then may cause use after free in readdir().
+
+The bug can be reproduced with Li Zefan's test code on the
+link:
+
+       https://patchwork.kernel.org/patch/2160771/
+
+This patch fixes the use after free under this situation.
+
+Reported-by: Li Zefan <lizefan@huawei.com>
+Signed-off-by: Ming Lei <ming.lei@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/sysfs/dir.c |   15 +++++++++++----
+ 1 file changed, 11 insertions(+), 4 deletions(-)
+
+--- a/fs/sysfs/dir.c
++++ b/fs/sysfs/dir.c
+@@ -994,6 +994,7 @@ static int sysfs_readdir(struct file * f
+       enum kobj_ns_type type;
+       const void *ns;
+       ino_t ino;
++      loff_t off;
+       type = sysfs_ns_type(parent_sd);
+       ns = sysfs_info(dentry->d_sb)->ns[type];
+@@ -1016,6 +1017,7 @@ static int sysfs_readdir(struct file * f
+                       return 0;
+       }
+       mutex_lock(&sysfs_mutex);
++      off = filp->f_pos;
+       for (pos = sysfs_dir_pos(ns, parent_sd, filp->f_pos, pos);
+            pos;
+            pos = sysfs_dir_next_pos(ns, parent_sd, filp->f_pos, pos)) {
+@@ -1027,19 +1029,24 @@ static int sysfs_readdir(struct file * f
+               len = strlen(name);
+               ino = pos->s_ino;
+               type = dt_type(pos);
+-              filp->f_pos = pos->s_hash;
++              off = filp->f_pos = pos->s_hash;
+               filp->private_data = sysfs_get(pos);
+               mutex_unlock(&sysfs_mutex);
+-              ret = filldir(dirent, name, len, filp->f_pos, ino, type);
++              ret = filldir(dirent, name, len, off, ino, type);
+               mutex_lock(&sysfs_mutex);
+               if (ret < 0)
+                       break;
+       }
+       mutex_unlock(&sysfs_mutex);
+-      if ((filp->f_pos > 1) && !pos) { /* EOF */
+-              filp->f_pos = INT_MAX;
++
++      /* don't reference last entry if its refcount is dropped */
++      if (!pos) {
+               filp->private_data = NULL;
++
++              /* EOF and not changed as 0 or 1 in read/write path */
++              if (off == filp->f_pos && off > 1)
++                      filp->f_pos = INT_MAX;
+       }
+       return 0;
+ }
diff --git a/queue-3.4/wrong-asm-register-contraints-in-the-futex-implementation.patch b/queue-3.4/wrong-asm-register-contraints-in-the-futex-implementation.patch
new file mode 100644 (file)
index 0000000..291e7a8
--- /dev/null
@@ -0,0 +1,292 @@
+From 136f39ddc53db3bcee2befbe323a56d4fbf06da8 Mon Sep 17 00:00:00 2001
+From: Stephan Schreiber <info@fs-driver.org>
+Date: Tue, 19 Mar 2013 15:22:27 -0700
+Subject: Wrong asm register contraints in the futex implementation
+
+From: Stephan Schreiber <info@fs-driver.org>
+
+commit 136f39ddc53db3bcee2befbe323a56d4fbf06da8 upstream.
+
+The Linux Kernel contains some inline assembly source code which has
+wrong asm register constraints in arch/ia64/include/asm/futex.h.
+
+I observed this on Kernel 3.2.23 but it is also true on the most
+recent Kernel 3.9-rc1.
+
+File arch/ia64/include/asm/futex.h:
+
+static inline int
+futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+                             u32 oldval, u32 newval)
+{
+       if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
+               return -EFAULT;
+
+       {
+               register unsigned long r8 __asm ("r8");
+               unsigned long prev;
+               __asm__ __volatile__(
+                       "       mf;;                                    \n"
+                       "       mov %0=r0                               \n"
+                       "       mov ar.ccv=%4;;                         \n"
+                       "[1:]   cmpxchg4.acq %1=[%2],%3,ar.ccv          \n"
+                       "       .xdata4 \"__ex_table\", 1b-., 2f-.      \n"
+                       "[2:]"
+                       : "=r" (r8), "=r" (prev)
+                       : "r" (uaddr), "r" (newval),
+                         "rO" ((long) (unsigned) oldval)
+                       : "memory");
+               *uval = prev;
+               return r8;
+       }
+}
+
+The list of output registers is
+                       : "=r" (r8), "=r" (prev)
+The constraint "=r" means that the GCC has to maintain that these vars
+are in registers and contain valid info when the program flow leaves
+the assembly block (output registers).
+But "=r" also means that GCC can put them in registers that are used
+as input registers. Input registers are uaddr, newval, oldval on the
+example.
+The second assembly instruction
+                       "       mov %0=r0                               \n"
+is the first one which writes to a register; it sets %0 to 0. %0 means
+the first register operand; it is r8 here. (The r0 is read-only and
+always 0 on the Itanium; it can be used if an immediate zero value is
+needed.)
+This instruction might overwrite one of the other registers which are
+still needed.
+Whether it really happens depends on how GCC decides what registers it
+uses and how it optimizes the code.
+
+The objdump utility can give us disassembly.
+The futex_atomic_cmpxchg_inatomic() function is inline, so we have to
+look for a module that uses the funtion. This is the
+cmpxchg_futex_value_locked() function in
+kernel/futex.c:
+
+static int cmpxchg_futex_value_locked(u32 *curval, u32 __user *uaddr,
+                                     u32 uval, u32 newval)
+{
+       int ret;
+
+       pagefault_disable();
+       ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval);
+       pagefault_enable();
+
+       return ret;
+}
+
+Now the disassembly. At first from the Kernel package 3.2.23 which has
+been compiled with GCC 4.4, remeber this Kernel seemed to work:
+objdump -d linux-3.2.23/debian/build/build_ia64_none_mckinley/kernel/futex.o
+
+0000000000000230 <cmpxchg_futex_value_locked>:
+      230:     0b 18 80 1b 18 21       [MMI]       adds r3=3168,r13;;
+      236:     80 40 0d 00 42 00                   adds r8=40,r3
+      23c:     00 00 04 00                         nop.i 0x0;;
+      240:     0b 50 00 10 10 10       [MMI]       ld4 r10=[r8];;
+      246:     90 08 28 00 42 00                   adds r9=1,r10
+      24c:     00 00 04 00                         nop.i 0x0;;
+      250:     09 00 00 00 01 00       [MMI]       nop.m 0x0
+      256:     00 48 20 20 23 00                   st4 [r8]=r9
+      25c:     00 00 04 00                         nop.i 0x0;;
+      260:     08 10 80 06 00 21       [MMI]       adds r2=32,r3
+      266:     00 00 00 02 00 00                   nop.m 0x0
+      26c:     02 08 f1 52                         extr.u r16=r33,0,61
+      270:     05 40 88 00 08 e0       [MLX]       addp4 r8=r34,r0
+      276:     ff ff 0f 00 00 e0                   movl r15=0xfffffffbfff;;
+      27c:     f1 f7 ff 65
+      280:     09 70 00 04 18 10       [MMI]       ld8 r14=[r2]
+      286:     00 00 00 02 00 c0                   nop.m 0x0
+      28c:     f0 80 1c d0                         cmp.ltu p6,p7=r15,r16;;
+      290:     08 40 fc 1d 09 3b       [MMI]       cmp.eq p8,p9=-1,r14
+      296:     00 00 00 02 00 40                   nop.m 0x0
+      29c:     e1 08 2d d0                         cmp.ltu p10,p11=r14,r33
+      2a0:     56 01 10 00 40 10       [BBB] (p10) br.cond.spnt.few 2e0
+<cmpxchg_futex_value_locked+0xb0>
+      2a6:     02 08 00 80 21 03             (p08) br.cond.dpnt.few 2b0
+<cmpxchg_futex_value_locked+0x80>
+      2ac:     40 00 00 41                   (p06) br.cond.spnt.few 2e0
+<cmpxchg_futex_value_locked+0xb0>
+      2b0:     0a 00 00 00 22 00       [MMI]       mf;;
+      2b6:     80 00 00 00 42 00                   mov r8=r0
+      2bc:     00 00 04 00                         nop.i 0x0
+      2c0:     0b 00 20 40 2a 04       [MMI]       mov.m ar.ccv=r8;;
+      2c6:     10 1a 85 22 20 00                   cmpxchg4.acq r33=[r33],r35,ar.ccv
+      2cc:     00 00 04 00                         nop.i 0x0;;
+      2d0:     10 00 84 40 90 11       [MIB]       st4 [r32]=r33
+      2d6:     00 00 00 02 00 00                   nop.i 0x0
+      2dc:     20 00 00 40                         br.few 2f0
+<cmpxchg_futex_value_locked+0xc0>
+      2e0:     09 40 c8 f9 ff 27       [MMI]       mov r8=-14
+      2e6:     00 00 00 02 00 00                   nop.m 0x0
+      2ec:     00 00 04 00                         nop.i 0x0;;
+      2f0:     0b 58 20 1a 19 21       [MMI]       adds r11=3208,r13;;
+      2f6:     20 01 2c 20 20 00                   ld4 r18=[r11]
+      2fc:     00 00 04 00                         nop.i 0x0;;
+      300:     0b 88 fc 25 3f 23       [MMI]       adds r17=-1,r18;;
+      306:     00 88 2c 20 23 00                   st4 [r11]=r17
+      30c:     00 00 04 00                         nop.i 0x0;;
+      310:     11 00 00 00 01 00       [MIB]       nop.m 0x0
+      316:     00 00 00 02 00 80                   nop.i 0x0
+      31c:     08 00 84 00                         br.ret.sptk.many b0;;
+
+The lines
+      2b0:     0a 00 00 00 22 00       [MMI]       mf;;
+      2b6:     80 00 00 00 42 00                   mov r8=r0
+      2bc:     00 00 04 00                         nop.i 0x0
+      2c0:     0b 00 20 40 2a 04       [MMI]       mov.m ar.ccv=r8;;
+      2c6:     10 1a 85 22 20 00                   cmpxchg4.acq r33=[r33],r35,ar.ccv
+      2cc:     00 00 04 00                         nop.i 0x0;;
+are the instructions of the assembly block.
+The line
+      2b6:     80 00 00 00 42 00                   mov r8=r0
+sets the r8 register to 0 and after that
+      2c0:     0b 00 20 40 2a 04       [MMI]       mov.m ar.ccv=r8;;
+prepares the 'oldvalue' for the cmpxchg but it takes it from r8. This
+is wrong.
+What happened here is what I explained above: An input register is
+overwritten which is still needed.
+The register operand constraints in futex.h are wrong.
+
+(The problem doesn't occur when the Kernel is compiled with GCC 4.6.)
+
+The attached patch fixes the register operand constraints in futex.h.
+The code after patching of it:
+
+static inline int
+futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+                             u32 oldval, u32 newval)
+{
+       if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
+               return -EFAULT;
+
+       {
+               register unsigned long r8 __asm ("r8") = 0;
+               unsigned long prev;
+               __asm__ __volatile__(
+                       "       mf;;                                    \n"
+                       "       mov ar.ccv=%4;;                         \n"
+                       "[1:]   cmpxchg4.acq %1=[%2],%3,ar.ccv          \n"
+                       "       .xdata4 \"__ex_table\", 1b-., 2f-.      \n"
+                       "[2:]"
+                       : "+r" (r8), "=&r" (prev)
+                       : "r" (uaddr), "r" (newval),
+                         "rO" ((long) (unsigned) oldval)
+                       : "memory");
+               *uval = prev;
+               return r8;
+       }
+}
+
+I also initialized the 'r8' var with the C programming language.
+The _asm qualifier on the definition of the 'r8' var forces GCC to use
+the r8 processor register for it.
+I don't believe that we should use inline assembly for zeroing out a
+local variable.
+The constraint is
+"+r" (r8)
+what means that it is both an input register and an output register.
+Note that the page fault handler will modify the r8 register which
+will be the return value of the function.
+The real fix is
+"=&r" (prev)
+The & means that GCC must not use any of the input registers to place
+this output register in.
+
+Patched the Kernel 3.2.23 and compiled it with GCC4.4:
+
+0000000000000230 <cmpxchg_futex_value_locked>:
+      230:     0b 18 80 1b 18 21       [MMI]       adds r3=3168,r13;;
+      236:     80 40 0d 00 42 00                   adds r8=40,r3
+      23c:     00 00 04 00                         nop.i 0x0;;
+      240:     0b 50 00 10 10 10       [MMI]       ld4 r10=[r8];;
+      246:     90 08 28 00 42 00                   adds r9=1,r10
+      24c:     00 00 04 00                         nop.i 0x0;;
+      250:     09 00 00 00 01 00       [MMI]       nop.m 0x0
+      256:     00 48 20 20 23 00                   st4 [r8]=r9
+      25c:     00 00 04 00                         nop.i 0x0;;
+      260:     08 10 80 06 00 21       [MMI]       adds r2=32,r3
+      266:     20 12 01 10 40 00                   addp4 r34=r34,r0
+      26c:     02 08 f1 52                         extr.u r16=r33,0,61
+      270:     05 40 00 00 00 e1       [MLX]       mov r8=r0
+      276:     ff ff 0f 00 00 e0                   movl r15=0xfffffffbfff;;
+      27c:     f1 f7 ff 65
+      280:     09 70 00 04 18 10       [MMI]       ld8 r14=[r2]
+      286:     00 00 00 02 00 c0                   nop.m 0x0
+      28c:     f0 80 1c d0                         cmp.ltu p6,p7=r15,r16;;
+      290:     08 40 fc 1d 09 3b       [MMI]       cmp.eq p8,p9=-1,r14
+      296:     00 00 00 02 00 40                   nop.m 0x0
+      29c:     e1 08 2d d0                         cmp.ltu p10,p11=r14,r33
+      2a0:     56 01 10 00 40 10       [BBB] (p10) br.cond.spnt.few 2e0
+<cmpxchg_futex_value_locked+0xb0>
+      2a6:     02 08 00 80 21 03             (p08) br.cond.dpnt.few 2b0
+<cmpxchg_futex_value_locked+0x80>
+      2ac:     40 00 00 41                   (p06) br.cond.spnt.few 2e0
+<cmpxchg_futex_value_locked+0xb0>
+      2b0:     0b 00 00 00 22 00       [MMI]       mf;;
+      2b6:     00 10 81 54 08 00                   mov.m ar.ccv=r34
+      2bc:     00 00 04 00                         nop.i 0x0;;
+      2c0:     09 58 8c 42 11 10       [MMI]       cmpxchg4.acq r11=[r33],r35,ar.ccv
+      2c6:     00 00 00 02 00 00                   nop.m 0x0
+      2cc:     00 00 04 00                         nop.i 0x0;;
+      2d0:     10 00 2c 40 90 11       [MIB]       st4 [r32]=r11
+      2d6:     00 00 00 02 00 00                   nop.i 0x0
+      2dc:     20 00 00 40                         br.few 2f0
+<cmpxchg_futex_value_locked+0xc0>
+      2e0:     09 40 c8 f9 ff 27       [MMI]       mov r8=-14
+      2e6:     00 00 00 02 00 00                   nop.m 0x0
+      2ec:     00 00 04 00                         nop.i 0x0;;
+      2f0:     0b 88 20 1a 19 21       [MMI]       adds r17=3208,r13;;
+      2f6:     30 01 44 20 20 00                   ld4 r19=[r17]
+      2fc:     00 00 04 00                         nop.i 0x0;;
+      300:     0b 90 fc 27 3f 23       [MMI]       adds r18=-1,r19;;
+      306:     00 90 44 20 23 00                   st4 [r17]=r18
+      30c:     00 00 04 00                         nop.i 0x0;;
+      310:     11 00 00 00 01 00       [MIB]       nop.m 0x0
+      316:     00 00 00 02 00 80                   nop.i 0x0
+      31c:     08 00 84 00                         br.ret.sptk.many b0;;
+
+Much better.
+There is a
+      270:     05 40 00 00 00 e1       [MLX]       mov r8=r0
+which was generated by C code r8 = 0. Below
+      2b6:     00 10 81 54 08 00                   mov.m ar.ccv=r34
+what means that oldval is no longer overwritten.
+
+This is Debian bug#702641
+(http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=702641).
+
+The patch is applicable on Kernel 3.9-rc1, 3.2.23 and many other versions.
+
+Signed-off-by: Stephan Schreiber <info@fs-driver.org>
+Signed-off-by: Tony Luck <tony.luck@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/ia64/include/asm/futex.h |    5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+--- a/arch/ia64/include/asm/futex.h
++++ b/arch/ia64/include/asm/futex.h
+@@ -106,16 +106,15 @@ futex_atomic_cmpxchg_inatomic(u32 *uval,
+               return -EFAULT;
+       {
+-              register unsigned long r8 __asm ("r8");
++              register unsigned long r8 __asm ("r8") = 0;
+               unsigned long prev;
+               __asm__ __volatile__(
+                       "       mf;;                                    \n"
+-                      "       mov %0=r0                               \n"
+                       "       mov ar.ccv=%4;;                         \n"
+                       "[1:]   cmpxchg4.acq %1=[%2],%3,ar.ccv          \n"
+                       "       .xdata4 \"__ex_table\", 1b-., 2f-.      \n"
+                       "[2:]"
+-                      : "=r" (r8), "=r" (prev)
++                      : "+r" (r8), "=&r" (prev)
+                       : "r" (uaddr), "r" (newval),
+                         "rO" ((long) (unsigned) oldval)
+                       : "memory");
diff --git a/queue-3.4/wrong-asm-register-contraints-in-the-kvm-implementation.patch b/queue-3.4/wrong-asm-register-contraints-in-the-kvm-implementation.patch
new file mode 100644 (file)
index 0000000..99c754b
--- /dev/null
@@ -0,0 +1,95 @@
+From de53e9caa4c6149ef4a78c2f83d7f5b655848767 Mon Sep 17 00:00:00 2001
+From: Stephan Schreiber <info@fs-driver.org>
+Date: Tue, 19 Mar 2013 15:27:12 -0700
+Subject: Wrong asm register contraints in the kvm implementation
+
+From: Stephan Schreiber <info@fs-driver.org>
+
+commit de53e9caa4c6149ef4a78c2f83d7f5b655848767 upstream.
+
+The Linux Kernel contains some inline assembly source code which has
+wrong asm register constraints in arch/ia64/kvm/vtlb.c.
+
+I observed this on Kernel 3.2.35 but it is also true on the most
+recent Kernel 3.9-rc1.
+
+File arch/ia64/kvm/vtlb.c:
+
+u64 guest_vhpt_lookup(u64 iha, u64 *pte)
+{
+       u64 ret;
+       struct thash_data *data;
+
+       data = __vtr_lookup(current_vcpu, iha, D_TLB);
+       if (data != NULL)
+               thash_vhpt_insert(current_vcpu, data->page_flags,
+                       data->itir, iha, D_TLB);
+
+       asm volatile (
+                       "rsm psr.ic|psr.i;;"
+                       "srlz.d;;"
+                       "ld8.s r9=[%1];;"
+                       "tnat.nz p6,p7=r9;;"
+                       "(p6) mov %0=1;"
+                       "(p6) mov r9=r0;"
+                       "(p7) extr.u r9=r9,0,53;;"
+                       "(p7) mov %0=r0;"
+                       "(p7) st8 [%2]=r9;;"
+                       "ssm psr.ic;;"
+                       "srlz.d;;"
+                       "ssm psr.i;;"
+                       "srlz.d;;"
+                       : "=r"(ret) : "r"(iha), "r"(pte):"memory");
+
+       return ret;
+}
+
+The list of output registers is
+                       : "=r"(ret) : "r"(iha), "r"(pte):"memory");
+The constraint "=r" means that the GCC has to maintain that these vars
+are in registers and contain valid info when the program flow leaves
+the assembly block (output registers).
+But "=r" also means that GCC can put them in registers that are used
+as input registers. Input registers are iha, pte on the example.
+If the predicate p7 is true, the 8th assembly instruction
+                       "(p7) mov %0=r0;"
+is the first one which writes to a register which is maintained by the
+register constraints; it sets %0. %0 means the first register operand;
+it is ret here.
+This instruction might overwrite the %2 register (pte) which is needed
+by the next instruction:
+                       "(p7) st8 [%2]=r9;;"
+Whether it really happens depends on how GCC decides what registers it
+uses and how it optimizes the code.
+
+The attached patch  fixes the register operand constraints in
+arch/ia64/kvm/vtlb.c.
+The register constraints should be
+                       : "=&r"(ret) : "r"(iha), "r"(pte):"memory");
+The & means that GCC must not use any of the input registers to place
+this output register in.
+
+This is Debian bug#702639
+(http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=702639).
+
+The patch is applicable on Kernel 3.9-rc1, 3.2.35 and many other versions.
+
+Signed-off-by: Stephan Schreiber <info@fs-driver.org>
+Signed-off-by: Tony Luck <tony.luck@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/ia64/kvm/vtlb.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/ia64/kvm/vtlb.c
++++ b/arch/ia64/kvm/vtlb.c
+@@ -256,7 +256,7 @@ u64 guest_vhpt_lookup(u64 iha, u64 *pte)
+                       "srlz.d;;"
+                       "ssm psr.i;;"
+                       "srlz.d;;"
+-                      : "=r"(ret) : "r"(iha), "r"(pte):"memory");
++                      : "=&r"(ret) : "r"(iha), "r"(pte) : "memory");
+       return ret;
+ }