From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Fri, 24 Sep 2010 16:03:01 +0000 (-0700)
Subject: .32 patches
X-Git-Tag: v2.6.35.6~5
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=adbb25dca172c3e91e062fc3565497afa0877d71;p=thirdparty%2Fkernel%2Fstable-queue.git

.32 patches
---

diff --git a/queue-2.6.32/series b/queue-2.6.32/series
index ef0f8274be2..9f4975f1a97 100644
--- a/queue-2.6.32/series
+++ b/queue-2.6.32/series
@@ -65,3 +65,4 @@ pata_pdc202xx_old-fix-UDMA-mode-for-PDC2026x-chipset.patch
 mips-sibyte-fix-m3-tlb-exception-handler-workaround.patch
 sis-agp-remove-sis-760-handled-by-amd64-agp.patch
 alpha-fix-printk-format-errors.patch
+x86-add-memory-modify-constraints-to-xchg-and-cmpxchg.patch
diff --git a/queue-2.6.32/x86-add-memory-modify-constraints-to-xchg-and-cmpxchg.patch b/queue-2.6.32/x86-add-memory-modify-constraints-to-xchg-and-cmpxchg.patch
new file mode 100644
index 00000000000..b8feab23fc2
--- /dev/null
+++ b/queue-2.6.32/x86-add-memory-modify-constraints-to-xchg-and-cmpxchg.patch
@@ -0,0 +1,131 @@
+From 113fc5a6e8c2288619ff7e8187a6f556b7e0d372 Mon Sep 17 00:00:00 2001
+From: H. Peter Anvin <hpa@zytor.com>
+Date: Tue, 27 Jul 2010 17:01:49 -0700
+Subject: x86: Add memory modify constraints to xchg() and cmpxchg()
+
+From: H. Peter Anvin <hpa@zytor.com>
+
+commit 113fc5a6e8c2288619ff7e8187a6f556b7e0d372 upstream.
+
+[ Backport to .32 by TomÃ¡Å¡ JanouÅ¡ek <tomi@nomi.cz> ]
+
+xchg() and cmpxchg() modify their memory operands, not merely read
+them.  For some versions of gcc the "memory" clobber has apparently
+dealt with the situation, but not for all.
+
+Originally-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: H. Peter Anvin <hpa@zytor.com>
+Cc: Glauber Costa <glommer@redhat.com>
+Cc: Avi Kivity <avi@redhat.com>
+Cc: Peter Palfrader <peter@palfrader.org>
+Cc: Greg KH <gregkh@suse.de>
+Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
+Cc: Zachary Amsden <zamsden@redhat.com>
+Cc: Marcelo Tosatti <mtosatti@redhat.com>
+LKML-Reference: <4C4F7277.8050306@zytor.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/x86/include/asm/cmpxchg_32.h |   65 +++++++++++---------------------------
+ arch/x86/include/asm/cmpxchg_64.h |    4 --
+ 2 files changed, 20 insertions(+), 49 deletions(-)
+
+--- a/arch/x86/include/asm/cmpxchg_32.h
++++ b/arch/x86/include/asm/cmpxchg_32.h
+@@ -17,60 +17,33 @@ struct __xchg_dummy {
+ #define __xg(x) ((struct __xchg_dummy *)(x))
+ 
+ /*
+- * The semantics of XCHGCMP8B are a bit strange, this is why
+- * there is a loop and the loading of %%eax and %%edx has to
+- * be inside. This inlines well in most cases, the cached
+- * cost is around ~38 cycles. (in the future we might want
+- * to do an SIMD/3DNOW!/MMX/FPU 64-bit store here, but that
+- * might have an implicit FPU-save as a cost, so it's not
+- * clear which path to go.)
++ * CMPXCHG8B only writes to the target if we had the previous
++ * value in registers, otherwise it acts as a read and gives us the
++ * "new previous" value.  That is why there is a loop.  Preloading
++ * EDX:EAX is a performance optimization: in the common case it means
++ * we need only one locked operation.
+  *
+- * cmpxchg8b must be used with the lock prefix here to allow
+- * the instruction to be executed atomically, see page 3-102
+- * of the instruction set reference 24319102.pdf. We need
+- * the reader side to see the coherent 64bit value.
++ * A SIMD/3DNOW!/MMX/FPU 64-bit store here would require at the very
++ * least an FPU save and/or %cr0.ts manipulation.
++ *
++ * cmpxchg8b must be used with the lock prefix here to allow the
++ * instruction to be executed atomically.  We need to have the reader
++ * side to see the coherent 64bit value.
+  */
+-static inline void __set_64bit(unsigned long long *ptr,
+-			       unsigned int low, unsigned int high)
++static inline void set_64bit(volatile u64 *ptr, u64 value)
+ {
++	u32 low  = value;
++	u32 high = value >> 32;
++	u64 prev = *ptr;
++
+ 	asm volatile("\n1:\t"
+-		     "movl (%1), %%eax\n\t"
+-		     "movl 4(%1), %%edx\n\t"
+ 		     LOCK_PREFIX "cmpxchg8b %0\n\t"
+ 		     "jnz 1b"
+-		     : "=m"(*ptr)
+-		     : "D" (ptr),
+-		       "b"(low),
+-		       "c"(high)
+-		     : "ax", "dx", "memory");
+-}
+-
+-static inline void __set_64bit_constant(unsigned long long *ptr,
+-					unsigned long long value)
+-{
+-	__set_64bit(ptr, (unsigned int)value, (unsigned int)(value >> 32));
++		     : "=m" (*ptr), "+A" (prev)
++		     : "b" (low), "c" (high)
++		     : "memory");
+ }
+ 
+-#define ll_low(x)	*(((unsigned int *)&(x)) + 0)
+-#define ll_high(x)	*(((unsigned int *)&(x)) + 1)
+-
+-static inline void __set_64bit_var(unsigned long long *ptr,
+-				   unsigned long long value)
+-{
+-	__set_64bit(ptr, ll_low(value), ll_high(value));
+-}
+-
+-#define set_64bit(ptr, value)			\
+-	(__builtin_constant_p((value))		\
+-	 ? __set_64bit_constant((ptr), (value))	\
+-	 : __set_64bit_var((ptr), (value)))
+-
+-#define _set_64bit(ptr, value)						\
+-	(__builtin_constant_p(value)					\
+-	 ? __set_64bit(ptr, (unsigned int)(value),			\
+-		       (unsigned int)((value) >> 32))			\
+-	 : __set_64bit(ptr, ll_low((value)), ll_high((value))))
+-
+ /*
+  * Note: no "lock" prefix even on SMP: xchg always implies lock anyway
+  * Note 2: xchg has side effect, so that attribute volatile is necessary,
+--- a/arch/x86/include/asm/cmpxchg_64.h
++++ b/arch/x86/include/asm/cmpxchg_64.h
+@@ -8,13 +8,11 @@
+ 
+ #define __xg(x) ((volatile long *)(x))
+ 
+-static inline void set_64bit(volatile unsigned long *ptr, unsigned long val)
++static inline void set_64bit(volatile u64 *ptr, u64 val)
+ {
+ 	*ptr = val;
+ }
+ 
+-#define _set_64bit set_64bit
+-
+ /*
+  * Note: no "lock" prefix even on SMP: xchg always implies lock anyway
+  * Note 2: xchg has side effect, so that attribute volatile is necessary,