From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Mon, 19 Apr 2010 18:31:36 +0000 (-0700)
Subject: .33 patches
X-Git-Tag: v2.6.32.12~32
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=2bd84889168e07723ef85e919910eaeb6363d1db;p=thirdparty%2Fkernel%2Fstable-queue.git

.33 patches
---

diff --git a/queue-2.6.33/fix-nfs4-handling-of-mountpoint-stat.patch b/queue-2.6.33/fix-nfs4-handling-of-mountpoint-stat.patch
new file mode 100644
index 00000000000..067edc8510d
--- /dev/null
+++ b/queue-2.6.33/fix-nfs4-handling-of-mountpoint-stat.patch
@@ -0,0 +1,40 @@
+From 462d60577a997aa87c935ae4521bd303733a9f2b Mon Sep 17 00:00:00 2001
+From: Al Viro <viro@zeniv.linux.org.uk>
+Date: Sat, 30 Jan 2010 16:11:21 -0500
+Subject: fix NFS4 handling of mountpoint stat
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+commit 462d60577a997aa87c935ae4521bd303733a9f2b upstream.
+
+RFC says we need to follow the chain of mounts if there's more
+than one stacked on that point.
+
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/nfsd/nfs4xdr.c |   12 +++++++++---
+ 1 file changed, 9 insertions(+), 3 deletions(-)
+
+--- a/fs/nfsd/nfs4xdr.c
++++ b/fs/nfsd/nfs4xdr.c
+@@ -2121,9 +2121,15 @@ out_acl:
+ 		 * and this is the root of a cross-mounted filesystem.
+ 		 */
+ 		if (ignore_crossmnt == 0 &&
+-		    exp->ex_path.mnt->mnt_root->d_inode == dentry->d_inode) {
+-			err = vfs_getattr(exp->ex_path.mnt->mnt_parent,
+-				exp->ex_path.mnt->mnt_mountpoint, &stat);
++		    dentry == exp->ex_path.mnt->mnt_root) {
++			struct path path = exp->ex_path;
++			path_get(&path);
++			while (follow_up(&path)) {
++				if (path.dentry != path.mnt->mnt_root)
++					break;
++			}
++			err = vfs_getattr(path.mnt, path.dentry, &stat);
++			path_put(&path);
+ 			if (err)
+ 				goto out_nfserr;
+ 		}
diff --git a/queue-2.6.33/series b/queue-2.6.33/series
index 04e7d5cba88..866fa40939c 100644
--- a/queue-2.6.33/series
+++ b/queue-2.6.33/series
@@ -82,3 +82,10 @@ x86-cacheinfo-calculate-l3-indices.patch
 x86-cacheinfo-remove-numa-dependency-fix-for-amd-fam10h-rev-d1.patch
 x86-cacheinfo-enable-l3-cid-only-on-amd.patch
 dm-ioctl-introduce-flag-indicating-uevent-was-generated.patch
+x86-32-clean-up-rwsem-inline-asm-statements.patch
+x86-clean-up-rwsem-type-system.patch
+x86-64-rwsem-64-bit-xadd-rwsem-implementation.patch
+x86-64-support-native-xadd-rwsem-implementation.patch
+x86-fix-breakage-of-uml-from-the-changes-in-the-rwsem-system.patch
+x86-64-rwsem-avoid-store-forwarding-hazard-in-__downgrade_write.patch
+fix-nfs4-handling-of-mountpoint-stat.patch
diff --git a/queue-2.6.33/x86-32-clean-up-rwsem-inline-asm-statements.patch b/queue-2.6.33/x86-32-clean-up-rwsem-inline-asm-statements.patch
new file mode 100644
index 00000000000..2a212a4232c
--- /dev/null
+++ b/queue-2.6.33/x86-32-clean-up-rwsem-inline-asm-statements.patch
@@ -0,0 +1,133 @@
+From 59c33fa7791e9948ba467c2b83e307a0d087ab49 Mon Sep 17 00:00:00 2001
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Tue, 12 Jan 2010 16:21:09 -0800
+Subject: x86-32: clean up rwsem inline asm statements
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit 59c33fa7791e9948ba467c2b83e307a0d087ab49 upstream.
+
+This makes gcc use the right register names and instruction operand sizes
+automatically for the rwsem inline asm statements.
+
+So instead of using "(%%eax)" to specify the memory address that is the
+semaphore, we use "(%1)" or similar. And instead of forcing the operation
+to always be 32-bit, we use "%z0", taking the size from the actual
+semaphore data structure itself.
+
+This doesn't actually matter on x86-32, but if we want to use the same
+inline asm for x86-64, we'll need to have the compiler generate the proper
+64-bit names for the registers (%rax instead of %eax), and if we want to
+use a 64-bit counter too (in order to avoid the 15-bit limit on the
+write counter that limits concurrent users to 32767 threads), we'll need
+to be able to generate instructions with "q" accesses rather than "l".
+
+Since this header currently isn't enabled on x86-64, none of that matters,
+but we do want to use the xadd version of the semaphores rather than have
+to take spinlocks to do a rwsem. The mm->mmap_sem can be heavily contended
+when you have lots of threads all taking page faults, and the fallback
+rwsem code that uses a spinlock performs abysmally badly in that case.
+
+[ hpa: modified the patch to skip size suffixes entirely when they are
+  redundant due to register operands. ]
+
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+LKML-Reference: <alpine.LFD.2.00.1001121613560.17145@localhost.localdomain>
+Signed-off-by: H. Peter Anvin <hpa@zytor.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/x86/include/asm/rwsem.h |   30 +++++++++++++++---------------
+ 1 file changed, 15 insertions(+), 15 deletions(-)
+
+--- a/arch/x86/include/asm/rwsem.h
++++ b/arch/x86/include/asm/rwsem.h
+@@ -105,7 +105,7 @@ do {								\
+ static inline void __down_read(struct rw_semaphore *sem)
+ {
+ 	asm volatile("# beginning down_read\n\t"
+-		     LOCK_PREFIX "  incl      (%%eax)\n\t"
++		     LOCK_PREFIX "  inc%z0      (%1)\n\t"
+ 		     /* adds 0x00000001, returns the old value */
+ 		     "  jns        1f\n"
+ 		     "  call call_rwsem_down_read_failed\n"
+@@ -123,12 +123,12 @@ static inline int __down_read_trylock(st
+ {
+ 	__s32 result, tmp;
+ 	asm volatile("# beginning __down_read_trylock\n\t"
+-		     "  movl      %0,%1\n\t"
++		     "  mov          %0,%1\n\t"
+ 		     "1:\n\t"
+-		     "  movl	     %1,%2\n\t"
+-		     "  addl      %3,%2\n\t"
++		     "  mov          %1,%2\n\t"
++		     "  add          %3,%2\n\t"
+ 		     "  jle	     2f\n\t"
+-		     LOCK_PREFIX "  cmpxchgl  %2,%0\n\t"
++		     LOCK_PREFIX "  cmpxchg  %2,%0\n\t"
+ 		     "  jnz	     1b\n\t"
+ 		     "2:\n\t"
+ 		     "# ending __down_read_trylock\n\t"
+@@ -147,9 +147,9 @@ static inline void __down_write_nested(s
+ 
+ 	tmp = RWSEM_ACTIVE_WRITE_BIAS;
+ 	asm volatile("# beginning down_write\n\t"
+-		     LOCK_PREFIX "  xadd      %%edx,(%%eax)\n\t"
++		     LOCK_PREFIX "  xadd      %1,(%2)\n\t"
+ 		     /* subtract 0x0000ffff, returns the old value */
+-		     "  testl     %%edx,%%edx\n\t"
++		     "  test      %1,%1\n\t"
+ 		     /* was the count 0 before? */
+ 		     "  jz        1f\n"
+ 		     "  call call_rwsem_down_write_failed\n"
+@@ -185,7 +185,7 @@ static inline void __up_read(struct rw_s
+ {
+ 	__s32 tmp = -RWSEM_ACTIVE_READ_BIAS;
+ 	asm volatile("# beginning __up_read\n\t"
+-		     LOCK_PREFIX "  xadd      %%edx,(%%eax)\n\t"
++		     LOCK_PREFIX "  xadd      %1,(%2)\n\t"
+ 		     /* subtracts 1, returns the old value */
+ 		     "  jns        1f\n\t"
+ 		     "  call call_rwsem_wake\n"
+@@ -201,18 +201,18 @@ static inline void __up_read(struct rw_s
+  */
+ static inline void __up_write(struct rw_semaphore *sem)
+ {
++	unsigned long tmp;
+ 	asm volatile("# beginning __up_write\n\t"
+-		     "  movl      %2,%%edx\n\t"
+-		     LOCK_PREFIX "  xaddl     %%edx,(%%eax)\n\t"
++		     LOCK_PREFIX "  xadd      %1,(%2)\n\t"
+ 		     /* tries to transition
+ 			0xffff0001 -> 0x00000000 */
+ 		     "  jz       1f\n"
+ 		     "  call call_rwsem_wake\n"
+ 		     "1:\n\t"
+ 		     "# ending __up_write\n"
+-		     : "+m" (sem->count)
+-		     : "a" (sem), "i" (-RWSEM_ACTIVE_WRITE_BIAS)
+-		     : "memory", "cc", "edx");
++		     : "+m" (sem->count), "=d" (tmp)
++		     : "a" (sem), "1" (-RWSEM_ACTIVE_WRITE_BIAS)
++		     : "memory", "cc");
+ }
+ 
+ /*
+@@ -221,7 +221,7 @@ static inline void __up_write(struct rw_
+ static inline void __downgrade_write(struct rw_semaphore *sem)
+ {
+ 	asm volatile("# beginning __downgrade_write\n\t"
+-		     LOCK_PREFIX "  addl      %2,(%%eax)\n\t"
++		     LOCK_PREFIX "  add%z0    %2,(%1)\n\t"
+ 		     /* transitions 0xZZZZ0001 -> 0xYYYY0001 */
+ 		     "  jns       1f\n\t"
+ 		     "  call call_rwsem_downgrade_wake\n"
+@@ -237,7 +237,7 @@ static inline void __downgrade_write(str
+  */
+ static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem)
+ {
+-	asm volatile(LOCK_PREFIX "addl %1,%0"
++	asm volatile(LOCK_PREFIX "add%z0 %1,%0"
+ 		     : "+m" (sem->count)
+ 		     : "ir" (delta));
+ }
diff --git a/queue-2.6.33/x86-64-rwsem-64-bit-xadd-rwsem-implementation.patch b/queue-2.6.33/x86-64-rwsem-64-bit-xadd-rwsem-implementation.patch
new file mode 100644
index 00000000000..48e7b2ee4af
--- /dev/null
+++ b/queue-2.6.33/x86-64-rwsem-64-bit-xadd-rwsem-implementation.patch
@@ -0,0 +1,141 @@
+From 1838ef1d782f7527e6defe87e180598622d2d071 Mon Sep 17 00:00:00 2001
+From: H. Peter Anvin <hpa@zytor.com>
+Date: Mon, 18 Jan 2010 14:00:34 -0800
+Subject: x86-64, rwsem: 64-bit xadd rwsem implementation
+
+From: H. Peter Anvin <hpa@zytor.com>
+
+commit 1838ef1d782f7527e6defe87e180598622d2d071 upstream.
+
+For x86-64, 32767 threads really is not enough.  Change rwsem_count_t
+to a signed long, so that it is 64 bits on x86-64.
+
+This required the following changes to the assembly code:
+
+a) %z0 doesn't work on all versions of gcc!  At least gcc 4.4.2 as
+   shipped with Fedora 12 emits "ll" not "q" for 64 bits, even for
+   integer operands.  Newer gccs apparently do this correctly, but
+   avoid this problem by using the _ASM_ macros instead of %z.
+b) 64 bits immediates are only allowed in "movq $imm,%reg"
+   constructs... no others.  Change some of the constraints to "e",
+   and fix the one case where we would have had to use an invalid
+   immediate -- in that case, we only care about the upper half
+   anyway, so just access the upper half.
+
+Signed-off-by: H. Peter Anvin <hpa@zytor.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+LKML-Reference: <tip-bafaecd11df15ad5b1e598adc7736afcd38ee13d@git.kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/x86/include/asm/rwsem.h |   53 ++++++++++++++++++++++++++++++++-----------
+ 1 file changed, 40 insertions(+), 13 deletions(-)
+
+--- a/arch/x86/include/asm/rwsem.h
++++ b/arch/x86/include/asm/rwsem.h
+@@ -41,6 +41,7 @@
+ #include <linux/list.h>
+ #include <linux/spinlock.h>
+ #include <linux/lockdep.h>
++#include <asm/asm.h>
+ 
+ struct rwsem_waiter;
+ 
+@@ -56,18 +57,24 @@ extern asmregparm struct rw_semaphore *
+ /*
+  * the semaphore definition
+  *
+- * The bias values and the counter type needs to be extended to 64 bits
+- * if we want to have more than 32767 potential readers/writers
++ * The bias values and the counter type limits the number of
++ * potential readers/writers to 32767 for 32 bits and 2147483647
++ * for 64 bits.
+  */
+ 
+-#define RWSEM_UNLOCKED_VALUE		0x00000000
+-#define RWSEM_ACTIVE_BIAS		0x00000001
+-#define RWSEM_ACTIVE_MASK		0x0000ffff
+-#define RWSEM_WAITING_BIAS		(-0x00010000)
++#ifdef CONFIG_X86_64
++# define RWSEM_ACTIVE_MASK		0xffffffffL
++#else
++# define RWSEM_ACTIVE_MASK		0x0000ffffL
++#endif
++
++#define RWSEM_UNLOCKED_VALUE		0x00000000L
++#define RWSEM_ACTIVE_BIAS		0x00000001L
++#define RWSEM_WAITING_BIAS		(-RWSEM_ACTIVE_MASK-1)
+ #define RWSEM_ACTIVE_READ_BIAS		RWSEM_ACTIVE_BIAS
+ #define RWSEM_ACTIVE_WRITE_BIAS		(RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
+ 
+-typedef signed int rwsem_count_t;
++typedef signed long rwsem_count_t;
+ 
+ struct rw_semaphore {
+ 	rwsem_count_t		count;
+@@ -110,7 +117,7 @@ do {								\
+ static inline void __down_read(struct rw_semaphore *sem)
+ {
+ 	asm volatile("# beginning down_read\n\t"
+-		     LOCK_PREFIX "  inc%z0      (%1)\n\t"
++		     LOCK_PREFIX _ASM_INC "(%1)\n\t"
+ 		     /* adds 0x00000001, returns the old value */
+ 		     "  jns        1f\n"
+ 		     "  call call_rwsem_down_read_failed\n"
+@@ -225,8 +232,25 @@ static inline void __up_write(struct rw_
+  */
+ static inline void __downgrade_write(struct rw_semaphore *sem)
+ {
++#ifdef CONFIG_X86_64
++# if RWSEM_WAITING_BIAS != -0x100000000
++#  error "This code assumes RWSEM_WAITING_BIAS == -2^32"
++# endif
++
++	/* 64-bit immediates are special and expensive, and not needed here */
++	asm volatile("# beginning __downgrade_write\n\t"
++		     LOCK_PREFIX "incl 4(%1)\n\t"
++		     /* transitions 0xZZZZZZZZ00000001 -> 0xYYYYYYYY00000001 */
++		     "  jns       1f\n\t"
++		     "  call call_rwsem_downgrade_wake\n"
++		     "1:\n\t"
++		     "# ending __downgrade_write\n"
++		     : "+m" (sem->count)
++		     : "a" (sem)
++		     : "memory", "cc");
++#else
+ 	asm volatile("# beginning __downgrade_write\n\t"
+-		     LOCK_PREFIX "  add%z0    %2,(%1)\n\t"
++		     LOCK_PREFIX _ASM_ADD "%2,(%1)\n\t"
+ 		     /* transitions 0xZZZZ0001 -> 0xYYYY0001 */
+ 		     "  jns       1f\n\t"
+ 		     "  call call_rwsem_downgrade_wake\n"
+@@ -235,22 +259,25 @@ static inline void __downgrade_write(str
+ 		     : "+m" (sem->count)
+ 		     : "a" (sem), "i" (-RWSEM_WAITING_BIAS)
+ 		     : "memory", "cc");
++#endif
+ }
+ 
+ /*
+  * implement atomic add functionality
+  */
+-static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem)
++static inline void rwsem_atomic_add(rwsem_count_t delta,
++				    struct rw_semaphore *sem)
+ {
+-	asm volatile(LOCK_PREFIX "add%z0 %1,%0"
++	asm volatile(LOCK_PREFIX _ASM_ADD "%1,%0"
+ 		     : "+m" (sem->count)
+-		     : "ir" (delta));
++		     : "er" (delta));
+ }
+ 
+ /*
+  * implement exchange and add functionality
+  */
+-static inline rwsem_count_t rwsem_atomic_update(int delta, struct rw_semaphore *sem)
++static inline rwsem_count_t rwsem_atomic_update(rwsem_count_t delta,
++						struct rw_semaphore *sem)
+ {
+ 	rwsem_count_t tmp = delta;
+ 
diff --git a/queue-2.6.33/x86-64-rwsem-avoid-store-forwarding-hazard-in-__downgrade_write.patch b/queue-2.6.33/x86-64-rwsem-avoid-store-forwarding-hazard-in-__downgrade_write.patch
new file mode 100644
index 00000000000..8489e98d16d
--- /dev/null
+++ b/queue-2.6.33/x86-64-rwsem-avoid-store-forwarding-hazard-in-__downgrade_write.patch
@@ -0,0 +1,79 @@
+From 0d1622d7f526311d87d7da2ee7dd14b73e45d3fc Mon Sep 17 00:00:00 2001
+From: Avi Kivity <avi@redhat.com>
+Date: Sat, 13 Feb 2010 10:33:12 +0200
+Subject: x86-64, rwsem: Avoid store forwarding hazard in __downgrade_write
+
+From: Avi Kivity <avi@redhat.com>
+
+commit 0d1622d7f526311d87d7da2ee7dd14b73e45d3fc upstream.
+
+The Intel Architecture Optimization Reference Manual states that a short
+load that follows a long store to the same object will suffer a store
+forwading penalty, particularly if the two accesses use different addresses.
+Trivially, a long load that follows a short store will also suffer a penalty.
+
+__downgrade_write() in rwsem incurs both penalties:  the increment operation
+will not be able to reuse a recently-loaded rwsem value, and its result will
+not be reused by any recently-following rwsem operation.
+
+A comment in the code states that this is because 64-bit immediates are
+special and expensive; but while they are slightly special (only a single
+instruction allows them), they aren't expensive: a test shows that two loops,
+one loading a 32-bit immediate and one loading a 64-bit immediate, both take
+1.5 cycles per iteration.
+
+Fix this by changing __downgrade_write to use the same add instruction on
+i386 and on x86_64, so that it uses the same operand size as all the other
+rwsem functions.
+
+Signed-off-by: Avi Kivity <avi@redhat.com>
+LKML-Reference: <1266049992-17419-1-git-send-email-avi@redhat.com>
+Signed-off-by: H. Peter Anvin <hpa@zytor.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/x86/include/asm/rwsem.h |   25 +++++--------------------
+ 1 file changed, 5 insertions(+), 20 deletions(-)
+
+--- a/arch/x86/include/asm/rwsem.h
++++ b/arch/x86/include/asm/rwsem.h
+@@ -232,34 +232,19 @@ static inline void __up_write(struct rw_
+  */
+ static inline void __downgrade_write(struct rw_semaphore *sem)
+ {
+-#ifdef CONFIG_X86_64
+-# if RWSEM_WAITING_BIAS != -0x100000000
+-#  error "This code assumes RWSEM_WAITING_BIAS == -2^32"
+-# endif
+-
+-	/* 64-bit immediates are special and expensive, and not needed here */
+-	asm volatile("# beginning __downgrade_write\n\t"
+-		     LOCK_PREFIX "incl 4(%1)\n\t"
+-		     /* transitions 0xZZZZZZZZ00000001 -> 0xYYYYYYYY00000001 */
+-		     "  jns       1f\n\t"
+-		     "  call call_rwsem_downgrade_wake\n"
+-		     "1:\n\t"
+-		     "# ending __downgrade_write\n"
+-		     : "+m" (sem->count)
+-		     : "a" (sem)
+-		     : "memory", "cc");
+-#else
+ 	asm volatile("# beginning __downgrade_write\n\t"
+ 		     LOCK_PREFIX _ASM_ADD "%2,(%1)\n\t"
+-		     /* transitions 0xZZZZ0001 -> 0xYYYY0001 */
++		     /*
++		      * transitions 0xZZZZ0001 -> 0xYYYY0001 (i386)
++		      *     0xZZZZZZZZ00000001 -> 0xYYYYYYYY00000001 (x86_64)
++		      */
+ 		     "  jns       1f\n\t"
+ 		     "  call call_rwsem_downgrade_wake\n"
+ 		     "1:\n\t"
+ 		     "# ending __downgrade_write\n"
+ 		     : "+m" (sem->count)
+-		     : "a" (sem), "i" (-RWSEM_WAITING_BIAS)
++		     : "a" (sem), "er" (-RWSEM_WAITING_BIAS)
+ 		     : "memory", "cc");
+-#endif
+ }
+ 
+ /*
diff --git a/queue-2.6.33/x86-64-support-native-xadd-rwsem-implementation.patch b/queue-2.6.33/x86-64-support-native-xadd-rwsem-implementation.patch
new file mode 100644
index 00000000000..da419ac2a41
--- /dev/null
+++ b/queue-2.6.33/x86-64-support-native-xadd-rwsem-implementation.patch
@@ -0,0 +1,148 @@
+From bafaecd11df15ad5b1e598adc7736afcd38ee13d Mon Sep 17 00:00:00 2001
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Tue, 12 Jan 2010 18:16:42 -0800
+Subject: x86-64: support native xadd rwsem implementation
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit bafaecd11df15ad5b1e598adc7736afcd38ee13d upstream.
+
+This one is much faster than the spinlock based fallback rwsem code,
+with certain artifical benchmarks having shown 300%+ improvement on
+threaded page faults etc.
+
+Again, note the 32767-thread limit here. So this really does need that
+whole "make rwsem_count_t be 64-bit and fix the BIAS values to match"
+extension on top of it, but that is conceptually a totally independent
+issue.
+
+NOT TESTED! The original patch that this all was based on were tested by
+KAMEZAWA Hiroyuki, but maybe I screwed up something when I created the
+cleaned-up series, so caveat emptor..
+
+Also note that it _may_ be a good idea to mark some more registers
+clobbered on x86-64 in the inline asms instead of saving/restoring them.
+They are inline functions, but they are only used in places where there
+are not a lot of live registers _anyway_, so doing for example the
+clobbers of %r8-%r11 in the asm wouldn't make the fast-path code any
+worse, and would make the slow-path code smaller.
+
+(Not that the slow-path really matters to that degree. Saving a few
+unnecessary registers is the _least_ of our problems when we hit the slow
+path. The instruction/cycle counting really only matters in the fast
+path).
+
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+LKML-Reference: <alpine.LFD.2.00.1001121810410.17145@localhost.localdomain>
+Signed-off-by: H. Peter Anvin <hpa@zytor.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/x86/Kconfig.cpu    |    2 -
+ arch/x86/lib/Makefile   |    1 
+ arch/x86/lib/rwsem_64.S |   81 ++++++++++++++++++++++++++++++++++++++++++++++++
+ 3 files changed, 83 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/Kconfig.cpu
++++ b/arch/x86/Kconfig.cpu
+@@ -319,7 +319,7 @@ config X86_L1_CACHE_SHIFT
+ 
+ config X86_XADD
+ 	def_bool y
+-	depends on X86_32 && !M386
++	depends on X86_64 || !M386
+ 
+ config X86_PPRO_FENCE
+ 	bool "PentiumPro memory ordering errata workaround"
+--- a/arch/x86/lib/Makefile
++++ b/arch/x86/lib/Makefile
+@@ -39,4 +39,5 @@ else
+         lib-y += thunk_64.o clear_page_64.o copy_page_64.o
+         lib-y += memmove_64.o memset_64.o
+         lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o
++	lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem_64.o
+ endif
+--- /dev/null
++++ b/arch/x86/lib/rwsem_64.S
+@@ -0,0 +1,81 @@
++/*
++ * x86-64 rwsem wrappers
++ *
++ * This interfaces the inline asm code to the slow-path
++ * C routines. We need to save the call-clobbered regs
++ * that the asm does not mark as clobbered, and move the
++ * argument from %rax to %rdi.
++ *
++ * NOTE! We don't need to save %rax, because the functions
++ * will always return the semaphore pointer in %rax (which
++ * is also the input argument to these helpers)
++ *
++ * The following can clobber %rdx because the asm clobbers it:
++ *   call_rwsem_down_write_failed
++ *   call_rwsem_wake
++ * but %rdi, %rsi, %rcx, %r8-r11 always need saving.
++ */
++
++#include <linux/linkage.h>
++#include <asm/rwlock.h>
++#include <asm/alternative-asm.h>
++#include <asm/frame.h>
++#include <asm/dwarf2.h>
++
++#define save_common_regs \
++	pushq %rdi; \
++	pushq %rsi; \
++	pushq %rcx; \
++	pushq %r8; \
++	pushq %r9; \
++	pushq %r10; \
++	pushq %r11
++
++#define restore_common_regs \
++	popq %r11; \
++	popq %r10; \
++	popq %r9; \
++	popq %r8; \
++	popq %rcx; \
++	popq %rsi; \
++	popq %rdi
++
++/* Fix up special calling conventions */
++ENTRY(call_rwsem_down_read_failed)
++	save_common_regs
++	pushq %rdx
++	movq %rax,%rdi
++	call rwsem_down_read_failed
++	popq %rdx
++	restore_common_regs
++	ret
++	ENDPROC(call_rwsem_down_read_failed)
++
++ENTRY(call_rwsem_down_write_failed)
++	save_common_regs
++	movq %rax,%rdi
++	call rwsem_down_write_failed
++	restore_common_regs
++	ret
++	ENDPROC(call_rwsem_down_write_failed)
++
++ENTRY(call_rwsem_wake)
++	decw %dx    /* do nothing if still outstanding active readers */
++	jnz 1f
++	save_common_regs
++	movq %rax,%rdi
++	call rwsem_wake
++	restore_common_regs
++1:	ret
++	ENDPROC(call_rwsem_wake)
++
++/* Fix up special calling conventions */
++ENTRY(call_rwsem_downgrade_wake)
++	save_common_regs
++	pushq %rdx
++	movq %rax,%rdi
++	call rwsem_downgrade_wake
++	popq %rdx
++	restore_common_regs
++	ret
++	ENDPROC(call_rwsem_downgrade_wake)
diff --git a/queue-2.6.33/x86-clean-up-rwsem-type-system.patch b/queue-2.6.33/x86-clean-up-rwsem-type-system.patch
new file mode 100644
index 00000000000..4e64afcfb7d
--- /dev/null
+++ b/queue-2.6.33/x86-clean-up-rwsem-type-system.patch
@@ -0,0 +1,117 @@
+From 5d0b7235d83eefdafda300656e97d368afcafc9a Mon Sep 17 00:00:00 2001
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Tue, 12 Jan 2010 17:57:35 -0800
+Subject: x86: clean up rwsem type system
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit 5d0b7235d83eefdafda300656e97d368afcafc9a upstream.
+
+The fast version of the rwsems (the code that uses xadd) has
+traditionally only worked on x86-32, and as a result it mixes different
+kinds of types wildly - they just all happen to be 32-bit.  We have
+"long", we have "__s32", and we have "int".
+
+To make it work on x86-64, the types suddenly matter a lot more.  It can
+be either a 32-bit or 64-bit signed type, and both work (with the caveat
+that a 32-bit counter will only have 15 bits of effective write
+counters, so it's limited to 32767 users).  But whatever type you
+choose, it needs to be used consistently.
+
+This makes a new 'rwsem_counter_t', that is a 32-bit signed type.  For a
+64-bit type, you'd need to also update the BIAS values.
+
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+LKML-Reference: <alpine.LFD.2.00.1001121755220.17145@localhost.localdomain>
+Signed-off-by: H. Peter Anvin <hpa@zytor.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/x86/include/asm/rwsem.h |   25 +++++++++++++++----------
+ 1 file changed, 15 insertions(+), 10 deletions(-)
+
+--- a/arch/x86/include/asm/rwsem.h
++++ b/arch/x86/include/asm/rwsem.h
+@@ -55,6 +55,9 @@ extern asmregparm struct rw_semaphore *
+ 
+ /*
+  * the semaphore definition
++ *
++ * The bias values and the counter type needs to be extended to 64 bits
++ * if we want to have more than 32767 potential readers/writers
+  */
+ 
+ #define RWSEM_UNLOCKED_VALUE		0x00000000
+@@ -64,8 +67,10 @@ extern asmregparm struct rw_semaphore *
+ #define RWSEM_ACTIVE_READ_BIAS		RWSEM_ACTIVE_BIAS
+ #define RWSEM_ACTIVE_WRITE_BIAS		(RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
+ 
++typedef signed int rwsem_count_t;
++
+ struct rw_semaphore {
+-	signed long		count;
++	rwsem_count_t		count;
+ 	spinlock_t		wait_lock;
+ 	struct list_head	wait_list;
+ #ifdef CONFIG_DEBUG_LOCK_ALLOC
+@@ -121,7 +126,7 @@ static inline void __down_read(struct rw
+  */
+ static inline int __down_read_trylock(struct rw_semaphore *sem)
+ {
+-	__s32 result, tmp;
++	rwsem_count_t result, tmp;
+ 	asm volatile("# beginning __down_read_trylock\n\t"
+ 		     "  mov          %0,%1\n\t"
+ 		     "1:\n\t"
+@@ -143,7 +148,7 @@ static inline int __down_read_trylock(st
+  */
+ static inline void __down_write_nested(struct rw_semaphore *sem, int subclass)
+ {
+-	int tmp;
++	rwsem_count_t tmp;
+ 
+ 	tmp = RWSEM_ACTIVE_WRITE_BIAS;
+ 	asm volatile("# beginning down_write\n\t"
+@@ -170,9 +175,9 @@ static inline void __down_write(struct r
+  */
+ static inline int __down_write_trylock(struct rw_semaphore *sem)
+ {
+-	signed long ret = cmpxchg(&sem->count,
+-				  RWSEM_UNLOCKED_VALUE,
+-				  RWSEM_ACTIVE_WRITE_BIAS);
++	rwsem_count_t ret = cmpxchg(&sem->count,
++				    RWSEM_UNLOCKED_VALUE,
++				    RWSEM_ACTIVE_WRITE_BIAS);
+ 	if (ret == RWSEM_UNLOCKED_VALUE)
+ 		return 1;
+ 	return 0;
+@@ -183,7 +188,7 @@ static inline int __down_write_trylock(s
+  */
+ static inline void __up_read(struct rw_semaphore *sem)
+ {
+-	__s32 tmp = -RWSEM_ACTIVE_READ_BIAS;
++	rwsem_count_t tmp = -RWSEM_ACTIVE_READ_BIAS;
+ 	asm volatile("# beginning __up_read\n\t"
+ 		     LOCK_PREFIX "  xadd      %1,(%2)\n\t"
+ 		     /* subtracts 1, returns the old value */
+@@ -201,7 +206,7 @@ static inline void __up_read(struct rw_s
+  */
+ static inline void __up_write(struct rw_semaphore *sem)
+ {
+-	unsigned long tmp;
++	rwsem_count_t tmp;
+ 	asm volatile("# beginning __up_write\n\t"
+ 		     LOCK_PREFIX "  xadd      %1,(%2)\n\t"
+ 		     /* tries to transition
+@@ -245,9 +250,9 @@ static inline void rwsem_atomic_add(int
+ /*
+  * implement exchange and add functionality
+  */
+-static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem)
++static inline rwsem_count_t rwsem_atomic_update(int delta, struct rw_semaphore *sem)
+ {
+-	int tmp = delta;
++	rwsem_count_t tmp = delta;
+ 
+ 	asm volatile(LOCK_PREFIX "xadd %0,%1"
+ 		     : "+r" (tmp), "+m" (sem->count)
diff --git a/queue-2.6.33/x86-fix-breakage-of-uml-from-the-changes-in-the-rwsem-system.patch b/queue-2.6.33/x86-fix-breakage-of-uml-from-the-changes-in-the-rwsem-system.patch
new file mode 100644
index 00000000000..96d50d2bab1
--- /dev/null
+++ b/queue-2.6.33/x86-fix-breakage-of-uml-from-the-changes-in-the-rwsem-system.patch
@@ -0,0 +1,50 @@
+From 4126faf0ab7417fbc6eb99fb0fd407e01e9e9dfe Mon Sep 17 00:00:00 2001
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Sun, 17 Jan 2010 10:24:07 -0800
+Subject: x86: Fix breakage of UML from the changes in the rwsem system
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit 4126faf0ab7417fbc6eb99fb0fd407e01e9e9dfe upstream.
+
+The patches 5d0b7235d83eefdafda300656e97d368afcafc9a and
+bafaecd11df15ad5b1e598adc7736afcd38ee13d broke the UML build:
+
+On Sun, 17 Jan 2010, Ingo Molnar wrote:
+>
+> FYI, -tip testing found that these changes break the UML build:
+>
+> kernel/built-in.o: In function `__up_read':
+> /home/mingo/tip/arch/x86/include/asm/rwsem.h:192: undefined reference to `call_rwsem_wake'
+> kernel/built-in.o: In function `__up_write':
+> /home/mingo/tip/arch/x86/include/asm/rwsem.h:210: undefined reference to `call_rwsem_wake'
+> kernel/built-in.o: In function `__downgrade_write':
+> /home/mingo/tip/arch/x86/include/asm/rwsem.h:228: undefined reference to `call_rwsem_downgrade_wake'
+> kernel/built-in.o: In function `__down_read':
+> /home/mingo/tip/arch/x86/include/asm/rwsem.h:112: undefined reference to `call_rwsem_down_read_failed'
+> kernel/built-in.o: In function `__down_write_nested':
+> /home/mingo/tip/arch/x86/include/asm/rwsem.h:154: undefined reference to `call_rwsem_down_write_failed'
+> collect2: ld returned 1 exit status
+
+Add lib/rwsem_64.o to the UML subarch objects to fix.
+
+LKML-Reference: <alpine.LFD.2.00.1001171023440.13231@localhost.localdomain>
+Signed-off-by: H. Peter Anvin <hpa@zytor.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/um/sys-x86_64/Makefile |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/arch/um/sys-x86_64/Makefile
++++ b/arch/um/sys-x86_64/Makefile
+@@ -8,7 +8,8 @@ obj-y = bug.o bugs.o delay.o fault.o ldt
+ 	setjmp.o signal.o stub.o stub_segv.o syscalls.o syscall_table.o \
+ 	sysrq.o ksyms.o tls.o
+ 
+-subarch-obj-y = lib/csum-partial_64.o lib/memcpy_64.o lib/thunk_64.o
++subarch-obj-y = lib/csum-partial_64.o lib/memcpy_64.o lib/thunk_64.o \
++		lib/rwsem_64.o
+ subarch-obj-$(CONFIG_MODULES) += kernel/module.o
+ 
+ ldt-y = ../sys-i386/ldt.o