--- /dev/null
+From 462d60577a997aa87c935ae4521bd303733a9f2b Mon Sep 17 00:00:00 2001
+From: Al Viro <viro@zeniv.linux.org.uk>
+Date: Sat, 30 Jan 2010 16:11:21 -0500
+Subject: fix NFS4 handling of mountpoint stat
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+commit 462d60577a997aa87c935ae4521bd303733a9f2b upstream.
+
+RFC says we need to follow the chain of mounts if there's more
+than one stacked on that point.
+
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/nfsd/nfs4xdr.c | 12 +++++++++---
+ 1 file changed, 9 insertions(+), 3 deletions(-)
+
+--- a/fs/nfsd/nfs4xdr.c
++++ b/fs/nfsd/nfs4xdr.c
+@@ -2121,9 +2121,15 @@ out_acl:
+ * and this is the root of a cross-mounted filesystem.
+ */
+ if (ignore_crossmnt == 0 &&
+- exp->ex_path.mnt->mnt_root->d_inode == dentry->d_inode) {
+- err = vfs_getattr(exp->ex_path.mnt->mnt_parent,
+- exp->ex_path.mnt->mnt_mountpoint, &stat);
++ dentry == exp->ex_path.mnt->mnt_root) {
++ struct path path = exp->ex_path;
++ path_get(&path);
++ while (follow_up(&path)) {
++ if (path.dentry != path.mnt->mnt_root)
++ break;
++ }
++ err = vfs_getattr(path.mnt, path.dentry, &stat);
++ path_put(&path);
+ if (err)
+ goto out_nfserr;
+ }
x86-cacheinfo-remove-numa-dependency-fix-for-amd-fam10h-rev-d1.patch
x86-cacheinfo-enable-l3-cid-only-on-amd.patch
dm-ioctl-introduce-flag-indicating-uevent-was-generated.patch
+x86-32-clean-up-rwsem-inline-asm-statements.patch
+x86-clean-up-rwsem-type-system.patch
+x86-64-rwsem-64-bit-xadd-rwsem-implementation.patch
+x86-64-support-native-xadd-rwsem-implementation.patch
+x86-fix-breakage-of-uml-from-the-changes-in-the-rwsem-system.patch
+x86-64-rwsem-avoid-store-forwarding-hazard-in-__downgrade_write.patch
+fix-nfs4-handling-of-mountpoint-stat.patch
--- /dev/null
+From 59c33fa7791e9948ba467c2b83e307a0d087ab49 Mon Sep 17 00:00:00 2001
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Tue, 12 Jan 2010 16:21:09 -0800
+Subject: x86-32: clean up rwsem inline asm statements
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit 59c33fa7791e9948ba467c2b83e307a0d087ab49 upstream.
+
+This makes gcc use the right register names and instruction operand sizes
+automatically for the rwsem inline asm statements.
+
+So instead of using "(%%eax)" to specify the memory address that is the
+semaphore, we use "(%1)" or similar. And instead of forcing the operation
+to always be 32-bit, we use "%z0", taking the size from the actual
+semaphore data structure itself.
+
+This doesn't actually matter on x86-32, but if we want to use the same
+inline asm for x86-64, we'll need to have the compiler generate the proper
+64-bit names for the registers (%rax instead of %eax), and if we want to
+use a 64-bit counter too (in order to avoid the 15-bit limit on the
+write counter that limits concurrent users to 32767 threads), we'll need
+to be able to generate instructions with "q" accesses rather than "l".
+
+Since this header currently isn't enabled on x86-64, none of that matters,
+but we do want to use the xadd version of the semaphores rather than have
+to take spinlocks to do a rwsem. The mm->mmap_sem can be heavily contended
+when you have lots of threads all taking page faults, and the fallback
+rwsem code that uses a spinlock performs abysmally badly in that case.
+
+[ hpa: modified the patch to skip size suffixes entirely when they are
+ redundant due to register operands. ]
+
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+LKML-Reference: <alpine.LFD.2.00.1001121613560.17145@localhost.localdomain>
+Signed-off-by: H. Peter Anvin <hpa@zytor.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/x86/include/asm/rwsem.h | 30 +++++++++++++++---------------
+ 1 file changed, 15 insertions(+), 15 deletions(-)
+
+--- a/arch/x86/include/asm/rwsem.h
++++ b/arch/x86/include/asm/rwsem.h
+@@ -105,7 +105,7 @@ do { \
+ static inline void __down_read(struct rw_semaphore *sem)
+ {
+ asm volatile("# beginning down_read\n\t"
+- LOCK_PREFIX " incl (%%eax)\n\t"
++ LOCK_PREFIX " inc%z0 (%1)\n\t"
+ /* adds 0x00000001, returns the old value */
+ " jns 1f\n"
+ " call call_rwsem_down_read_failed\n"
+@@ -123,12 +123,12 @@ static inline int __down_read_trylock(st
+ {
+ __s32 result, tmp;
+ asm volatile("# beginning __down_read_trylock\n\t"
+- " movl %0,%1\n\t"
++ " mov %0,%1\n\t"
+ "1:\n\t"
+- " movl %1,%2\n\t"
+- " addl %3,%2\n\t"
++ " mov %1,%2\n\t"
++ " add %3,%2\n\t"
+ " jle 2f\n\t"
+- LOCK_PREFIX " cmpxchgl %2,%0\n\t"
++ LOCK_PREFIX " cmpxchg %2,%0\n\t"
+ " jnz 1b\n\t"
+ "2:\n\t"
+ "# ending __down_read_trylock\n\t"
+@@ -147,9 +147,9 @@ static inline void __down_write_nested(s
+
+ tmp = RWSEM_ACTIVE_WRITE_BIAS;
+ asm volatile("# beginning down_write\n\t"
+- LOCK_PREFIX " xadd %%edx,(%%eax)\n\t"
++ LOCK_PREFIX " xadd %1,(%2)\n\t"
+ /* subtract 0x0000ffff, returns the old value */
+- " testl %%edx,%%edx\n\t"
++ " test %1,%1\n\t"
+ /* was the count 0 before? */
+ " jz 1f\n"
+ " call call_rwsem_down_write_failed\n"
+@@ -185,7 +185,7 @@ static inline void __up_read(struct rw_s
+ {
+ __s32 tmp = -RWSEM_ACTIVE_READ_BIAS;
+ asm volatile("# beginning __up_read\n\t"
+- LOCK_PREFIX " xadd %%edx,(%%eax)\n\t"
++ LOCK_PREFIX " xadd %1,(%2)\n\t"
+ /* subtracts 1, returns the old value */
+ " jns 1f\n\t"
+ " call call_rwsem_wake\n"
+@@ -201,18 +201,18 @@ static inline void __up_read(struct rw_s
+ */
+ static inline void __up_write(struct rw_semaphore *sem)
+ {
++ unsigned long tmp;
+ asm volatile("# beginning __up_write\n\t"
+- " movl %2,%%edx\n\t"
+- LOCK_PREFIX " xaddl %%edx,(%%eax)\n\t"
++ LOCK_PREFIX " xadd %1,(%2)\n\t"
+ /* tries to transition
+ 0xffff0001 -> 0x00000000 */
+ " jz 1f\n"
+ " call call_rwsem_wake\n"
+ "1:\n\t"
+ "# ending __up_write\n"
+- : "+m" (sem->count)
+- : "a" (sem), "i" (-RWSEM_ACTIVE_WRITE_BIAS)
+- : "memory", "cc", "edx");
++ : "+m" (sem->count), "=d" (tmp)
++ : "a" (sem), "1" (-RWSEM_ACTIVE_WRITE_BIAS)
++ : "memory", "cc");
+ }
+
+ /*
+@@ -221,7 +221,7 @@ static inline void __up_write(struct rw_
+ static inline void __downgrade_write(struct rw_semaphore *sem)
+ {
+ asm volatile("# beginning __downgrade_write\n\t"
+- LOCK_PREFIX " addl %2,(%%eax)\n\t"
++ LOCK_PREFIX " add%z0 %2,(%1)\n\t"
+ /* transitions 0xZZZZ0001 -> 0xYYYY0001 */
+ " jns 1f\n\t"
+ " call call_rwsem_downgrade_wake\n"
+@@ -237,7 +237,7 @@ static inline void __downgrade_write(str
+ */
+ static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem)
+ {
+- asm volatile(LOCK_PREFIX "addl %1,%0"
++ asm volatile(LOCK_PREFIX "add%z0 %1,%0"
+ : "+m" (sem->count)
+ : "ir" (delta));
+ }
--- /dev/null
+From 1838ef1d782f7527e6defe87e180598622d2d071 Mon Sep 17 00:00:00 2001
+From: H. Peter Anvin <hpa@zytor.com>
+Date: Mon, 18 Jan 2010 14:00:34 -0800
+Subject: x86-64, rwsem: 64-bit xadd rwsem implementation
+
+From: H. Peter Anvin <hpa@zytor.com>
+
+commit 1838ef1d782f7527e6defe87e180598622d2d071 upstream.
+
+For x86-64, 32767 threads really is not enough. Change rwsem_count_t
+to a signed long, so that it is 64 bits on x86-64.
+
+This required the following changes to the assembly code:
+
+a) %z0 doesn't work on all versions of gcc! At least gcc 4.4.2 as
+ shipped with Fedora 12 emits "ll" not "q" for 64 bits, even for
+ integer operands. Newer gccs apparently do this correctly, but
+ avoid this problem by using the _ASM_ macros instead of %z.
+b) 64 bits immediates are only allowed in "movq $imm,%reg"
+ constructs... no others. Change some of the constraints to "e",
+ and fix the one case where we would have had to use an invalid
+ immediate -- in that case, we only care about the upper half
+ anyway, so just access the upper half.
+
+Signed-off-by: H. Peter Anvin <hpa@zytor.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+LKML-Reference: <tip-bafaecd11df15ad5b1e598adc7736afcd38ee13d@git.kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/x86/include/asm/rwsem.h | 53 ++++++++++++++++++++++++++++++++-----------
+ 1 file changed, 40 insertions(+), 13 deletions(-)
+
+--- a/arch/x86/include/asm/rwsem.h
++++ b/arch/x86/include/asm/rwsem.h
+@@ -41,6 +41,7 @@
+ #include <linux/list.h>
+ #include <linux/spinlock.h>
+ #include <linux/lockdep.h>
++#include <asm/asm.h>
+
+ struct rwsem_waiter;
+
+@@ -56,18 +57,24 @@ extern asmregparm struct rw_semaphore *
+ /*
+ * the semaphore definition
+ *
+- * The bias values and the counter type needs to be extended to 64 bits
+- * if we want to have more than 32767 potential readers/writers
++ * The bias values and the counter type limits the number of
++ * potential readers/writers to 32767 for 32 bits and 2147483647
++ * for 64 bits.
+ */
+
+-#define RWSEM_UNLOCKED_VALUE 0x00000000
+-#define RWSEM_ACTIVE_BIAS 0x00000001
+-#define RWSEM_ACTIVE_MASK 0x0000ffff
+-#define RWSEM_WAITING_BIAS (-0x00010000)
++#ifdef CONFIG_X86_64
++# define RWSEM_ACTIVE_MASK 0xffffffffL
++#else
++# define RWSEM_ACTIVE_MASK 0x0000ffffL
++#endif
++
++#define RWSEM_UNLOCKED_VALUE 0x00000000L
++#define RWSEM_ACTIVE_BIAS 0x00000001L
++#define RWSEM_WAITING_BIAS (-RWSEM_ACTIVE_MASK-1)
+ #define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
+ #define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
+
+-typedef signed int rwsem_count_t;
++typedef signed long rwsem_count_t;
+
+ struct rw_semaphore {
+ rwsem_count_t count;
+@@ -110,7 +117,7 @@ do { \
+ static inline void __down_read(struct rw_semaphore *sem)
+ {
+ asm volatile("# beginning down_read\n\t"
+- LOCK_PREFIX " inc%z0 (%1)\n\t"
++ LOCK_PREFIX _ASM_INC "(%1)\n\t"
+ /* adds 0x00000001, returns the old value */
+ " jns 1f\n"
+ " call call_rwsem_down_read_failed\n"
+@@ -225,8 +232,25 @@ static inline void __up_write(struct rw_
+ */
+ static inline void __downgrade_write(struct rw_semaphore *sem)
+ {
++#ifdef CONFIG_X86_64
++# if RWSEM_WAITING_BIAS != -0x100000000
++# error "This code assumes RWSEM_WAITING_BIAS == -2^32"
++# endif
++
++ /* 64-bit immediates are special and expensive, and not needed here */
++ asm volatile("# beginning __downgrade_write\n\t"
++ LOCK_PREFIX "incl 4(%1)\n\t"
++ /* transitions 0xZZZZZZZZ00000001 -> 0xYYYYYYYY00000001 */
++ " jns 1f\n\t"
++ " call call_rwsem_downgrade_wake\n"
++ "1:\n\t"
++ "# ending __downgrade_write\n"
++ : "+m" (sem->count)
++ : "a" (sem)
++ : "memory", "cc");
++#else
+ asm volatile("# beginning __downgrade_write\n\t"
+- LOCK_PREFIX " add%z0 %2,(%1)\n\t"
++ LOCK_PREFIX _ASM_ADD "%2,(%1)\n\t"
+ /* transitions 0xZZZZ0001 -> 0xYYYY0001 */
+ " jns 1f\n\t"
+ " call call_rwsem_downgrade_wake\n"
+@@ -235,22 +259,25 @@ static inline void __downgrade_write(str
+ : "+m" (sem->count)
+ : "a" (sem), "i" (-RWSEM_WAITING_BIAS)
+ : "memory", "cc");
++#endif
+ }
+
+ /*
+ * implement atomic add functionality
+ */
+-static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem)
++static inline void rwsem_atomic_add(rwsem_count_t delta,
++ struct rw_semaphore *sem)
+ {
+- asm volatile(LOCK_PREFIX "add%z0 %1,%0"
++ asm volatile(LOCK_PREFIX _ASM_ADD "%1,%0"
+ : "+m" (sem->count)
+- : "ir" (delta));
++ : "er" (delta));
+ }
+
+ /*
+ * implement exchange and add functionality
+ */
+-static inline rwsem_count_t rwsem_atomic_update(int delta, struct rw_semaphore *sem)
++static inline rwsem_count_t rwsem_atomic_update(rwsem_count_t delta,
++ struct rw_semaphore *sem)
+ {
+ rwsem_count_t tmp = delta;
+
--- /dev/null
+From 0d1622d7f526311d87d7da2ee7dd14b73e45d3fc Mon Sep 17 00:00:00 2001
+From: Avi Kivity <avi@redhat.com>
+Date: Sat, 13 Feb 2010 10:33:12 +0200
+Subject: x86-64, rwsem: Avoid store forwarding hazard in __downgrade_write
+
+From: Avi Kivity <avi@redhat.com>
+
+commit 0d1622d7f526311d87d7da2ee7dd14b73e45d3fc upstream.
+
+The Intel Architecture Optimization Reference Manual states that a short
+load that follows a long store to the same object will suffer a store
+forwading penalty, particularly if the two accesses use different addresses.
+Trivially, a long load that follows a short store will also suffer a penalty.
+
+__downgrade_write() in rwsem incurs both penalties: the increment operation
+will not be able to reuse a recently-loaded rwsem value, and its result will
+not be reused by any recently-following rwsem operation.
+
+A comment in the code states that this is because 64-bit immediates are
+special and expensive; but while they are slightly special (only a single
+instruction allows them), they aren't expensive: a test shows that two loops,
+one loading a 32-bit immediate and one loading a 64-bit immediate, both take
+1.5 cycles per iteration.
+
+Fix this by changing __downgrade_write to use the same add instruction on
+i386 and on x86_64, so that it uses the same operand size as all the other
+rwsem functions.
+
+Signed-off-by: Avi Kivity <avi@redhat.com>
+LKML-Reference: <1266049992-17419-1-git-send-email-avi@redhat.com>
+Signed-off-by: H. Peter Anvin <hpa@zytor.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/x86/include/asm/rwsem.h | 25 +++++--------------------
+ 1 file changed, 5 insertions(+), 20 deletions(-)
+
+--- a/arch/x86/include/asm/rwsem.h
++++ b/arch/x86/include/asm/rwsem.h
+@@ -232,34 +232,19 @@ static inline void __up_write(struct rw_
+ */
+ static inline void __downgrade_write(struct rw_semaphore *sem)
+ {
+-#ifdef CONFIG_X86_64
+-# if RWSEM_WAITING_BIAS != -0x100000000
+-# error "This code assumes RWSEM_WAITING_BIAS == -2^32"
+-# endif
+-
+- /* 64-bit immediates are special and expensive, and not needed here */
+- asm volatile("# beginning __downgrade_write\n\t"
+- LOCK_PREFIX "incl 4(%1)\n\t"
+- /* transitions 0xZZZZZZZZ00000001 -> 0xYYYYYYYY00000001 */
+- " jns 1f\n\t"
+- " call call_rwsem_downgrade_wake\n"
+- "1:\n\t"
+- "# ending __downgrade_write\n"
+- : "+m" (sem->count)
+- : "a" (sem)
+- : "memory", "cc");
+-#else
+ asm volatile("# beginning __downgrade_write\n\t"
+ LOCK_PREFIX _ASM_ADD "%2,(%1)\n\t"
+- /* transitions 0xZZZZ0001 -> 0xYYYY0001 */
++ /*
++ * transitions 0xZZZZ0001 -> 0xYYYY0001 (i386)
++ * 0xZZZZZZZZ00000001 -> 0xYYYYYYYY00000001 (x86_64)
++ */
+ " jns 1f\n\t"
+ " call call_rwsem_downgrade_wake\n"
+ "1:\n\t"
+ "# ending __downgrade_write\n"
+ : "+m" (sem->count)
+- : "a" (sem), "i" (-RWSEM_WAITING_BIAS)
++ : "a" (sem), "er" (-RWSEM_WAITING_BIAS)
+ : "memory", "cc");
+-#endif
+ }
+
+ /*
--- /dev/null
+From bafaecd11df15ad5b1e598adc7736afcd38ee13d Mon Sep 17 00:00:00 2001
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Tue, 12 Jan 2010 18:16:42 -0800
+Subject: x86-64: support native xadd rwsem implementation
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit bafaecd11df15ad5b1e598adc7736afcd38ee13d upstream.
+
+This one is much faster than the spinlock based fallback rwsem code,
+with certain artifical benchmarks having shown 300%+ improvement on
+threaded page faults etc.
+
+Again, note the 32767-thread limit here. So this really does need that
+whole "make rwsem_count_t be 64-bit and fix the BIAS values to match"
+extension on top of it, but that is conceptually a totally independent
+issue.
+
+NOT TESTED! The original patch that this all was based on were tested by
+KAMEZAWA Hiroyuki, but maybe I screwed up something when I created the
+cleaned-up series, so caveat emptor..
+
+Also note that it _may_ be a good idea to mark some more registers
+clobbered on x86-64 in the inline asms instead of saving/restoring them.
+They are inline functions, but they are only used in places where there
+are not a lot of live registers _anyway_, so doing for example the
+clobbers of %r8-%r11 in the asm wouldn't make the fast-path code any
+worse, and would make the slow-path code smaller.
+
+(Not that the slow-path really matters to that degree. Saving a few
+unnecessary registers is the _least_ of our problems when we hit the slow
+path. The instruction/cycle counting really only matters in the fast
+path).
+
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+LKML-Reference: <alpine.LFD.2.00.1001121810410.17145@localhost.localdomain>
+Signed-off-by: H. Peter Anvin <hpa@zytor.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/x86/Kconfig.cpu | 2 -
+ arch/x86/lib/Makefile | 1
+ arch/x86/lib/rwsem_64.S | 81 ++++++++++++++++++++++++++++++++++++++++++++++++
+ 3 files changed, 83 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/Kconfig.cpu
++++ b/arch/x86/Kconfig.cpu
+@@ -319,7 +319,7 @@ config X86_L1_CACHE_SHIFT
+
+ config X86_XADD
+ def_bool y
+- depends on X86_32 && !M386
++ depends on X86_64 || !M386
+
+ config X86_PPRO_FENCE
+ bool "PentiumPro memory ordering errata workaround"
+--- a/arch/x86/lib/Makefile
++++ b/arch/x86/lib/Makefile
+@@ -39,4 +39,5 @@ else
+ lib-y += thunk_64.o clear_page_64.o copy_page_64.o
+ lib-y += memmove_64.o memset_64.o
+ lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o
++ lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem_64.o
+ endif
+--- /dev/null
++++ b/arch/x86/lib/rwsem_64.S
+@@ -0,0 +1,81 @@
++/*
++ * x86-64 rwsem wrappers
++ *
++ * This interfaces the inline asm code to the slow-path
++ * C routines. We need to save the call-clobbered regs
++ * that the asm does not mark as clobbered, and move the
++ * argument from %rax to %rdi.
++ *
++ * NOTE! We don't need to save %rax, because the functions
++ * will always return the semaphore pointer in %rax (which
++ * is also the input argument to these helpers)
++ *
++ * The following can clobber %rdx because the asm clobbers it:
++ * call_rwsem_down_write_failed
++ * call_rwsem_wake
++ * but %rdi, %rsi, %rcx, %r8-r11 always need saving.
++ */
++
++#include <linux/linkage.h>
++#include <asm/rwlock.h>
++#include <asm/alternative-asm.h>
++#include <asm/frame.h>
++#include <asm/dwarf2.h>
++
++#define save_common_regs \
++ pushq %rdi; \
++ pushq %rsi; \
++ pushq %rcx; \
++ pushq %r8; \
++ pushq %r9; \
++ pushq %r10; \
++ pushq %r11
++
++#define restore_common_regs \
++ popq %r11; \
++ popq %r10; \
++ popq %r9; \
++ popq %r8; \
++ popq %rcx; \
++ popq %rsi; \
++ popq %rdi
++
++/* Fix up special calling conventions */
++ENTRY(call_rwsem_down_read_failed)
++ save_common_regs
++ pushq %rdx
++ movq %rax,%rdi
++ call rwsem_down_read_failed
++ popq %rdx
++ restore_common_regs
++ ret
++ ENDPROC(call_rwsem_down_read_failed)
++
++ENTRY(call_rwsem_down_write_failed)
++ save_common_regs
++ movq %rax,%rdi
++ call rwsem_down_write_failed
++ restore_common_regs
++ ret
++ ENDPROC(call_rwsem_down_write_failed)
++
++ENTRY(call_rwsem_wake)
++ decw %dx /* do nothing if still outstanding active readers */
++ jnz 1f
++ save_common_regs
++ movq %rax,%rdi
++ call rwsem_wake
++ restore_common_regs
++1: ret
++ ENDPROC(call_rwsem_wake)
++
++/* Fix up special calling conventions */
++ENTRY(call_rwsem_downgrade_wake)
++ save_common_regs
++ pushq %rdx
++ movq %rax,%rdi
++ call rwsem_downgrade_wake
++ popq %rdx
++ restore_common_regs
++ ret
++ ENDPROC(call_rwsem_downgrade_wake)
--- /dev/null
+From 5d0b7235d83eefdafda300656e97d368afcafc9a Mon Sep 17 00:00:00 2001
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Tue, 12 Jan 2010 17:57:35 -0800
+Subject: x86: clean up rwsem type system
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit 5d0b7235d83eefdafda300656e97d368afcafc9a upstream.
+
+The fast version of the rwsems (the code that uses xadd) has
+traditionally only worked on x86-32, and as a result it mixes different
+kinds of types wildly - they just all happen to be 32-bit. We have
+"long", we have "__s32", and we have "int".
+
+To make it work on x86-64, the types suddenly matter a lot more. It can
+be either a 32-bit or 64-bit signed type, and both work (with the caveat
+that a 32-bit counter will only have 15 bits of effective write
+counters, so it's limited to 32767 users). But whatever type you
+choose, it needs to be used consistently.
+
+This makes a new 'rwsem_counter_t', that is a 32-bit signed type. For a
+64-bit type, you'd need to also update the BIAS values.
+
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+LKML-Reference: <alpine.LFD.2.00.1001121755220.17145@localhost.localdomain>
+Signed-off-by: H. Peter Anvin <hpa@zytor.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/x86/include/asm/rwsem.h | 25 +++++++++++++++----------
+ 1 file changed, 15 insertions(+), 10 deletions(-)
+
+--- a/arch/x86/include/asm/rwsem.h
++++ b/arch/x86/include/asm/rwsem.h
+@@ -55,6 +55,9 @@ extern asmregparm struct rw_semaphore *
+
+ /*
+ * the semaphore definition
++ *
++ * The bias values and the counter type needs to be extended to 64 bits
++ * if we want to have more than 32767 potential readers/writers
+ */
+
+ #define RWSEM_UNLOCKED_VALUE 0x00000000
+@@ -64,8 +67,10 @@ extern asmregparm struct rw_semaphore *
+ #define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
+ #define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
+
++typedef signed int rwsem_count_t;
++
+ struct rw_semaphore {
+- signed long count;
++ rwsem_count_t count;
+ spinlock_t wait_lock;
+ struct list_head wait_list;
+ #ifdef CONFIG_DEBUG_LOCK_ALLOC
+@@ -121,7 +126,7 @@ static inline void __down_read(struct rw
+ */
+ static inline int __down_read_trylock(struct rw_semaphore *sem)
+ {
+- __s32 result, tmp;
++ rwsem_count_t result, tmp;
+ asm volatile("# beginning __down_read_trylock\n\t"
+ " mov %0,%1\n\t"
+ "1:\n\t"
+@@ -143,7 +148,7 @@ static inline int __down_read_trylock(st
+ */
+ static inline void __down_write_nested(struct rw_semaphore *sem, int subclass)
+ {
+- int tmp;
++ rwsem_count_t tmp;
+
+ tmp = RWSEM_ACTIVE_WRITE_BIAS;
+ asm volatile("# beginning down_write\n\t"
+@@ -170,9 +175,9 @@ static inline void __down_write(struct r
+ */
+ static inline int __down_write_trylock(struct rw_semaphore *sem)
+ {
+- signed long ret = cmpxchg(&sem->count,
+- RWSEM_UNLOCKED_VALUE,
+- RWSEM_ACTIVE_WRITE_BIAS);
++ rwsem_count_t ret = cmpxchg(&sem->count,
++ RWSEM_UNLOCKED_VALUE,
++ RWSEM_ACTIVE_WRITE_BIAS);
+ if (ret == RWSEM_UNLOCKED_VALUE)
+ return 1;
+ return 0;
+@@ -183,7 +188,7 @@ static inline int __down_write_trylock(s
+ */
+ static inline void __up_read(struct rw_semaphore *sem)
+ {
+- __s32 tmp = -RWSEM_ACTIVE_READ_BIAS;
++ rwsem_count_t tmp = -RWSEM_ACTIVE_READ_BIAS;
+ asm volatile("# beginning __up_read\n\t"
+ LOCK_PREFIX " xadd %1,(%2)\n\t"
+ /* subtracts 1, returns the old value */
+@@ -201,7 +206,7 @@ static inline void __up_read(struct rw_s
+ */
+ static inline void __up_write(struct rw_semaphore *sem)
+ {
+- unsigned long tmp;
++ rwsem_count_t tmp;
+ asm volatile("# beginning __up_write\n\t"
+ LOCK_PREFIX " xadd %1,(%2)\n\t"
+ /* tries to transition
+@@ -245,9 +250,9 @@ static inline void rwsem_atomic_add(int
+ /*
+ * implement exchange and add functionality
+ */
+-static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem)
++static inline rwsem_count_t rwsem_atomic_update(int delta, struct rw_semaphore *sem)
+ {
+- int tmp = delta;
++ rwsem_count_t tmp = delta;
+
+ asm volatile(LOCK_PREFIX "xadd %0,%1"
+ : "+r" (tmp), "+m" (sem->count)
--- /dev/null
+From 4126faf0ab7417fbc6eb99fb0fd407e01e9e9dfe Mon Sep 17 00:00:00 2001
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Sun, 17 Jan 2010 10:24:07 -0800
+Subject: x86: Fix breakage of UML from the changes in the rwsem system
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit 4126faf0ab7417fbc6eb99fb0fd407e01e9e9dfe upstream.
+
+The patches 5d0b7235d83eefdafda300656e97d368afcafc9a and
+bafaecd11df15ad5b1e598adc7736afcd38ee13d broke the UML build:
+
+On Sun, 17 Jan 2010, Ingo Molnar wrote:
+>
+> FYI, -tip testing found that these changes break the UML build:
+>
+> kernel/built-in.o: In function `__up_read':
+> /home/mingo/tip/arch/x86/include/asm/rwsem.h:192: undefined reference to `call_rwsem_wake'
+> kernel/built-in.o: In function `__up_write':
+> /home/mingo/tip/arch/x86/include/asm/rwsem.h:210: undefined reference to `call_rwsem_wake'
+> kernel/built-in.o: In function `__downgrade_write':
+> /home/mingo/tip/arch/x86/include/asm/rwsem.h:228: undefined reference to `call_rwsem_downgrade_wake'
+> kernel/built-in.o: In function `__down_read':
+> /home/mingo/tip/arch/x86/include/asm/rwsem.h:112: undefined reference to `call_rwsem_down_read_failed'
+> kernel/built-in.o: In function `__down_write_nested':
+> /home/mingo/tip/arch/x86/include/asm/rwsem.h:154: undefined reference to `call_rwsem_down_write_failed'
+> collect2: ld returned 1 exit status
+
+Add lib/rwsem_64.o to the UML subarch objects to fix.
+
+LKML-Reference: <alpine.LFD.2.00.1001171023440.13231@localhost.localdomain>
+Signed-off-by: H. Peter Anvin <hpa@zytor.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/um/sys-x86_64/Makefile | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/arch/um/sys-x86_64/Makefile
++++ b/arch/um/sys-x86_64/Makefile
+@@ -8,7 +8,8 @@ obj-y = bug.o bugs.o delay.o fault.o ldt
+ setjmp.o signal.o stub.o stub_segv.o syscalls.o syscall_table.o \
+ sysrq.o ksyms.o tls.o
+
+-subarch-obj-y = lib/csum-partial_64.o lib/memcpy_64.o lib/thunk_64.o
++subarch-obj-y = lib/csum-partial_64.o lib/memcpy_64.o lib/thunk_64.o \
++ lib/rwsem_64.o
+ subarch-obj-$(CONFIG_MODULES) += kernel/module.o
+
+ ldt-y = ../sys-i386/ldt.o