From: Greg Kroah-Hartman Date: Mon, 19 Apr 2010 18:31:36 +0000 (-0700) Subject: .33 patches X-Git-Tag: v2.6.32.12~32 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=2bd84889168e07723ef85e919910eaeb6363d1db;p=thirdparty%2Fkernel%2Fstable-queue.git .33 patches --- diff --git a/queue-2.6.33/fix-nfs4-handling-of-mountpoint-stat.patch b/queue-2.6.33/fix-nfs4-handling-of-mountpoint-stat.patch new file mode 100644 index 00000000000..067edc8510d --- /dev/null +++ b/queue-2.6.33/fix-nfs4-handling-of-mountpoint-stat.patch @@ -0,0 +1,40 @@ +From 462d60577a997aa87c935ae4521bd303733a9f2b Mon Sep 17 00:00:00 2001 +From: Al Viro +Date: Sat, 30 Jan 2010 16:11:21 -0500 +Subject: fix NFS4 handling of mountpoint stat + +From: Al Viro + +commit 462d60577a997aa87c935ae4521bd303733a9f2b upstream. + +RFC says we need to follow the chain of mounts if there's more +than one stacked on that point. + +Signed-off-by: Al Viro +Signed-off-by: Greg Kroah-Hartman + +--- + fs/nfsd/nfs4xdr.c | 12 +++++++++--- + 1 file changed, 9 insertions(+), 3 deletions(-) + +--- a/fs/nfsd/nfs4xdr.c ++++ b/fs/nfsd/nfs4xdr.c +@@ -2121,9 +2121,15 @@ out_acl: + * and this is the root of a cross-mounted filesystem. + */ + if (ignore_crossmnt == 0 && +- exp->ex_path.mnt->mnt_root->d_inode == dentry->d_inode) { +- err = vfs_getattr(exp->ex_path.mnt->mnt_parent, +- exp->ex_path.mnt->mnt_mountpoint, &stat); ++ dentry == exp->ex_path.mnt->mnt_root) { ++ struct path path = exp->ex_path; ++ path_get(&path); ++ while (follow_up(&path)) { ++ if (path.dentry != path.mnt->mnt_root) ++ break; ++ } ++ err = vfs_getattr(path.mnt, path.dentry, &stat); ++ path_put(&path); + if (err) + goto out_nfserr; + } diff --git a/queue-2.6.33/series b/queue-2.6.33/series index 04e7d5cba88..866fa40939c 100644 --- a/queue-2.6.33/series +++ b/queue-2.6.33/series @@ -82,3 +82,10 @@ x86-cacheinfo-calculate-l3-indices.patch x86-cacheinfo-remove-numa-dependency-fix-for-amd-fam10h-rev-d1.patch x86-cacheinfo-enable-l3-cid-only-on-amd.patch dm-ioctl-introduce-flag-indicating-uevent-was-generated.patch +x86-32-clean-up-rwsem-inline-asm-statements.patch +x86-clean-up-rwsem-type-system.patch +x86-64-rwsem-64-bit-xadd-rwsem-implementation.patch +x86-64-support-native-xadd-rwsem-implementation.patch +x86-fix-breakage-of-uml-from-the-changes-in-the-rwsem-system.patch +x86-64-rwsem-avoid-store-forwarding-hazard-in-__downgrade_write.patch +fix-nfs4-handling-of-mountpoint-stat.patch diff --git a/queue-2.6.33/x86-32-clean-up-rwsem-inline-asm-statements.patch b/queue-2.6.33/x86-32-clean-up-rwsem-inline-asm-statements.patch new file mode 100644 index 00000000000..2a212a4232c --- /dev/null +++ b/queue-2.6.33/x86-32-clean-up-rwsem-inline-asm-statements.patch @@ -0,0 +1,133 @@ +From 59c33fa7791e9948ba467c2b83e307a0d087ab49 Mon Sep 17 00:00:00 2001 +From: Linus Torvalds +Date: Tue, 12 Jan 2010 16:21:09 -0800 +Subject: x86-32: clean up rwsem inline asm statements + +From: Linus Torvalds + +commit 59c33fa7791e9948ba467c2b83e307a0d087ab49 upstream. + +This makes gcc use the right register names and instruction operand sizes +automatically for the rwsem inline asm statements. + +So instead of using "(%%eax)" to specify the memory address that is the +semaphore, we use "(%1)" or similar. And instead of forcing the operation +to always be 32-bit, we use "%z0", taking the size from the actual +semaphore data structure itself. + +This doesn't actually matter on x86-32, but if we want to use the same +inline asm for x86-64, we'll need to have the compiler generate the proper +64-bit names for the registers (%rax instead of %eax), and if we want to +use a 64-bit counter too (in order to avoid the 15-bit limit on the +write counter that limits concurrent users to 32767 threads), we'll need +to be able to generate instructions with "q" accesses rather than "l". + +Since this header currently isn't enabled on x86-64, none of that matters, +but we do want to use the xadd version of the semaphores rather than have +to take spinlocks to do a rwsem. The mm->mmap_sem can be heavily contended +when you have lots of threads all taking page faults, and the fallback +rwsem code that uses a spinlock performs abysmally badly in that case. + +[ hpa: modified the patch to skip size suffixes entirely when they are + redundant due to register operands. ] + +Signed-off-by: Linus Torvalds +LKML-Reference: +Signed-off-by: H. Peter Anvin +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/rwsem.h | 30 +++++++++++++++--------------- + 1 file changed, 15 insertions(+), 15 deletions(-) + +--- a/arch/x86/include/asm/rwsem.h ++++ b/arch/x86/include/asm/rwsem.h +@@ -105,7 +105,7 @@ do { \ + static inline void __down_read(struct rw_semaphore *sem) + { + asm volatile("# beginning down_read\n\t" +- LOCK_PREFIX " incl (%%eax)\n\t" ++ LOCK_PREFIX " inc%z0 (%1)\n\t" + /* adds 0x00000001, returns the old value */ + " jns 1f\n" + " call call_rwsem_down_read_failed\n" +@@ -123,12 +123,12 @@ static inline int __down_read_trylock(st + { + __s32 result, tmp; + asm volatile("# beginning __down_read_trylock\n\t" +- " movl %0,%1\n\t" ++ " mov %0,%1\n\t" + "1:\n\t" +- " movl %1,%2\n\t" +- " addl %3,%2\n\t" ++ " mov %1,%2\n\t" ++ " add %3,%2\n\t" + " jle 2f\n\t" +- LOCK_PREFIX " cmpxchgl %2,%0\n\t" ++ LOCK_PREFIX " cmpxchg %2,%0\n\t" + " jnz 1b\n\t" + "2:\n\t" + "# ending __down_read_trylock\n\t" +@@ -147,9 +147,9 @@ static inline void __down_write_nested(s + + tmp = RWSEM_ACTIVE_WRITE_BIAS; + asm volatile("# beginning down_write\n\t" +- LOCK_PREFIX " xadd %%edx,(%%eax)\n\t" ++ LOCK_PREFIX " xadd %1,(%2)\n\t" + /* subtract 0x0000ffff, returns the old value */ +- " testl %%edx,%%edx\n\t" ++ " test %1,%1\n\t" + /* was the count 0 before? */ + " jz 1f\n" + " call call_rwsem_down_write_failed\n" +@@ -185,7 +185,7 @@ static inline void __up_read(struct rw_s + { + __s32 tmp = -RWSEM_ACTIVE_READ_BIAS; + asm volatile("# beginning __up_read\n\t" +- LOCK_PREFIX " xadd %%edx,(%%eax)\n\t" ++ LOCK_PREFIX " xadd %1,(%2)\n\t" + /* subtracts 1, returns the old value */ + " jns 1f\n\t" + " call call_rwsem_wake\n" +@@ -201,18 +201,18 @@ static inline void __up_read(struct rw_s + */ + static inline void __up_write(struct rw_semaphore *sem) + { ++ unsigned long tmp; + asm volatile("# beginning __up_write\n\t" +- " movl %2,%%edx\n\t" +- LOCK_PREFIX " xaddl %%edx,(%%eax)\n\t" ++ LOCK_PREFIX " xadd %1,(%2)\n\t" + /* tries to transition + 0xffff0001 -> 0x00000000 */ + " jz 1f\n" + " call call_rwsem_wake\n" + "1:\n\t" + "# ending __up_write\n" +- : "+m" (sem->count) +- : "a" (sem), "i" (-RWSEM_ACTIVE_WRITE_BIAS) +- : "memory", "cc", "edx"); ++ : "+m" (sem->count), "=d" (tmp) ++ : "a" (sem), "1" (-RWSEM_ACTIVE_WRITE_BIAS) ++ : "memory", "cc"); + } + + /* +@@ -221,7 +221,7 @@ static inline void __up_write(struct rw_ + static inline void __downgrade_write(struct rw_semaphore *sem) + { + asm volatile("# beginning __downgrade_write\n\t" +- LOCK_PREFIX " addl %2,(%%eax)\n\t" ++ LOCK_PREFIX " add%z0 %2,(%1)\n\t" + /* transitions 0xZZZZ0001 -> 0xYYYY0001 */ + " jns 1f\n\t" + " call call_rwsem_downgrade_wake\n" +@@ -237,7 +237,7 @@ static inline void __downgrade_write(str + */ + static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem) + { +- asm volatile(LOCK_PREFIX "addl %1,%0" ++ asm volatile(LOCK_PREFIX "add%z0 %1,%0" + : "+m" (sem->count) + : "ir" (delta)); + } diff --git a/queue-2.6.33/x86-64-rwsem-64-bit-xadd-rwsem-implementation.patch b/queue-2.6.33/x86-64-rwsem-64-bit-xadd-rwsem-implementation.patch new file mode 100644 index 00000000000..48e7b2ee4af --- /dev/null +++ b/queue-2.6.33/x86-64-rwsem-64-bit-xadd-rwsem-implementation.patch @@ -0,0 +1,141 @@ +From 1838ef1d782f7527e6defe87e180598622d2d071 Mon Sep 17 00:00:00 2001 +From: H. Peter Anvin +Date: Mon, 18 Jan 2010 14:00:34 -0800 +Subject: x86-64, rwsem: 64-bit xadd rwsem implementation + +From: H. Peter Anvin + +commit 1838ef1d782f7527e6defe87e180598622d2d071 upstream. + +For x86-64, 32767 threads really is not enough. Change rwsem_count_t +to a signed long, so that it is 64 bits on x86-64. + +This required the following changes to the assembly code: + +a) %z0 doesn't work on all versions of gcc! At least gcc 4.4.2 as + shipped with Fedora 12 emits "ll" not "q" for 64 bits, even for + integer operands. Newer gccs apparently do this correctly, but + avoid this problem by using the _ASM_ macros instead of %z. +b) 64 bits immediates are only allowed in "movq $imm,%reg" + constructs... no others. Change some of the constraints to "e", + and fix the one case where we would have had to use an invalid + immediate -- in that case, we only care about the upper half + anyway, so just access the upper half. + +Signed-off-by: H. Peter Anvin +Cc: Linus Torvalds +LKML-Reference: +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/rwsem.h | 53 ++++++++++++++++++++++++++++++++----------- + 1 file changed, 40 insertions(+), 13 deletions(-) + +--- a/arch/x86/include/asm/rwsem.h ++++ b/arch/x86/include/asm/rwsem.h +@@ -41,6 +41,7 @@ + #include + #include + #include ++#include + + struct rwsem_waiter; + +@@ -56,18 +57,24 @@ extern asmregparm struct rw_semaphore * + /* + * the semaphore definition + * +- * The bias values and the counter type needs to be extended to 64 bits +- * if we want to have more than 32767 potential readers/writers ++ * The bias values and the counter type limits the number of ++ * potential readers/writers to 32767 for 32 bits and 2147483647 ++ * for 64 bits. + */ + +-#define RWSEM_UNLOCKED_VALUE 0x00000000 +-#define RWSEM_ACTIVE_BIAS 0x00000001 +-#define RWSEM_ACTIVE_MASK 0x0000ffff +-#define RWSEM_WAITING_BIAS (-0x00010000) ++#ifdef CONFIG_X86_64 ++# define RWSEM_ACTIVE_MASK 0xffffffffL ++#else ++# define RWSEM_ACTIVE_MASK 0x0000ffffL ++#endif ++ ++#define RWSEM_UNLOCKED_VALUE 0x00000000L ++#define RWSEM_ACTIVE_BIAS 0x00000001L ++#define RWSEM_WAITING_BIAS (-RWSEM_ACTIVE_MASK-1) + #define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS + #define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) + +-typedef signed int rwsem_count_t; ++typedef signed long rwsem_count_t; + + struct rw_semaphore { + rwsem_count_t count; +@@ -110,7 +117,7 @@ do { \ + static inline void __down_read(struct rw_semaphore *sem) + { + asm volatile("# beginning down_read\n\t" +- LOCK_PREFIX " inc%z0 (%1)\n\t" ++ LOCK_PREFIX _ASM_INC "(%1)\n\t" + /* adds 0x00000001, returns the old value */ + " jns 1f\n" + " call call_rwsem_down_read_failed\n" +@@ -225,8 +232,25 @@ static inline void __up_write(struct rw_ + */ + static inline void __downgrade_write(struct rw_semaphore *sem) + { ++#ifdef CONFIG_X86_64 ++# if RWSEM_WAITING_BIAS != -0x100000000 ++# error "This code assumes RWSEM_WAITING_BIAS == -2^32" ++# endif ++ ++ /* 64-bit immediates are special and expensive, and not needed here */ ++ asm volatile("# beginning __downgrade_write\n\t" ++ LOCK_PREFIX "incl 4(%1)\n\t" ++ /* transitions 0xZZZZZZZZ00000001 -> 0xYYYYYYYY00000001 */ ++ " jns 1f\n\t" ++ " call call_rwsem_downgrade_wake\n" ++ "1:\n\t" ++ "# ending __downgrade_write\n" ++ : "+m" (sem->count) ++ : "a" (sem) ++ : "memory", "cc"); ++#else + asm volatile("# beginning __downgrade_write\n\t" +- LOCK_PREFIX " add%z0 %2,(%1)\n\t" ++ LOCK_PREFIX _ASM_ADD "%2,(%1)\n\t" + /* transitions 0xZZZZ0001 -> 0xYYYY0001 */ + " jns 1f\n\t" + " call call_rwsem_downgrade_wake\n" +@@ -235,22 +259,25 @@ static inline void __downgrade_write(str + : "+m" (sem->count) + : "a" (sem), "i" (-RWSEM_WAITING_BIAS) + : "memory", "cc"); ++#endif + } + + /* + * implement atomic add functionality + */ +-static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem) ++static inline void rwsem_atomic_add(rwsem_count_t delta, ++ struct rw_semaphore *sem) + { +- asm volatile(LOCK_PREFIX "add%z0 %1,%0" ++ asm volatile(LOCK_PREFIX _ASM_ADD "%1,%0" + : "+m" (sem->count) +- : "ir" (delta)); ++ : "er" (delta)); + } + + /* + * implement exchange and add functionality + */ +-static inline rwsem_count_t rwsem_atomic_update(int delta, struct rw_semaphore *sem) ++static inline rwsem_count_t rwsem_atomic_update(rwsem_count_t delta, ++ struct rw_semaphore *sem) + { + rwsem_count_t tmp = delta; + diff --git a/queue-2.6.33/x86-64-rwsem-avoid-store-forwarding-hazard-in-__downgrade_write.patch b/queue-2.6.33/x86-64-rwsem-avoid-store-forwarding-hazard-in-__downgrade_write.patch new file mode 100644 index 00000000000..8489e98d16d --- /dev/null +++ b/queue-2.6.33/x86-64-rwsem-avoid-store-forwarding-hazard-in-__downgrade_write.patch @@ -0,0 +1,79 @@ +From 0d1622d7f526311d87d7da2ee7dd14b73e45d3fc Mon Sep 17 00:00:00 2001 +From: Avi Kivity +Date: Sat, 13 Feb 2010 10:33:12 +0200 +Subject: x86-64, rwsem: Avoid store forwarding hazard in __downgrade_write + +From: Avi Kivity + +commit 0d1622d7f526311d87d7da2ee7dd14b73e45d3fc upstream. + +The Intel Architecture Optimization Reference Manual states that a short +load that follows a long store to the same object will suffer a store +forwading penalty, particularly if the two accesses use different addresses. +Trivially, a long load that follows a short store will also suffer a penalty. + +__downgrade_write() in rwsem incurs both penalties: the increment operation +will not be able to reuse a recently-loaded rwsem value, and its result will +not be reused by any recently-following rwsem operation. + +A comment in the code states that this is because 64-bit immediates are +special and expensive; but while they are slightly special (only a single +instruction allows them), they aren't expensive: a test shows that two loops, +one loading a 32-bit immediate and one loading a 64-bit immediate, both take +1.5 cycles per iteration. + +Fix this by changing __downgrade_write to use the same add instruction on +i386 and on x86_64, so that it uses the same operand size as all the other +rwsem functions. + +Signed-off-by: Avi Kivity +LKML-Reference: <1266049992-17419-1-git-send-email-avi@redhat.com> +Signed-off-by: H. Peter Anvin +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/rwsem.h | 25 +++++-------------------- + 1 file changed, 5 insertions(+), 20 deletions(-) + +--- a/arch/x86/include/asm/rwsem.h ++++ b/arch/x86/include/asm/rwsem.h +@@ -232,34 +232,19 @@ static inline void __up_write(struct rw_ + */ + static inline void __downgrade_write(struct rw_semaphore *sem) + { +-#ifdef CONFIG_X86_64 +-# if RWSEM_WAITING_BIAS != -0x100000000 +-# error "This code assumes RWSEM_WAITING_BIAS == -2^32" +-# endif +- +- /* 64-bit immediates are special and expensive, and not needed here */ +- asm volatile("# beginning __downgrade_write\n\t" +- LOCK_PREFIX "incl 4(%1)\n\t" +- /* transitions 0xZZZZZZZZ00000001 -> 0xYYYYYYYY00000001 */ +- " jns 1f\n\t" +- " call call_rwsem_downgrade_wake\n" +- "1:\n\t" +- "# ending __downgrade_write\n" +- : "+m" (sem->count) +- : "a" (sem) +- : "memory", "cc"); +-#else + asm volatile("# beginning __downgrade_write\n\t" + LOCK_PREFIX _ASM_ADD "%2,(%1)\n\t" +- /* transitions 0xZZZZ0001 -> 0xYYYY0001 */ ++ /* ++ * transitions 0xZZZZ0001 -> 0xYYYY0001 (i386) ++ * 0xZZZZZZZZ00000001 -> 0xYYYYYYYY00000001 (x86_64) ++ */ + " jns 1f\n\t" + " call call_rwsem_downgrade_wake\n" + "1:\n\t" + "# ending __downgrade_write\n" + : "+m" (sem->count) +- : "a" (sem), "i" (-RWSEM_WAITING_BIAS) ++ : "a" (sem), "er" (-RWSEM_WAITING_BIAS) + : "memory", "cc"); +-#endif + } + + /* diff --git a/queue-2.6.33/x86-64-support-native-xadd-rwsem-implementation.patch b/queue-2.6.33/x86-64-support-native-xadd-rwsem-implementation.patch new file mode 100644 index 00000000000..da419ac2a41 --- /dev/null +++ b/queue-2.6.33/x86-64-support-native-xadd-rwsem-implementation.patch @@ -0,0 +1,148 @@ +From bafaecd11df15ad5b1e598adc7736afcd38ee13d Mon Sep 17 00:00:00 2001 +From: Linus Torvalds +Date: Tue, 12 Jan 2010 18:16:42 -0800 +Subject: x86-64: support native xadd rwsem implementation + +From: Linus Torvalds + +commit bafaecd11df15ad5b1e598adc7736afcd38ee13d upstream. + +This one is much faster than the spinlock based fallback rwsem code, +with certain artifical benchmarks having shown 300%+ improvement on +threaded page faults etc. + +Again, note the 32767-thread limit here. So this really does need that +whole "make rwsem_count_t be 64-bit and fix the BIAS values to match" +extension on top of it, but that is conceptually a totally independent +issue. + +NOT TESTED! The original patch that this all was based on were tested by +KAMEZAWA Hiroyuki, but maybe I screwed up something when I created the +cleaned-up series, so caveat emptor.. + +Also note that it _may_ be a good idea to mark some more registers +clobbered on x86-64 in the inline asms instead of saving/restoring them. +They are inline functions, but they are only used in places where there +are not a lot of live registers _anyway_, so doing for example the +clobbers of %r8-%r11 in the asm wouldn't make the fast-path code any +worse, and would make the slow-path code smaller. + +(Not that the slow-path really matters to that degree. Saving a few +unnecessary registers is the _least_ of our problems when we hit the slow +path. The instruction/cycle counting really only matters in the fast +path). + +Signed-off-by: Linus Torvalds +LKML-Reference: +Signed-off-by: H. Peter Anvin +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/Kconfig.cpu | 2 - + arch/x86/lib/Makefile | 1 + arch/x86/lib/rwsem_64.S | 81 ++++++++++++++++++++++++++++++++++++++++++++++++ + 3 files changed, 83 insertions(+), 1 deletion(-) + +--- a/arch/x86/Kconfig.cpu ++++ b/arch/x86/Kconfig.cpu +@@ -319,7 +319,7 @@ config X86_L1_CACHE_SHIFT + + config X86_XADD + def_bool y +- depends on X86_32 && !M386 ++ depends on X86_64 || !M386 + + config X86_PPRO_FENCE + bool "PentiumPro memory ordering errata workaround" +--- a/arch/x86/lib/Makefile ++++ b/arch/x86/lib/Makefile +@@ -39,4 +39,5 @@ else + lib-y += thunk_64.o clear_page_64.o copy_page_64.o + lib-y += memmove_64.o memset_64.o + lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o ++ lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem_64.o + endif +--- /dev/null ++++ b/arch/x86/lib/rwsem_64.S +@@ -0,0 +1,81 @@ ++/* ++ * x86-64 rwsem wrappers ++ * ++ * This interfaces the inline asm code to the slow-path ++ * C routines. We need to save the call-clobbered regs ++ * that the asm does not mark as clobbered, and move the ++ * argument from %rax to %rdi. ++ * ++ * NOTE! We don't need to save %rax, because the functions ++ * will always return the semaphore pointer in %rax (which ++ * is also the input argument to these helpers) ++ * ++ * The following can clobber %rdx because the asm clobbers it: ++ * call_rwsem_down_write_failed ++ * call_rwsem_wake ++ * but %rdi, %rsi, %rcx, %r8-r11 always need saving. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++#define save_common_regs \ ++ pushq %rdi; \ ++ pushq %rsi; \ ++ pushq %rcx; \ ++ pushq %r8; \ ++ pushq %r9; \ ++ pushq %r10; \ ++ pushq %r11 ++ ++#define restore_common_regs \ ++ popq %r11; \ ++ popq %r10; \ ++ popq %r9; \ ++ popq %r8; \ ++ popq %rcx; \ ++ popq %rsi; \ ++ popq %rdi ++ ++/* Fix up special calling conventions */ ++ENTRY(call_rwsem_down_read_failed) ++ save_common_regs ++ pushq %rdx ++ movq %rax,%rdi ++ call rwsem_down_read_failed ++ popq %rdx ++ restore_common_regs ++ ret ++ ENDPROC(call_rwsem_down_read_failed) ++ ++ENTRY(call_rwsem_down_write_failed) ++ save_common_regs ++ movq %rax,%rdi ++ call rwsem_down_write_failed ++ restore_common_regs ++ ret ++ ENDPROC(call_rwsem_down_write_failed) ++ ++ENTRY(call_rwsem_wake) ++ decw %dx /* do nothing if still outstanding active readers */ ++ jnz 1f ++ save_common_regs ++ movq %rax,%rdi ++ call rwsem_wake ++ restore_common_regs ++1: ret ++ ENDPROC(call_rwsem_wake) ++ ++/* Fix up special calling conventions */ ++ENTRY(call_rwsem_downgrade_wake) ++ save_common_regs ++ pushq %rdx ++ movq %rax,%rdi ++ call rwsem_downgrade_wake ++ popq %rdx ++ restore_common_regs ++ ret ++ ENDPROC(call_rwsem_downgrade_wake) diff --git a/queue-2.6.33/x86-clean-up-rwsem-type-system.patch b/queue-2.6.33/x86-clean-up-rwsem-type-system.patch new file mode 100644 index 00000000000..4e64afcfb7d --- /dev/null +++ b/queue-2.6.33/x86-clean-up-rwsem-type-system.patch @@ -0,0 +1,117 @@ +From 5d0b7235d83eefdafda300656e97d368afcafc9a Mon Sep 17 00:00:00 2001 +From: Linus Torvalds +Date: Tue, 12 Jan 2010 17:57:35 -0800 +Subject: x86: clean up rwsem type system + +From: Linus Torvalds + +commit 5d0b7235d83eefdafda300656e97d368afcafc9a upstream. + +The fast version of the rwsems (the code that uses xadd) has +traditionally only worked on x86-32, and as a result it mixes different +kinds of types wildly - they just all happen to be 32-bit. We have +"long", we have "__s32", and we have "int". + +To make it work on x86-64, the types suddenly matter a lot more. It can +be either a 32-bit or 64-bit signed type, and both work (with the caveat +that a 32-bit counter will only have 15 bits of effective write +counters, so it's limited to 32767 users). But whatever type you +choose, it needs to be used consistently. + +This makes a new 'rwsem_counter_t', that is a 32-bit signed type. For a +64-bit type, you'd need to also update the BIAS values. + +Signed-off-by: Linus Torvalds +LKML-Reference: +Signed-off-by: H. Peter Anvin +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/rwsem.h | 25 +++++++++++++++---------- + 1 file changed, 15 insertions(+), 10 deletions(-) + +--- a/arch/x86/include/asm/rwsem.h ++++ b/arch/x86/include/asm/rwsem.h +@@ -55,6 +55,9 @@ extern asmregparm struct rw_semaphore * + + /* + * the semaphore definition ++ * ++ * The bias values and the counter type needs to be extended to 64 bits ++ * if we want to have more than 32767 potential readers/writers + */ + + #define RWSEM_UNLOCKED_VALUE 0x00000000 +@@ -64,8 +67,10 @@ extern asmregparm struct rw_semaphore * + #define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS + #define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) + ++typedef signed int rwsem_count_t; ++ + struct rw_semaphore { +- signed long count; ++ rwsem_count_t count; + spinlock_t wait_lock; + struct list_head wait_list; + #ifdef CONFIG_DEBUG_LOCK_ALLOC +@@ -121,7 +126,7 @@ static inline void __down_read(struct rw + */ + static inline int __down_read_trylock(struct rw_semaphore *sem) + { +- __s32 result, tmp; ++ rwsem_count_t result, tmp; + asm volatile("# beginning __down_read_trylock\n\t" + " mov %0,%1\n\t" + "1:\n\t" +@@ -143,7 +148,7 @@ static inline int __down_read_trylock(st + */ + static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) + { +- int tmp; ++ rwsem_count_t tmp; + + tmp = RWSEM_ACTIVE_WRITE_BIAS; + asm volatile("# beginning down_write\n\t" +@@ -170,9 +175,9 @@ static inline void __down_write(struct r + */ + static inline int __down_write_trylock(struct rw_semaphore *sem) + { +- signed long ret = cmpxchg(&sem->count, +- RWSEM_UNLOCKED_VALUE, +- RWSEM_ACTIVE_WRITE_BIAS); ++ rwsem_count_t ret = cmpxchg(&sem->count, ++ RWSEM_UNLOCKED_VALUE, ++ RWSEM_ACTIVE_WRITE_BIAS); + if (ret == RWSEM_UNLOCKED_VALUE) + return 1; + return 0; +@@ -183,7 +188,7 @@ static inline int __down_write_trylock(s + */ + static inline void __up_read(struct rw_semaphore *sem) + { +- __s32 tmp = -RWSEM_ACTIVE_READ_BIAS; ++ rwsem_count_t tmp = -RWSEM_ACTIVE_READ_BIAS; + asm volatile("# beginning __up_read\n\t" + LOCK_PREFIX " xadd %1,(%2)\n\t" + /* subtracts 1, returns the old value */ +@@ -201,7 +206,7 @@ static inline void __up_read(struct rw_s + */ + static inline void __up_write(struct rw_semaphore *sem) + { +- unsigned long tmp; ++ rwsem_count_t tmp; + asm volatile("# beginning __up_write\n\t" + LOCK_PREFIX " xadd %1,(%2)\n\t" + /* tries to transition +@@ -245,9 +250,9 @@ static inline void rwsem_atomic_add(int + /* + * implement exchange and add functionality + */ +-static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem) ++static inline rwsem_count_t rwsem_atomic_update(int delta, struct rw_semaphore *sem) + { +- int tmp = delta; ++ rwsem_count_t tmp = delta; + + asm volatile(LOCK_PREFIX "xadd %0,%1" + : "+r" (tmp), "+m" (sem->count) diff --git a/queue-2.6.33/x86-fix-breakage-of-uml-from-the-changes-in-the-rwsem-system.patch b/queue-2.6.33/x86-fix-breakage-of-uml-from-the-changes-in-the-rwsem-system.patch new file mode 100644 index 00000000000..96d50d2bab1 --- /dev/null +++ b/queue-2.6.33/x86-fix-breakage-of-uml-from-the-changes-in-the-rwsem-system.patch @@ -0,0 +1,50 @@ +From 4126faf0ab7417fbc6eb99fb0fd407e01e9e9dfe Mon Sep 17 00:00:00 2001 +From: Linus Torvalds +Date: Sun, 17 Jan 2010 10:24:07 -0800 +Subject: x86: Fix breakage of UML from the changes in the rwsem system + +From: Linus Torvalds + +commit 4126faf0ab7417fbc6eb99fb0fd407e01e9e9dfe upstream. + +The patches 5d0b7235d83eefdafda300656e97d368afcafc9a and +bafaecd11df15ad5b1e598adc7736afcd38ee13d broke the UML build: + +On Sun, 17 Jan 2010, Ingo Molnar wrote: +> +> FYI, -tip testing found that these changes break the UML build: +> +> kernel/built-in.o: In function `__up_read': +> /home/mingo/tip/arch/x86/include/asm/rwsem.h:192: undefined reference to `call_rwsem_wake' +> kernel/built-in.o: In function `__up_write': +> /home/mingo/tip/arch/x86/include/asm/rwsem.h:210: undefined reference to `call_rwsem_wake' +> kernel/built-in.o: In function `__downgrade_write': +> /home/mingo/tip/arch/x86/include/asm/rwsem.h:228: undefined reference to `call_rwsem_downgrade_wake' +> kernel/built-in.o: In function `__down_read': +> /home/mingo/tip/arch/x86/include/asm/rwsem.h:112: undefined reference to `call_rwsem_down_read_failed' +> kernel/built-in.o: In function `__down_write_nested': +> /home/mingo/tip/arch/x86/include/asm/rwsem.h:154: undefined reference to `call_rwsem_down_write_failed' +> collect2: ld returned 1 exit status + +Add lib/rwsem_64.o to the UML subarch objects to fix. + +LKML-Reference: +Signed-off-by: H. Peter Anvin +Signed-off-by: Greg Kroah-Hartman + +--- + arch/um/sys-x86_64/Makefile | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/arch/um/sys-x86_64/Makefile ++++ b/arch/um/sys-x86_64/Makefile +@@ -8,7 +8,8 @@ obj-y = bug.o bugs.o delay.o fault.o ldt + setjmp.o signal.o stub.o stub_segv.o syscalls.o syscall_table.o \ + sysrq.o ksyms.o tls.o + +-subarch-obj-y = lib/csum-partial_64.o lib/memcpy_64.o lib/thunk_64.o ++subarch-obj-y = lib/csum-partial_64.o lib/memcpy_64.o lib/thunk_64.o \ ++ lib/rwsem_64.o + subarch-obj-$(CONFIG_MODULES) += kernel/module.o + + ldt-y = ../sys-i386/ldt.o