From: Greg Kroah-Hartman Date: Wed, 15 Aug 2012 14:50:31 +0000 (-0700) Subject: removed an ARM mutex patch. Hopefully someone remembers to resend it later... X-Git-Tag: v3.5.2~1 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=c2c34c780e3259ab662868694090818200b3d2b5;p=thirdparty%2Fkernel%2Fstable-queue.git removed an ARM mutex patch. Hopefully someone remembers to resend it later... queue-3.0/arm-7467-1-mutex-use-generic-xchg-based-implementation-for-armv6.patch queue-3.4/arm-7467-1-mutex-use-generic-xchg-based-implementation-for-armv6.patch queue-3.5/arm-7467-1-mutex-use-generic-xchg-based-implementation-for-armv6.patch --- diff --git a/queue-3.0/arm-7467-1-mutex-use-generic-xchg-based-implementation-for-armv6.patch b/queue-3.0/arm-7467-1-mutex-use-generic-xchg-based-implementation-for-armv6.patch deleted file mode 100644 index 8067dd670af..00000000000 --- a/queue-3.0/arm-7467-1-mutex-use-generic-xchg-based-implementation-for-armv6.patch +++ /dev/null @@ -1,162 +0,0 @@ -From a76d7bd96d65fa5119adba97e1b58d95f2e78829 Mon Sep 17 00:00:00 2001 -From: Will Deacon -Date: Fri, 13 Jul 2012 19:15:40 +0100 -Subject: ARM: 7467/1: mutex: use generic xchg-based implementation for ARMv6+ - -From: Will Deacon - -commit a76d7bd96d65fa5119adba97e1b58d95f2e78829 upstream. - -The open-coded mutex implementation for ARMv6+ cores suffers from a -severe lack of barriers, so in the uncontended case we don't actually -protect any accesses performed during the critical section. - -Furthermore, the code is largely a duplication of the ARMv6+ atomic_dec -code but optimised to remove a branch instruction, as the mutex fastpath -was previously inlined. Now that this is executed out-of-line, we can -reuse the atomic access code for the locking (in fact, we use the xchg -code as this produces shorter critical sections). - -This patch uses the generic xchg based implementation for mutexes on -ARMv6+, which introduces barriers to the lock/unlock operations and also -has the benefit of removing a fair amount of inline assembly code. - -Acked-by: Arnd Bergmann -Acked-by: Nicolas Pitre -Reported-by: Shan Kang -Signed-off-by: Will Deacon -Signed-off-by: Russell King -Signed-off-by: Greg Kroah-Hartman - ---- - arch/arm/include/asm/mutex.h | 119 +------------------------------------------ - 1 file changed, 4 insertions(+), 115 deletions(-) - ---- a/arch/arm/include/asm/mutex.h -+++ b/arch/arm/include/asm/mutex.h -@@ -7,121 +7,10 @@ - */ - #ifndef _ASM_MUTEX_H - #define _ASM_MUTEX_H -- --#if __LINUX_ARM_ARCH__ < 6 --/* On pre-ARMv6 hardware the swp based implementation is the most efficient. */ --# include --#else -- - /* -- * Attempting to lock a mutex on ARMv6+ can be done with a bastardized -- * atomic decrement (it is not a reliable atomic decrement but it satisfies -- * the defined semantics for our purpose, while being smaller and faster -- * than a real atomic decrement or atomic swap. The idea is to attempt -- * decrementing the lock value only once. If once decremented it isn't zero, -- * or if its store-back fails due to a dispute on the exclusive store, we -- * simply bail out immediately through the slow path where the lock will be -- * reattempted until it succeeds. -+ * On pre-ARMv6 hardware this results in a swp-based implementation, -+ * which is the most efficient. For ARMv6+, we emit a pair of exclusive -+ * accesses instead. - */ --static inline void --__mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *)) --{ -- int __ex_flag, __res; -- -- __asm__ ( -- -- "ldrex %0, [%2] \n\t" -- "sub %0, %0, #1 \n\t" -- "strex %1, %0, [%2] " -- -- : "=&r" (__res), "=&r" (__ex_flag) -- : "r" (&(count)->counter) -- : "cc","memory" ); -- -- __res |= __ex_flag; -- if (unlikely(__res != 0)) -- fail_fn(count); --} -- --static inline int --__mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *)) --{ -- int __ex_flag, __res; -- -- __asm__ ( -- -- "ldrex %0, [%2] \n\t" -- "sub %0, %0, #1 \n\t" -- "strex %1, %0, [%2] " -- -- : "=&r" (__res), "=&r" (__ex_flag) -- : "r" (&(count)->counter) -- : "cc","memory" ); -- -- __res |= __ex_flag; -- if (unlikely(__res != 0)) -- __res = fail_fn(count); -- return __res; --} -- --/* -- * Same trick is used for the unlock fast path. However the original value, -- * rather than the result, is used to test for success in order to have -- * better generated assembly. -- */ --static inline void --__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *)) --{ -- int __ex_flag, __res, __orig; -- -- __asm__ ( -- -- "ldrex %0, [%3] \n\t" -- "add %1, %0, #1 \n\t" -- "strex %2, %1, [%3] " -- -- : "=&r" (__orig), "=&r" (__res), "=&r" (__ex_flag) -- : "r" (&(count)->counter) -- : "cc","memory" ); -- -- __orig |= __ex_flag; -- if (unlikely(__orig != 0)) -- fail_fn(count); --} -- --/* -- * If the unlock was done on a contended lock, or if the unlock simply fails -- * then the mutex remains locked. -- */ --#define __mutex_slowpath_needs_to_unlock() 1 -- --/* -- * For __mutex_fastpath_trylock we use another construct which could be -- * described as a "single value cmpxchg". -- * -- * This provides the needed trylock semantics like cmpxchg would, but it is -- * lighter and less generic than a true cmpxchg implementation. -- */ --static inline int --__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *)) --{ -- int __ex_flag, __res, __orig; -- -- __asm__ ( -- -- "1: ldrex %0, [%3] \n\t" -- "subs %1, %0, #1 \n\t" -- "strexeq %2, %1, [%3] \n\t" -- "movlt %0, #0 \n\t" -- "cmpeq %2, #0 \n\t" -- "bgt 1b " -- -- : "=&r" (__orig), "=&r" (__res), "=&r" (__ex_flag) -- : "r" (&count->counter) -- : "cc", "memory" ); -- -- return __orig; --} -- --#endif -+#include - #endif diff --git a/queue-3.0/series b/queue-3.0/series index 60fe12c44bb..88864768473 100644 --- a/queue-3.0/series +++ b/queue-3.0/series @@ -3,7 +3,6 @@ sunrpc-return-negative-value-in-case-rpcbind-client-creation-error.patch nilfs2-fix-deadlock-issue-between-chcp-and-thaw-ioctls.patch pcdp-use-early_ioremap-early_iounmap-to-access-pcdp-table.patch mm-fix-wrong-argument-of-migrate_huge_pages-in-soft_offline_huge_page.patch -arm-7467-1-mutex-use-generic-xchg-based-implementation-for-armv6.patch arm-7478-1-errata-extend-workaround-for-erratum-720789.patch arm-7479-1-mm-avoid-null-dereference-when-flushing-gate_vma-with-vivt-caches.patch mm-mmu_notifier-fix-freed-page-still-mapped-in-secondary-mmu.patch diff --git a/queue-3.4/arm-7467-1-mutex-use-generic-xchg-based-implementation-for-armv6.patch b/queue-3.4/arm-7467-1-mutex-use-generic-xchg-based-implementation-for-armv6.patch deleted file mode 100644 index 8067dd670af..00000000000 --- a/queue-3.4/arm-7467-1-mutex-use-generic-xchg-based-implementation-for-armv6.patch +++ /dev/null @@ -1,162 +0,0 @@ -From a76d7bd96d65fa5119adba97e1b58d95f2e78829 Mon Sep 17 00:00:00 2001 -From: Will Deacon -Date: Fri, 13 Jul 2012 19:15:40 +0100 -Subject: ARM: 7467/1: mutex: use generic xchg-based implementation for ARMv6+ - -From: Will Deacon - -commit a76d7bd96d65fa5119adba97e1b58d95f2e78829 upstream. - -The open-coded mutex implementation for ARMv6+ cores suffers from a -severe lack of barriers, so in the uncontended case we don't actually -protect any accesses performed during the critical section. - -Furthermore, the code is largely a duplication of the ARMv6+ atomic_dec -code but optimised to remove a branch instruction, as the mutex fastpath -was previously inlined. Now that this is executed out-of-line, we can -reuse the atomic access code for the locking (in fact, we use the xchg -code as this produces shorter critical sections). - -This patch uses the generic xchg based implementation for mutexes on -ARMv6+, which introduces barriers to the lock/unlock operations and also -has the benefit of removing a fair amount of inline assembly code. - -Acked-by: Arnd Bergmann -Acked-by: Nicolas Pitre -Reported-by: Shan Kang -Signed-off-by: Will Deacon -Signed-off-by: Russell King -Signed-off-by: Greg Kroah-Hartman - ---- - arch/arm/include/asm/mutex.h | 119 +------------------------------------------ - 1 file changed, 4 insertions(+), 115 deletions(-) - ---- a/arch/arm/include/asm/mutex.h -+++ b/arch/arm/include/asm/mutex.h -@@ -7,121 +7,10 @@ - */ - #ifndef _ASM_MUTEX_H - #define _ASM_MUTEX_H -- --#if __LINUX_ARM_ARCH__ < 6 --/* On pre-ARMv6 hardware the swp based implementation is the most efficient. */ --# include --#else -- - /* -- * Attempting to lock a mutex on ARMv6+ can be done with a bastardized -- * atomic decrement (it is not a reliable atomic decrement but it satisfies -- * the defined semantics for our purpose, while being smaller and faster -- * than a real atomic decrement or atomic swap. The idea is to attempt -- * decrementing the lock value only once. If once decremented it isn't zero, -- * or if its store-back fails due to a dispute on the exclusive store, we -- * simply bail out immediately through the slow path where the lock will be -- * reattempted until it succeeds. -+ * On pre-ARMv6 hardware this results in a swp-based implementation, -+ * which is the most efficient. For ARMv6+, we emit a pair of exclusive -+ * accesses instead. - */ --static inline void --__mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *)) --{ -- int __ex_flag, __res; -- -- __asm__ ( -- -- "ldrex %0, [%2] \n\t" -- "sub %0, %0, #1 \n\t" -- "strex %1, %0, [%2] " -- -- : "=&r" (__res), "=&r" (__ex_flag) -- : "r" (&(count)->counter) -- : "cc","memory" ); -- -- __res |= __ex_flag; -- if (unlikely(__res != 0)) -- fail_fn(count); --} -- --static inline int --__mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *)) --{ -- int __ex_flag, __res; -- -- __asm__ ( -- -- "ldrex %0, [%2] \n\t" -- "sub %0, %0, #1 \n\t" -- "strex %1, %0, [%2] " -- -- : "=&r" (__res), "=&r" (__ex_flag) -- : "r" (&(count)->counter) -- : "cc","memory" ); -- -- __res |= __ex_flag; -- if (unlikely(__res != 0)) -- __res = fail_fn(count); -- return __res; --} -- --/* -- * Same trick is used for the unlock fast path. However the original value, -- * rather than the result, is used to test for success in order to have -- * better generated assembly. -- */ --static inline void --__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *)) --{ -- int __ex_flag, __res, __orig; -- -- __asm__ ( -- -- "ldrex %0, [%3] \n\t" -- "add %1, %0, #1 \n\t" -- "strex %2, %1, [%3] " -- -- : "=&r" (__orig), "=&r" (__res), "=&r" (__ex_flag) -- : "r" (&(count)->counter) -- : "cc","memory" ); -- -- __orig |= __ex_flag; -- if (unlikely(__orig != 0)) -- fail_fn(count); --} -- --/* -- * If the unlock was done on a contended lock, or if the unlock simply fails -- * then the mutex remains locked. -- */ --#define __mutex_slowpath_needs_to_unlock() 1 -- --/* -- * For __mutex_fastpath_trylock we use another construct which could be -- * described as a "single value cmpxchg". -- * -- * This provides the needed trylock semantics like cmpxchg would, but it is -- * lighter and less generic than a true cmpxchg implementation. -- */ --static inline int --__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *)) --{ -- int __ex_flag, __res, __orig; -- -- __asm__ ( -- -- "1: ldrex %0, [%3] \n\t" -- "subs %1, %0, #1 \n\t" -- "strexeq %2, %1, [%3] \n\t" -- "movlt %0, #0 \n\t" -- "cmpeq %2, #0 \n\t" -- "bgt 1b " -- -- : "=&r" (__orig), "=&r" (__res), "=&r" (__ex_flag) -- : "r" (&count->counter) -- : "cc", "memory" ); -- -- return __orig; --} -- --#endif -+#include - #endif diff --git a/queue-3.4/series b/queue-3.4/series index a48a0dc2135..4793868e82c 100644 --- a/queue-3.4/series +++ b/queue-3.4/series @@ -7,7 +7,6 @@ media-ene_ir-fix-driver-initialisation.patch pcdp-use-early_ioremap-early_iounmap-to-access-pcdp-table.patch mm-fix-wrong-argument-of-migrate_huge_pages-in-soft_offline_huge_page.patch arm-7466-1-disable-interrupt-before-spinning-endlessly.patch -arm-7467-1-mutex-use-generic-xchg-based-implementation-for-armv6.patch arm-7476-1-vfp-only-clear-vfp-state-for-current-cpu-in-vfp_pm_suspend.patch arm-7477-1-vfp-always-save-vfp-state-in-vfp_pm_suspend-on-up.patch arm-7478-1-errata-extend-workaround-for-erratum-720789.patch diff --git a/queue-3.5/arm-7467-1-mutex-use-generic-xchg-based-implementation-for-armv6.patch b/queue-3.5/arm-7467-1-mutex-use-generic-xchg-based-implementation-for-armv6.patch deleted file mode 100644 index 8067dd670af..00000000000 --- a/queue-3.5/arm-7467-1-mutex-use-generic-xchg-based-implementation-for-armv6.patch +++ /dev/null @@ -1,162 +0,0 @@ -From a76d7bd96d65fa5119adba97e1b58d95f2e78829 Mon Sep 17 00:00:00 2001 -From: Will Deacon -Date: Fri, 13 Jul 2012 19:15:40 +0100 -Subject: ARM: 7467/1: mutex: use generic xchg-based implementation for ARMv6+ - -From: Will Deacon - -commit a76d7bd96d65fa5119adba97e1b58d95f2e78829 upstream. - -The open-coded mutex implementation for ARMv6+ cores suffers from a -severe lack of barriers, so in the uncontended case we don't actually -protect any accesses performed during the critical section. - -Furthermore, the code is largely a duplication of the ARMv6+ atomic_dec -code but optimised to remove a branch instruction, as the mutex fastpath -was previously inlined. Now that this is executed out-of-line, we can -reuse the atomic access code for the locking (in fact, we use the xchg -code as this produces shorter critical sections). - -This patch uses the generic xchg based implementation for mutexes on -ARMv6+, which introduces barriers to the lock/unlock operations and also -has the benefit of removing a fair amount of inline assembly code. - -Acked-by: Arnd Bergmann -Acked-by: Nicolas Pitre -Reported-by: Shan Kang -Signed-off-by: Will Deacon -Signed-off-by: Russell King -Signed-off-by: Greg Kroah-Hartman - ---- - arch/arm/include/asm/mutex.h | 119 +------------------------------------------ - 1 file changed, 4 insertions(+), 115 deletions(-) - ---- a/arch/arm/include/asm/mutex.h -+++ b/arch/arm/include/asm/mutex.h -@@ -7,121 +7,10 @@ - */ - #ifndef _ASM_MUTEX_H - #define _ASM_MUTEX_H -- --#if __LINUX_ARM_ARCH__ < 6 --/* On pre-ARMv6 hardware the swp based implementation is the most efficient. */ --# include --#else -- - /* -- * Attempting to lock a mutex on ARMv6+ can be done with a bastardized -- * atomic decrement (it is not a reliable atomic decrement but it satisfies -- * the defined semantics for our purpose, while being smaller and faster -- * than a real atomic decrement or atomic swap. The idea is to attempt -- * decrementing the lock value only once. If once decremented it isn't zero, -- * or if its store-back fails due to a dispute on the exclusive store, we -- * simply bail out immediately through the slow path where the lock will be -- * reattempted until it succeeds. -+ * On pre-ARMv6 hardware this results in a swp-based implementation, -+ * which is the most efficient. For ARMv6+, we emit a pair of exclusive -+ * accesses instead. - */ --static inline void --__mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *)) --{ -- int __ex_flag, __res; -- -- __asm__ ( -- -- "ldrex %0, [%2] \n\t" -- "sub %0, %0, #1 \n\t" -- "strex %1, %0, [%2] " -- -- : "=&r" (__res), "=&r" (__ex_flag) -- : "r" (&(count)->counter) -- : "cc","memory" ); -- -- __res |= __ex_flag; -- if (unlikely(__res != 0)) -- fail_fn(count); --} -- --static inline int --__mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *)) --{ -- int __ex_flag, __res; -- -- __asm__ ( -- -- "ldrex %0, [%2] \n\t" -- "sub %0, %0, #1 \n\t" -- "strex %1, %0, [%2] " -- -- : "=&r" (__res), "=&r" (__ex_flag) -- : "r" (&(count)->counter) -- : "cc","memory" ); -- -- __res |= __ex_flag; -- if (unlikely(__res != 0)) -- __res = fail_fn(count); -- return __res; --} -- --/* -- * Same trick is used for the unlock fast path. However the original value, -- * rather than the result, is used to test for success in order to have -- * better generated assembly. -- */ --static inline void --__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *)) --{ -- int __ex_flag, __res, __orig; -- -- __asm__ ( -- -- "ldrex %0, [%3] \n\t" -- "add %1, %0, #1 \n\t" -- "strex %2, %1, [%3] " -- -- : "=&r" (__orig), "=&r" (__res), "=&r" (__ex_flag) -- : "r" (&(count)->counter) -- : "cc","memory" ); -- -- __orig |= __ex_flag; -- if (unlikely(__orig != 0)) -- fail_fn(count); --} -- --/* -- * If the unlock was done on a contended lock, or if the unlock simply fails -- * then the mutex remains locked. -- */ --#define __mutex_slowpath_needs_to_unlock() 1 -- --/* -- * For __mutex_fastpath_trylock we use another construct which could be -- * described as a "single value cmpxchg". -- * -- * This provides the needed trylock semantics like cmpxchg would, but it is -- * lighter and less generic than a true cmpxchg implementation. -- */ --static inline int --__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *)) --{ -- int __ex_flag, __res, __orig; -- -- __asm__ ( -- -- "1: ldrex %0, [%3] \n\t" -- "subs %1, %0, #1 \n\t" -- "strexeq %2, %1, [%3] \n\t" -- "movlt %0, #0 \n\t" -- "cmpeq %2, #0 \n\t" -- "bgt 1b " -- -- : "=&r" (__orig), "=&r" (__res), "=&r" (__ex_flag) -- : "r" (&count->counter) -- : "cc", "memory" ); -- -- return __orig; --} -- --#endif -+#include - #endif diff --git a/queue-3.5/series b/queue-3.5/series index a342a6f2d36..3089478b907 100644 --- a/queue-3.5/series +++ b/queue-3.5/series @@ -17,7 +17,6 @@ memcg-prevent-oom-with-too-many-dirty-pages.patch memcg-further-prevent-oom-with-too-many-dirty-pages.patch mm-fix-wrong-argument-of-migrate_huge_pages-in-soft_offline_huge_page.patch arm-7466-1-disable-interrupt-before-spinning-endlessly.patch -arm-7467-1-mutex-use-generic-xchg-based-implementation-for-armv6.patch arm-7476-1-vfp-only-clear-vfp-state-for-current-cpu-in-vfp_pm_suspend.patch arm-7477-1-vfp-always-save-vfp-state-in-vfp_pm_suspend-on-up.patch arm-7478-1-errata-extend-workaround-for-erratum-720789.patch