From 736324ceeb199d856976365f16fda5caffc035b1 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 3 Oct 2014 12:52:12 -0700 Subject: [PATCH] 3.14-stable patches added patches: powerpc-add-smp_mb-s-to-arch_spin_unlock_wait.patch powerpc-add-smp_mb-to-arch_spin_is_locked.patch powerpc-perf-fix-abiv2-kernel-backtraces.patch --- ...dd-smp_mb-s-to-arch_spin_unlock_wait.patch | 47 +++++ ...pc-add-smp_mb-to-arch_spin_is_locked.patch | 191 ++++++++++++++++++ ...rpc-perf-fix-abiv2-kernel-backtraces.patch | 85 ++++++++ queue-3.14/series | 3 + 4 files changed, 326 insertions(+) create mode 100644 queue-3.14/powerpc-add-smp_mb-s-to-arch_spin_unlock_wait.patch create mode 100644 queue-3.14/powerpc-add-smp_mb-to-arch_spin_is_locked.patch create mode 100644 queue-3.14/powerpc-perf-fix-abiv2-kernel-backtraces.patch diff --git a/queue-3.14/powerpc-add-smp_mb-s-to-arch_spin_unlock_wait.patch b/queue-3.14/powerpc-add-smp_mb-s-to-arch_spin_unlock_wait.patch new file mode 100644 index 00000000000..ba9eb664c07 --- /dev/null +++ b/queue-3.14/powerpc-add-smp_mb-s-to-arch_spin_unlock_wait.patch @@ -0,0 +1,47 @@ +From 78e05b1421fa41ae8457701140933baa5e7d9479 Mon Sep 17 00:00:00 2001 +From: Michael Ellerman +Date: Thu, 7 Aug 2014 15:36:18 +1000 +Subject: powerpc: Add smp_mb()s to arch_spin_unlock_wait() + +From: Michael Ellerman + +commit 78e05b1421fa41ae8457701140933baa5e7d9479 upstream. + +Similar to the previous commit which described why we need to add a +barrier to arch_spin_is_locked(), we have a similar problem with +spin_unlock_wait(). + +We need a barrier on entry to ensure any spinlock we have previously +taken is visibly locked prior to the load of lock->slock. + +It's also not clear if spin_unlock_wait() is intended to have ACQUIRE +semantics. For now be conservative and add a barrier on exit to give it +ACQUIRE semantics. + +Signed-off-by: Michael Ellerman +Signed-off-by: Benjamin Herrenschmidt +Signed-off-by: Greg Kroah-Hartman + +--- + arch/powerpc/lib/locks.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/arch/powerpc/lib/locks.c ++++ b/arch/powerpc/lib/locks.c +@@ -70,12 +70,16 @@ void __rw_yield(arch_rwlock_t *rw) + + void arch_spin_unlock_wait(arch_spinlock_t *lock) + { ++ smp_mb(); ++ + while (lock->slock) { + HMT_low(); + if (SHARED_PROCESSOR) + __spin_yield(lock); + } + HMT_medium(); ++ ++ smp_mb(); + } + + EXPORT_SYMBOL(arch_spin_unlock_wait); diff --git a/queue-3.14/powerpc-add-smp_mb-to-arch_spin_is_locked.patch b/queue-3.14/powerpc-add-smp_mb-to-arch_spin_is_locked.patch new file mode 100644 index 00000000000..7e638de6c6a --- /dev/null +++ b/queue-3.14/powerpc-add-smp_mb-to-arch_spin_is_locked.patch @@ -0,0 +1,191 @@ +From 51d7d5205d3389a32859f9939f1093f267409929 Mon Sep 17 00:00:00 2001 +From: Michael Ellerman +Date: Thu, 7 Aug 2014 15:36:17 +1000 +Subject: powerpc: Add smp_mb() to arch_spin_is_locked() + +From: Michael Ellerman + +commit 51d7d5205d3389a32859f9939f1093f267409929 upstream. + +The kernel defines the function spin_is_locked(), which can be used to +check if a spinlock is currently locked. + +Using spin_is_locked() on a lock you don't hold is obviously racy. That +is, even though you may observe that the lock is unlocked, it may become +locked at any time. + +There is (at least) one exception to that, which is if two locks are +used as a pair, and the holder of each checks the status of the other +before doing any update. + +Assuming *A and *B are two locks, and *COUNTER is a shared non-atomic +value: + +The first CPU does: + + spin_lock(*A) + + if spin_is_locked(*B) + # nothing + else + smp_mb() + LOAD r = *COUNTER + r++ + STORE *COUNTER = r + + spin_unlock(*A) + +And the second CPU does: + + spin_lock(*B) + + if spin_is_locked(*A) + # nothing + else + smp_mb() + LOAD r = *COUNTER + r++ + STORE *COUNTER = r + + spin_unlock(*B) + +Although this is a strange locking construct, it should work. + +It seems to be understood, but not documented, that spin_is_locked() is +not a memory barrier, so in the examples above and below the caller +inserts its own memory barrier before acting on the result of +spin_is_locked(). + +For now we assume spin_is_locked() is implemented as below, and we break +it out in our examples: + + bool spin_is_locked(*LOCK) { + LOAD l = *LOCK + return l.locked + } + +Our intuition is that there should be no problem even if the two code +sequences run simultaneously such as: + + CPU 0 CPU 1 + ================================================== + spin_lock(*A) spin_lock(*B) + LOAD b = *B LOAD a = *A + if b.locked # true if a.locked # true + # nothing # nothing + spin_unlock(*A) spin_unlock(*B) + +If one CPU gets the lock before the other then it will do the update and +the other CPU will back off: + + CPU 0 CPU 1 + ================================================== + spin_lock(*A) + LOAD b = *B + spin_lock(*B) + if b.locked # false LOAD a = *A + else if a.locked # true + smp_mb() # nothing + LOAD r1 = *COUNTER spin_unlock(*B) + r1++ + STORE *COUNTER = r1 + spin_unlock(*A) + +However in reality spin_lock() itself is not indivisible. On powerpc we +implement it as a load-and-reserve and store-conditional. + +Ignoring the retry logic for the lost reservation case, it boils down to: + spin_lock(*LOCK) { + LOAD l = *LOCK + l.locked = true + STORE *LOCK = l + ACQUIRE_BARRIER + } + +The ACQUIRE_BARRIER is required to give spin_lock() ACQUIRE semantics as +defined in memory-barriers.txt: + + This acts as a one-way permeable barrier. It guarantees that all + memory operations after the ACQUIRE operation will appear to happen + after the ACQUIRE operation with respect to the other components of + the system. + +On modern powerpc systems we use lwsync for ACQUIRE_BARRIER. lwsync is +also know as "lightweight sync", or "sync 1". + +As described in Power ISA v2.07 section B.2.1.1, in this scenario the +lwsync is not the barrier itself. It instead causes the LOAD of *LOCK to +act as the barrier, preventing any loads or stores in the locked region +from occurring prior to the load of *LOCK. + +Whether this behaviour is in accordance with the definition of ACQUIRE +semantics in memory-barriers.txt is open to discussion, we may switch to +a different barrier in future. + +What this means in practice is that the following can occur: + + CPU 0 CPU 1 + ================================================== + LOAD a = *A LOAD b = *B + a.locked = true b.locked = true + LOAD b = *B LOAD a = *A + STORE *A = a STORE *B = b + if b.locked # false if a.locked # false + else else + smp_mb() smp_mb() + LOAD r1 = *COUNTER LOAD r2 = *COUNTER + r1++ r2++ + STORE *COUNTER = r1 + STORE *COUNTER = r2 # Lost update + spin_unlock(*A) spin_unlock(*B) + +That is, the load of *B can occur prior to the store that makes *A +visibly locked. And similarly for CPU 1. The result is both CPUs hold +their lock and believe the other lock is unlocked. + +The easiest fix for this is to add a full memory barrier to the start of +spin_is_locked(), so adding to our previous definition would give us: + + bool spin_is_locked(*LOCK) { + smp_mb() + LOAD l = *LOCK + return l.locked + } + +The new barrier orders the store to the lock we are locking vs the load +of the other lock: + + CPU 0 CPU 1 + ================================================== + LOAD a = *A LOAD b = *B + a.locked = true b.locked = true + STORE *A = a STORE *B = b + smp_mb() smp_mb() + LOAD b = *B LOAD a = *A + if b.locked # true if a.locked # true + # nothing # nothing + spin_unlock(*A) spin_unlock(*B) + +Although the above example is theoretical, there is code similar to this +example in sem_lock() in ipc/sem.c. This commit in addition to the next +commit appears to be a fix for crashes we are seeing in that code where +we believe this race happens in practice. + +Signed-off-by: Michael Ellerman +Signed-off-by: Benjamin Herrenschmidt +Signed-off-by: Greg Kroah-Hartman + +--- + arch/powerpc/include/asm/spinlock.h | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/powerpc/include/asm/spinlock.h ++++ b/arch/powerpc/include/asm/spinlock.h +@@ -61,6 +61,7 @@ static __always_inline int arch_spin_val + + static inline int arch_spin_is_locked(arch_spinlock_t *lock) + { ++ smp_mb(); + return !arch_spin_value_unlocked(*lock); + } + diff --git a/queue-3.14/powerpc-perf-fix-abiv2-kernel-backtraces.patch b/queue-3.14/powerpc-perf-fix-abiv2-kernel-backtraces.patch new file mode 100644 index 00000000000..94be7bfb454 --- /dev/null +++ b/queue-3.14/powerpc-perf-fix-abiv2-kernel-backtraces.patch @@ -0,0 +1,85 @@ +From 85101af13bb854a6572fa540df7c7201958624b9 Mon Sep 17 00:00:00 2001 +From: Anton Blanchard +Date: Tue, 26 Aug 2014 12:44:15 +1000 +Subject: powerpc/perf: Fix ABIv2 kernel backtraces + +From: Anton Blanchard + +commit 85101af13bb854a6572fa540df7c7201958624b9 upstream. + +ABIv2 kernels are failing to backtrace through the kernel. An example: + +39.30% readseek2_proce [kernel.kallsyms] [k] find_get_entry + | + --- find_get_entry + __GI___libc_read + +The problem is in valid_next_sp() where we check that the new stack +pointer is at least STACK_FRAME_OVERHEAD below the previous one. + +ABIv1 has a minimum stack frame size of 112 bytes consisting of 48 bytes +and 64 bytes of parameter save area. ABIv2 changes that to 32 bytes +with no paramter save area. + +STACK_FRAME_OVERHEAD is in theory the minimum stack frame size, +but we over 240 uses of it, some of which assume that it includes +space for the parameter area. + +We need to work through all our stack defines and rationalise them +but let's fix perf now by creating STACK_FRAME_MIN_SIZE and using +in valid_next_sp(). This fixes the issue: + +30.64% readseek2_proce [kernel.kallsyms] [k] find_get_entry + | + --- find_get_entry + pagecache_get_page + generic_file_read_iter + new_sync_read + vfs_read + sys_read + syscall_exit + __GI___libc_read + +Reported-by: Aneesh Kumar K.V +Signed-off-by: Anton Blanchard +Signed-off-by: Greg Kroah-Hartman + +--- + arch/powerpc/include/asm/ptrace.h | 7 +++++++ + arch/powerpc/perf/callchain.c | 2 +- + 2 files changed, 8 insertions(+), 1 deletion(-) + +--- a/arch/powerpc/include/asm/ptrace.h ++++ b/arch/powerpc/include/asm/ptrace.h +@@ -47,6 +47,12 @@ + STACK_FRAME_OVERHEAD + KERNEL_REDZONE_SIZE) + #define STACK_FRAME_MARKER 12 + ++#if defined(_CALL_ELF) && _CALL_ELF == 2 ++#define STACK_FRAME_MIN_SIZE 32 ++#else ++#define STACK_FRAME_MIN_SIZE STACK_FRAME_OVERHEAD ++#endif ++ + /* Size of dummy stack frame allocated when calling signal handler. */ + #define __SIGNAL_FRAMESIZE 128 + #define __SIGNAL_FRAMESIZE32 64 +@@ -60,6 +66,7 @@ + #define STACK_FRAME_REGS_MARKER ASM_CONST(0x72656773) + #define STACK_INT_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD) + #define STACK_FRAME_MARKER 2 ++#define STACK_FRAME_MIN_SIZE STACK_FRAME_OVERHEAD + + /* Size of stack frame allocated when calling signal handler. */ + #define __SIGNAL_FRAMESIZE 64 +--- a/arch/powerpc/perf/callchain.c ++++ b/arch/powerpc/perf/callchain.c +@@ -35,7 +35,7 @@ static int valid_next_sp(unsigned long s + return 0; /* must be 16-byte aligned */ + if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD)) + return 0; +- if (sp >= prev_sp + STACK_FRAME_OVERHEAD) ++ if (sp >= prev_sp + STACK_FRAME_MIN_SIZE) + return 1; + /* + * sp could decrease when we jump off an interrupt stack diff --git a/queue-3.14/series b/queue-3.14/series index 1c6bb7ee210..0520d119fa9 100644 --- a/queue-3.14/series +++ b/queue-3.14/series @@ -188,3 +188,6 @@ mm-slab-initialize-object-alignment-on-cache-creation.patch mm-softdirty-keep-bit-when-zapping-file-pte.patch sched-fix-unreleased-llc_shared_mask-bit-during-cpu-hotplug.patch brcmfmac-handle-if-event-for-p2p_device-interface.patch +powerpc-perf-fix-abiv2-kernel-backtraces.patch +powerpc-add-smp_mb-to-arch_spin_is_locked.patch +powerpc-add-smp_mb-s-to-arch_spin_unlock_wait.patch -- 2.47.3