From 736324ceeb199d856976365f16fda5caffc035b1 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 3 Oct 2014 12:52:12 -0700
Subject: [PATCH] 3.14-stable patches

added patches:
	powerpc-add-smp_mb-s-to-arch_spin_unlock_wait.patch
	powerpc-add-smp_mb-to-arch_spin_is_locked.patch
	powerpc-perf-fix-abiv2-kernel-backtraces.patch
---
 ...dd-smp_mb-s-to-arch_spin_unlock_wait.patch |  47 +++++
 ...pc-add-smp_mb-to-arch_spin_is_locked.patch | 191 ++++++++++++++++++
 ...rpc-perf-fix-abiv2-kernel-backtraces.patch |  85 ++++++++
 queue-3.14/series                             |   3 +
 4 files changed, 326 insertions(+)
 create mode 100644 queue-3.14/powerpc-add-smp_mb-s-to-arch_spin_unlock_wait.patch
 create mode 100644 queue-3.14/powerpc-add-smp_mb-to-arch_spin_is_locked.patch
 create mode 100644 queue-3.14/powerpc-perf-fix-abiv2-kernel-backtraces.patch

diff --git a/queue-3.14/powerpc-add-smp_mb-s-to-arch_spin_unlock_wait.patch b/queue-3.14/powerpc-add-smp_mb-s-to-arch_spin_unlock_wait.patch
new file mode 100644
index 00000000000..ba9eb664c07
--- /dev/null
+++ b/queue-3.14/powerpc-add-smp_mb-s-to-arch_spin_unlock_wait.patch
@@ -0,0 +1,47 @@
+From 78e05b1421fa41ae8457701140933baa5e7d9479 Mon Sep 17 00:00:00 2001
+From: Michael Ellerman <mpe@ellerman.id.au>
+Date: Thu, 7 Aug 2014 15:36:18 +1000
+Subject: powerpc: Add smp_mb()s to arch_spin_unlock_wait()
+
+From: Michael Ellerman <mpe@ellerman.id.au>
+
+commit 78e05b1421fa41ae8457701140933baa5e7d9479 upstream.
+
+Similar to the previous commit which described why we need to add a
+barrier to arch_spin_is_locked(), we have a similar problem with
+spin_unlock_wait().
+
+We need a barrier on entry to ensure any spinlock we have previously
+taken is visibly locked prior to the load of lock->slock.
+
+It's also not clear if spin_unlock_wait() is intended to have ACQUIRE
+semantics. For now be conservative and add a barrier on exit to give it
+ACQUIRE semantics.
+
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/lib/locks.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/arch/powerpc/lib/locks.c
++++ b/arch/powerpc/lib/locks.c
+@@ -70,12 +70,16 @@ void __rw_yield(arch_rwlock_t *rw)
+ 
+ void arch_spin_unlock_wait(arch_spinlock_t *lock)
+ {
++	smp_mb();
++
+ 	while (lock->slock) {
+ 		HMT_low();
+ 		if (SHARED_PROCESSOR)
+ 			__spin_yield(lock);
+ 	}
+ 	HMT_medium();
++
++	smp_mb();
+ }
+ 
+ EXPORT_SYMBOL(arch_spin_unlock_wait);
diff --git a/queue-3.14/powerpc-add-smp_mb-to-arch_spin_is_locked.patch b/queue-3.14/powerpc-add-smp_mb-to-arch_spin_is_locked.patch
new file mode 100644
index 00000000000..7e638de6c6a
--- /dev/null
+++ b/queue-3.14/powerpc-add-smp_mb-to-arch_spin_is_locked.patch
@@ -0,0 +1,191 @@
+From 51d7d5205d3389a32859f9939f1093f267409929 Mon Sep 17 00:00:00 2001
+From: Michael Ellerman <mpe@ellerman.id.au>
+Date: Thu, 7 Aug 2014 15:36:17 +1000
+Subject: powerpc: Add smp_mb() to arch_spin_is_locked()
+
+From: Michael Ellerman <mpe@ellerman.id.au>
+
+commit 51d7d5205d3389a32859f9939f1093f267409929 upstream.
+
+The kernel defines the function spin_is_locked(), which can be used to
+check if a spinlock is currently locked.
+
+Using spin_is_locked() on a lock you don't hold is obviously racy. That
+is, even though you may observe that the lock is unlocked, it may become
+locked at any time.
+
+There is (at least) one exception to that, which is if two locks are
+used as a pair, and the holder of each checks the status of the other
+before doing any update.
+
+Assuming *A and *B are two locks, and *COUNTER is a shared non-atomic
+value:
+
+The first CPU does:
+
+	spin_lock(*A)
+
+	if spin_is_locked(*B)
+		# nothing
+	else
+		smp_mb()
+		LOAD r = *COUNTER
+		r++
+		STORE *COUNTER = r
+
+	spin_unlock(*A)
+
+And the second CPU does:
+
+	spin_lock(*B)
+
+	if spin_is_locked(*A)
+		# nothing
+	else
+		smp_mb()
+		LOAD r = *COUNTER
+		r++
+		STORE *COUNTER = r
+
+	spin_unlock(*B)
+
+Although this is a strange locking construct, it should work.
+
+It seems to be understood, but not documented, that spin_is_locked() is
+not a memory barrier, so in the examples above and below the caller
+inserts its own memory barrier before acting on the result of
+spin_is_locked().
+
+For now we assume spin_is_locked() is implemented as below, and we break
+it out in our examples:
+
+	bool spin_is_locked(*LOCK) {
+		LOAD l = *LOCK
+		return l.locked
+	}
+
+Our intuition is that there should be no problem even if the two code
+sequences run simultaneously such as:
+
+	CPU 0			CPU 1
+	==================================================
+	spin_lock(*A)		spin_lock(*B)
+	LOAD b = *B		LOAD a = *A
+	if b.locked # true	if a.locked # true
+	# nothing		# nothing
+	spin_unlock(*A)		spin_unlock(*B)
+
+If one CPU gets the lock before the other then it will do the update and
+the other CPU will back off:
+
+	CPU 0			CPU 1
+	==================================================
+	spin_lock(*A)
+	LOAD b = *B
+				spin_lock(*B)
+	if b.locked # false	LOAD a = *A
+	else			if a.locked # true
+	smp_mb()		# nothing
+	LOAD r1 = *COUNTER	spin_unlock(*B)
+	r1++
+	STORE *COUNTER = r1
+	spin_unlock(*A)
+
+However in reality spin_lock() itself is not indivisible. On powerpc we
+implement it as a load-and-reserve and store-conditional.
+
+Ignoring the retry logic for the lost reservation case, it boils down to:
+	spin_lock(*LOCK) {
+		LOAD l = *LOCK
+		l.locked = true
+		STORE *LOCK = l
+		ACQUIRE_BARRIER
+	}
+
+The ACQUIRE_BARRIER is required to give spin_lock() ACQUIRE semantics as
+defined in memory-barriers.txt:
+
+     This acts as a one-way permeable barrier.  It guarantees that all
+     memory operations after the ACQUIRE operation will appear to happen
+     after the ACQUIRE operation with respect to the other components of
+     the system.
+
+On modern powerpc systems we use lwsync for ACQUIRE_BARRIER. lwsync is
+also know as "lightweight sync", or "sync 1".
+
+As described in Power ISA v2.07 section B.2.1.1, in this scenario the
+lwsync is not the barrier itself. It instead causes the LOAD of *LOCK to
+act as the barrier, preventing any loads or stores in the locked region
+from occurring prior to the load of *LOCK.
+
+Whether this behaviour is in accordance with the definition of ACQUIRE
+semantics in memory-barriers.txt is open to discussion, we may switch to
+a different barrier in future.
+
+What this means in practice is that the following can occur:
+
+	CPU 0			CPU 1
+	==================================================
+	LOAD a = *A 		LOAD b = *B
+	a.locked = true		b.locked = true
+	LOAD b = *B		LOAD a = *A
+	STORE *A = a		STORE *B = b
+	if b.locked # false	if a.locked # false
+	else			else
+	smp_mb()		smp_mb()
+	LOAD r1 = *COUNTER	LOAD r2 = *COUNTER
+	r1++			r2++
+	STORE *COUNTER = r1
+				STORE *COUNTER = r2	# Lost update
+	spin_unlock(*A)		spin_unlock(*B)
+
+That is, the load of *B can occur prior to the store that makes *A
+visibly locked. And similarly for CPU 1. The result is both CPUs hold
+their lock and believe the other lock is unlocked.
+
+The easiest fix for this is to add a full memory barrier to the start of
+spin_is_locked(), so adding to our previous definition would give us:
+
+	bool spin_is_locked(*LOCK) {
+		smp_mb()
+		LOAD l = *LOCK
+		return l.locked
+	}
+
+The new barrier orders the store to the lock we are locking vs the load
+of the other lock:
+
+	CPU 0			CPU 1
+	==================================================
+	LOAD a = *A 		LOAD b = *B
+	a.locked = true		b.locked = true
+	STORE *A = a		STORE *B = b
+	smp_mb()		smp_mb()
+	LOAD b = *B		LOAD a = *A
+	if b.locked # true	if a.locked # true
+	# nothing		# nothing
+	spin_unlock(*A)		spin_unlock(*B)
+
+Although the above example is theoretical, there is code similar to this
+example in sem_lock() in ipc/sem.c. This commit in addition to the next
+commit appears to be a fix for crashes we are seeing in that code where
+we believe this race happens in practice.
+
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/include/asm/spinlock.h |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/powerpc/include/asm/spinlock.h
++++ b/arch/powerpc/include/asm/spinlock.h
+@@ -61,6 +61,7 @@ static __always_inline int arch_spin_val
+ 
+ static inline int arch_spin_is_locked(arch_spinlock_t *lock)
+ {
++	smp_mb();
+ 	return !arch_spin_value_unlocked(*lock);
+ }
+ 
diff --git a/queue-3.14/powerpc-perf-fix-abiv2-kernel-backtraces.patch b/queue-3.14/powerpc-perf-fix-abiv2-kernel-backtraces.patch
new file mode 100644
index 00000000000..94be7bfb454
--- /dev/null
+++ b/queue-3.14/powerpc-perf-fix-abiv2-kernel-backtraces.patch
@@ -0,0 +1,85 @@
+From 85101af13bb854a6572fa540df7c7201958624b9 Mon Sep 17 00:00:00 2001
+From: Anton Blanchard <anton@samba.org>
+Date: Tue, 26 Aug 2014 12:44:15 +1000
+Subject: powerpc/perf: Fix ABIv2 kernel backtraces
+
+From: Anton Blanchard <anton@samba.org>
+
+commit 85101af13bb854a6572fa540df7c7201958624b9 upstream.
+
+ABIv2 kernels are failing to backtrace through the kernel. An example:
+
+39.30%  readseek2_proce  [kernel.kallsyms]    [k] find_get_entry
+            |
+            --- find_get_entry
+               __GI___libc_read
+
+The problem is in valid_next_sp() where we check that the new stack
+pointer is at least STACK_FRAME_OVERHEAD below the previous one.
+
+ABIv1 has a minimum stack frame size of 112 bytes consisting of 48 bytes
+and 64 bytes of parameter save area. ABIv2 changes that to 32 bytes
+with no paramter save area.
+
+STACK_FRAME_OVERHEAD is in theory the minimum stack frame size,
+but we over 240 uses of it, some of which assume that it includes
+space for the parameter area.
+
+We need to work through all our stack defines and rationalise them
+but let's fix perf now by creating STACK_FRAME_MIN_SIZE and using
+in valid_next_sp(). This fixes the issue:
+
+30.64%  readseek2_proce  [kernel.kallsyms]    [k] find_get_entry
+            |
+            --- find_get_entry
+               pagecache_get_page
+               generic_file_read_iter
+               new_sync_read
+               vfs_read
+               sys_read
+               syscall_exit
+               __GI___libc_read
+
+Reported-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Signed-off-by: Anton Blanchard <anton@samba.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/include/asm/ptrace.h |    7 +++++++
+ arch/powerpc/perf/callchain.c     |    2 +-
+ 2 files changed, 8 insertions(+), 1 deletion(-)
+
+--- a/arch/powerpc/include/asm/ptrace.h
++++ b/arch/powerpc/include/asm/ptrace.h
+@@ -47,6 +47,12 @@
+ 				 STACK_FRAME_OVERHEAD + KERNEL_REDZONE_SIZE)
+ #define STACK_FRAME_MARKER	12
+ 
++#if defined(_CALL_ELF) && _CALL_ELF == 2
++#define STACK_FRAME_MIN_SIZE	32
++#else
++#define STACK_FRAME_MIN_SIZE	STACK_FRAME_OVERHEAD
++#endif
++
+ /* Size of dummy stack frame allocated when calling signal handler. */
+ #define __SIGNAL_FRAMESIZE	128
+ #define __SIGNAL_FRAMESIZE32	64
+@@ -60,6 +66,7 @@
+ #define STACK_FRAME_REGS_MARKER	ASM_CONST(0x72656773)
+ #define STACK_INT_FRAME_SIZE	(sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD)
+ #define STACK_FRAME_MARKER	2
++#define STACK_FRAME_MIN_SIZE	STACK_FRAME_OVERHEAD
+ 
+ /* Size of stack frame allocated when calling signal handler. */
+ #define __SIGNAL_FRAMESIZE	64
+--- a/arch/powerpc/perf/callchain.c
++++ b/arch/powerpc/perf/callchain.c
+@@ -35,7 +35,7 @@ static int valid_next_sp(unsigned long s
+ 		return 0;		/* must be 16-byte aligned */
+ 	if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD))
+ 		return 0;
+-	if (sp >= prev_sp + STACK_FRAME_OVERHEAD)
++	if (sp >= prev_sp + STACK_FRAME_MIN_SIZE)
+ 		return 1;
+ 	/*
+ 	 * sp could decrease when we jump off an interrupt stack
diff --git a/queue-3.14/series b/queue-3.14/series
index 1c6bb7ee210..0520d119fa9 100644
--- a/queue-3.14/series
+++ b/queue-3.14/series
@@ -188,3 +188,6 @@ mm-slab-initialize-object-alignment-on-cache-creation.patch
 mm-softdirty-keep-bit-when-zapping-file-pte.patch
 sched-fix-unreleased-llc_shared_mask-bit-during-cpu-hotplug.patch
 brcmfmac-handle-if-event-for-p2p_device-interface.patch
+powerpc-perf-fix-abiv2-kernel-backtraces.patch
+powerpc-add-smp_mb-to-arch_spin_is_locked.patch
+powerpc-add-smp_mb-s-to-arch_spin_unlock_wait.patch
-- 
2.47.3