]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blame - releases/3.10.16/tile-use-a-more-conservative-__my_cpu_offset-in-config_preempt.patch
Linux 4.14.95
[thirdparty/kernel/stable-queue.git] / releases / 3.10.16 / tile-use-a-more-conservative-__my_cpu_offset-in-config_preempt.patch
CommitLineData
ae9cf8e9
GKH
1From f862eefec0b68e099a9fa58d3761ffb10bad97e1 Mon Sep 17 00:00:00 2001
2From: Chris Metcalf <cmetcalf@tilera.com>
3Date: Thu, 26 Sep 2013 13:24:53 -0400
4Subject: tile: use a more conservative __my_cpu_offset in CONFIG_PREEMPT
5
6From: Chris Metcalf <cmetcalf@tilera.com>
7
8commit f862eefec0b68e099a9fa58d3761ffb10bad97e1 upstream.
9
10It turns out the kernel relies on barrier() to force a reload of the
11percpu offset value. Since we can't easily modify the definition of
12barrier() to include "tp" as an output register, we instead provide a
13definition of __my_cpu_offset as extended assembly that includes a fake
14stack read to hazard against barrier(), forcing gcc to know that it
15must reread "tp" and recompute anything based on "tp" after a barrier.
16
17This fixes observed hangs in the slub allocator when we are looping
18on a percpu cmpxchg_double.
19
20A similar fix for ARMv7 was made in June in change 509eb76ebf97.
21
22Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
23Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
24
25---
26 arch/tile/include/asm/percpu.h | 34 +++++++++++++++++++++++++++++++---
27 1 file changed, 31 insertions(+), 3 deletions(-)
28
29--- a/arch/tile/include/asm/percpu.h
30+++ b/arch/tile/include/asm/percpu.h
31@@ -15,9 +15,37 @@
32 #ifndef _ASM_TILE_PERCPU_H
33 #define _ASM_TILE_PERCPU_H
34
35-register unsigned long __my_cpu_offset __asm__("tp");
36-#define __my_cpu_offset __my_cpu_offset
37-#define set_my_cpu_offset(tp) (__my_cpu_offset = (tp))
38+register unsigned long my_cpu_offset_reg asm("tp");
39+
40+#ifdef CONFIG_PREEMPT
41+/*
42+ * For full preemption, we can't just use the register variable
43+ * directly, since we need barrier() to hazard against it, causing the
44+ * compiler to reload anything computed from a previous "tp" value.
45+ * But we also don't want to use volatile asm, since we'd like the
46+ * compiler to be able to cache the value across multiple percpu reads.
47+ * So we use a fake stack read as a hazard against barrier().
48+ * The 'U' constraint is like 'm' but disallows postincrement.
49+ */
50+static inline unsigned long __my_cpu_offset(void)
51+{
52+ unsigned long tp;
53+ register unsigned long *sp asm("sp");
54+ asm("move %0, tp" : "=r" (tp) : "U" (*sp));
55+ return tp;
56+}
57+#define __my_cpu_offset __my_cpu_offset()
58+#else
59+/*
60+ * We don't need to hazard against barrier() since "tp" doesn't ever
61+ * change with PREEMPT_NONE, and with PREEMPT_VOLUNTARY it only
62+ * changes at function call points, at which we are already re-reading
63+ * the value of "tp" due to "my_cpu_offset_reg" being a global variable.
64+ */
65+#define __my_cpu_offset my_cpu_offset_reg
66+#endif
67+
68+#define set_my_cpu_offset(tp) (my_cpu_offset_reg = (tp))
69
70 #include <asm-generic/percpu.h>
71