]>
Commit | Line | Data |
---|---|---|
ae9cf8e9 GKH |
1 | From f862eefec0b68e099a9fa58d3761ffb10bad97e1 Mon Sep 17 00:00:00 2001 |
2 | From: Chris Metcalf <cmetcalf@tilera.com> | |
3 | Date: Thu, 26 Sep 2013 13:24:53 -0400 | |
4 | Subject: tile: use a more conservative __my_cpu_offset in CONFIG_PREEMPT | |
5 | ||
6 | From: Chris Metcalf <cmetcalf@tilera.com> | |
7 | ||
8 | commit f862eefec0b68e099a9fa58d3761ffb10bad97e1 upstream. | |
9 | ||
10 | It turns out the kernel relies on barrier() to force a reload of the | |
11 | percpu offset value. Since we can't easily modify the definition of | |
12 | barrier() to include "tp" as an output register, we instead provide a | |
13 | definition of __my_cpu_offset as extended assembly that includes a fake | |
14 | stack read to hazard against barrier(), forcing gcc to know that it | |
15 | must reread "tp" and recompute anything based on "tp" after a barrier. | |
16 | ||
17 | This fixes observed hangs in the slub allocator when we are looping | |
18 | on a percpu cmpxchg_double. | |
19 | ||
20 | A similar fix for ARMv7 was made in June in change 509eb76ebf97. | |
21 | ||
22 | Signed-off-by: Chris Metcalf <cmetcalf@tilera.com> | |
23 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | |
24 | ||
25 | --- | |
26 | arch/tile/include/asm/percpu.h | 34 +++++++++++++++++++++++++++++++--- | |
27 | 1 file changed, 31 insertions(+), 3 deletions(-) | |
28 | ||
29 | --- a/arch/tile/include/asm/percpu.h | |
30 | +++ b/arch/tile/include/asm/percpu.h | |
31 | @@ -15,9 +15,37 @@ | |
32 | #ifndef _ASM_TILE_PERCPU_H | |
33 | #define _ASM_TILE_PERCPU_H | |
34 | ||
35 | -register unsigned long __my_cpu_offset __asm__("tp"); | |
36 | -#define __my_cpu_offset __my_cpu_offset | |
37 | -#define set_my_cpu_offset(tp) (__my_cpu_offset = (tp)) | |
38 | +register unsigned long my_cpu_offset_reg asm("tp"); | |
39 | + | |
40 | +#ifdef CONFIG_PREEMPT | |
41 | +/* | |
42 | + * For full preemption, we can't just use the register variable | |
43 | + * directly, since we need barrier() to hazard against it, causing the | |
44 | + * compiler to reload anything computed from a previous "tp" value. | |
45 | + * But we also don't want to use volatile asm, since we'd like the | |
46 | + * compiler to be able to cache the value across multiple percpu reads. | |
47 | + * So we use a fake stack read as a hazard against barrier(). | |
48 | + * The 'U' constraint is like 'm' but disallows postincrement. | |
49 | + */ | |
50 | +static inline unsigned long __my_cpu_offset(void) | |
51 | +{ | |
52 | + unsigned long tp; | |
53 | + register unsigned long *sp asm("sp"); | |
54 | + asm("move %0, tp" : "=r" (tp) : "U" (*sp)); | |
55 | + return tp; | |
56 | +} | |
57 | +#define __my_cpu_offset __my_cpu_offset() | |
58 | +#else | |
59 | +/* | |
60 | + * We don't need to hazard against barrier() since "tp" doesn't ever | |
61 | + * change with PREEMPT_NONE, and with PREEMPT_VOLUNTARY it only | |
62 | + * changes at function call points, at which we are already re-reading | |
63 | + * the value of "tp" due to "my_cpu_offset_reg" being a global variable. | |
64 | + */ | |
65 | +#define __my_cpu_offset my_cpu_offset_reg | |
66 | +#endif | |
67 | + | |
68 | +#define set_my_cpu_offset(tp) (my_cpu_offset_reg = (tp)) | |
69 | ||
70 | #include <asm-generic/percpu.h> | |
71 |