]>
Commit | Line | Data |
---|---|---|
48a6571f AF |
1 | From 398aa66827155ef52bab58bebd24597d90968929 Mon Sep 17 00:00:00 2001 |
2 | From: Will Deacon <will.deacon@arm.com> | |
3 | Date: Thu, 8 Jul 2010 10:59:16 +0100 | |
4 | Subject: [PATCH] ARM: 6212/1: atomic ops: add memory constraints to inline | |
5 | asm | |
6 | ||
7 | Currently, the 32-bit and 64-bit atomic operations on ARM do not | |
8 | include memory constraints in the inline assembly blocks. In the | |
9 | case of barrier-less operations [for example, atomic_add], this | |
10 | means that the compiler may constant fold values which have actually | |
11 | been modified by a call to an atomic operation. | |
12 | ||
13 | This issue can be observed in the atomic64_test routine in | |
14 | <kernel root>/lib/atomic64_test.c: | |
15 | ||
16 | 00000000 <test_atomic64>: | |
17 | 0: e1a0c00d mov ip, sp | |
18 | 4: e92dd830 push {r4, r5, fp, ip, lr, pc} | |
19 | 8: e24cb004 sub fp, ip, #4 | |
20 | c: e24dd008 sub sp, sp, #8 | |
21 | 10: e24b3014 sub r3, fp, #20 | |
22 | 14: e30d000d movw r0, #53261 ; 0xd00d | |
23 | 18: e3011337 movw r1, #4919 ; 0x1337 | |
24 | 1c: e34c0001 movt r0, #49153 ; 0xc001 | |
25 | 20: e34a1aa3 movt r1, #43683 ; 0xaaa3 | |
26 | 24: e16300f8 strd r0, [r3, #-8]! | |
27 | 28: e30c0afe movw r0, #51966 ; 0xcafe | |
28 | 2c: e30b1eef movw r1, #48879 ; 0xbeef | |
29 | 30: e34d0eaf movt r0, #57007 ; 0xdeaf | |
30 | 34: e34d1ead movt r1, #57005 ; 0xdead | |
31 | 38: e1b34f9f ldrexd r4, [r3] | |
32 | 3c: e1a34f90 strexd r4, r0, [r3] | |
33 | 40: e3340000 teq r4, #0 | |
34 | 44: 1afffffb bne 38 <test_atomic64+0x38> | |
35 | 48: e59f0004 ldr r0, [pc, #4] ; 54 <test_atomic64+0x54> | |
36 | 4c: e3a0101e mov r1, #30 | |
37 | 50: ebfffffe bl 0 <__bug> | |
38 | 54: 00000000 .word 0x00000000 | |
39 | ||
40 | The atomic64_set (0x38-0x44) writes to the atomic64_t, but the | |
41 | compiler doesn't see this, assumes the test condition is always | |
42 | false and generates an unconditional branch to __bug. The rest of the | |
43 | test is optimised away. | |
44 | ||
45 | This patch adds suitable memory constraints to the atomic operations on ARM | |
46 | to ensure that the compiler is informed of the correct data hazards. We have | |
47 | to use the "Qo" constraints to avoid hitting the GCC anomaly described at | |
48 | http://gcc.gnu.org/bugzilla/show_bug.cgi?id=44492 , where the compiler | |
49 | makes assumptions about the writeback in the addressing mode used by the | |
50 | inline assembly. These constraints forbid the use of auto{inc,dec} addressing | |
51 | modes, so it doesn't matter if we don't use the operand exactly once. | |
52 | ||
53 | Cc: stable@kernel.org | |
54 | Reviewed-by: Nicolas Pitre <nicolas.pitre@linaro.org> | |
55 | Signed-off-by: Will Deacon <will.deacon@arm.com> | |
56 | Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk> | |
57 | --- | |
58 | arch/arm/include/asm/atomic.h | 132 ++++++++++++++++++++-------------------- | |
59 | 1 files changed, 66 insertions(+), 66 deletions(-) | |
60 | ||
61 | diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h | |
62 | index e9e56c0..7e79503 100644 | |
63 | --- a/arch/arm/include/asm/atomic.h | |
64 | +++ b/arch/arm/include/asm/atomic.h | |
65 | @@ -40,12 +40,12 @@ static inline void atomic_add(int i, atomic_t *v) | |
66 | int result; | |
67 | ||
68 | __asm__ __volatile__("@ atomic_add\n" | |
69 | -"1: ldrex %0, [%2]\n" | |
70 | -" add %0, %0, %3\n" | |
71 | -" strex %1, %0, [%2]\n" | |
72 | +"1: ldrex %0, [%3]\n" | |
73 | +" add %0, %0, %4\n" | |
74 | +" strex %1, %0, [%3]\n" | |
75 | " teq %1, #0\n" | |
76 | " bne 1b" | |
77 | - : "=&r" (result), "=&r" (tmp) | |
78 | + : "=&r" (result), "=&r" (tmp), "+Qo" (v->counter) | |
79 | : "r" (&v->counter), "Ir" (i) | |
80 | : "cc"); | |
81 | } | |
82 | @@ -58,12 +58,12 @@ static inline int atomic_add_return(int i, atomic_t *v) | |
83 | smp_mb(); | |
84 | ||
85 | __asm__ __volatile__("@ atomic_add_return\n" | |
86 | -"1: ldrex %0, [%2]\n" | |
87 | -" add %0, %0, %3\n" | |
88 | -" strex %1, %0, [%2]\n" | |
89 | +"1: ldrex %0, [%3]\n" | |
90 | +" add %0, %0, %4\n" | |
91 | +" strex %1, %0, [%3]\n" | |
92 | " teq %1, #0\n" | |
93 | " bne 1b" | |
94 | - : "=&r" (result), "=&r" (tmp) | |
95 | + : "=&r" (result), "=&r" (tmp), "+Qo" (v->counter) | |
96 | : "r" (&v->counter), "Ir" (i) | |
97 | : "cc"); | |
98 | ||
99 | @@ -78,12 +78,12 @@ static inline void atomic_sub(int i, atomic_t *v) | |
100 | int result; | |
101 | ||
102 | __asm__ __volatile__("@ atomic_sub\n" | |
103 | -"1: ldrex %0, [%2]\n" | |
104 | -" sub %0, %0, %3\n" | |
105 | -" strex %1, %0, [%2]\n" | |
106 | +"1: ldrex %0, [%3]\n" | |
107 | +" sub %0, %0, %4\n" | |
108 | +" strex %1, %0, [%3]\n" | |
109 | " teq %1, #0\n" | |
110 | " bne 1b" | |
111 | - : "=&r" (result), "=&r" (tmp) | |
112 | + : "=&r" (result), "=&r" (tmp), "+Qo" (v->counter) | |
113 | : "r" (&v->counter), "Ir" (i) | |
114 | : "cc"); | |
115 | } | |
116 | @@ -96,12 +96,12 @@ static inline int atomic_sub_return(int i, atomic_t *v) | |
117 | smp_mb(); | |
118 | ||
119 | __asm__ __volatile__("@ atomic_sub_return\n" | |
120 | -"1: ldrex %0, [%2]\n" | |
121 | -" sub %0, %0, %3\n" | |
122 | -" strex %1, %0, [%2]\n" | |
123 | +"1: ldrex %0, [%3]\n" | |
124 | +" sub %0, %0, %4\n" | |
125 | +" strex %1, %0, [%3]\n" | |
126 | " teq %1, #0\n" | |
127 | " bne 1b" | |
128 | - : "=&r" (result), "=&r" (tmp) | |
129 | + : "=&r" (result), "=&r" (tmp), "+Qo" (v->counter) | |
130 | : "r" (&v->counter), "Ir" (i) | |
131 | : "cc"); | |
132 | ||
133 | @@ -118,11 +118,11 @@ static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new) | |
134 | ||
135 | do { | |
136 | __asm__ __volatile__("@ atomic_cmpxchg\n" | |
137 | - "ldrex %1, [%2]\n" | |
138 | + "ldrex %1, [%3]\n" | |
139 | "mov %0, #0\n" | |
140 | - "teq %1, %3\n" | |
141 | - "strexeq %0, %4, [%2]\n" | |
142 | - : "=&r" (res), "=&r" (oldval) | |
143 | + "teq %1, %4\n" | |
144 | + "strexeq %0, %5, [%3]\n" | |
145 | + : "=&r" (res), "=&r" (oldval), "+Qo" (ptr->counter) | |
146 | : "r" (&ptr->counter), "Ir" (old), "r" (new) | |
147 | : "cc"); | |
148 | } while (res); | |
149 | @@ -137,12 +137,12 @@ static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr) | |
150 | unsigned long tmp, tmp2; | |
151 | ||
152 | __asm__ __volatile__("@ atomic_clear_mask\n" | |
153 | -"1: ldrex %0, [%2]\n" | |
154 | -" bic %0, %0, %3\n" | |
155 | -" strex %1, %0, [%2]\n" | |
156 | +"1: ldrex %0, [%3]\n" | |
157 | +" bic %0, %0, %4\n" | |
158 | +" strex %1, %0, [%3]\n" | |
159 | " teq %1, #0\n" | |
160 | " bne 1b" | |
161 | - : "=&r" (tmp), "=&r" (tmp2) | |
162 | + : "=&r" (tmp), "=&r" (tmp2), "+Qo" (*addr) | |
163 | : "r" (addr), "Ir" (mask) | |
164 | : "cc"); | |
165 | } | |
166 | @@ -249,7 +249,7 @@ static inline u64 atomic64_read(atomic64_t *v) | |
167 | __asm__ __volatile__("@ atomic64_read\n" | |
168 | " ldrexd %0, %H0, [%1]" | |
169 | : "=&r" (result) | |
170 | - : "r" (&v->counter) | |
171 | + : "r" (&v->counter), "Qo" (v->counter) | |
172 | ); | |
173 | ||
174 | return result; | |
175 | @@ -260,11 +260,11 @@ static inline void atomic64_set(atomic64_t *v, u64 i) | |
176 | u64 tmp; | |
177 | ||
178 | __asm__ __volatile__("@ atomic64_set\n" | |
179 | -"1: ldrexd %0, %H0, [%1]\n" | |
180 | -" strexd %0, %2, %H2, [%1]\n" | |
181 | +"1: ldrexd %0, %H0, [%2]\n" | |
182 | +" strexd %0, %3, %H3, [%2]\n" | |
183 | " teq %0, #0\n" | |
184 | " bne 1b" | |
185 | - : "=&r" (tmp) | |
186 | + : "=&r" (tmp), "=Qo" (v->counter) | |
187 | : "r" (&v->counter), "r" (i) | |
188 | : "cc"); | |
189 | } | |
190 | @@ -275,13 +275,13 @@ static inline void atomic64_add(u64 i, atomic64_t *v) | |
191 | unsigned long tmp; | |
192 | ||
193 | __asm__ __volatile__("@ atomic64_add\n" | |
194 | -"1: ldrexd %0, %H0, [%2]\n" | |
195 | -" adds %0, %0, %3\n" | |
196 | -" adc %H0, %H0, %H3\n" | |
197 | -" strexd %1, %0, %H0, [%2]\n" | |
198 | +"1: ldrexd %0, %H0, [%3]\n" | |
199 | +" adds %0, %0, %4\n" | |
200 | +" adc %H0, %H0, %H4\n" | |
201 | +" strexd %1, %0, %H0, [%3]\n" | |
202 | " teq %1, #0\n" | |
203 | " bne 1b" | |
204 | - : "=&r" (result), "=&r" (tmp) | |
205 | + : "=&r" (result), "=&r" (tmp), "+Qo" (v->counter) | |
206 | : "r" (&v->counter), "r" (i) | |
207 | : "cc"); | |
208 | } | |
209 | @@ -294,13 +294,13 @@ static inline u64 atomic64_add_return(u64 i, atomic64_t *v) | |
210 | smp_mb(); | |
211 | ||
212 | __asm__ __volatile__("@ atomic64_add_return\n" | |
213 | -"1: ldrexd %0, %H0, [%2]\n" | |
214 | -" adds %0, %0, %3\n" | |
215 | -" adc %H0, %H0, %H3\n" | |
216 | -" strexd %1, %0, %H0, [%2]\n" | |
217 | +"1: ldrexd %0, %H0, [%3]\n" | |
218 | +" adds %0, %0, %4\n" | |
219 | +" adc %H0, %H0, %H4\n" | |
220 | +" strexd %1, %0, %H0, [%3]\n" | |
221 | " teq %1, #0\n" | |
222 | " bne 1b" | |
223 | - : "=&r" (result), "=&r" (tmp) | |
224 | + : "=&r" (result), "=&r" (tmp), "+Qo" (v->counter) | |
225 | : "r" (&v->counter), "r" (i) | |
226 | : "cc"); | |
227 | ||
228 | @@ -315,13 +315,13 @@ static inline void atomic64_sub(u64 i, atomic64_t *v) | |
229 | unsigned long tmp; | |
230 | ||
231 | __asm__ __volatile__("@ atomic64_sub\n" | |
232 | -"1: ldrexd %0, %H0, [%2]\n" | |
233 | -" subs %0, %0, %3\n" | |
234 | -" sbc %H0, %H0, %H3\n" | |
235 | -" strexd %1, %0, %H0, [%2]\n" | |
236 | +"1: ldrexd %0, %H0, [%3]\n" | |
237 | +" subs %0, %0, %4\n" | |
238 | +" sbc %H0, %H0, %H4\n" | |
239 | +" strexd %1, %0, %H0, [%3]\n" | |
240 | " teq %1, #0\n" | |
241 | " bne 1b" | |
242 | - : "=&r" (result), "=&r" (tmp) | |
243 | + : "=&r" (result), "=&r" (tmp), "+Qo" (v->counter) | |
244 | : "r" (&v->counter), "r" (i) | |
245 | : "cc"); | |
246 | } | |
247 | @@ -334,13 +334,13 @@ static inline u64 atomic64_sub_return(u64 i, atomic64_t *v) | |
248 | smp_mb(); | |
249 | ||
250 | __asm__ __volatile__("@ atomic64_sub_return\n" | |
251 | -"1: ldrexd %0, %H0, [%2]\n" | |
252 | -" subs %0, %0, %3\n" | |
253 | -" sbc %H0, %H0, %H3\n" | |
254 | -" strexd %1, %0, %H0, [%2]\n" | |
255 | +"1: ldrexd %0, %H0, [%3]\n" | |
256 | +" subs %0, %0, %4\n" | |
257 | +" sbc %H0, %H0, %H4\n" | |
258 | +" strexd %1, %0, %H0, [%3]\n" | |
259 | " teq %1, #0\n" | |
260 | " bne 1b" | |
261 | - : "=&r" (result), "=&r" (tmp) | |
262 | + : "=&r" (result), "=&r" (tmp), "+Qo" (v->counter) | |
263 | : "r" (&v->counter), "r" (i) | |
264 | : "cc"); | |
265 | ||
266 | @@ -358,12 +358,12 @@ static inline u64 atomic64_cmpxchg(atomic64_t *ptr, u64 old, u64 new) | |
267 | ||
268 | do { | |
269 | __asm__ __volatile__("@ atomic64_cmpxchg\n" | |
270 | - "ldrexd %1, %H1, [%2]\n" | |
271 | + "ldrexd %1, %H1, [%3]\n" | |
272 | "mov %0, #0\n" | |
273 | - "teq %1, %3\n" | |
274 | - "teqeq %H1, %H3\n" | |
275 | - "strexdeq %0, %4, %H4, [%2]" | |
276 | - : "=&r" (res), "=&r" (oldval) | |
277 | + "teq %1, %4\n" | |
278 | + "teqeq %H1, %H4\n" | |
279 | + "strexdeq %0, %5, %H5, [%3]" | |
280 | + : "=&r" (res), "=&r" (oldval), "+Qo" (ptr->counter) | |
281 | : "r" (&ptr->counter), "r" (old), "r" (new) | |
282 | : "cc"); | |
283 | } while (res); | |
284 | @@ -381,11 +381,11 @@ static inline u64 atomic64_xchg(atomic64_t *ptr, u64 new) | |
285 | smp_mb(); | |
286 | ||
287 | __asm__ __volatile__("@ atomic64_xchg\n" | |
288 | -"1: ldrexd %0, %H0, [%2]\n" | |
289 | -" strexd %1, %3, %H3, [%2]\n" | |
290 | +"1: ldrexd %0, %H0, [%3]\n" | |
291 | +" strexd %1, %4, %H4, [%3]\n" | |
292 | " teq %1, #0\n" | |
293 | " bne 1b" | |
294 | - : "=&r" (result), "=&r" (tmp) | |
295 | + : "=&r" (result), "=&r" (tmp), "+Qo" (ptr->counter) | |
296 | : "r" (&ptr->counter), "r" (new) | |
297 | : "cc"); | |
298 | ||
299 | @@ -402,16 +402,16 @@ static inline u64 atomic64_dec_if_positive(atomic64_t *v) | |
300 | smp_mb(); | |
301 | ||
302 | __asm__ __volatile__("@ atomic64_dec_if_positive\n" | |
303 | -"1: ldrexd %0, %H0, [%2]\n" | |
304 | +"1: ldrexd %0, %H0, [%3]\n" | |
305 | " subs %0, %0, #1\n" | |
306 | " sbc %H0, %H0, #0\n" | |
307 | " teq %H0, #0\n" | |
308 | " bmi 2f\n" | |
309 | -" strexd %1, %0, %H0, [%2]\n" | |
310 | +" strexd %1, %0, %H0, [%3]\n" | |
311 | " teq %1, #0\n" | |
312 | " bne 1b\n" | |
313 | "2:" | |
314 | - : "=&r" (result), "=&r" (tmp) | |
315 | + : "=&r" (result), "=&r" (tmp), "+Qo" (v->counter) | |
316 | : "r" (&v->counter) | |
317 | : "cc"); | |
318 | ||
319 | @@ -429,18 +429,18 @@ static inline int atomic64_add_unless(atomic64_t *v, u64 a, u64 u) | |
320 | smp_mb(); | |
321 | ||
322 | __asm__ __volatile__("@ atomic64_add_unless\n" | |
323 | -"1: ldrexd %0, %H0, [%3]\n" | |
324 | -" teq %0, %4\n" | |
325 | -" teqeq %H0, %H4\n" | |
326 | +"1: ldrexd %0, %H0, [%4]\n" | |
327 | +" teq %0, %5\n" | |
328 | +" teqeq %H0, %H5\n" | |
329 | " moveq %1, #0\n" | |
330 | " beq 2f\n" | |
331 | -" adds %0, %0, %5\n" | |
332 | -" adc %H0, %H0, %H5\n" | |
333 | -" strexd %2, %0, %H0, [%3]\n" | |
334 | +" adds %0, %0, %6\n" | |
335 | +" adc %H0, %H0, %H6\n" | |
336 | +" strexd %2, %0, %H0, [%4]\n" | |
337 | " teq %2, #0\n" | |
338 | " bne 1b\n" | |
339 | "2:" | |
340 | - : "=&r" (val), "+r" (ret), "=&r" (tmp) | |
341 | + : "=&r" (val), "+r" (ret), "=&r" (tmp), "+Qo" (v->counter) | |
342 | : "r" (&v->counter), "r" (u), "r" (a) | |
343 | : "cc"); | |
344 | ||
345 | -- | |
346 | 1.7.6.2 | |
347 |