]>
Commit | Line | Data |
---|---|---|
7ce78e23 | 1 | /* TILE atomics. |
f1717362 | 2 | Copyright (C) 2011-2016 Free Software Foundation, Inc. |
7ce78e23 | 3 | Contributed by Walter Lee (walt@tilera.com) |
4 | ||
5 | This file is free software; you can redistribute it and/or modify it | |
6 | under the terms of the GNU General Public License as published by the | |
7 | Free Software Foundation; either version 3, or (at your option) any | |
8 | later version. | |
9 | ||
10 | This file is distributed in the hope that it will be useful, but | |
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | General Public License for more details. | |
14 | ||
15 | Under Section 7 of GPL version 3, you are granted additional | |
16 | permissions described in the GCC Runtime Library Exception, version | |
17 | 3.1, as published by the Free Software Foundation. | |
18 | ||
19 | You should have received a copy of the GNU General Public License and | |
20 | a copy of the GCC Runtime Library Exception along with this program; | |
21 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
22 | <http://www.gnu.org/licenses/>. */ | |
23 | ||
41766a5f | 24 | #include "tconfig.h" |
7ce78e23 | 25 | #include "coretypes.h" |
26 | #include "atomic.h" | |
27 | ||
41766a5f | 28 | #define bool unsigned char |
29 | ||
7ce78e23 | 30 | /* This code should be inlined by the compiler, but for now support |
31 | it as out-of-line methods in libgcc. */ | |
32 | ||
c8d04645 | 33 | static inline void |
7ce78e23 | 34 | pre_atomic_barrier (int model) |
35 | { | |
36 | switch ((enum memmodel) model) | |
37 | { | |
38 | case MEMMODEL_RELEASE: | |
39 | case MEMMODEL_ACQ_REL: | |
40 | case MEMMODEL_SEQ_CST: | |
41 | __atomic_thread_fence (model); | |
42 | break; | |
43 | default: | |
44 | break; | |
45 | } | |
46 | return; | |
47 | } | |
48 | ||
c8d04645 | 49 | static inline void |
7ce78e23 | 50 | post_atomic_barrier (int model) |
51 | { | |
52 | switch ((enum memmodel) model) | |
53 | { | |
54 | case MEMMODEL_ACQUIRE: | |
55 | case MEMMODEL_ACQ_REL: | |
56 | case MEMMODEL_SEQ_CST: | |
57 | __atomic_thread_fence (model); | |
58 | break; | |
59 | default: | |
60 | break; | |
61 | } | |
62 | return; | |
63 | } | |
64 | ||
65 | #define __unused __attribute__((unused)) | |
66 | ||
c8d04645 | 67 | #define __fetch_and_do(proto, type, size, opname, top, bottom) \ |
68 | proto \ | |
7ce78e23 | 69 | { \ |
c8d04645 | 70 | top; \ |
3d14844b | 71 | type rv = arch_atomic_##opname(p, i); \ |
c8d04645 | 72 | bottom; \ |
7ce78e23 | 73 | return rv; \ |
74 | } | |
75 | ||
c8d04645 | 76 | #define __atomic_fetch_and_do(type, size, opname) \ |
77 | __fetch_and_do(type __atomic_fetch_##opname##_##size(type* p, type i, int model), \ | |
78 | type, size, opname, \ | |
79 | pre_atomic_barrier(model), \ | |
80 | post_atomic_barrier(model)) \ | |
81 | ||
7ce78e23 | 82 | __atomic_fetch_and_do (int, 4, add) |
83 | __atomic_fetch_and_do (int, 4, sub) | |
84 | __atomic_fetch_and_do (int, 4, or) | |
85 | __atomic_fetch_and_do (int, 4, and) | |
86 | __atomic_fetch_and_do (int, 4, xor) | |
87 | __atomic_fetch_and_do (int, 4, nand) | |
88 | __atomic_fetch_and_do (long long, 8, add) | |
89 | __atomic_fetch_and_do (long long, 8, sub) | |
90 | __atomic_fetch_and_do (long long, 8, or) | |
91 | __atomic_fetch_and_do (long long, 8, and) | |
92 | __atomic_fetch_and_do (long long, 8, xor) | |
93 | __atomic_fetch_and_do (long long, 8, nand) | |
fcb4fe71 | 94 | |
c8d04645 | 95 | #define __sync_fetch_and_do(type, size, opname) \ |
96 | __fetch_and_do(type __sync_fetch_and_##opname##_##size(type* p, type i), \ | |
97 | type, size, opname, \ | |
98 | arch_atomic_write_barrier(), \ | |
99 | arch_atomic_read_barrier()) | |
100 | ||
101 | __sync_fetch_and_do (int, 4, add) | |
102 | __sync_fetch_and_do (int, 4, sub) | |
103 | __sync_fetch_and_do (int, 4, or) | |
104 | __sync_fetch_and_do (int, 4, and) | |
105 | __sync_fetch_and_do (int, 4, xor) | |
106 | __sync_fetch_and_do (int, 4, nand) | |
107 | __sync_fetch_and_do (long long, 8, add) | |
108 | __sync_fetch_and_do (long long, 8, sub) | |
109 | __sync_fetch_and_do (long long, 8, or) | |
110 | __sync_fetch_and_do (long long, 8, and) | |
111 | __sync_fetch_and_do (long long, 8, xor) | |
112 | __sync_fetch_and_do (long long, 8, nand) | |
113 | ||
114 | #define __do_and_fetch(proto, type, size, opname, op, op2, top, bottom) \ | |
115 | proto \ | |
116 | { \ | |
117 | top; \ | |
118 | type rv = op2 (arch_atomic_##opname(p, i) op i); \ | |
119 | bottom; \ | |
120 | return rv; \ | |
7ce78e23 | 121 | } |
c8d04645 | 122 | |
123 | #define __atomic_do_and_fetch(type, size, opname, op, op2) \ | |
124 | __do_and_fetch(type __atomic_##opname##_fetch_##size(type* p, type i, int model), \ | |
125 | type, size, opname, op, op2, \ | |
126 | pre_atomic_barrier(model), \ | |
127 | post_atomic_barrier(model)) \ | |
128 | ||
fcb4fe71 | 129 | __atomic_do_and_fetch (int, 4, add, +, ) |
130 | __atomic_do_and_fetch (int, 4, sub, -, ) | |
131 | __atomic_do_and_fetch (int, 4, or, |, ) | |
132 | __atomic_do_and_fetch (int, 4, and, &, ) | |
133 | __atomic_do_and_fetch (int, 4, xor, |, ) | |
134 | __atomic_do_and_fetch (int, 4, nand, &, ~) | |
135 | __atomic_do_and_fetch (long long, 8, add, +, ) | |
136 | __atomic_do_and_fetch (long long, 8, sub, -, ) | |
137 | __atomic_do_and_fetch (long long, 8, or, |, ) | |
138 | __atomic_do_and_fetch (long long, 8, and, &, ) | |
139 | __atomic_do_and_fetch (long long, 8, xor, |, ) | |
140 | __atomic_do_and_fetch (long long, 8, nand, &, ~) | |
141 | ||
c8d04645 | 142 | #define __sync_do_and_fetch(type, size, opname, op, op2) \ |
143 | __do_and_fetch(type __sync_##opname##_and_fetch_##size(type* p, type i), \ | |
144 | type, size, opname, op, op2, \ | |
145 | arch_atomic_write_barrier(), \ | |
146 | arch_atomic_read_barrier()) \ | |
147 | ||
148 | __sync_do_and_fetch (int, 4, add, +, ) | |
149 | __sync_do_and_fetch (int, 4, sub, -, ) | |
150 | __sync_do_and_fetch (int, 4, or, |, ) | |
151 | __sync_do_and_fetch (int, 4, and, &, ) | |
152 | __sync_do_and_fetch (int, 4, xor, |, ) | |
153 | __sync_do_and_fetch (int, 4, nand, &, ~) | |
154 | __sync_do_and_fetch (long long, 8, add, +, ) | |
155 | __sync_do_and_fetch (long long, 8, sub, -, ) | |
156 | __sync_do_and_fetch (long long, 8, or, |, ) | |
157 | __sync_do_and_fetch (long long, 8, and, &, ) | |
158 | __sync_do_and_fetch (long long, 8, xor, |, ) | |
159 | __sync_do_and_fetch (long long, 8, nand, &, ~) | |
160 | ||
7ce78e23 | 161 | #define __atomic_exchange_methods(type, size) \ |
162 | bool \ | |
163 | __atomic_compare_exchange_##size(volatile type* ptr, type* oldvalp, \ | |
164 | type newval, bool weak __unused, \ | |
165 | int models, int modelf __unused) \ | |
166 | { \ | |
167 | type oldval = *oldvalp; \ | |
168 | pre_atomic_barrier(models); \ | |
3d14844b | 169 | type retval = arch_atomic_val_compare_and_exchange(ptr, oldval, newval); \ |
7ce78e23 | 170 | post_atomic_barrier(models); \ |
171 | bool success = (retval == oldval); \ | |
172 | *oldvalp = retval; \ | |
173 | return success; \ | |
174 | } \ | |
175 | \ | |
176 | type \ | |
177 | __atomic_exchange_##size(volatile type* ptr, type val, int model) \ | |
178 | { \ | |
179 | pre_atomic_barrier(model); \ | |
3d14844b | 180 | type retval = arch_atomic_exchange(ptr, val); \ |
7ce78e23 | 181 | post_atomic_barrier(model); \ |
182 | return retval; \ | |
183 | } | |
fcb4fe71 | 184 | |
7ce78e23 | 185 | __atomic_exchange_methods (int, 4) |
186 | __atomic_exchange_methods (long long, 8) | |
187 | ||
c8d04645 | 188 | #define __sync_exchange_methods(type, size) \ |
189 | type \ | |
190 | __sync_val_compare_and_swap_##size(type* ptr, type oldval, type newval) \ | |
191 | { \ | |
192 | arch_atomic_write_barrier(); \ | |
193 | type retval = arch_atomic_val_compare_and_exchange(ptr, oldval, newval); \ | |
194 | arch_atomic_read_barrier(); \ | |
195 | return retval; \ | |
196 | } \ | |
197 | \ | |
198 | bool \ | |
199 | __sync_bool_compare_and_swap_##size(type* ptr, type oldval, type newval) \ | |
200 | { \ | |
201 | arch_atomic_write_barrier(); \ | |
202 | bool retval = arch_atomic_bool_compare_and_exchange(ptr, oldval, newval); \ | |
203 | arch_atomic_read_barrier(); \ | |
204 | return retval; \ | |
205 | } \ | |
206 | \ | |
207 | type \ | |
208 | __sync_lock_test_and_set_##size(type* ptr, type val) \ | |
209 | { \ | |
210 | type retval = arch_atomic_exchange(ptr, val); \ | |
211 | arch_atomic_acquire_barrier_value(retval); \ | |
212 | return retval; \ | |
213 | } | |
214 | ||
215 | __sync_exchange_methods (int, 4) | |
216 | __sync_exchange_methods (long long, 8) | |
217 | ||
47bfa8ec | 218 | #ifdef __LITTLE_ENDIAN__ |
219 | #define BIT_OFFSET(n, type) ((n) * 8) | |
220 | #else | |
221 | #define BIT_OFFSET(n, type) ((4 - sizeof(type) - (n)) * 8) | |
222 | #endif | |
223 | ||
7ce78e23 | 224 | /* Subword methods require the same approach for both TILEPro and |
225 | TILE-Gx. We load the background data for the word, insert the | |
226 | desired subword piece, then compare-and-exchange it into place. */ | |
227 | #define u8 unsigned char | |
228 | #define u16 unsigned short | |
fcb4fe71 | 229 | |
c8d04645 | 230 | #define __subword_cmpxchg_body(type, size, ptr, guess, val) \ |
231 | ({ \ | |
232 | unsigned int *p = (unsigned int *)((unsigned long)ptr & ~3UL); \ | |
233 | const int shift = BIT_OFFSET((unsigned long)ptr & 3UL, type); \ | |
234 | const unsigned int valmask = (1 << (sizeof(type) * 8)) - 1; \ | |
235 | const unsigned int bgmask = ~(valmask << shift); \ | |
236 | unsigned int oldword = *p; \ | |
237 | type oldval = (oldword >> shift) & valmask; \ | |
238 | if (__builtin_expect((oldval == guess), 1)) { \ | |
239 | unsigned int word = (oldword & bgmask) | ((val & valmask) << shift); \ | |
240 | oldword = arch_atomic_val_compare_and_exchange(p, oldword, word); \ | |
241 | oldval = (oldword >> shift) & valmask; \ | |
242 | } \ | |
243 | oldval; \ | |
244 | }) \ | |
245 | ||
7ce78e23 | 246 | #define __atomic_subword_cmpxchg(type, size) \ |
247 | \ | |
248 | bool \ | |
c8d04645 | 249 | __atomic_compare_exchange_##size(volatile type* ptr, type* guess_ptr, \ |
7ce78e23 | 250 | type val, bool weak __unused, int models, \ |
251 | int modelf __unused) \ | |
252 | { \ | |
253 | pre_atomic_barrier(models); \ | |
c8d04645 | 254 | type guess = *guess_ptr; \ |
255 | type oldval = __subword_cmpxchg_body(type, size, ptr, guess, val); \ | |
7ce78e23 | 256 | post_atomic_barrier(models); \ |
c8d04645 | 257 | bool success = (oldval == guess); \ |
258 | *guess_ptr = oldval; \ | |
7ce78e23 | 259 | return success; \ |
260 | } | |
fcb4fe71 | 261 | |
7ce78e23 | 262 | __atomic_subword_cmpxchg (u8, 1) |
263 | __atomic_subword_cmpxchg (u16, 2) | |
fcb4fe71 | 264 | |
c8d04645 | 265 | #define __sync_subword_cmpxchg(type, size) \ |
266 | \ | |
267 | type \ | |
268 | __sync_val_compare_and_swap_##size(type* ptr, type guess, type val) \ | |
269 | { \ | |
270 | arch_atomic_write_barrier(); \ | |
271 | type oldval = __subword_cmpxchg_body(type, size, ptr, guess, val); \ | |
272 | arch_atomic_read_barrier(); \ | |
273 | return oldval; \ | |
274 | } \ | |
275 | \ | |
276 | bool \ | |
277 | __sync_bool_compare_and_swap_##size(type* ptr, type guess, type val) \ | |
278 | { \ | |
279 | type oldval = __sync_val_compare_and_swap_##size(ptr, guess, val); \ | |
280 | return oldval == guess; \ | |
281 | } | |
282 | ||
283 | __sync_subword_cmpxchg (u8, 1) | |
284 | __sync_subword_cmpxchg (u16, 2) | |
285 | ||
7ce78e23 | 286 | /* For the atomic-update subword methods, we use the same approach as |
287 | above, but we retry until we succeed if the compare-and-exchange | |
288 | fails. */ | |
c8d04645 | 289 | #define __subword(type, proto, top, expr, bottom) \ |
7ce78e23 | 290 | proto \ |
291 | { \ | |
292 | top \ | |
293 | unsigned int *p = (unsigned int *)((unsigned long)ptr & ~3UL); \ | |
47bfa8ec | 294 | const int shift = BIT_OFFSET((unsigned long)ptr & 3UL, type); \ |
7ce78e23 | 295 | const unsigned int valmask = (1 << (sizeof(type) * 8)) - 1; \ |
296 | const unsigned int bgmask = ~(valmask << shift); \ | |
297 | unsigned int oldword, xword = *p; \ | |
298 | type val, oldval; \ | |
299 | do { \ | |
300 | oldword = xword; \ | |
301 | oldval = (oldword >> shift) & valmask; \ | |
302 | val = expr; \ | |
303 | unsigned int word = (oldword & bgmask) | ((val & valmask) << shift); \ | |
3d14844b | 304 | xword = arch_atomic_val_compare_and_exchange(p, oldword, word); \ |
7ce78e23 | 305 | } while (__builtin_expect(xword != oldword, 0)); \ |
306 | bottom \ | |
307 | } | |
fcb4fe71 | 308 | |
7ce78e23 | 309 | #define __atomic_subword_fetch(type, funcname, expr, retval) \ |
c8d04645 | 310 | __subword(type, \ |
311 | type __atomic_ ## funcname(volatile type *ptr, type i, int model), \ | |
312 | pre_atomic_barrier(model);, \ | |
313 | expr, \ | |
314 | post_atomic_barrier(model); return retval;) | |
fcb4fe71 | 315 | |
7ce78e23 | 316 | __atomic_subword_fetch (u8, fetch_add_1, oldval + i, oldval) |
317 | __atomic_subword_fetch (u8, fetch_sub_1, oldval - i, oldval) | |
318 | __atomic_subword_fetch (u8, fetch_or_1, oldval | i, oldval) | |
319 | __atomic_subword_fetch (u8, fetch_and_1, oldval & i, oldval) | |
320 | __atomic_subword_fetch (u8, fetch_xor_1, oldval ^ i, oldval) | |
321 | __atomic_subword_fetch (u8, fetch_nand_1, ~(oldval & i), oldval) | |
fcb4fe71 | 322 | |
7ce78e23 | 323 | __atomic_subword_fetch (u16, fetch_add_2, oldval + i, oldval) |
324 | __atomic_subword_fetch (u16, fetch_sub_2, oldval - i, oldval) | |
325 | __atomic_subword_fetch (u16, fetch_or_2, oldval | i, oldval) | |
326 | __atomic_subword_fetch (u16, fetch_and_2, oldval & i, oldval) | |
327 | __atomic_subword_fetch (u16, fetch_xor_2, oldval ^ i, oldval) | |
328 | __atomic_subword_fetch (u16, fetch_nand_2, ~(oldval & i), oldval) | |
fcb4fe71 | 329 | |
7ce78e23 | 330 | __atomic_subword_fetch (u8, add_fetch_1, oldval + i, val) |
331 | __atomic_subword_fetch (u8, sub_fetch_1, oldval - i, val) | |
332 | __atomic_subword_fetch (u8, or_fetch_1, oldval | i, val) | |
333 | __atomic_subword_fetch (u8, and_fetch_1, oldval & i, val) | |
334 | __atomic_subword_fetch (u8, xor_fetch_1, oldval ^ i, val) | |
335 | __atomic_subword_fetch (u8, nand_fetch_1, ~(oldval & i), val) | |
fcb4fe71 | 336 | |
7ce78e23 | 337 | __atomic_subword_fetch (u16, add_fetch_2, oldval + i, val) |
338 | __atomic_subword_fetch (u16, sub_fetch_2, oldval - i, val) | |
339 | __atomic_subword_fetch (u16, or_fetch_2, oldval | i, val) | |
340 | __atomic_subword_fetch (u16, and_fetch_2, oldval & i, val) | |
341 | __atomic_subword_fetch (u16, xor_fetch_2, oldval ^ i, val) | |
342 | __atomic_subword_fetch (u16, nand_fetch_2, ~(oldval & i), val) | |
fcb4fe71 | 343 | |
c8d04645 | 344 | #define __sync_subword_fetch(type, funcname, expr, retval) \ |
345 | __subword(type, \ | |
346 | type __sync_ ## funcname(type *ptr, type i), \ | |
347 | arch_atomic_read_barrier();, \ | |
348 | expr, \ | |
349 | arch_atomic_write_barrier(); return retval;) | |
350 | ||
351 | __sync_subword_fetch (u8, fetch_and_add_1, oldval + i, oldval) | |
352 | __sync_subword_fetch (u8, fetch_and_sub_1, oldval - i, oldval) | |
353 | __sync_subword_fetch (u8, fetch_and_or_1, oldval | i, oldval) | |
354 | __sync_subword_fetch (u8, fetch_and_and_1, oldval & i, oldval) | |
355 | __sync_subword_fetch (u8, fetch_and_xor_1, oldval ^ i, oldval) | |
356 | __sync_subword_fetch (u8, fetch_and_nand_1, ~(oldval & i), oldval) | |
357 | ||
358 | __sync_subword_fetch (u16, fetch_and_add_2, oldval + i, oldval) | |
359 | __sync_subword_fetch (u16, fetch_and_sub_2, oldval - i, oldval) | |
360 | __sync_subword_fetch (u16, fetch_and_or_2, oldval | i, oldval) | |
361 | __sync_subword_fetch (u16, fetch_and_and_2, oldval & i, oldval) | |
362 | __sync_subword_fetch (u16, fetch_and_xor_2, oldval ^ i, oldval) | |
363 | __sync_subword_fetch (u16, fetch_and_nand_2, ~(oldval & i), oldval) | |
364 | ||
365 | __sync_subword_fetch (u8, add_and_fetch_1, oldval + i, val) | |
366 | __sync_subword_fetch (u8, sub_and_fetch_1, oldval - i, val) | |
367 | __sync_subword_fetch (u8, or_and_fetch_1, oldval | i, val) | |
368 | __sync_subword_fetch (u8, and_and_fetch_1, oldval & i, val) | |
369 | __sync_subword_fetch (u8, xor_and_fetch_1, oldval ^ i, val) | |
370 | __sync_subword_fetch (u8, nand_and_fetch_1, ~(oldval & i), val) | |
371 | ||
372 | __sync_subword_fetch (u16, add_and_fetch_2, oldval + i, val) | |
373 | __sync_subword_fetch (u16, sub_and_fetch_2, oldval - i, val) | |
374 | __sync_subword_fetch (u16, or_and_fetch_2, oldval | i, val) | |
375 | __sync_subword_fetch (u16, and_and_fetch_2, oldval & i, val) | |
376 | __sync_subword_fetch (u16, xor_and_fetch_2, oldval ^ i, val) | |
377 | __sync_subword_fetch (u16, nand_and_fetch_2, ~(oldval & i), val) | |
378 | ||
7ce78e23 | 379 | #define __atomic_subword_lock(type, size) \ |
c8d04645 | 380 | __subword(type, \ |
381 | type __atomic_exchange_##size(volatile type* ptr, type nval, int model), \ | |
382 | pre_atomic_barrier(model);, \ | |
383 | nval, \ | |
384 | post_atomic_barrier(model); return oldval;) | |
fcb4fe71 | 385 | |
7ce78e23 | 386 | __atomic_subword_lock (u8, 1) |
387 | __atomic_subword_lock (u16, 2) | |
c8d04645 | 388 | |
389 | #define __sync_subword_lock(type, size) \ | |
390 | __subword(type, \ | |
391 | type __sync_lock_test_and_set_##size(type* ptr, type nval), \ | |
392 | , \ | |
393 | nval, \ | |
394 | arch_atomic_acquire_barrier_value(oldval); return oldval;) | |
395 | ||
396 | __sync_subword_lock (u8, 1) | |
397 | __sync_subword_lock (u16, 2) |