]> git.ipfire.org Git - thirdparty/gcc.git/blame - libgcc/config/tilepro/atomic.h
Update copyright years.
[thirdparty/gcc.git] / libgcc / config / tilepro / atomic.h
CommitLineData
7ce78e23 1/* Macros for atomic functionality for tile.
f1717362 2 Copyright (C) 2011-2016 Free Software Foundation, Inc.
7ce78e23 3 Contributed by Walter Lee (walt@tilera.com)
4
5 This file is free software; you can redistribute it and/or modify it
6 under the terms of the GNU General Public License as published by the
7 Free Software Foundation; either version 3, or (at your option) any
8 later version.
9
10 This file is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24
25/* Provides macros for common atomic functionality. */
26
27#ifndef _ATOMIC_H_
28#define _ATOMIC_H_
29
30#ifdef __tilegx__
31/* Atomic instruction macros
32
33 The macros provided by atomic.h simplify access to the TILE-Gx
34 architecture's atomic instructions. The architecture provides a
35 variety of atomic instructions, including "exchange", "compare and
36 exchange", "fetch and ADD", "fetch and AND", "fetch and OR", and
37 "fetch and ADD if greater than or equal to zero".
38
39 No barrier or fence semantics are implied by any of the atomic
40 instructions for manipulating memory; you must specify the barriers
41 that you wish explicitly, using the provided macros.
42
43 Any integral 32- or 64-bit value can be used as the argument
44 to these macros, such as "int", "long long", "unsigned long", etc.
45 The pointers must be aligned to 4 or 8 bytes for 32- or 64-bit data.
46 The "exchange" and "compare and exchange" macros may also take
47 pointer values. We use the pseudo-type "VAL" in the documentation
48 to indicate the use of an appropriate type. */
49#else
50/* Atomic instruction macros
51
52 The macros provided by atomic.h simplify access to the Tile
53 architecture's atomic instructions. Since the architecture
54 supports test-and-set as its only in-silicon atomic operation, many
55 of the operations provided by this header are implemented as
56 fast-path calls to Linux emulation routines.
57
58 Using the kernel for atomic operations allows userspace to take
59 advantage of the kernel's existing atomic-integer support (managed
60 by a distributed array of locks). The kernel provides proper
61 ordering among simultaneous atomic operations on different cores,
62 and guarantees a process can not be context-switched part way
63 through an atomic operation. By virtue of sharing the kernel
64 atomic implementation, the userspace atomic operations
65 are compatible with the atomic methods provided by the kernel's
66 futex() syscall API. Note that these operations never cause Linux
67 kernel scheduling, and are in fact invisible to the kernel; they
68 simply act as regular function calls but with an elevated privilege
69 level. Note that the kernel's distributed lock array is hashed by
70 using only VA bits from the atomic value's address (to avoid the
71 performance hit of page table locking and multiple page-table
72 lookups to get the PA) and only the VA bits that are below page
73 granularity (to properly lock simultaneous accesses to the same
74 page mapped at different VAs). As a result, simultaneous atomic
75 operations on values whose addresses are at the same offset on a
76 page will contend in the kernel for the same lock array element.
77
78 No barrier or fence semantics are implied by any of the atomic
79 instructions for manipulating memory; you must specify the barriers
80 that you wish explicitly, using the provided macros.
81
82 Any integral 32- or 64-bit value can be used as the argument
83 to these macros, such as "int", "long long", "unsigned long", etc.
84 The pointers must be aligned to 4 or 8 bytes for 32- or 64-bit data.
85 The "exchange" and "compare and exchange" macros may also take
86 pointer values. We use the pseudo-type "VAL" in the documentation
87 to indicate the use of an appropriate type.
88
89 The 32-bit routines are implemented using a single kernel fast
90 syscall, as is the 64-bit compare-and-exchange. The other 64-bit
91 routines are implemented by looping over the 64-bit
92 compare-and-exchange routine, so may be potentially less efficient. */
93#endif
94
7ce78e23 95#ifdef __tilegx__
96#include <arch/spr_def.h>
97#else
98#include <asm/unistd.h>
99#endif
100
101
102/* 32-bit integer compare-and-exchange. */
103static __inline __attribute__ ((always_inline))
3d14844b 104 int arch_atomic_val_compare_and_exchange_4 (volatile int *mem,
105 int oldval, int newval)
7ce78e23 106{
107#ifdef __tilegx__
108 __insn_mtspr (SPR_CMPEXCH_VALUE, oldval);
109 return __insn_cmpexch4 (mem, newval);
110#else
111 int result;
112 __asm__ __volatile__ ("swint1":"=R00" (result),
113 "=m" (*mem):"R10" (__NR_FAST_cmpxchg), "R00" (mem),
114 "R01" (oldval), "R02" (newval), "m" (*mem):"r20",
115 "r21", "r22", "r23", "r24", "r25", "r26", "r27",
116 "r28", "r29", "memory");
117 return result;
118#endif
119}
120
121/* 64-bit integer compare-and-exchange. */
122static __inline __attribute__ ((always_inline))
1c229e23 123 long long arch_atomic_val_compare_and_exchange_8 (volatile long long
124 *mem, long long oldval,
125 long long newval)
7ce78e23 126{
127#ifdef __tilegx__
128 __insn_mtspr (SPR_CMPEXCH_VALUE, oldval);
129 return __insn_cmpexch (mem, newval);
130#else
131 unsigned int result_lo, result_hi;
132 unsigned int oldval_lo = oldval & 0xffffffffu, oldval_hi = oldval >> 32;
133 unsigned int newval_lo = newval & 0xffffffffu, newval_hi = newval >> 32;
134 __asm__ __volatile__ ("swint1":"=R00" (result_lo), "=R01" (result_hi),
135 "=m" (*mem):"R10" (__NR_FAST_cmpxchg64), "R00" (mem),
136 "R02" (oldval_lo), "R03" (oldval_hi),
137 "R04" (newval_lo), "R05" (newval_hi),
138 "m" (*mem):"r20", "r21", "r22", "r23", "r24", "r25",
139 "r26", "r27", "r28", "r29", "memory");
1c229e23 140 return ((long long) result_hi) << 32 | result_lo;
7ce78e23 141#endif
142}
143
144/* This non-existent symbol is called for sizes other than "4" and "8",
145 indicating a bug in the caller. */
3d14844b 146extern int __arch_atomic_error_bad_argument_size (void)
7ce78e23 147 __attribute__ ((warning ("sizeof atomic argument not 4 or 8")));
148
149
3d14844b 150#define arch_atomic_val_compare_and_exchange(mem, o, n) \
1c229e23 151 __extension__ ({ \
7ce78e23 152 (__typeof(*(mem)))(__typeof(*(mem)-*(mem))) \
153 ((sizeof(*(mem)) == 8) ? \
3d14844b 154 arch_atomic_val_compare_and_exchange_8( \
1c229e23 155 (volatile long long*)(mem), (__typeof((o)-(o)))(o), \
7ce78e23 156 (__typeof((n)-(n)))(n)) : \
157 (sizeof(*(mem)) == 4) ? \
3d14844b 158 arch_atomic_val_compare_and_exchange_4( \
7ce78e23 159 (volatile int*)(mem), (__typeof((o)-(o)))(o), \
160 (__typeof((n)-(n)))(n)) : \
3d14844b 161 __arch_atomic_error_bad_argument_size()); \
7ce78e23 162 })
163
3d14844b 164#define arch_atomic_bool_compare_and_exchange(mem, o, n) \
1c229e23 165 __extension__ ({ \
7ce78e23 166 __typeof(o) __o = (o); \
167 __builtin_expect( \
3d14844b 168 __o == arch_atomic_val_compare_and_exchange((mem), __o, (n)), 1); \
7ce78e23 169 })
170
171
172/* Loop with compare_and_exchange until we guess the correct value.
173 Normally "expr" will be an expression using __old and __value. */
3d14844b 174#define __arch_atomic_update_cmpxchg(mem, value, expr) \
1c229e23 175 __extension__ ({ \
7ce78e23 176 __typeof(value) __value = (value); \
177 __typeof(*(mem)) *__mem = (mem), __old = *__mem, __guess; \
178 do { \
179 __guess = __old; \
3d14844b 180 __old = arch_atomic_val_compare_and_exchange(__mem, __old, (expr)); \
7ce78e23 181 } while (__builtin_expect(__old != __guess, 0)); \
182 __old; \
183 })
184
185#ifdef __tilegx__
186
187/* Generic atomic op with 8- or 4-byte variant.
188 The _mask, _addend, and _expr arguments are ignored on tilegx. */
3d14844b 189#define __arch_atomic_update(mem, value, op, _mask, _addend, _expr) \
1c229e23 190 __extension__ ({ \
7ce78e23 191 ((__typeof(*(mem))) \
192 ((sizeof(*(mem)) == 8) ? (__typeof(*(mem)-*(mem)))__insn_##op( \
1c229e23 193 (volatile void *)(mem), \
194 (long long)(__typeof((value)-(value)))(value)) : \
7ce78e23 195 (sizeof(*(mem)) == 4) ? (int)__insn_##op##4( \
1c229e23 196 (volatile void *)(mem), \
197 (int)(__typeof((value)-(value)))(value)) : \
3d14844b 198 __arch_atomic_error_bad_argument_size())); \
7ce78e23 199 })
200
201#else
202
203/* This uses TILEPro's fast syscall support to atomically compute:
204
205 int old = *ptr;
206 *ptr = (old & mask) + addend;
207 return old;
208
209 This primitive can be used for atomic exchange, add, or, and.
210 Only 32-bit support is provided. */
211static __inline __attribute__ ((always_inline))
212 int
3d14844b 213 __arch_atomic_update_4 (volatile int *mem, int mask, int addend)
7ce78e23 214{
215 int result;
216 __asm__ __volatile__ ("swint1":"=R00" (result),
217 "=m" (*mem):"R10" (__NR_FAST_atomic_update),
218 "R00" (mem), "R01" (mask), "R02" (addend),
219 "m" (*mem):"r20", "r21", "r22", "r23", "r24", "r25",
220 "r26", "r27", "r28", "r29", "memory");
221 return result;
222}
223
224/* Generic atomic op with 8- or 4-byte variant.
225 The _op argument is ignored on tilepro. */
3d14844b 226#define __arch_atomic_update(mem, value, _op, mask, addend, expr) \
1c229e23 227 __extension__ ({ \
7ce78e23 228 (__typeof(*(mem)))(__typeof(*(mem)-*(mem))) \
229 ((sizeof(*(mem)) == 8) ? \
3d14844b 230 __arch_atomic_update_cmpxchg((mem), (value), (expr)) : \
7ce78e23 231 (sizeof(*(mem)) == 4) ? \
3d14844b 232 __arch_atomic_update_4((volatile int*)(mem), \
233 (__typeof((mask)-(mask)))(mask), \
234 (__typeof((addend)-(addend)))(addend)) : \
235 __arch_atomic_error_bad_argument_size()); \
7ce78e23 236 })
237
238#endif /* __tilegx__ */
239
240
3d14844b 241#define arch_atomic_exchange(mem, newvalue) \
242 __arch_atomic_update(mem, newvalue, exch, 0, newvalue, __value)
7ce78e23 243
3d14844b 244#define arch_atomic_add(mem, value) \
245 __arch_atomic_update(mem, value, fetchadd, -1, value, __old + __value)
7ce78e23 246
3d14844b 247#define arch_atomic_sub(mem, value) arch_atomic_add((mem), -(value))
7ce78e23 248
3d14844b 249#define arch_atomic_increment(mem) arch_atomic_add((mem), 1)
7ce78e23 250
3d14844b 251#define arch_atomic_decrement(mem) arch_atomic_add((mem), -1)
7ce78e23 252
3d14844b 253#define arch_atomic_and(mem, mask) \
254 __arch_atomic_update(mem, mask, fetchand, mask, 0, __old & __value)
7ce78e23 255
3d14844b 256#define arch_atomic_or(mem, mask) \
257 __arch_atomic_update(mem, mask, fetchor, ~mask, mask, __old | __value)
7ce78e23 258
3d14844b 259#define arch_atomic_xor(mem, mask) \
260 __arch_atomic_update_cmpxchg(mem, mask, __old ^ __value)
261
262#define arch_atomic_nand(mem, mask) \
263 __arch_atomic_update_cmpxchg(mem, mask, ~(__old & __value))
264
265#define arch_atomic_bit_set(mem, bit) \
1c229e23 266 __extension__ ({ \
7ce78e23 267 __typeof(*(mem)) __mask = (__typeof(*(mem)))1 << (bit); \
3d14844b 268 __mask & arch_atomic_or((mem), __mask); \
7ce78e23 269 })
270
3d14844b 271#define arch_atomic_bit_clear(mem, bit) \
1c229e23 272 __extension__ ({ \
7ce78e23 273 __typeof(*(mem)) __mask = (__typeof(*(mem)))1 << (bit); \
3d14844b 274 __mask & arch_atomic_and((mem), ~__mask); \
7ce78e23 275 })
276
277#ifdef __tilegx__
278/* Atomically store a new value to memory.
279 Note that you can freely use types of any size here, unlike the
280 other atomic routines, which require 32- or 64-bit types.
281 This accessor is provided for compatibility with TILEPro, which
282 required an explicit atomic operation for stores that needed
283 to be atomic with respect to other atomic methods in this header. */
3d14844b 284#define arch_atomic_write(mem, value) ((void) (*(mem) = (value)))
7ce78e23 285#else
3d14844b 286#define arch_atomic_write(mem, value) \
7ce78e23 287 do { \
288 __typeof(mem) __aw_mem = (mem); \
289 __typeof(value) __aw_val = (value); \
290 unsigned int *__aw_mem32, __aw_intval, __aw_val32, __aw_off, __aw_mask; \
291 __aw_intval = (__typeof((value) - (value)))__aw_val; \
292 switch (sizeof(*__aw_mem)) { \
293 case 8: \
3d14844b 294 __arch_atomic_update_cmpxchg(__aw_mem, __aw_val, __value); \
7ce78e23 295 break; \
296 case 4: \
3d14844b 297 __arch_atomic_update_4((int *)__aw_mem, 0, __aw_intval); \
7ce78e23 298 break; \
299 case 2: \
300 __aw_off = 8 * ((long)__aw_mem & 0x2); \
301 __aw_mask = 0xffffU << __aw_off; \
302 __aw_mem32 = (unsigned int *)((long)__aw_mem & ~0x2); \
303 __aw_val32 = (__aw_intval << __aw_off) & __aw_mask; \
3d14844b 304 __arch_atomic_update_cmpxchg(__aw_mem32, __aw_val32, \
305 (__old & ~__aw_mask) | __value); \
7ce78e23 306 break; \
307 case 1: \
308 __aw_off = 8 * ((long)__aw_mem & 0x3); \
309 __aw_mask = 0xffU << __aw_off; \
310 __aw_mem32 = (unsigned int *)((long)__aw_mem & ~0x3); \
311 __aw_val32 = (__aw_intval << __aw_off) & __aw_mask; \
3d14844b 312 __arch_atomic_update_cmpxchg(__aw_mem32, __aw_val32, \
313 (__old & ~__aw_mask) | __value); \
7ce78e23 314 break; \
315 } \
316 } while (0)
317#endif
318
319/* Compiler barrier.
320
321 This macro prevents loads or stores from being moved by the compiler
322 across the macro. Any loaded value that was loaded before this
323 macro must then be reloaded by the compiler. */
3d14844b 324#define arch_atomic_compiler_barrier() __asm__ __volatile__("" ::: "memory")
7ce78e23 325
326/* Full memory barrier.
327
3d14844b 328 This macro has the semantics of arch_atomic_compiler_barrer(), but also
7ce78e23 329 ensures that previous stores are visible to other cores, and that
330 all previous loaded values have been placed into their target
331 register on this core. */
3d14844b 332#define arch_atomic_full_barrier() __insn_mf()
7ce78e23 333
334/* Read memory barrier.
335
336 Ensure that all reads by this processor that occurred prior to the
337 read memory barrier have completed, and that no reads that occur
338 after the read memory barrier on this processor are initiated
339 before the barrier.
340
341 On current TILE chips a read barrier is implemented as a full barrier,
342 but this may not be true in later versions of the architecture.
343
3d14844b 344 See also arch_atomic_acquire_barrier() for the appropriate idiom to use
7ce78e23 345 to ensure no reads are lifted above an atomic lock instruction. */
3d14844b 346#define arch_atomic_read_barrier() arch_atomic_full_barrier()
7ce78e23 347
348/* Write memory barrier.
349
350 Ensure that all writes by this processor that occurred prior to the
351 write memory barrier have completed, and that no writes that occur
352 after the write memory barrier on this processor are initiated
353 before the barrier.
354
355 On current TILE chips a write barrier is implemented as a full barrier,
356 but this may not be true in later versions of the architecture.
357
3d14844b 358 See also arch_atomic_release_barrier() for the appropriate idiom to use
7ce78e23 359 to ensure all writes are complete prior to an atomic unlock instruction. */
3d14844b 360#define arch_atomic_write_barrier() arch_atomic_full_barrier()
7ce78e23 361
362/* Lock acquisition barrier.
363
364 Ensure that no load operations that follow this macro in the
365 program can issue prior to the barrier. Without such a barrier,
366 the compiler can reorder them to issue earlier, or the hardware can
367 issue them speculatively. The latter is not currently done in the
368 Tile microarchitecture, but using this operation improves
369 portability to future implementations.
370
371 This operation is intended to be used as part of the "acquire"
372 path for locking, that is, when entering a critical section.
373 This should be done after the atomic operation that actually
374 acquires the lock, and in conjunction with a "control dependency"
375 that checks the atomic operation result to see if the lock was
3d14844b 376 in fact acquired. See the arch_atomic_read_barrier() macro
7ce78e23 377 for a heavier-weight barrier to use in certain unusual constructs,
3d14844b 378 or arch_atomic_acquire_barrier_value() if no control dependency exists. */
379#define arch_atomic_acquire_barrier() arch_atomic_compiler_barrier()
7ce78e23 380
381/* Lock release barrier.
382
383 Ensure that no store operations that precede this macro in the
384 program complete subsequent to the barrier. Without such a
385 barrier, the compiler can reorder stores to issue later, or stores
386 can be still outstanding in the memory network.
387
388 This operation is intended to be used as part of the "release" path
389 for locking, that is, when leaving a critical section. This should
390 be done before the operation (such as a store of zero) that
391 actually releases the lock. */
3d14844b 392#define arch_atomic_release_barrier() arch_atomic_write_barrier()
7ce78e23 393
394/* Barrier until the read of a particular value is complete.
395
396 This is occasionally useful when constructing certain locking
397 scenarios. For example, you might write a routine that issues an
398 atomic instruction to enter a critical section, then reads one or
399 more values within the critical section without checking to see if
400 the critical section was in fact acquired, and only later checks
401 the atomic instruction result to see if the lock was acquired. If
402 so the routine could properly release the lock and know that the
403 values that were read were valid.
404
405 In this scenario, it is required to wait for the result of the
406 atomic instruction, even if the value itself is not checked. This
407 guarantees that if the atomic instruction succeeded in taking the lock,
408 the lock was held before any reads in the critical section issued. */
3d14844b 409#define arch_atomic_acquire_barrier_value(val) \
7ce78e23 410 __asm__ __volatile__("move %0, %0" :: "r"(val))
411
412/* Access the given variable in memory exactly once.
413
414 In some contexts, an algorithm may need to force access to memory,
415 since otherwise the compiler may think it can optimize away a
416 memory load or store; for example, in a loop when polling memory to
417 see if another cpu has updated it yet. Generally this is only
418 required for certain very carefully hand-tuned algorithms; using it
419 unnecessarily may result in performance losses.
420
421 A related use of this macro is to ensure that the compiler does not
422 rematerialize the value of "x" by reloading it from memory
423 unexpectedly; the "volatile" marking will prevent the compiler from
424 being able to rematerialize. This is helpful if an algorithm needs
425 to read a variable without locking, but needs it to have the same
426 value if it ends up being used several times within the algorithm.
427
428 Note that multiple uses of this macro are guaranteed to be ordered,
429 i.e. the compiler will not reorder stores or loads that are wrapped
3d14844b 430 in arch_atomic_access_once(). */
431#define arch_atomic_access_once(x) (*(volatile __typeof(x) *)&(x))
432
7ce78e23 433
434
435#endif /* !_ATOMIC_H_ */