libgcc/config/tilepro/atomic.c

   1 /* TILE atomics.
   2    Copyright (C) 2011-2022 Free Software Foundation, Inc.
   3    Contributed by Walter Lee (walt@tilera.com)
   4
   5    This file is free software; you can redistribute it and/or modify it
   6    under the terms of the GNU General Public License as published by the
   7    Free Software Foundation; either version 3, or (at your option) any
   8    later version.
   9
  10    This file is distributed in the hope that it will be useful, but
  11    WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    General Public License for more details.
  14
  15    Under Section 7 of GPL version 3, you are granted additional
  16    permissions described in the GCC Runtime Library Exception, version
  17    3.1, as published by the Free Software Foundation.
  18
  19    You should have received a copy of the GNU General Public License and
  20    a copy of the GCC Runtime Library Exception along with this program;
  21    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
  22    <http://www.gnu.org/licenses/>.  */
  23
  24 #include "tconfig.h"
  25 #include "coretypes.h"
  26 #include "atomic.h"
  27
  28 #define bool unsigned char
  29
  30 /* This code should be inlined by the compiler, but for now support
  31    it as out-of-line methods in libgcc.  */
  32
  33 static inline void
  34 pre_atomic_barrier (int model)
  35 {
  36   switch (model)
  37     {
  38     case __ATOMIC_RELEASE:
  39     case __ATOMIC_ACQ_REL:
  40     case __ATOMIC_SEQ_CST:
  41       __atomic_thread_fence (model);
  42       break;
  43     default:
  44       break;
  45     }
  46   return;
  47 }
  48
  49 static inline void
  50 post_atomic_barrier (int model)
  51 {
  52   switch (model)
  53     {
  54     case __ATOMIC_ACQUIRE:
  55     case __ATOMIC_ACQ_REL:
  56     case __ATOMIC_SEQ_CST:
  57       __atomic_thread_fence (model);
  58       break;
  59     default:
  60       break;
  61     }
  62   return;
  63 }
  64
  65 #define __unused __attribute__((unused))
  66
  67 #define __fetch_and_do(proto, type, size, opname, top, bottom)  \
  68 proto                                                           \
  69 {                                                               \
  70   top;                                                          \
  71   type rv = arch_atomic_##opname(p, i);                         \
  72   bottom;                                                       \
  73   return rv;                                                    \
  74 }
  75
  76 #define __atomic_fetch_and_do(type, size, opname)                       \
  77   __fetch_and_do(type __atomic_fetch_##opname##_##size(type* p, type i, int model), \
  78                  type, size, opname,                                    \
  79                  pre_atomic_barrier(model),                             \
  80                  post_atomic_barrier(model))                            \
  81
  82 __atomic_fetch_and_do (int, 4, add)
  83 __atomic_fetch_and_do (int, 4, sub)
  84 __atomic_fetch_and_do (int, 4, or)
  85 __atomic_fetch_and_do (int, 4, and)
  86 __atomic_fetch_and_do (int, 4, xor)
  87 __atomic_fetch_and_do (int, 4, nand)
  88 __atomic_fetch_and_do (long long, 8, add)
  89 __atomic_fetch_and_do (long long, 8, sub)
  90 __atomic_fetch_and_do (long long, 8, or)
  91 __atomic_fetch_and_do (long long, 8, and)
  92 __atomic_fetch_and_do (long long, 8, xor)
  93 __atomic_fetch_and_do (long long, 8, nand)
  94
  95 #define __sync_fetch_and_do(type, size, opname)                         \
  96   __fetch_and_do(type __sync_fetch_and_##opname##_##size(type* p, type i), \
  97                  type, size, opname,                                    \
  98                  arch_atomic_write_barrier(),                           \
  99                  arch_atomic_read_barrier())
 100
 101 __sync_fetch_and_do (int, 4, add)
 102 __sync_fetch_and_do (int, 4, sub)
 103 __sync_fetch_and_do (int, 4, or)
 104 __sync_fetch_and_do (int, 4, and)
 105 __sync_fetch_and_do (int, 4, xor)
 106 __sync_fetch_and_do (int, 4, nand)
 107 __sync_fetch_and_do (long long, 8, add)
 108 __sync_fetch_and_do (long long, 8, sub)
 109 __sync_fetch_and_do (long long, 8, or)
 110 __sync_fetch_and_do (long long, 8, and)
 111 __sync_fetch_and_do (long long, 8, xor)
 112 __sync_fetch_and_do (long long, 8, nand)
 113
 114 #define __do_and_fetch(proto, type, size, opname, op, op2, top, bottom) \
 115 proto                                                                   \
 116 {                                                                       \
 117   top;                                                                  \
 118   type rv = op2 (arch_atomic_##opname(p, i) op i);                      \
 119   bottom;                                                               \
 120   return rv;                                                            \
 121 }
 122
 123 #define __atomic_do_and_fetch(type, size, opname, op, op2)              \
 124   __do_and_fetch(type __atomic_##opname##_fetch_##size(type* p, type i, int model), \
 125                  type, size, opname, op, op2,                           \
 126                  pre_atomic_barrier(model),                             \
 127                  post_atomic_barrier(model))                            \
 128
 129 __atomic_do_and_fetch (int, 4, add, +, )
 130 __atomic_do_and_fetch (int, 4, sub, -, )
 131 __atomic_do_and_fetch (int, 4, or, |, )
 132 __atomic_do_and_fetch (int, 4, and, &, )
 133 __atomic_do_and_fetch (int, 4, xor, |, )
 134 __atomic_do_and_fetch (int, 4, nand, &, ~)
 135 __atomic_do_and_fetch (long long, 8, add, +, )
 136 __atomic_do_and_fetch (long long, 8, sub, -, )
 137 __atomic_do_and_fetch (long long, 8, or, |, )
 138 __atomic_do_and_fetch (long long, 8, and, &, )
 139 __atomic_do_and_fetch (long long, 8, xor, |, )
 140 __atomic_do_and_fetch (long long, 8, nand, &, ~)
 141
 142 #define __sync_do_and_fetch(type, size, opname, op, op2)                \
 143   __do_and_fetch(type __sync_##opname##_and_fetch_##size(type* p, type i), \
 144                  type, size, opname, op, op2,                           \
 145                  arch_atomic_write_barrier(),                           \
 146                  arch_atomic_read_barrier())                            \
 147
 148 __sync_do_and_fetch (int, 4, add, +, )
 149 __sync_do_and_fetch (int, 4, sub, -, )
 150 __sync_do_and_fetch (int, 4, or, |, )
 151 __sync_do_and_fetch (int, 4, and, &, )
 152 __sync_do_and_fetch (int, 4, xor, |, )
 153 __sync_do_and_fetch (int, 4, nand, &, ~)
 154 __sync_do_and_fetch (long long, 8, add, +, )
 155 __sync_do_and_fetch (long long, 8, sub, -, )
 156 __sync_do_and_fetch (long long, 8, or, |, )
 157 __sync_do_and_fetch (long long, 8, and, &, )
 158 __sync_do_and_fetch (long long, 8, xor, |, )
 159 __sync_do_and_fetch (long long, 8, nand, &, ~)
 160
 161 #define __atomic_exchange_methods(type, size)                           \
 162 bool                                                                    \
 163 __atomic_compare_exchange_##size(volatile type* ptr, type* oldvalp,     \
 164                                  type newval, bool weak __unused,       \
 165                                  int models, int modelf __unused)       \
 166 {                                                                       \
 167   type oldval = *oldvalp;                                               \
 168   pre_atomic_barrier(models);                                           \
 169   type retval = arch_atomic_val_compare_and_exchange(ptr, oldval, newval); \
 170   post_atomic_barrier(models);                                          \
 171   bool success = (retval == oldval);                                    \
 172   *oldvalp = retval;                                                    \
 173   return success;                                                       \
 174 }                                                                       \
 175                                                                         \
 176 type                                                                    \
 177 __atomic_exchange_##size(volatile type* ptr, type val, int model)       \
 178 {                                                                       \
 179   pre_atomic_barrier(model);                                            \
 180   type retval = arch_atomic_exchange(ptr, val);                         \
 181   post_atomic_barrier(model);                                           \
 182   return retval;                                                        \
 183 }
 184
 185 __atomic_exchange_methods (int, 4)
 186 __atomic_exchange_methods (long long, 8)
 187
 188 #define __sync_exchange_methods(type, size)                             \
 189 type                                                                    \
 190 __sync_val_compare_and_swap_##size(type* ptr, type oldval, type newval) \
 191 {                                                                       \
 192   arch_atomic_write_barrier();                                          \
 193   type retval = arch_atomic_val_compare_and_exchange(ptr, oldval, newval); \
 194   arch_atomic_read_barrier();                                           \
 195   return retval;                                                        \
 196 }                                                                       \
 197                                                                         \
 198 bool                                                                    \
 199 __sync_bool_compare_and_swap_##size(type* ptr, type oldval, type newval) \
 200 {                                                                       \
 201   arch_atomic_write_barrier();                                          \
 202   bool retval = arch_atomic_bool_compare_and_exchange(ptr, oldval, newval); \
 203   arch_atomic_read_barrier();                                           \
 204   return retval;                                                        \
 205 }                                                                       \
 206                                                                         \
 207 type                                                                    \
 208 __sync_lock_test_and_set_##size(type* ptr, type val)                    \
 209 {                                                                       \
 210   type retval = arch_atomic_exchange(ptr, val);                         \
 211   arch_atomic_acquire_barrier_value(retval);                            \
 212   return retval;                                                        \
 213 }
 214
 215 __sync_exchange_methods (int, 4)
 216 __sync_exchange_methods (long long, 8)
 217
 218 #ifdef __LITTLE_ENDIAN__
 219 #define BIT_OFFSET(n, type) ((n) * 8)
 220 #else
 221 #define BIT_OFFSET(n, type) ((4 - sizeof(type) - (n)) * 8)
 222 #endif
 223
 224 /* Subword methods require the same approach for both TILEPro and
 225    TILE-Gx.  We load the background data for the word, insert the
 226    desired subword piece, then compare-and-exchange it into place.  */
 227 #define u8 unsigned char
 228 #define u16 unsigned short
 229
 230 #define __subword_cmpxchg_body(type, size, ptr, guess, val)             \
 231   ({                                                                    \
 232     unsigned int *p = (unsigned int *)((unsigned long)ptr & ~3UL);      \
 233     const int shift = BIT_OFFSET((unsigned long)ptr & 3UL, type);       \
 234     const unsigned int valmask = (1 << (sizeof(type) * 8)) - 1;         \
 235     const unsigned int bgmask = ~(valmask << shift);                    \
 236     unsigned int oldword = *p;                                          \
 237     type oldval = (oldword >> shift) & valmask;                         \
 238     if (__builtin_expect((oldval == guess), 1)) {                       \
 239       unsigned int word = (oldword & bgmask) | ((val & valmask) << shift); \
 240       oldword = arch_atomic_val_compare_and_exchange(p, oldword, word); \
 241       oldval = (oldword >> shift) & valmask;                            \
 242     }                                                                   \
 243     oldval;                                                             \
 244   })                                                                    \
 245
 246 #define __atomic_subword_cmpxchg(type, size)                            \
 247                                                                         \
 248 bool                                                                    \
 249 __atomic_compare_exchange_##size(volatile type* ptr, type* guess_ptr,   \
 250                                  type val, bool weak __unused, int models, \
 251                                  int modelf __unused)                   \
 252 {                                                                       \
 253   pre_atomic_barrier(models);                                           \
 254   type guess = *guess_ptr;                                              \
 255   type oldval = __subword_cmpxchg_body(type, size, ptr, guess, val);    \
 256   post_atomic_barrier(models);                                          \
 257   bool success = (oldval == guess);                                     \
 258   *guess_ptr = oldval;                                                  \
 259   return success;                                                       \
 260 }
 261
 262 __atomic_subword_cmpxchg (u8, 1)
 263 __atomic_subword_cmpxchg (u16, 2)
 264
 265 #define __sync_subword_cmpxchg(type, size)                              \
 266                                                                         \
 267 type                                                                    \
 268 __sync_val_compare_and_swap_##size(type* ptr, type guess, type val)     \
 269 {                                                                       \
 270   arch_atomic_write_barrier();                                          \
 271   type oldval = __subword_cmpxchg_body(type, size, ptr, guess, val);    \
 272   arch_atomic_read_barrier();                                           \
 273   return oldval;                                                        \
 274 }                                                                       \
 275                                                                         \
 276 bool                                                                    \
 277 __sync_bool_compare_and_swap_##size(type* ptr, type guess, type val)    \
 278 {                                                                       \
 279   type oldval = __sync_val_compare_and_swap_##size(ptr, guess, val);    \
 280   return oldval == guess;                                               \
 281 }
 282
 283 __sync_subword_cmpxchg (u8, 1)
 284 __sync_subword_cmpxchg (u16, 2)
 285
 286 /* For the atomic-update subword methods, we use the same approach as
 287    above, but we retry until we succeed if the compare-and-exchange
 288    fails.  */
 289 #define __subword(type, proto, top, expr, bottom)                       \
 290 proto                                                                   \
 291 {                                                                       \
 292   top                                                                   \
 293   unsigned int *p = (unsigned int *)((unsigned long)ptr & ~3UL);        \
 294   const int shift = BIT_OFFSET((unsigned long)ptr & 3UL, type);         \
 295   const unsigned int valmask = (1 << (sizeof(type) * 8)) - 1;           \
 296   const unsigned int bgmask = ~(valmask << shift);                      \
 297   unsigned int oldword, xword = *p;                                     \
 298   type val, oldval;                                                     \
 299   do {                                                                  \
 300     oldword = xword;                                                    \
 301     oldval = (oldword >> shift) & valmask;                              \
 302     val = expr;                                                         \
 303     unsigned int word = (oldword & bgmask) | ((val & valmask) << shift); \
 304     xword = arch_atomic_val_compare_and_exchange(p, oldword, word);     \
 305   } while (__builtin_expect(xword != oldword, 0));                      \
 306   bottom                                                                \
 307 }
 308
 309 #define __atomic_subword_fetch(type, funcname, expr, retval)            \
 310   __subword(type,                                                       \
 311             type __atomic_ ## funcname(volatile type *ptr, type i, int model), \
 312             pre_atomic_barrier(model);,                                 \
 313             expr,                                                       \
 314             post_atomic_barrier(model); return retval;)
 315
 316 __atomic_subword_fetch (u8, fetch_add_1, oldval + i, oldval)
 317 __atomic_subword_fetch (u8, fetch_sub_1, oldval - i, oldval)
 318 __atomic_subword_fetch (u8, fetch_or_1, oldval | i, oldval)
 319 __atomic_subword_fetch (u8, fetch_and_1, oldval & i, oldval)
 320 __atomic_subword_fetch (u8, fetch_xor_1, oldval ^ i, oldval)
 321 __atomic_subword_fetch (u8, fetch_nand_1, ~(oldval & i), oldval)
 322
 323 __atomic_subword_fetch (u16, fetch_add_2, oldval + i, oldval)
 324 __atomic_subword_fetch (u16, fetch_sub_2, oldval - i, oldval)
 325 __atomic_subword_fetch (u16, fetch_or_2, oldval | i, oldval)
 326 __atomic_subword_fetch (u16, fetch_and_2, oldval & i, oldval)
 327 __atomic_subword_fetch (u16, fetch_xor_2, oldval ^ i, oldval)
 328 __atomic_subword_fetch (u16, fetch_nand_2, ~(oldval & i), oldval)
 329
 330 __atomic_subword_fetch (u8, add_fetch_1, oldval + i, val)
 331 __atomic_subword_fetch (u8, sub_fetch_1, oldval - i, val)
 332 __atomic_subword_fetch (u8, or_fetch_1, oldval | i, val)
 333 __atomic_subword_fetch (u8, and_fetch_1, oldval & i, val)
 334 __atomic_subword_fetch (u8, xor_fetch_1, oldval ^ i, val)
 335 __atomic_subword_fetch (u8, nand_fetch_1, ~(oldval & i), val)
 336
 337 __atomic_subword_fetch (u16, add_fetch_2, oldval + i, val)
 338 __atomic_subword_fetch (u16, sub_fetch_2, oldval - i, val)
 339 __atomic_subword_fetch (u16, or_fetch_2, oldval | i, val)
 340 __atomic_subword_fetch (u16, and_fetch_2, oldval & i, val)
 341 __atomic_subword_fetch (u16, xor_fetch_2, oldval ^ i, val)
 342 __atomic_subword_fetch (u16, nand_fetch_2, ~(oldval & i), val)
 343
 344 #define __sync_subword_fetch(type, funcname, expr, retval)      \
 345   __subword(type,                                               \
 346             type __sync_ ## funcname(type *ptr, type i),        \
 347             arch_atomic_read_barrier();,                        \
 348             expr,                                               \
 349             arch_atomic_write_barrier(); return retval;)
 350
 351 __sync_subword_fetch (u8, fetch_and_add_1, oldval + i, oldval)
 352 __sync_subword_fetch (u8, fetch_and_sub_1, oldval - i, oldval)
 353 __sync_subword_fetch (u8, fetch_and_or_1, oldval | i, oldval)
 354 __sync_subword_fetch (u8, fetch_and_and_1, oldval & i, oldval)
 355 __sync_subword_fetch (u8, fetch_and_xor_1, oldval ^ i, oldval)
 356 __sync_subword_fetch (u8, fetch_and_nand_1, ~(oldval & i), oldval)
 357
 358 __sync_subword_fetch (u16, fetch_and_add_2, oldval + i, oldval)
 359 __sync_subword_fetch (u16, fetch_and_sub_2, oldval - i, oldval)
 360 __sync_subword_fetch (u16, fetch_and_or_2, oldval | i, oldval)
 361 __sync_subword_fetch (u16, fetch_and_and_2, oldval & i, oldval)
 362 __sync_subword_fetch (u16, fetch_and_xor_2, oldval ^ i, oldval)
 363 __sync_subword_fetch (u16, fetch_and_nand_2, ~(oldval & i), oldval)
 364
 365 __sync_subword_fetch (u8, add_and_fetch_1, oldval + i, val)
 366 __sync_subword_fetch (u8, sub_and_fetch_1, oldval - i, val)
 367 __sync_subword_fetch (u8, or_and_fetch_1, oldval | i, val)
 368 __sync_subword_fetch (u8, and_and_fetch_1, oldval & i, val)
 369 __sync_subword_fetch (u8, xor_and_fetch_1, oldval ^ i, val)
 370 __sync_subword_fetch (u8, nand_and_fetch_1, ~(oldval & i), val)
 371
 372 __sync_subword_fetch (u16, add_and_fetch_2, oldval + i, val)
 373 __sync_subword_fetch (u16, sub_and_fetch_2, oldval - i, val)
 374 __sync_subword_fetch (u16, or_and_fetch_2, oldval | i, val)
 375 __sync_subword_fetch (u16, and_and_fetch_2, oldval & i, val)
 376 __sync_subword_fetch (u16, xor_and_fetch_2, oldval ^ i, val)
 377 __sync_subword_fetch (u16, nand_and_fetch_2, ~(oldval & i), val)
 378
 379 #define __atomic_subword_lock(type, size)                               \
 380   __subword(type,                                                       \
 381             type __atomic_exchange_##size(volatile type* ptr, type nval, int model), \
 382             pre_atomic_barrier(model);,                                 \
 383             nval,                                                       \
 384             post_atomic_barrier(model); return oldval;)
 385
 386 __atomic_subword_lock (u8, 1)
 387 __atomic_subword_lock (u16, 2)
 388
 389 #define __sync_subword_lock(type, size)                                 \
 390   __subword(type,                                                       \
 391             type __sync_lock_test_and_set_##size(type* ptr, type nval), \
 392             ,                                                           \
 393             nval,                                                       \
 394             arch_atomic_acquire_barrier_value(oldval); return oldval;)
 395
 396 __sync_subword_lock (u8, 1)
 397 __sync_subword_lock (u16, 2)