]>
Commit | Line | Data |
---|---|---|
c2ba9709 JS |
1 | // -*- C++ -*- |
2 | ||
748086b7 | 3 | // Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. |
c2ba9709 JS |
4 | // |
5 | // This file is part of the GNU ISO C++ Library. This library is free | |
6 | // software; you can redistribute it and/or modify it under the terms | |
7 | // of the GNU General Public License as published by the Free Software | |
748086b7 | 8 | // Foundation; either version 3, or (at your option) any later |
c2ba9709 JS |
9 | // version. |
10 | ||
11 | // This library is distributed in the hope that it will be useful, but | |
12 | // WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | // General Public License for more details. | |
15 | ||
748086b7 JJ |
16 | // Under Section 7 of GPL version 3, you are granted additional |
17 | // permissions described in the GCC Runtime Library Exception, version | |
18 | // 3.1, as published by the Free Software Foundation. | |
c2ba9709 | 19 | |
748086b7 JJ |
20 | // You should have received a copy of the GNU General Public License and |
21 | // a copy of the GCC Runtime Library Exception along with this program; | |
22 | // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
23 | // <http://www.gnu.org/licenses/>. | |
c2ba9709 JS |
24 | |
25 | /** @file parallel/compatibility.h | |
26 | * @brief Compatibility layer, mostly concerned with atomic operations. | |
27 | * This file is a GNU parallel extension to the Standard C++ Library. | |
28 | */ | |
29 | ||
30 | // Written by Felix Putze. | |
31 | ||
32 | #ifndef _GLIBCXX_PARALLEL_COMPATIBILITY_H | |
33 | #define _GLIBCXX_PARALLEL_COMPATIBILITY_H 1 | |
34 | ||
35 | #include <parallel/types.h> | |
36 | #include <parallel/base.h> | |
37 | ||
38 | #if defined(__SUNPRO_CC) && defined(__sparc) | |
39 | #include <sys/atomic.h> | |
40 | #endif | |
41 | ||
cccc4450 | 42 | #if !defined(_WIN32) || defined (__CYGWIN__) |
c2ba9709 JS |
43 | #include <sched.h> |
44 | #endif | |
45 | ||
46 | #if defined(_MSC_VER) | |
47 | #include <Windows.h> | |
48 | #include <intrin.h> | |
49 | #undef max | |
50 | #undef min | |
51 | #endif | |
52 | ||
cccc4450 DS |
53 | #ifdef __MINGW32__ |
54 | // Including <windows.h> will drag in all the windows32 names. Since | |
55 | // that can cause user code portability problems, we just declare the | |
56 | // one needed function here. | |
57 | extern "C" | |
58 | __attribute((dllimport)) void __attribute__((stdcall)) Sleep (unsigned long); | |
59 | #endif | |
60 | ||
c2ba9709 JS |
61 | namespace __gnu_parallel |
62 | { | |
63 | #if defined(__ICC) | |
1acba85b JS |
64 | template<typename _MustBeInt = int> |
65 | int32 __faa32(int32* __x, int32 __inc) | |
c2ba9709 JS |
66 | { |
67 | asm volatile("lock xadd %0,%1" | |
15ac3c72 JS |
68 | : "=__r" (__inc), "=__m" (*__x) |
69 | : "0" (__inc) | |
70 | : "memory"); | |
1acba85b | 71 | return __inc; |
c2ba9709 JS |
72 | } |
73 | #if defined(__x86_64) | |
1acba85b JS |
74 | template<typename _MustBeInt = int> |
75 | int64 __faa64(int64* __x, int64 __inc) | |
c2ba9709 JS |
76 | { |
77 | asm volatile("lock xadd %0,%1" | |
15ac3c72 JS |
78 | : "=__r" (__inc), "=__m" (*__x) |
79 | : "0" (__inc) | |
80 | : "memory"); | |
1acba85b | 81 | return __inc; |
c2ba9709 JS |
82 | } |
83 | #endif | |
84 | #endif | |
85 | ||
86 | // atomic functions only work on integers | |
87 | ||
88 | /** @brief Add a value to a variable, atomically. | |
89 | * | |
90 | * Implementation is heavily platform-dependent. | |
1acba85b JS |
91 | * @param __ptr Pointer to a 32-bit signed integer. |
92 | * @param __addend Value to add. | |
c2ba9709 | 93 | */ |
6df548d2 | 94 | inline int32 |
1acba85b | 95 | __fetch_and_add_32(volatile int32* __ptr, int32 __addend) |
c2ba9709 | 96 | { |
15ac3c72 | 97 | #if defined(__ICC) //x86 version |
1acba85b | 98 | return _InterlockedExchangeAdd((void*)__ptr, __addend); |
15ac3c72 | 99 | #elif defined(__ECC) //IA-64 version |
1acba85b | 100 | return _InterlockedExchangeAdd((void*)__ptr, __addend); |
c2ba9709 | 101 | #elif defined(__ICL) || defined(_MSC_VER) |
1acba85b | 102 | return _InterlockedExchangeAdd(reinterpret_cast<volatile long*>(__ptr), |
15ac3c72 | 103 | __addend); |
c2ba9709 | 104 | #elif defined(__GNUC__) |
1acba85b | 105 | return __sync_fetch_and_add(__ptr, __addend); |
c2ba9709 | 106 | #elif defined(__SUNPRO_CC) && defined(__sparc) |
1acba85b | 107 | volatile int32 __before, __after; |
c2ba9709 JS |
108 | do |
109 | { | |
15ac3c72 JS |
110 | __before = *__ptr; |
111 | __after = __before + __addend; | |
1acba85b | 112 | } while (atomic_cas_32((volatile unsigned int*)__ptr, __before, |
15ac3c72 | 113 | __after) != __before); |
1acba85b | 114 | return __before; |
15ac3c72 | 115 | #else //fallback, slow |
1acba85b JS |
116 | #pragma message("slow __fetch_and_add_32") |
117 | int32 __res; | |
c2ba9709 JS |
118 | #pragma omp critical |
119 | { | |
1acba85b JS |
120 | __res = *__ptr; |
121 | *(__ptr) += __addend; | |
c2ba9709 | 122 | } |
1acba85b | 123 | return __res; |
c2ba9709 JS |
124 | #endif |
125 | } | |
126 | ||
127 | /** @brief Add a value to a variable, atomically. | |
128 | * | |
129 | * Implementation is heavily platform-dependent. | |
1acba85b JS |
130 | * @param __ptr Pointer to a 64-bit signed integer. |
131 | * @param __addend Value to add. | |
c2ba9709 | 132 | */ |
6df548d2 | 133 | inline int64 |
1acba85b | 134 | __fetch_and_add_64(volatile int64* __ptr, int64 __addend) |
c2ba9709 | 135 | { |
15ac3c72 | 136 | #if defined(__ICC) && defined(__x86_64) //x86 version |
1acba85b | 137 | return __faa64<int>((int64*)__ptr, __addend); |
15ac3c72 | 138 | #elif defined(__ECC) //IA-64 version |
1acba85b | 139 | return _InterlockedExchangeAdd64((void*)__ptr, __addend); |
c2ba9709 JS |
140 | #elif defined(__ICL) || defined(_MSC_VER) |
141 | #ifndef _WIN64 | |
15ac3c72 | 142 | _GLIBCXX_PARALLEL_ASSERT(false); //not available in this case |
c2ba9709 JS |
143 | return 0; |
144 | #else | |
1acba85b | 145 | return _InterlockedExchangeAdd64(__ptr, __addend); |
c2ba9709 JS |
146 | #endif |
147 | #elif defined(__GNUC__) && defined(__x86_64) | |
1acba85b | 148 | return __sync_fetch_and_add(__ptr, __addend); |
15ac3c72 | 149 | #elif defined(__GNUC__) && defined(__i386) && \ |
c2ba9709 | 150 | (defined(__i686) || defined(__pentium4) || defined(__athlon)) |
1acba85b | 151 | return __sync_fetch_and_add(__ptr, __addend); |
c2ba9709 | 152 | #elif defined(__SUNPRO_CC) && defined(__sparc) |
1acba85b | 153 | volatile int64 __before, __after; |
c2ba9709 JS |
154 | do |
155 | { | |
15ac3c72 JS |
156 | __before = *__ptr; |
157 | __after = __before + __addend; | |
1acba85b | 158 | } while (atomic_cas_64((volatile unsigned long long*)__ptr, __before, |
15ac3c72 | 159 | __after) != __before); |
1acba85b | 160 | return __before; |
15ac3c72 | 161 | #else //fallback, slow |
c2ba9709 | 162 | #if defined(__GNUC__) && defined(__i386) |
1acba85b | 163 | // XXX doesn'__t work with -march=native |
c2ba9709 JS |
164 | //#warning "please compile with -march=i686 or better" |
165 | #endif | |
1acba85b JS |
166 | #pragma message("slow __fetch_and_add_64") |
167 | int64 __res; | |
c2ba9709 JS |
168 | #pragma omp critical |
169 | { | |
1acba85b JS |
170 | __res = *__ptr; |
171 | *(__ptr) += __addend; | |
c2ba9709 | 172 | } |
1acba85b | 173 | return __res; |
c2ba9709 JS |
174 | #endif |
175 | } | |
176 | ||
177 | /** @brief Add a value to a variable, atomically. | |
178 | * | |
179 | * Implementation is heavily platform-dependent. | |
1acba85b JS |
180 | * @param __ptr Pointer to a signed integer. |
181 | * @param __addend Value to add. | |
c2ba9709 | 182 | */ |
1acba85b JS |
183 | template<typename _Tp> |
184 | inline _Tp | |
185 | __fetch_and_add(volatile _Tp* __ptr, _Tp __addend) | |
c2ba9709 | 186 | { |
1acba85b JS |
187 | if (sizeof(_Tp) == sizeof(int32)) |
188 | return (_Tp)__fetch_and_add_32((volatile int32*) __ptr, (int32)__addend); | |
189 | else if (sizeof(_Tp) == sizeof(int64)) | |
190 | return (_Tp)__fetch_and_add_64((volatile int64*) __ptr, (int64)__addend); | |
c2ba9709 JS |
191 | else |
192 | _GLIBCXX_PARALLEL_ASSERT(false); | |
193 | } | |
194 | ||
195 | ||
196 | #if defined(__ICC) | |
197 | ||
1acba85b | 198 | template<typename _MustBeInt = int> |
6df548d2 | 199 | inline int32 |
1acba85b | 200 | __cas32(volatile int32* __ptr, int32 __old, int32 __nw) |
c2ba9709 | 201 | { |
1acba85b | 202 | int32 __before; |
c2ba9709 | 203 | __asm__ __volatile__("lock; cmpxchgl %1,%2" |
15ac3c72 JS |
204 | : "=a"(__before) |
205 | : "q"(__nw), "__m"(*(volatile long long*)(__ptr)), | |
206 | "0"(__old) | |
207 | : "memory"); | |
1acba85b | 208 | return __before; |
c2ba9709 JS |
209 | } |
210 | ||
211 | #if defined(__x86_64) | |
1acba85b | 212 | template<typename _MustBeInt = int> |
6df548d2 | 213 | inline int64 |
1acba85b | 214 | __cas64(volatile int64 *__ptr, int64 __old, int64 __nw) |
c2ba9709 | 215 | { |
1acba85b | 216 | int64 __before; |
c2ba9709 | 217 | __asm__ __volatile__("lock; cmpxchgq %1,%2" |
15ac3c72 JS |
218 | : "=a"(__before) |
219 | : "q"(__nw), "__m"(*(volatile long long*)(__ptr)), | |
220 | "0"(__old) | |
221 | : "memory"); | |
1acba85b | 222 | return __before; |
c2ba9709 JS |
223 | } |
224 | #endif | |
225 | ||
226 | #endif | |
227 | ||
1acba85b JS |
228 | /** @brief Compare @__c *__ptr and @__c __comparand. If equal, let @__c |
229 | * *__ptr=__replacement and return @__c true, return @__c false otherwise. | |
c2ba9709 JS |
230 | * |
231 | * Implementation is heavily platform-dependent. | |
1acba85b JS |
232 | * @param __ptr Pointer to 32-bit signed integer. |
233 | * @param __comparand Compare value. | |
234 | * @param __replacement Replacement value. | |
c2ba9709 JS |
235 | */ |
236 | inline bool | |
15ac3c72 JS |
237 | __compare_and_swap_32(volatile int32* __ptr, int32 __comparand, |
238 | int32 __replacement) | |
c2ba9709 | 239 | { |
15ac3c72 | 240 | #if defined(__ICC) //x86 version |
1acba85b | 241 | return _InterlockedCompareExchange((void*)__ptr, __replacement, |
15ac3c72 JS |
242 | __comparand) == __comparand; |
243 | #elif defined(__ECC) //IA-64 version | |
1acba85b | 244 | return _InterlockedCompareExchange((void*)__ptr, __replacement, |
15ac3c72 | 245 | __comparand) == __comparand; |
c2ba9709 | 246 | #elif defined(__ICL) || defined(_MSC_VER) |
15ac3c72 JS |
247 | return _InterlockedCompareExchange( |
248 | reinterpret_cast<volatile long*>(__ptr), | |
249 | __replacement, __comparand) | |
250 | == __comparand; | |
c2ba9709 | 251 | #elif defined(__GNUC__) |
1acba85b | 252 | return __sync_bool_compare_and_swap(__ptr, __comparand, __replacement); |
c2ba9709 | 253 | #elif defined(__SUNPRO_CC) && defined(__sparc) |
1acba85b | 254 | return atomic_cas_32((volatile unsigned int*)__ptr, __comparand, |
15ac3c72 | 255 | __replacement) == __comparand; |
c2ba9709 | 256 | #else |
1acba85b JS |
257 | #pragma message("slow __compare_and_swap_32") |
258 | bool __res = false; | |
c2ba9709 JS |
259 | #pragma omp critical |
260 | { | |
1acba85b | 261 | if (*__ptr == __comparand) |
15ac3c72 JS |
262 | { |
263 | *__ptr = __replacement; | |
264 | __res = true; | |
265 | } | |
c2ba9709 | 266 | } |
1acba85b | 267 | return __res; |
c2ba9709 JS |
268 | #endif |
269 | } | |
270 | ||
1acba85b JS |
271 | /** @brief Compare @__c *__ptr and @__c __comparand. If equal, let @__c |
272 | * *__ptr=__replacement and return @__c true, return @__c false otherwise. | |
c2ba9709 JS |
273 | * |
274 | * Implementation is heavily platform-dependent. | |
1acba85b JS |
275 | * @param __ptr Pointer to 64-bit signed integer. |
276 | * @param __comparand Compare value. | |
277 | * @param __replacement Replacement value. | |
c2ba9709 JS |
278 | */ |
279 | inline bool | |
15ac3c72 JS |
280 | __compare_and_swap_64(volatile int64* __ptr, int64 __comparand, |
281 | int64 __replacement) | |
c2ba9709 | 282 | { |
15ac3c72 | 283 | #if defined(__ICC) && defined(__x86_64) //x86 version |
1acba85b | 284 | return __cas64<int>(__ptr, __comparand, __replacement) == __comparand; |
15ac3c72 | 285 | #elif defined(__ECC) //IA-64 version |
1acba85b | 286 | return _InterlockedCompareExchange64((void*)__ptr, __replacement, |
15ac3c72 | 287 | __comparand) == __comparand; |
c2ba9709 JS |
288 | #elif defined(__ICL) || defined(_MSC_VER) |
289 | #ifndef _WIN64 | |
15ac3c72 | 290 | _GLIBCXX_PARALLEL_ASSERT(false); //not available in this case |
c2ba9709 JS |
291 | return 0; |
292 | #else | |
1acba85b | 293 | return _InterlockedCompareExchange64(__ptr, __replacement, |
15ac3c72 | 294 | __comparand) == __comparand; |
c2ba9709 JS |
295 | #endif |
296 | ||
297 | #elif defined(__GNUC__) && defined(__x86_64) | |
1acba85b | 298 | return __sync_bool_compare_and_swap(__ptr, __comparand, __replacement); |
15ac3c72 | 299 | #elif defined(__GNUC__) && defined(__i386) && \ |
c2ba9709 | 300 | (defined(__i686) || defined(__pentium4) || defined(__athlon)) |
1acba85b | 301 | return __sync_bool_compare_and_swap(__ptr, __comparand, __replacement); |
c2ba9709 | 302 | #elif defined(__SUNPRO_CC) && defined(__sparc) |
1acba85b | 303 | return atomic_cas_64((volatile unsigned long long*)__ptr, |
15ac3c72 | 304 | __comparand, __replacement) == __comparand; |
c2ba9709 JS |
305 | #else |
306 | #if defined(__GNUC__) && defined(__i386) | |
307 | // XXX -march=native | |
308 | //#warning "please compile with -march=i686 or better" | |
309 | #endif | |
1acba85b JS |
310 | #pragma message("slow __compare_and_swap_64") |
311 | bool __res = false; | |
c2ba9709 JS |
312 | #pragma omp critical |
313 | { | |
1acba85b | 314 | if (*__ptr == __comparand) |
15ac3c72 JS |
315 | { |
316 | *__ptr = __replacement; | |
317 | __res = true; | |
318 | } | |
c2ba9709 | 319 | } |
1acba85b | 320 | return __res; |
c2ba9709 JS |
321 | #endif |
322 | } | |
323 | ||
1acba85b JS |
324 | /** @brief Compare @__c *__ptr and @__c __comparand. If equal, let @__c |
325 | * *__ptr=__replacement and return @__c true, return @__c false otherwise. | |
c2ba9709 JS |
326 | * |
327 | * Implementation is heavily platform-dependent. | |
1acba85b JS |
328 | * @param __ptr Pointer to signed integer. |
329 | * @param __comparand Compare value. | |
330 | * @param __replacement Replacement value. */ | |
331 | template<typename _Tp> | |
c2ba9709 | 332 | inline bool |
1acba85b | 333 | __compare_and_swap(volatile _Tp* __ptr, _Tp __comparand, _Tp __replacement) |
c2ba9709 | 334 | { |
1acba85b | 335 | if (sizeof(_Tp) == sizeof(int32)) |
15ac3c72 JS |
336 | return __compare_and_swap_32((volatile int32*) __ptr, (int32)__comparand, |
337 | (int32)__replacement); | |
1acba85b | 338 | else if (sizeof(_Tp) == sizeof(int64)) |
15ac3c72 JS |
339 | return __compare_and_swap_64((volatile int64*) __ptr, (int64)__comparand, |
340 | (int64)__replacement); | |
c2ba9709 JS |
341 | else |
342 | _GLIBCXX_PARALLEL_ASSERT(false); | |
343 | } | |
344 | ||
345 | /** @brief Yield the control to another thread, without waiting for | |
346 | the end to the time slice. */ | |
347 | inline void | |
1acba85b | 348 | __yield() |
c2ba9709 | 349 | { |
cccc4450 | 350 | #if defined (_WIN32) && !defined (__CYGWIN__) |
c2ba9709 JS |
351 | Sleep(0); |
352 | #else | |
353 | sched_yield(); | |
354 | #endif | |
355 | } | |
356 | } // end namespace | |
357 | ||
cbcd1e45 | 358 | #endif /* _GLIBCXX_PARALLEL_COMPATIBILITY_H */ |