]>
Commit | Line | Data |
---|---|---|
c2ba9709 JS |
1 | // -*- C++ -*- |
2 | ||
748086b7 | 3 | // Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. |
c2ba9709 JS |
4 | // |
5 | // This file is part of the GNU ISO C++ Library. This library is free | |
6 | // software; you can redistribute it and/or modify it under the terms | |
7 | // of the GNU General Public License as published by the Free Software | |
748086b7 | 8 | // Foundation; either version 3, or (at your option) any later |
c2ba9709 JS |
9 | // version. |
10 | ||
11 | // This library is distributed in the hope that it will be useful, but | |
12 | // WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | // General Public License for more details. | |
15 | ||
748086b7 JJ |
16 | // Under Section 7 of GPL version 3, you are granted additional |
17 | // permissions described in the GCC Runtime Library Exception, version | |
18 | // 3.1, as published by the Free Software Foundation. | |
c2ba9709 | 19 | |
748086b7 JJ |
20 | // You should have received a copy of the GNU General Public License and |
21 | // a copy of the GCC Runtime Library Exception along with this program; | |
22 | // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
23 | // <http://www.gnu.org/licenses/>. | |
c2ba9709 JS |
24 | |
25 | /** @file parallel/compatibility.h | |
26 | * @brief Compatibility layer, mostly concerned with atomic operations. | |
27 | * This file is a GNU parallel extension to the Standard C++ Library. | |
28 | */ | |
29 | ||
30 | // Written by Felix Putze. | |
31 | ||
32 | #ifndef _GLIBCXX_PARALLEL_COMPATIBILITY_H | |
33 | #define _GLIBCXX_PARALLEL_COMPATIBILITY_H 1 | |
34 | ||
35 | #include <parallel/types.h> | |
36 | #include <parallel/base.h> | |
37 | ||
38 | #if defined(__SUNPRO_CC) && defined(__sparc) | |
39 | #include <sys/atomic.h> | |
40 | #endif | |
41 | ||
cccc4450 | 42 | #if !defined(_WIN32) || defined (__CYGWIN__) |
c2ba9709 JS |
43 | #include <sched.h> |
44 | #endif | |
45 | ||
46 | #if defined(_MSC_VER) | |
47 | #include <Windows.h> | |
48 | #include <intrin.h> | |
49 | #undef max | |
50 | #undef min | |
51 | #endif | |
52 | ||
cccc4450 DS |
53 | #ifdef __MINGW32__ |
54 | // Including <windows.h> will drag in all the windows32 names. Since | |
55 | // that can cause user code portability problems, we just declare the | |
56 | // one needed function here. | |
57 | extern "C" | |
58 | __attribute((dllimport)) void __attribute__((stdcall)) Sleep (unsigned long); | |
59 | #endif | |
60 | ||
c2ba9709 JS |
61 | namespace __gnu_parallel |
62 | { | |
63 | #if defined(__ICC) | |
1acba85b JS |
64 | template<typename _MustBeInt = int> |
65 | int32 __faa32(int32* __x, int32 __inc) | |
c2ba9709 JS |
66 | { |
67 | asm volatile("lock xadd %0,%1" | |
1acba85b JS |
68 | : "=__r" (__inc), "=__m" (*__x) |
69 | : "0" (__inc) | |
c2ba9709 | 70 | : "memory"); |
1acba85b | 71 | return __inc; |
c2ba9709 JS |
72 | } |
73 | #if defined(__x86_64) | |
1acba85b JS |
74 | template<typename _MustBeInt = int> |
75 | int64 __faa64(int64* __x, int64 __inc) | |
c2ba9709 JS |
76 | { |
77 | asm volatile("lock xadd %0,%1" | |
1acba85b JS |
78 | : "=__r" (__inc), "=__m" (*__x) |
79 | : "0" (__inc) | |
c2ba9709 | 80 | : "memory"); |
1acba85b | 81 | return __inc; |
c2ba9709 JS |
82 | } |
83 | #endif | |
84 | #endif | |
85 | ||
86 | // atomic functions only work on integers | |
87 | ||
88 | /** @brief Add a value to a variable, atomically. | |
89 | * | |
90 | * Implementation is heavily platform-dependent. | |
1acba85b JS |
91 | * @param __ptr Pointer to a 32-bit signed integer. |
92 | * @param __addend Value to add. | |
c2ba9709 | 93 | */ |
6df548d2 | 94 | inline int32 |
1acba85b | 95 | __fetch_and_add_32(volatile int32* __ptr, int32 __addend) |
c2ba9709 JS |
96 | { |
97 | #if defined(__ICC) //x86 version | |
1acba85b | 98 | return _InterlockedExchangeAdd((void*)__ptr, __addend); |
c2ba9709 | 99 | #elif defined(__ECC) //IA-64 version |
1acba85b | 100 | return _InterlockedExchangeAdd((void*)__ptr, __addend); |
c2ba9709 | 101 | #elif defined(__ICL) || defined(_MSC_VER) |
1acba85b JS |
102 | return _InterlockedExchangeAdd(reinterpret_cast<volatile long*>(__ptr), |
103 | __addend); | |
c2ba9709 | 104 | #elif defined(__GNUC__) |
1acba85b | 105 | return __sync_fetch_and_add(__ptr, __addend); |
c2ba9709 | 106 | #elif defined(__SUNPRO_CC) && defined(__sparc) |
1acba85b | 107 | volatile int32 __before, __after; |
c2ba9709 JS |
108 | do |
109 | { | |
1acba85b JS |
110 | __before = *__ptr; |
111 | __after = __before + __addend; | |
112 | } while (atomic_cas_32((volatile unsigned int*)__ptr, __before, | |
113 | __after) != __before); | |
114 | return __before; | |
c2ba9709 | 115 | #else //fallback, slow |
1acba85b JS |
116 | #pragma message("slow __fetch_and_add_32") |
117 | int32 __res; | |
c2ba9709 JS |
118 | #pragma omp critical |
119 | { | |
1acba85b JS |
120 | __res = *__ptr; |
121 | *(__ptr) += __addend; | |
c2ba9709 | 122 | } |
1acba85b | 123 | return __res; |
c2ba9709 JS |
124 | #endif |
125 | } | |
126 | ||
127 | /** @brief Add a value to a variable, atomically. | |
128 | * | |
129 | * Implementation is heavily platform-dependent. | |
1acba85b JS |
130 | * @param __ptr Pointer to a 64-bit signed integer. |
131 | * @param __addend Value to add. | |
c2ba9709 | 132 | */ |
6df548d2 | 133 | inline int64 |
1acba85b | 134 | __fetch_and_add_64(volatile int64* __ptr, int64 __addend) |
c2ba9709 JS |
135 | { |
136 | #if defined(__ICC) && defined(__x86_64) //x86 version | |
1acba85b | 137 | return __faa64<int>((int64*)__ptr, __addend); |
c2ba9709 | 138 | #elif defined(__ECC) //IA-64 version |
1acba85b | 139 | return _InterlockedExchangeAdd64((void*)__ptr, __addend); |
c2ba9709 JS |
140 | #elif defined(__ICL) || defined(_MSC_VER) |
141 | #ifndef _WIN64 | |
142 | _GLIBCXX_PARALLEL_ASSERT(false); //not available in this case | |
143 | return 0; | |
144 | #else | |
1acba85b | 145 | return _InterlockedExchangeAdd64(__ptr, __addend); |
c2ba9709 JS |
146 | #endif |
147 | #elif defined(__GNUC__) && defined(__x86_64) | |
1acba85b | 148 | return __sync_fetch_and_add(__ptr, __addend); |
c2ba9709 JS |
149 | #elif defined(__GNUC__) && defined(__i386) && \ |
150 | (defined(__i686) || defined(__pentium4) || defined(__athlon)) | |
1acba85b | 151 | return __sync_fetch_and_add(__ptr, __addend); |
c2ba9709 | 152 | #elif defined(__SUNPRO_CC) && defined(__sparc) |
1acba85b | 153 | volatile int64 __before, __after; |
c2ba9709 JS |
154 | do |
155 | { | |
1acba85b JS |
156 | __before = *__ptr; |
157 | __after = __before + __addend; | |
158 | } while (atomic_cas_64((volatile unsigned long long*)__ptr, __before, | |
159 | __after) != __before); | |
160 | return __before; | |
c2ba9709 JS |
161 | #else //fallback, slow |
162 | #if defined(__GNUC__) && defined(__i386) | |
1acba85b | 163 | // XXX doesn'__t work with -march=native |
c2ba9709 JS |
164 | //#warning "please compile with -march=i686 or better" |
165 | #endif | |
1acba85b JS |
166 | #pragma message("slow __fetch_and_add_64") |
167 | int64 __res; | |
c2ba9709 JS |
168 | #pragma omp critical |
169 | { | |
1acba85b JS |
170 | __res = *__ptr; |
171 | *(__ptr) += __addend; | |
c2ba9709 | 172 | } |
1acba85b | 173 | return __res; |
c2ba9709 JS |
174 | #endif |
175 | } | |
176 | ||
177 | /** @brief Add a value to a variable, atomically. | |
178 | * | |
179 | * Implementation is heavily platform-dependent. | |
1acba85b JS |
180 | * @param __ptr Pointer to a signed integer. |
181 | * @param __addend Value to add. | |
c2ba9709 | 182 | */ |
1acba85b JS |
183 | template<typename _Tp> |
184 | inline _Tp | |
185 | __fetch_and_add(volatile _Tp* __ptr, _Tp __addend) | |
c2ba9709 | 186 | { |
1acba85b JS |
187 | if (sizeof(_Tp) == sizeof(int32)) |
188 | return (_Tp)__fetch_and_add_32((volatile int32*) __ptr, (int32)__addend); | |
189 | else if (sizeof(_Tp) == sizeof(int64)) | |
190 | return (_Tp)__fetch_and_add_64((volatile int64*) __ptr, (int64)__addend); | |
c2ba9709 JS |
191 | else |
192 | _GLIBCXX_PARALLEL_ASSERT(false); | |
193 | } | |
194 | ||
195 | ||
196 | #if defined(__ICC) | |
197 | ||
1acba85b | 198 | template<typename _MustBeInt = int> |
6df548d2 | 199 | inline int32 |
1acba85b | 200 | __cas32(volatile int32* __ptr, int32 __old, int32 __nw) |
c2ba9709 | 201 | { |
1acba85b | 202 | int32 __before; |
c2ba9709 | 203 | __asm__ __volatile__("lock; cmpxchgl %1,%2" |
1acba85b JS |
204 | : "=a"(__before) |
205 | : "q"(__nw), "__m"(*(volatile long long*)(__ptr)), "0"(__old) | |
c2ba9709 | 206 | : "memory"); |
1acba85b | 207 | return __before; |
c2ba9709 JS |
208 | } |
209 | ||
210 | #if defined(__x86_64) | |
1acba85b | 211 | template<typename _MustBeInt = int> |
6df548d2 | 212 | inline int64 |
1acba85b | 213 | __cas64(volatile int64 *__ptr, int64 __old, int64 __nw) |
c2ba9709 | 214 | { |
1acba85b | 215 | int64 __before; |
c2ba9709 | 216 | __asm__ __volatile__("lock; cmpxchgq %1,%2" |
1acba85b JS |
217 | : "=a"(__before) |
218 | : "q"(__nw), "__m"(*(volatile long long*)(__ptr)), "0"(__old) | |
c2ba9709 | 219 | : "memory"); |
1acba85b | 220 | return __before; |
c2ba9709 JS |
221 | } |
222 | #endif | |
223 | ||
224 | #endif | |
225 | ||
1acba85b JS |
226 | /** @brief Compare @__c *__ptr and @__c __comparand. If equal, let @__c |
227 | * *__ptr=__replacement and return @__c true, return @__c false otherwise. | |
c2ba9709 JS |
228 | * |
229 | * Implementation is heavily platform-dependent. | |
1acba85b JS |
230 | * @param __ptr Pointer to 32-bit signed integer. |
231 | * @param __comparand Compare value. | |
232 | * @param __replacement Replacement value. | |
c2ba9709 JS |
233 | */ |
234 | inline bool | |
1acba85b | 235 | __compare_and_swap_32(volatile int32* __ptr, int32 __comparand, int32 __replacement) |
c2ba9709 JS |
236 | { |
237 | #if defined(__ICC) //x86 version | |
1acba85b JS |
238 | return _InterlockedCompareExchange((void*)__ptr, __replacement, |
239 | __comparand) == __comparand; | |
c2ba9709 | 240 | #elif defined(__ECC) //IA-64 version |
1acba85b JS |
241 | return _InterlockedCompareExchange((void*)__ptr, __replacement, |
242 | __comparand) == __comparand; | |
c2ba9709 | 243 | #elif defined(__ICL) || defined(_MSC_VER) |
1acba85b JS |
244 | return _InterlockedCompareExchange(reinterpret_cast<volatile long*>(__ptr), |
245 | __replacement, __comparand) == __comparand; | |
c2ba9709 | 246 | #elif defined(__GNUC__) |
1acba85b | 247 | return __sync_bool_compare_and_swap(__ptr, __comparand, __replacement); |
c2ba9709 | 248 | #elif defined(__SUNPRO_CC) && defined(__sparc) |
1acba85b JS |
249 | return atomic_cas_32((volatile unsigned int*)__ptr, __comparand, |
250 | __replacement) == __comparand; | |
c2ba9709 | 251 | #else |
1acba85b JS |
252 | #pragma message("slow __compare_and_swap_32") |
253 | bool __res = false; | |
c2ba9709 JS |
254 | #pragma omp critical |
255 | { | |
1acba85b | 256 | if (*__ptr == __comparand) |
c2ba9709 | 257 | { |
1acba85b JS |
258 | *__ptr = __replacement; |
259 | __res = true; | |
c2ba9709 JS |
260 | } |
261 | } | |
1acba85b | 262 | return __res; |
c2ba9709 JS |
263 | #endif |
264 | } | |
265 | ||
1acba85b JS |
266 | /** @brief Compare @__c *__ptr and @__c __comparand. If equal, let @__c |
267 | * *__ptr=__replacement and return @__c true, return @__c false otherwise. | |
c2ba9709 JS |
268 | * |
269 | * Implementation is heavily platform-dependent. | |
1acba85b JS |
270 | * @param __ptr Pointer to 64-bit signed integer. |
271 | * @param __comparand Compare value. | |
272 | * @param __replacement Replacement value. | |
c2ba9709 JS |
273 | */ |
274 | inline bool | |
1acba85b | 275 | __compare_and_swap_64(volatile int64* __ptr, int64 __comparand, int64 __replacement) |
c2ba9709 JS |
276 | { |
277 | #if defined(__ICC) && defined(__x86_64) //x86 version | |
1acba85b | 278 | return __cas64<int>(__ptr, __comparand, __replacement) == __comparand; |
c2ba9709 | 279 | #elif defined(__ECC) //IA-64 version |
1acba85b JS |
280 | return _InterlockedCompareExchange64((void*)__ptr, __replacement, |
281 | __comparand) == __comparand; | |
c2ba9709 JS |
282 | #elif defined(__ICL) || defined(_MSC_VER) |
283 | #ifndef _WIN64 | |
284 | _GLIBCXX_PARALLEL_ASSERT(false); //not available in this case | |
285 | return 0; | |
286 | #else | |
1acba85b JS |
287 | return _InterlockedCompareExchange64(__ptr, __replacement, |
288 | __comparand) == __comparand; | |
c2ba9709 JS |
289 | #endif |
290 | ||
291 | #elif defined(__GNUC__) && defined(__x86_64) | |
1acba85b | 292 | return __sync_bool_compare_and_swap(__ptr, __comparand, __replacement); |
c2ba9709 JS |
293 | #elif defined(__GNUC__) && defined(__i386) && \ |
294 | (defined(__i686) || defined(__pentium4) || defined(__athlon)) | |
1acba85b | 295 | return __sync_bool_compare_and_swap(__ptr, __comparand, __replacement); |
c2ba9709 | 296 | #elif defined(__SUNPRO_CC) && defined(__sparc) |
1acba85b JS |
297 | return atomic_cas_64((volatile unsigned long long*)__ptr, |
298 | __comparand, __replacement) == __comparand; | |
c2ba9709 JS |
299 | #else |
300 | #if defined(__GNUC__) && defined(__i386) | |
301 | // XXX -march=native | |
302 | //#warning "please compile with -march=i686 or better" | |
303 | #endif | |
1acba85b JS |
304 | #pragma message("slow __compare_and_swap_64") |
305 | bool __res = false; | |
c2ba9709 JS |
306 | #pragma omp critical |
307 | { | |
1acba85b | 308 | if (*__ptr == __comparand) |
c2ba9709 | 309 | { |
1acba85b JS |
310 | *__ptr = __replacement; |
311 | __res = true; | |
c2ba9709 JS |
312 | } |
313 | } | |
1acba85b | 314 | return __res; |
c2ba9709 JS |
315 | #endif |
316 | } | |
317 | ||
1acba85b JS |
318 | /** @brief Compare @__c *__ptr and @__c __comparand. If equal, let @__c |
319 | * *__ptr=__replacement and return @__c true, return @__c false otherwise. | |
c2ba9709 JS |
320 | * |
321 | * Implementation is heavily platform-dependent. | |
1acba85b JS |
322 | * @param __ptr Pointer to signed integer. |
323 | * @param __comparand Compare value. | |
324 | * @param __replacement Replacement value. */ | |
325 | template<typename _Tp> | |
c2ba9709 | 326 | inline bool |
1acba85b | 327 | __compare_and_swap(volatile _Tp* __ptr, _Tp __comparand, _Tp __replacement) |
c2ba9709 | 328 | { |
1acba85b JS |
329 | if (sizeof(_Tp) == sizeof(int32)) |
330 | return __compare_and_swap_32((volatile int32*) __ptr, (int32)__comparand, (int32)__replacement); | |
331 | else if (sizeof(_Tp) == sizeof(int64)) | |
332 | return __compare_and_swap_64((volatile int64*) __ptr, (int64)__comparand, (int64)__replacement); | |
c2ba9709 JS |
333 | else |
334 | _GLIBCXX_PARALLEL_ASSERT(false); | |
335 | } | |
336 | ||
337 | /** @brief Yield the control to another thread, without waiting for | |
338 | the end to the time slice. */ | |
339 | inline void | |
1acba85b | 340 | __yield() |
c2ba9709 | 341 | { |
cccc4450 | 342 | #if defined (_WIN32) && !defined (__CYGWIN__) |
c2ba9709 JS |
343 | Sleep(0); |
344 | #else | |
345 | sched_yield(); | |
346 | #endif | |
347 | } | |
348 | } // end namespace | |
349 | ||
cbcd1e45 | 350 | #endif /* _GLIBCXX_PARALLEL_COMPATIBILITY_H */ |