]>
Commit | Line | Data |
---|---|---|
c2ba9709 JS |
1 | // -*- C++ -*- |
2 | ||
748086b7 | 3 | // Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. |
c2ba9709 JS |
4 | // |
5 | // This file is part of the GNU ISO C++ Library. This library is free | |
6 | // software; you can redistribute it and/or modify it under the terms | |
7 | // of the GNU General Public License as published by the Free Software | |
748086b7 | 8 | // Foundation; either version 3, or (at your option) any later |
c2ba9709 JS |
9 | // version. |
10 | ||
11 | // This library is distributed in the hope that it will be useful, but | |
12 | // WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | // General Public License for more details. | |
15 | ||
748086b7 JJ |
16 | // Under Section 7 of GPL version 3, you are granted additional |
17 | // permissions described in the GCC Runtime Library Exception, version | |
18 | // 3.1, as published by the Free Software Foundation. | |
c2ba9709 | 19 | |
748086b7 JJ |
20 | // You should have received a copy of the GNU General Public License and |
21 | // a copy of the GCC Runtime Library Exception along with this program; | |
22 | // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
23 | // <http://www.gnu.org/licenses/>. | |
c2ba9709 JS |
24 | |
25 | /** @file parallel/compatibility.h | |
26 | * @brief Compatibility layer, mostly concerned with atomic operations. | |
27 | * This file is a GNU parallel extension to the Standard C++ Library. | |
28 | */ | |
29 | ||
30 | // Written by Felix Putze. | |
31 | ||
32 | #ifndef _GLIBCXX_PARALLEL_COMPATIBILITY_H | |
33 | #define _GLIBCXX_PARALLEL_COMPATIBILITY_H 1 | |
34 | ||
35 | #include <parallel/types.h> | |
36 | #include <parallel/base.h> | |
37 | ||
38 | #if defined(__SUNPRO_CC) && defined(__sparc) | |
39 | #include <sys/atomic.h> | |
40 | #endif | |
41 | ||
cccc4450 | 42 | #if !defined(_WIN32) || defined (__CYGWIN__) |
c2ba9709 JS |
43 | #include <sched.h> |
44 | #endif | |
45 | ||
46 | #if defined(_MSC_VER) | |
47 | #include <Windows.h> | |
48 | #include <intrin.h> | |
49 | #undef max | |
50 | #undef min | |
51 | #endif | |
52 | ||
cccc4450 DS |
53 | #ifdef __MINGW32__ |
54 | // Including <windows.h> will drag in all the windows32 names. Since | |
55 | // that can cause user code portability problems, we just declare the | |
56 | // one needed function here. | |
57 | extern "C" | |
58 | __attribute((dllimport)) void __attribute__((stdcall)) Sleep (unsigned long); | |
59 | #endif | |
60 | ||
c2ba9709 JS |
61 | namespace __gnu_parallel |
62 | { | |
63 | #if defined(__ICC) | |
64 | template<typename must_be_int = int> | |
6df548d2 | 65 | int32 faa32(int32* x, int32 inc) |
c2ba9709 JS |
66 | { |
67 | asm volatile("lock xadd %0,%1" | |
68 | : "=r" (inc), "=m" (*x) | |
69 | : "0" (inc) | |
70 | : "memory"); | |
71 | return inc; | |
72 | } | |
73 | #if defined(__x86_64) | |
74 | template<typename must_be_int = int> | |
6df548d2 | 75 | int64 faa64(int64* x, int64 inc) |
c2ba9709 JS |
76 | { |
77 | asm volatile("lock xadd %0,%1" | |
78 | : "=r" (inc), "=m" (*x) | |
79 | : "0" (inc) | |
80 | : "memory"); | |
81 | return inc; | |
82 | } | |
83 | #endif | |
84 | #endif | |
85 | ||
86 | // atomic functions only work on integers | |
87 | ||
88 | /** @brief Add a value to a variable, atomically. | |
89 | * | |
90 | * Implementation is heavily platform-dependent. | |
91 | * @param ptr Pointer to a 32-bit signed integer. | |
92 | * @param addend Value to add. | |
93 | */ | |
6df548d2 PC |
94 | inline int32 |
95 | fetch_and_add_32(volatile int32* ptr, int32 addend) | |
c2ba9709 JS |
96 | { |
97 | #if defined(__ICC) //x86 version | |
98 | return _InterlockedExchangeAdd((void*)ptr, addend); | |
99 | #elif defined(__ECC) //IA-64 version | |
100 | return _InterlockedExchangeAdd((void*)ptr, addend); | |
101 | #elif defined(__ICL) || defined(_MSC_VER) | |
5817ff8e PC |
102 | return _InterlockedExchangeAdd(reinterpret_cast<volatile long*>(ptr), |
103 | addend); | |
c2ba9709 JS |
104 | #elif defined(__GNUC__) |
105 | return __sync_fetch_and_add(ptr, addend); | |
106 | #elif defined(__SUNPRO_CC) && defined(__sparc) | |
6df548d2 | 107 | volatile int32 before, after; |
c2ba9709 JS |
108 | do |
109 | { | |
110 | before = *ptr; | |
111 | after = before + addend; | |
5817ff8e PC |
112 | } while (atomic_cas_32((volatile unsigned int*)ptr, before, |
113 | after) != before); | |
c2ba9709 JS |
114 | return before; |
115 | #else //fallback, slow | |
116 | #pragma message("slow fetch_and_add_32") | |
6df548d2 | 117 | int32 res; |
c2ba9709 JS |
118 | #pragma omp critical |
119 | { | |
120 | res = *ptr; | |
121 | *(ptr) += addend; | |
122 | } | |
123 | return res; | |
124 | #endif | |
125 | } | |
126 | ||
127 | /** @brief Add a value to a variable, atomically. | |
128 | * | |
129 | * Implementation is heavily platform-dependent. | |
130 | * @param ptr Pointer to a 64-bit signed integer. | |
131 | * @param addend Value to add. | |
132 | */ | |
6df548d2 PC |
133 | inline int64 |
134 | fetch_and_add_64(volatile int64* ptr, int64 addend) | |
c2ba9709 JS |
135 | { |
136 | #if defined(__ICC) && defined(__x86_64) //x86 version | |
6df548d2 | 137 | return faa64<int>((int64*)ptr, addend); |
c2ba9709 JS |
138 | #elif defined(__ECC) //IA-64 version |
139 | return _InterlockedExchangeAdd64((void*)ptr, addend); | |
140 | #elif defined(__ICL) || defined(_MSC_VER) | |
141 | #ifndef _WIN64 | |
142 | _GLIBCXX_PARALLEL_ASSERT(false); //not available in this case | |
143 | return 0; | |
144 | #else | |
145 | return _InterlockedExchangeAdd64(ptr, addend); | |
146 | #endif | |
147 | #elif defined(__GNUC__) && defined(__x86_64) | |
148 | return __sync_fetch_and_add(ptr, addend); | |
149 | #elif defined(__GNUC__) && defined(__i386) && \ | |
150 | (defined(__i686) || defined(__pentium4) || defined(__athlon)) | |
151 | return __sync_fetch_and_add(ptr, addend); | |
152 | #elif defined(__SUNPRO_CC) && defined(__sparc) | |
6df548d2 | 153 | volatile int64 before, after; |
c2ba9709 JS |
154 | do |
155 | { | |
156 | before = *ptr; | |
157 | after = before + addend; | |
5817ff8e PC |
158 | } while (atomic_cas_64((volatile unsigned long long*)ptr, before, |
159 | after) != before); | |
c2ba9709 JS |
160 | return before; |
161 | #else //fallback, slow | |
162 | #if defined(__GNUC__) && defined(__i386) | |
163 | // XXX doesn't work with -march=native | |
164 | //#warning "please compile with -march=i686 or better" | |
165 | #endif | |
166 | #pragma message("slow fetch_and_add_64") | |
6df548d2 | 167 | int64 res; |
c2ba9709 JS |
168 | #pragma omp critical |
169 | { | |
170 | res = *ptr; | |
171 | *(ptr) += addend; | |
172 | } | |
173 | return res; | |
174 | #endif | |
175 | } | |
176 | ||
177 | /** @brief Add a value to a variable, atomically. | |
178 | * | |
179 | * Implementation is heavily platform-dependent. | |
180 | * @param ptr Pointer to a signed integer. | |
181 | * @param addend Value to add. | |
182 | */ | |
183 | template<typename T> | |
184 | inline T | |
185 | fetch_and_add(volatile T* ptr, T addend) | |
186 | { | |
6df548d2 PC |
187 | if (sizeof(T) == sizeof(int32)) |
188 | return (T)fetch_and_add_32((volatile int32*) ptr, (int32)addend); | |
189 | else if (sizeof(T) == sizeof(int64)) | |
190 | return (T)fetch_and_add_64((volatile int64*) ptr, (int64)addend); | |
c2ba9709 JS |
191 | else |
192 | _GLIBCXX_PARALLEL_ASSERT(false); | |
193 | } | |
194 | ||
195 | ||
196 | #if defined(__ICC) | |
197 | ||
198 | template<typename must_be_int = int> | |
6df548d2 PC |
199 | inline int32 |
200 | cas32(volatile int32* ptr, int32 old, int32 nw) | |
c2ba9709 | 201 | { |
6df548d2 | 202 | int32 before; |
c2ba9709 JS |
203 | __asm__ __volatile__("lock; cmpxchgl %1,%2" |
204 | : "=a"(before) | |
205 | : "q"(nw), "m"(*(volatile long long*)(ptr)), "0"(old) | |
206 | : "memory"); | |
207 | return before; | |
208 | } | |
209 | ||
210 | #if defined(__x86_64) | |
211 | template<typename must_be_int = int> | |
6df548d2 PC |
212 | inline int64 |
213 | cas64(volatile int64 *ptr, int64 old, int64 nw) | |
c2ba9709 | 214 | { |
6df548d2 | 215 | int64 before; |
c2ba9709 JS |
216 | __asm__ __volatile__("lock; cmpxchgq %1,%2" |
217 | : "=a"(before) | |
218 | : "q"(nw), "m"(*(volatile long long*)(ptr)), "0"(old) | |
219 | : "memory"); | |
220 | return before; | |
221 | } | |
222 | #endif | |
223 | ||
224 | #endif | |
225 | ||
226 | /** @brief Compare @c *ptr and @c comparand. If equal, let @c | |
227 | * *ptr=replacement and return @c true, return @c false otherwise. | |
228 | * | |
229 | * Implementation is heavily platform-dependent. | |
230 | * @param ptr Pointer to 32-bit signed integer. | |
231 | * @param comparand Compare value. | |
232 | * @param replacement Replacement value. | |
233 | */ | |
234 | inline bool | |
6df548d2 | 235 | compare_and_swap_32(volatile int32* ptr, int32 comparand, int32 replacement) |
c2ba9709 JS |
236 | { |
237 | #if defined(__ICC) //x86 version | |
5817ff8e PC |
238 | return _InterlockedCompareExchange((void*)ptr, replacement, |
239 | comparand) == comparand; | |
c2ba9709 | 240 | #elif defined(__ECC) //IA-64 version |
5817ff8e PC |
241 | return _InterlockedCompareExchange((void*)ptr, replacement, |
242 | comparand) == comparand; | |
c2ba9709 | 243 | #elif defined(__ICL) || defined(_MSC_VER) |
5817ff8e PC |
244 | return _InterlockedCompareExchange(reinterpret_cast<volatile long*>(ptr), |
245 | replacement, comparand) == comparand; | |
c2ba9709 JS |
246 | #elif defined(__GNUC__) |
247 | return __sync_bool_compare_and_swap(ptr, comparand, replacement); | |
248 | #elif defined(__SUNPRO_CC) && defined(__sparc) | |
5817ff8e PC |
249 | return atomic_cas_32((volatile unsigned int*)ptr, comparand, |
250 | replacement) == comparand; | |
c2ba9709 JS |
251 | #else |
252 | #pragma message("slow compare_and_swap_32") | |
253 | bool res = false; | |
254 | #pragma omp critical | |
255 | { | |
256 | if (*ptr == comparand) | |
257 | { | |
258 | *ptr = replacement; | |
259 | res = true; | |
260 | } | |
261 | } | |
262 | return res; | |
263 | #endif | |
264 | } | |
265 | ||
266 | /** @brief Compare @c *ptr and @c comparand. If equal, let @c | |
267 | * *ptr=replacement and return @c true, return @c false otherwise. | |
268 | * | |
269 | * Implementation is heavily platform-dependent. | |
270 | * @param ptr Pointer to 64-bit signed integer. | |
271 | * @param comparand Compare value. | |
272 | * @param replacement Replacement value. | |
273 | */ | |
274 | inline bool | |
6df548d2 | 275 | compare_and_swap_64(volatile int64* ptr, int64 comparand, int64 replacement) |
c2ba9709 JS |
276 | { |
277 | #if defined(__ICC) && defined(__x86_64) //x86 version | |
278 | return cas64<int>(ptr, comparand, replacement) == comparand; | |
279 | #elif defined(__ECC) //IA-64 version | |
5817ff8e PC |
280 | return _InterlockedCompareExchange64((void*)ptr, replacement, |
281 | comparand) == comparand; | |
c2ba9709 JS |
282 | #elif defined(__ICL) || defined(_MSC_VER) |
283 | #ifndef _WIN64 | |
284 | _GLIBCXX_PARALLEL_ASSERT(false); //not available in this case | |
285 | return 0; | |
286 | #else | |
5817ff8e PC |
287 | return _InterlockedCompareExchange64(ptr, replacement, |
288 | comparand) == comparand; | |
c2ba9709 JS |
289 | #endif |
290 | ||
291 | #elif defined(__GNUC__) && defined(__x86_64) | |
292 | return __sync_bool_compare_and_swap(ptr, comparand, replacement); | |
293 | #elif defined(__GNUC__) && defined(__i386) && \ | |
294 | (defined(__i686) || defined(__pentium4) || defined(__athlon)) | |
295 | return __sync_bool_compare_and_swap(ptr, comparand, replacement); | |
296 | #elif defined(__SUNPRO_CC) && defined(__sparc) | |
5817ff8e PC |
297 | return atomic_cas_64((volatile unsigned long long*)ptr, |
298 | comparand, replacement) == comparand; | |
c2ba9709 JS |
299 | #else |
300 | #if defined(__GNUC__) && defined(__i386) | |
301 | // XXX -march=native | |
302 | //#warning "please compile with -march=i686 or better" | |
303 | #endif | |
304 | #pragma message("slow compare_and_swap_64") | |
305 | bool res = false; | |
306 | #pragma omp critical | |
307 | { | |
308 | if (*ptr == comparand) | |
309 | { | |
310 | *ptr = replacement; | |
311 | res = true; | |
312 | } | |
313 | } | |
314 | return res; | |
315 | #endif | |
316 | } | |
317 | ||
318 | /** @brief Compare @c *ptr and @c comparand. If equal, let @c | |
319 | * *ptr=replacement and return @c true, return @c false otherwise. | |
320 | * | |
321 | * Implementation is heavily platform-dependent. | |
322 | * @param ptr Pointer to signed integer. | |
323 | * @param comparand Compare value. | |
324 | * @param replacement Replacement value. */ | |
325 | template<typename T> | |
326 | inline bool | |
327 | compare_and_swap(volatile T* ptr, T comparand, T replacement) | |
328 | { | |
6df548d2 PC |
329 | if (sizeof(T) == sizeof(int32)) |
330 | return compare_and_swap_32((volatile int32*) ptr, (int32)comparand, (int32)replacement); | |
331 | else if (sizeof(T) == sizeof(int64)) | |
332 | return compare_and_swap_64((volatile int64*) ptr, (int64)comparand, (int64)replacement); | |
c2ba9709 JS |
333 | else |
334 | _GLIBCXX_PARALLEL_ASSERT(false); | |
335 | } | |
336 | ||
337 | /** @brief Yield the control to another thread, without waiting for | |
338 | the end to the time slice. */ | |
339 | inline void | |
340 | yield() | |
341 | { | |
cccc4450 | 342 | #if defined (_WIN32) && !defined (__CYGWIN__) |
c2ba9709 JS |
343 | Sleep(0); |
344 | #else | |
345 | sched_yield(); | |
346 | #endif | |
347 | } | |
348 | } // end namespace | |
349 | ||
cbcd1e45 | 350 | #endif /* _GLIBCXX_PARALLEL_COMPATIBILITY_H */ |