]>
Commit | Line | Data |
---|---|---|
3038054c BI |
1 | /* os-unix.c -*-C-*- |
2 | * | |
3 | ************************************************************************* | |
4 | * | |
2e01cda6 | 5 | * Copyright (C) 2009-2016, Intel Corporation |
3038054c BI |
6 | * All rights reserved. |
7 | * | |
3038054c BI |
8 | * Redistribution and use in source and binary forms, with or without |
9 | * modification, are permitted provided that the following conditions | |
10 | * are met: | |
11 | * | |
12 | * * Redistributions of source code must retain the above copyright | |
13 | * notice, this list of conditions and the following disclaimer. | |
14 | * * Redistributions in binary form must reproduce the above copyright | |
15 | * notice, this list of conditions and the following disclaimer in | |
16 | * the documentation and/or other materials provided with the | |
17 | * distribution. | |
18 | * * Neither the name of Intel Corporation nor the names of its | |
19 | * contributors may be used to endorse or promote products derived | |
20 | * from this software without specific prior written permission. | |
21 | * | |
3038054c BI |
22 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
23 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
24 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
25 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
26 | * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, | |
27 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, | |
28 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS | |
29 | * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED | |
30 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
31 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY | |
32 | * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
33 | * POSSIBILITY OF SUCH DAMAGE. | |
2e01cda6 IV |
34 | * |
35 | * ********************************************************************* | |
36 | * | |
37 | * PLEASE NOTE: This file is a downstream copy of a file mainitained in | |
38 | * a repository at cilkplus.org. Changes made to this file that are not | |
39 | * submitted through the contribution process detailed at | |
40 | * http://www.cilkplus.org/submit-cilk-contribution will be lost the next | |
41 | * time that a new version is released. Changes only submitted to the | |
42 | * GNU compiler collection or posted to the git repository at | |
43 | * https://bitbucket.org/intelcilkruntime/intel-cilk-runtime.git are | |
44 | * not tracked. | |
45 | * | |
46 | * We welcome your contributions to this open source project. Thank you | |
47 | * for your assistance in helping us improve Cilk Plus. | |
3038054c BI |
48 | **************************************************************************/ |
49 | ||
3038054c BI |
50 | #include "os.h" |
51 | #include "bug.h" | |
52 | #include "cilk_malloc.h" | |
53 | #include <internal/abi.h> | |
54 | ||
55 | #if defined __linux__ | |
56 | # include <sys/sysinfo.h> | |
57 | # include <sys/syscall.h> | |
2e01cda6 | 58 | |
3038054c BI |
59 | #elif defined __APPLE__ |
60 | # include <sys/sysctl.h> | |
61 | // Uses sysconf(_SC_NPROCESSORS_ONLN) in verbose output | |
2e01cda6 | 62 | |
3038054c BI |
63 | #elif defined __VXWORKS__ |
64 | # include <vxWorks.h> | |
65 | # include <vxCpuLib.h> | |
2e01cda6 IV |
66 | # include <taskLib.h> |
67 | ||
2546da0b RO |
68 | // Solaris |
69 | #elif defined __sun__ && defined __svr4__ | |
70 | # include <sched.h> | |
2e01cda6 IV |
71 | |
72 | // OSes we know about which don't require any additional files | |
73 | #elif defined __CYGWIN__ || \ | |
74 | defined __DragonFly__ || \ | |
75 | defined __FreeBSD__ || \ | |
76 | defined __GNU__ | |
77 | // No additional include files | |
78 | ||
3038054c BI |
79 | #else |
80 | # error "Unsupported OS" | |
81 | #endif | |
82 | ||
83 | #include <stdarg.h> | |
84 | #include <stddef.h> | |
85 | #include <stdio.h> | |
86 | #include <stdlib.h> | |
87 | #include <string.h> | |
88 | #include <unistd.h> | |
89 | #include <pthread.h> | |
90 | #include <sys/types.h> | |
91 | ||
92 | ||
93 | ||
94 | // /* Thread-local storage */ | |
95 | // #ifdef _WIN32 | |
96 | // typedef unsigned cilkos_tls_key_t; | |
97 | // #else | |
98 | // typedef pthread_key_t cilkos_tls_key_t; | |
99 | // #endif | |
100 | // cilkos_tls_key_t cilkos_allocate_tls_key(); | |
101 | // void cilkos_set_tls_pointer(cilkos_tls_key_t key, void* ptr); | |
102 | // void* cilkos_get_tls_pointer(cilkos_tls_key_t key); | |
103 | ||
104 | #if !defined CILK_WORKER_TLS | |
105 | static int cilk_keys_defined; | |
106 | static pthread_key_t worker_key, pedigree_leaf_key, tbb_interop_key; | |
107 | ||
108 | #if SUPPORT_GET_CURRENT_FIBER > 0 | |
109 | static pthread_key_t fiber_key; | |
110 | #endif | |
111 | ||
112 | static void *serial_worker; | |
113 | ||
114 | ||
115 | // This destructor is called when a pthread dies to deallocate the | |
116 | // pedigree node. | |
117 | static void __cilkrts_pedigree_leaf_destructor(void* pedigree_tls_ptr) | |
118 | { | |
119 | __cilkrts_pedigree* pedigree_tls | |
120 | = (__cilkrts_pedigree*)pedigree_tls_ptr; | |
121 | if (pedigree_tls) { | |
122 | // Assert that we have either one or two nodes | |
123 | // left in the pedigree chain. | |
124 | // If we have more, then something is going wrong... | |
125 | CILK_ASSERT(!pedigree_tls->parent || !pedigree_tls->parent->parent); | |
126 | __cilkrts_free(pedigree_tls); | |
127 | } | |
128 | } | |
129 | ||
130 | void __cilkrts_init_tls_variables(void) | |
131 | { | |
132 | int status; | |
133 | /* This will be called once in serial execution before any | |
134 | Cilk parallelism so we do not need to worry about races | |
135 | on cilk_keys_defined. */ | |
136 | if (cilk_keys_defined) | |
137 | return; | |
138 | status = pthread_key_create(&worker_key, NULL); | |
139 | CILK_ASSERT (status == 0); | |
140 | status = pthread_key_create(&pedigree_leaf_key, | |
141 | __cilkrts_pedigree_leaf_destructor); | |
142 | CILK_ASSERT (status == 0); | |
143 | status = pthread_key_create(&tbb_interop_key, NULL); | |
144 | CILK_ASSERT (status == 0); | |
145 | ||
146 | #if SUPPORT_GET_CURRENT_FIBER > 0 | |
147 | status = pthread_key_create(&fiber_key, NULL); | |
148 | CILK_ASSERT (status == 0); | |
149 | #endif | |
150 | cilk_keys_defined = 1; | |
151 | return; | |
152 | } | |
153 | ||
154 | COMMON_SYSDEP | |
155 | void* cilkos_get_current_thread_id(void) | |
156 | { | |
157 | return (void*)pthread_self(); | |
158 | } | |
159 | ||
160 | ||
161 | CILK_ABI_WORKER_PTR __cilkrts_get_tls_worker() | |
162 | { | |
163 | if (__builtin_expect(cilk_keys_defined, 1)) | |
164 | return (__cilkrts_worker *)pthread_getspecific(worker_key); | |
165 | else | |
166 | return serial_worker; | |
167 | ||
168 | } | |
169 | ||
170 | CILK_ABI_WORKER_PTR __cilkrts_get_tls_worker_fast() | |
171 | { | |
172 | return (__cilkrts_worker *)pthread_getspecific(worker_key); | |
173 | } | |
174 | ||
175 | COMMON_SYSDEP | |
176 | __cilk_tbb_stack_op_thunk *__cilkrts_get_tls_tbb_interop(void) | |
177 | { | |
178 | if (__builtin_expect(cilk_keys_defined, 1)) | |
179 | return (__cilk_tbb_stack_op_thunk *) | |
180 | pthread_getspecific(tbb_interop_key); | |
181 | else | |
182 | return 0; | |
183 | } | |
184 | ||
185 | // This counter should be updated atomically. | |
186 | static int __cilkrts_global_pedigree_tls_counter = -1; | |
187 | ||
188 | COMMON_SYSDEP | |
189 | __cilkrts_pedigree *__cilkrts_get_tls_pedigree_leaf(int create_new) | |
190 | { | |
191 | __cilkrts_pedigree *pedigree_tls; | |
192 | if (__builtin_expect(cilk_keys_defined, 1)) { | |
193 | pedigree_tls = | |
194 | (struct __cilkrts_pedigree *)pthread_getspecific(pedigree_leaf_key); | |
195 | } | |
196 | else { | |
197 | return 0; | |
198 | } | |
199 | ||
200 | if (!pedigree_tls && create_new) { | |
201 | // This call creates two nodes, X and Y. | |
202 | // X == pedigree_tls[0] is the leaf node, which gets copied | |
203 | // in and out of a user worker w when w binds and unbinds. | |
204 | // Y == pedigree_tls[1] is the root node, | |
205 | // which is a constant node that represents the user worker | |
206 | // thread w. | |
207 | pedigree_tls = (__cilkrts_pedigree*) | |
208 | __cilkrts_malloc(2 * sizeof(__cilkrts_pedigree)); | |
209 | ||
210 | // This call sets the TLS pointer to the new node. | |
211 | __cilkrts_set_tls_pedigree_leaf(pedigree_tls); | |
212 | ||
213 | pedigree_tls[0].rank = 0; | |
214 | pedigree_tls[0].parent = &pedigree_tls[1]; | |
215 | ||
216 | // Create Y, whose rank begins as the global counter value. | |
217 | pedigree_tls[1].rank = | |
218 | __sync_add_and_fetch(&__cilkrts_global_pedigree_tls_counter, 1); | |
219 | ||
220 | pedigree_tls[1].parent = NULL; | |
221 | CILK_ASSERT(pedigree_tls[1].rank != -1); | |
222 | } | |
223 | return pedigree_tls; | |
224 | } | |
225 | ||
226 | #if SUPPORT_GET_CURRENT_FIBER > 0 | |
227 | COMMON_SYSDEP | |
228 | cilk_fiber_sysdep* cilkos_get_tls_cilk_fiber(void) | |
229 | { | |
230 | if (__builtin_expect(cilk_keys_defined, 1)) | |
231 | return (cilk_fiber_sysdep *)pthread_getspecific(fiber_key); | |
232 | else | |
233 | return NULL; | |
234 | } | |
235 | #endif | |
236 | ||
237 | COMMON_SYSDEP | |
238 | void __cilkrts_set_tls_worker(__cilkrts_worker *w) | |
239 | { | |
240 | if (__builtin_expect(cilk_keys_defined, 1)) { | |
241 | int status; | |
242 | status = pthread_setspecific(worker_key, w); | |
243 | CILK_ASSERT (status == 0); | |
244 | return; | |
245 | } | |
246 | else | |
247 | { | |
248 | serial_worker = w; | |
249 | } | |
250 | } | |
251 | ||
252 | COMMON_SYSDEP | |
253 | void __cilkrts_set_tls_tbb_interop(__cilk_tbb_stack_op_thunk *t) | |
254 | { | |
255 | if (__builtin_expect(cilk_keys_defined, 1)) { | |
256 | int status; | |
257 | status = pthread_setspecific(tbb_interop_key, t); | |
258 | CILK_ASSERT (status == 0); | |
259 | return; | |
260 | } | |
261 | abort(); | |
262 | } | |
263 | ||
264 | COMMON_SYSDEP | |
265 | void __cilkrts_set_tls_pedigree_leaf(__cilkrts_pedigree* pedigree_leaf) | |
266 | { | |
267 | if (__builtin_expect(cilk_keys_defined, 1)) { | |
268 | int status; | |
269 | status = pthread_setspecific(pedigree_leaf_key, pedigree_leaf); | |
270 | CILK_ASSERT (status == 0); | |
271 | return; | |
272 | } | |
273 | abort(); | |
274 | } | |
275 | ||
276 | #if SUPPORT_GET_CURRENT_FIBER > 0 | |
277 | COMMON_SYSDEP | |
278 | void cilkos_set_tls_cilk_fiber(cilk_fiber_sysdep* fiber) | |
279 | { | |
280 | if (__builtin_expect(cilk_keys_defined, 1)) { | |
281 | int status; | |
282 | status = pthread_setspecific(fiber_key, fiber); | |
283 | CILK_ASSERT (status == 0); | |
284 | return; | |
285 | } | |
286 | abort(); | |
287 | } | |
288 | #endif | |
289 | ||
290 | #else | |
291 | void __cilkrts_init_tls_variables(void) | |
292 | { | |
293 | } | |
294 | #endif | |
295 | ||
b1cd42c5 | 296 | #if defined (__linux__) && ! defined(__ANDROID__) |
3038054c BI |
297 | /* |
298 | * Get the thread id, rather than the pid. In the case of MIC offload, it's | |
299 | * possible that we have multiple threads entering Cilk, and each has a | |
300 | * different affinity. | |
301 | */ | |
302 | static pid_t linux_gettid(void) | |
303 | { | |
304 | return syscall(SYS_gettid); | |
305 | } | |
306 | ||
307 | /* | |
308 | * On Linux we look at the thread affinity mask and restrict ourself to one | |
309 | * thread for each of the hardware contexts to which we are bound. | |
310 | * Therefore if user does | |
311 | * % taskset 0-1 cilkProgram | |
312 | * # restrict execution to hardware contexts zero and one | |
313 | * the Cilk program will only use two threads even if it is running on a | |
314 | * machine that has 32 hardware contexts. | |
315 | * This is the right thing to do, because the threads are restricted to two | |
316 | * hardware contexts by the affinity mask set by taskset, and if we were to | |
317 | * create extra threads they would simply oversubscribe the hardware resources | |
318 | * we can use. | |
319 | * This is particularly important on MIC in offload mode, where the affinity | |
320 | * mask is set by the offload library to force the offload code away from | |
321 | * cores that have offload support threads running on them. | |
322 | */ | |
2e01cda6 | 323 | static int linux_get_affinity_count () |
3038054c | 324 | { |
2e01cda6 IV |
325 | long system_cores = sysconf(_SC_NPROCESSORS_ONLN); |
326 | int affinity_cores = 0; | |
327 | ||
328 | #if defined HAVE_PTHREAD_AFFINITY_NP | |
329 | ||
330 | #if defined (CPU_ALLOC_SIZE) && ! defined(DONT_USE_CPU_ALLOC_SIZE) | |
331 | // Statically allocated cpu_set_t's max out at 1024 cores. If | |
332 | // CPU_ALLOC_SIZE is available, use it to support large numbers of cores | |
333 | size_t cpusetsize = CPU_ALLOC_SIZE(system_cores); | |
334 | cpu_set_t *process_mask = (cpu_set_t *)__cilkrts_malloc(cpusetsize); | |
335 | ||
336 | // Get the affinity mask for this thread | |
337 | int err = pthread_getaffinity_np(pthread_self(), | |
338 | cpusetsize, | |
339 | process_mask); | |
340 | ||
341 | // Count the available cores. | |
342 | if (0 == err) | |
343 | affinity_cores = CPU_COUNT_S(cpusetsize, process_mask); | |
344 | ||
345 | __cilkrts_free(process_mask); | |
346 | ||
ef132d59 | 347 | #else |
2e01cda6 IV |
348 | // CPU_ALLOC_SIZE isn't available, or this is the Intel compiler build |
349 | // and we have to support RHEL5. Use a statically allocated cpu_set_t | |
ef132d59 | 350 | |
3038054c BI |
351 | cpu_set_t process_mask; |
352 | ||
353 | // Extract the thread affinity mask | |
2e01cda6 IV |
354 | int err = pthread_getaffinity_np(pthread_self(), |
355 | sizeof(process_mask), | |
356 | &process_mask); | |
3038054c | 357 | |
2e01cda6 | 358 | if (0 == err) |
3038054c | 359 | { |
2e01cda6 IV |
360 | // We have extracted the mask OK, so now we can count the number of |
361 | // threads in it. This is linear in the maximum number of CPUs | |
362 | // available, We could do a logarithmic version, if we assume the | |
363 | // format of the mask, but it's not really worth it. We only call | |
364 | // this at thread startup anyway. | |
365 | int i; | |
366 | for (i = 0; i < CPU_SETSIZE; i++) | |
3038054c | 367 | { |
2e01cda6 IV |
368 | if (CPU_ISSET(i, &process_mask)) |
369 | { | |
370 | affinity_cores++; | |
371 | } | |
3038054c BI |
372 | } |
373 | } | |
2e01cda6 IV |
374 | #endif // CPU_ALLOC_SIZE |
375 | #endif // ! defined HAVE_PTHREAD_AFFINITY_NP | |
376 | ||
377 | // If we've got a count of cores this thread is supposed to use, that's | |
378 | // the number or cores we'll use. Otherwise, default to the number of | |
379 | // cores on the system. | |
380 | if (0 == affinity_cores) | |
381 | return system_cores; | |
382 | else | |
383 | return affinity_cores; | |
3038054c | 384 | } |
b1cd42c5 | 385 | #endif // defined (__linux__) && ! defined(__ANDROID__) |
3038054c BI |
386 | |
387 | /* | |
388 | * __cilkrts_hardware_cpu_count | |
389 | * | |
390 | * Returns the number of available CPUs on this hardware. This is architecture- | |
391 | * specific. | |
392 | */ | |
393 | ||
394 | COMMON_SYSDEP int __cilkrts_hardware_cpu_count(void) | |
395 | { | |
2e01cda6 IV |
396 | #if defined __ANDROID__ || \ |
397 | defined __CYGWIN__ || \ | |
398 | defined __DragonFly__ || \ | |
399 | defined __FreeBSD__ || \ | |
400 | (defined(__sun__) && defined(__svr4__)) | |
401 | return (int)sysconf(_SC_NPROCESSORS_ONLN); | |
3038054c BI |
402 | #elif defined __MIC__ |
403 | /// HACK: Usually, the 3rd and 4th hyperthreads are not beneficial | |
404 | /// on KNC. Also, ignore the last core. | |
2e01cda6 IV |
405 | int count = (int)sysconf (_SC_NPROCESSORS_ONLN); |
406 | return count/2 - 2; | |
3038054c | 407 | #elif defined __linux__ |
2e01cda6 | 408 | return linux_get_affinity_count(); |
3038054c | 409 | #elif defined __APPLE__ |
2e01cda6 | 410 | int count; |
3038054c | 411 | size_t len = sizeof count; |
2e01cda6 IV |
412 | int status = sysctlbyname("hw.logicalcpu", &count, &len, 0, 0); |
413 | assert(0 == status); | |
3038054c BI |
414 | |
415 | return count; | |
3038054c | 416 | #elif defined __VXWORKS__ |
2e01cda6 | 417 | return __builtin_popcount(vxCpuEnabledGet()); |
3038054c | 418 | #else |
2e01cda6 IV |
419 | #error "Unsupported architecture" |
420 | #endif | |
421 | } | |
422 | ||
423 | COMMON_SYSDEP void __cilkrts_idle(void) | |
424 | { | |
425 | // This is another version of __cilkrts_yield() to be used when | |
426 | // silencing workers that are not stealing work. | |
427 | #if defined(__ANDROID__) || \ | |
428 | defined(__FreeBSD__) || \ | |
429 | defined(__VXWORKS__) || \ | |
430 | (defined(__sun__) && defined(__svr4__)) | |
431 | sched_yield(); | |
432 | #elif defined(__MIC__) | |
433 | _mm_delay_32(1024); | |
434 | #elif defined(__linux__) || \ | |
be5ddbb8 RO |
435 | defined(__APPLE__) || \ |
436 | defined(__CYGWIN__) | |
437 | ||
2e01cda6 IV |
438 | usleep(10000); |
439 | #else | |
440 | # error "Unsupported architecture" | |
3038054c BI |
441 | #endif |
442 | } | |
443 | ||
444 | COMMON_SYSDEP void __cilkrts_sleep(void) | |
445 | { | |
446 | #ifdef __VXWORKS__ | |
2e01cda6 | 447 | taskDelay(1); |
3038054c BI |
448 | #else |
449 | usleep(1); | |
450 | #endif | |
451 | } | |
452 | ||
453 | COMMON_SYSDEP void __cilkrts_yield(void) | |
454 | { | |
2e01cda6 IV |
455 | #if defined(__ANDROID__) || \ |
456 | defined(__APPLE__) || \ | |
be5ddbb8 | 457 | defined(__CYGWIN__) || \ |
2e01cda6 IV |
458 | defined(__FreeBSD__) || \ |
459 | defined(__VXWORKS__) || \ | |
460 | (defined(__sun__) && defined(__svr4__)) | |
461 | // Call sched_yield to yield quantum. I'm not sure why we | |
3038054c BI |
462 | // don't do this on Linux also. |
463 | sched_yield(); | |
464 | #elif defined(__MIC__) | |
465 | // On MIC, pthread_yield() really trashes things. Arch's measurements | |
466 | // showed that calling _mm_delay_32() (or doing nothing) was a better | |
467 | // option. Delaying 1024 clock cycles is a reasonable compromise between | |
468 | // giving up the processor and latency starting up when work becomes | |
469 | // available | |
470 | _mm_delay_32(1024); | |
2e01cda6 | 471 | #elif defined(__linux__) |
3038054c BI |
472 | // On Linux, call pthread_yield (which in turn will call sched_yield) |
473 | // to yield quantum. | |
474 | pthread_yield(); | |
2e01cda6 IV |
475 | #else |
476 | # error "Unsupported architecture" | |
3038054c BI |
477 | #endif |
478 | } | |
479 | ||
480 | COMMON_SYSDEP __STDNS size_t cilkos_getenv(char* value, __STDNS size_t vallen, | |
481 | const char* varname) | |
482 | { | |
483 | CILK_ASSERT(value); | |
484 | CILK_ASSERT(varname); | |
485 | ||
486 | const char* envstr = getenv(varname); | |
487 | if (envstr) | |
488 | { | |
2e01cda6 | 489 | size_t len = cilk_strlen(envstr); |
3038054c BI |
490 | if (len > vallen - 1) |
491 | return len + 1; | |
2e01cda6 | 492 | cilk_strcpy_s(value, vallen, envstr); |
3038054c BI |
493 | return len; |
494 | } | |
495 | else | |
496 | { | |
497 | value[0] = '\0'; | |
498 | return 0; | |
499 | } | |
500 | } | |
501 | ||
502 | /* | |
503 | * Unrecoverable error: Print an error message and abort execution. | |
504 | */ | |
505 | COMMON_SYSDEP void cilkos_error(const char *fmt, ...) | |
506 | { | |
507 | va_list l; | |
508 | fflush(NULL); | |
509 | fprintf(stderr, "Cilk error: "); | |
510 | va_start(l, fmt); | |
511 | vfprintf(stderr, fmt, l); | |
512 | va_end(l); | |
513 | fprintf(stderr, "Exiting.\n"); | |
514 | fflush(stderr); | |
515 | ||
516 | abort(); | |
517 | } | |
518 | ||
519 | /* | |
520 | * Print a warning message and return. | |
521 | */ | |
522 | COMMON_SYSDEP void cilkos_warning(const char *fmt, ...) | |
523 | { | |
524 | va_list l; | |
525 | fflush(NULL); | |
526 | fprintf(stderr, "Cilk warning: "); | |
527 | va_start(l, fmt); | |
528 | vfprintf(stderr, fmt, l); | |
529 | va_end(l); | |
530 | fflush(stderr); | |
531 | } | |
532 | ||
2e01cda6 IV |
533 | #ifdef __VXWORKS__ |
534 | #ifdef _WRS_KERNEL | |
535 | void cilkStart() | |
536 | { | |
537 | __cilkrts_init_tls_variables(); | |
538 | } | |
539 | #else | |
540 | _WRS_CONSTRUCTOR(cilkInit, 100) | |
541 | { | |
542 | __cilkrts_init_tls_variables(); | |
543 | } | |
544 | #endif | |
545 | #else | |
3038054c BI |
546 | static void __attribute__((constructor)) init_once() |
547 | { | |
548 | /*__cilkrts_debugger_notification_internal(CILK_DB_RUNTIME_LOADED);*/ | |
549 | __cilkrts_init_tls_variables(); | |
550 | } | |
2e01cda6 | 551 | #endif |
3038054c BI |
552 | |
553 | ||
554 | #define PAGE 4096 | |
555 | #define CILK_MIN_STACK_SIZE (4*PAGE) | |
556 | // Default size for the stacks that we create in Cilk for Unix. | |
557 | #define CILK_DEFAULT_STACK_SIZE 0x100000 | |
558 | ||
559 | /* | |
560 | * Convert the user's specified stack size into a "reasonable" value | |
561 | * for this OS. | |
562 | */ | |
563 | size_t cilkos_validate_stack_size(size_t specified_stack_size) { | |
564 | // Convert any negative value to the default. | |
565 | if (specified_stack_size == 0) { | |
566 | CILK_ASSERT((CILK_DEFAULT_STACK_SIZE % PAGE) == 0); | |
567 | return CILK_DEFAULT_STACK_SIZE; | |
568 | } | |
569 | // Round values in between 0 and CILK_MIN_STACK_SIZE up to | |
570 | // CILK_MIN_STACK_SIZE. | |
571 | if (specified_stack_size <= CILK_MIN_STACK_SIZE) { | |
572 | return CILK_MIN_STACK_SIZE; | |
573 | } | |
574 | if ((specified_stack_size % PAGE) > 0) { | |
575 | // Round the user's stack size value up to nearest page boundary. | |
576 | return (PAGE * (1 + specified_stack_size / PAGE)); | |
577 | } | |
578 | return specified_stack_size; | |
579 | } | |
580 | ||
581 | long cilkos_atomic_add(volatile long* p, long x) | |
582 | { | |
583 | return __sync_add_and_fetch(p, x); | |
584 | } | |
585 | ||
586 | /* End os-unix.c */ |