]> git.ipfire.org Git - thirdparty/gcc.git/blame - libcilkrts/runtime/os-unix.c
config.gcc (*-*-dragonfly*): New target.
[thirdparty/gcc.git] / libcilkrts / runtime / os-unix.c
CommitLineData
3038054c
BI
1/* os-unix.c -*-C-*-
2 *
3 *************************************************************************
4 *
5 * @copyright
6 * Copyright (C) 2009-2013, Intel Corporation
7 * All rights reserved.
8 *
9 * @copyright
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * * Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * * Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in
18 * the documentation and/or other materials provided with the
19 * distribution.
20 * * Neither the name of Intel Corporation nor the names of its
21 * contributors may be used to endorse or promote products derived
22 * from this software without specific prior written permission.
23 *
24 * @copyright
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
30 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
31 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
32 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
33 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
35 * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 **************************************************************************/
38
39#ifdef __linux__
40 // define _GNU_SOURCE before *any* #include.
41 // Even <stdint.h> will break later #includes if this macro is not
42 // already defined when it is #included.
43# define _GNU_SOURCE
44#endif
45
46#include "os.h"
47#include "bug.h"
48#include "cilk_malloc.h"
49#include <internal/abi.h>
50
51#if defined __linux__
52# include <sys/sysinfo.h>
53# include <sys/syscall.h>
54#elif defined __APPLE__
55# include <sys/sysctl.h>
56 // Uses sysconf(_SC_NPROCESSORS_ONLN) in verbose output
dbed5a9b
JM
57#elif defined __DragonFly__
58// No additional include files
3038054c
BI
59#elif defined __FreeBSD__
60// No additional include files
61#elif defined __CYGWIN__
62// Cygwin on Windows - no additional include files
63#elif defined __VXWORKS__
64# include <vxWorks.h>
65# include <vxCpuLib.h>
66# include <taskLib.h>
2546da0b
RO
67// Solaris
68#elif defined __sun__ && defined __svr4__
69# include <sched.h>
3038054c
BI
70#else
71# error "Unsupported OS"
72#endif
73
74#include <stdarg.h>
75#include <stddef.h>
76#include <stdio.h>
77#include <stdlib.h>
78#include <string.h>
79#include <unistd.h>
80#include <pthread.h>
81#include <sys/types.h>
82
83
84
85// /* Thread-local storage */
86// #ifdef _WIN32
87// typedef unsigned cilkos_tls_key_t;
88// #else
89// typedef pthread_key_t cilkos_tls_key_t;
90// #endif
91// cilkos_tls_key_t cilkos_allocate_tls_key();
92// void cilkos_set_tls_pointer(cilkos_tls_key_t key, void* ptr);
93// void* cilkos_get_tls_pointer(cilkos_tls_key_t key);
94
95#if !defined CILK_WORKER_TLS
96static int cilk_keys_defined;
97static pthread_key_t worker_key, pedigree_leaf_key, tbb_interop_key;
98
99#if SUPPORT_GET_CURRENT_FIBER > 0
100static pthread_key_t fiber_key;
101#endif
102
103static void *serial_worker;
104
105
106// This destructor is called when a pthread dies to deallocate the
107// pedigree node.
108static void __cilkrts_pedigree_leaf_destructor(void* pedigree_tls_ptr)
109{
110 __cilkrts_pedigree* pedigree_tls
111 = (__cilkrts_pedigree*)pedigree_tls_ptr;
112 if (pedigree_tls) {
113 // Assert that we have either one or two nodes
114 // left in the pedigree chain.
115 // If we have more, then something is going wrong...
116 CILK_ASSERT(!pedigree_tls->parent || !pedigree_tls->parent->parent);
117 __cilkrts_free(pedigree_tls);
118 }
119}
120
121void __cilkrts_init_tls_variables(void)
122{
123 int status;
124 /* This will be called once in serial execution before any
125 Cilk parallelism so we do not need to worry about races
126 on cilk_keys_defined. */
127 if (cilk_keys_defined)
128 return;
129 status = pthread_key_create(&worker_key, NULL);
130 CILK_ASSERT (status == 0);
131 status = pthread_key_create(&pedigree_leaf_key,
132 __cilkrts_pedigree_leaf_destructor);
133 CILK_ASSERT (status == 0);
134 status = pthread_key_create(&tbb_interop_key, NULL);
135 CILK_ASSERT (status == 0);
136
137#if SUPPORT_GET_CURRENT_FIBER > 0
138 status = pthread_key_create(&fiber_key, NULL);
139 CILK_ASSERT (status == 0);
140#endif
141 cilk_keys_defined = 1;
142 return;
143}
144
145COMMON_SYSDEP
146void* cilkos_get_current_thread_id(void)
147{
148 return (void*)pthread_self();
149}
150
151
152CILK_ABI_WORKER_PTR __cilkrts_get_tls_worker()
153{
154 if (__builtin_expect(cilk_keys_defined, 1))
155 return (__cilkrts_worker *)pthread_getspecific(worker_key);
156 else
157 return serial_worker;
158
159}
160
161CILK_ABI_WORKER_PTR __cilkrts_get_tls_worker_fast()
162{
163 return (__cilkrts_worker *)pthread_getspecific(worker_key);
164}
165
166COMMON_SYSDEP
167__cilk_tbb_stack_op_thunk *__cilkrts_get_tls_tbb_interop(void)
168{
169 if (__builtin_expect(cilk_keys_defined, 1))
170 return (__cilk_tbb_stack_op_thunk *)
171 pthread_getspecific(tbb_interop_key);
172 else
173 return 0;
174}
175
176// This counter should be updated atomically.
177static int __cilkrts_global_pedigree_tls_counter = -1;
178
179COMMON_SYSDEP
180__cilkrts_pedigree *__cilkrts_get_tls_pedigree_leaf(int create_new)
181{
182 __cilkrts_pedigree *pedigree_tls;
183 if (__builtin_expect(cilk_keys_defined, 1)) {
184 pedigree_tls =
185 (struct __cilkrts_pedigree *)pthread_getspecific(pedigree_leaf_key);
186 }
187 else {
188 return 0;
189 }
190
191 if (!pedigree_tls && create_new) {
192 // This call creates two nodes, X and Y.
193 // X == pedigree_tls[0] is the leaf node, which gets copied
194 // in and out of a user worker w when w binds and unbinds.
195 // Y == pedigree_tls[1] is the root node,
196 // which is a constant node that represents the user worker
197 // thread w.
198 pedigree_tls = (__cilkrts_pedigree*)
199 __cilkrts_malloc(2 * sizeof(__cilkrts_pedigree));
200
201 // This call sets the TLS pointer to the new node.
202 __cilkrts_set_tls_pedigree_leaf(pedigree_tls);
203
204 pedigree_tls[0].rank = 0;
205 pedigree_tls[0].parent = &pedigree_tls[1];
206
207 // Create Y, whose rank begins as the global counter value.
208 pedigree_tls[1].rank =
209 __sync_add_and_fetch(&__cilkrts_global_pedigree_tls_counter, 1);
210
211 pedigree_tls[1].parent = NULL;
212 CILK_ASSERT(pedigree_tls[1].rank != -1);
213 }
214 return pedigree_tls;
215}
216
217#if SUPPORT_GET_CURRENT_FIBER > 0
218COMMON_SYSDEP
219cilk_fiber_sysdep* cilkos_get_tls_cilk_fiber(void)
220{
221 if (__builtin_expect(cilk_keys_defined, 1))
222 return (cilk_fiber_sysdep *)pthread_getspecific(fiber_key);
223 else
224 return NULL;
225}
226#endif
227
228COMMON_SYSDEP
229void __cilkrts_set_tls_worker(__cilkrts_worker *w)
230{
231 if (__builtin_expect(cilk_keys_defined, 1)) {
232 int status;
233 status = pthread_setspecific(worker_key, w);
234 CILK_ASSERT (status == 0);
235 return;
236 }
237 else
238 {
239 serial_worker = w;
240 }
241}
242
243COMMON_SYSDEP
244void __cilkrts_set_tls_tbb_interop(__cilk_tbb_stack_op_thunk *t)
245{
246 if (__builtin_expect(cilk_keys_defined, 1)) {
247 int status;
248 status = pthread_setspecific(tbb_interop_key, t);
249 CILK_ASSERT (status == 0);
250 return;
251 }
252 abort();
253}
254
255COMMON_SYSDEP
256void __cilkrts_set_tls_pedigree_leaf(__cilkrts_pedigree* pedigree_leaf)
257{
258 if (__builtin_expect(cilk_keys_defined, 1)) {
259 int status;
260 status = pthread_setspecific(pedigree_leaf_key, pedigree_leaf);
261 CILK_ASSERT (status == 0);
262 return;
263 }
264 abort();
265}
266
267#if SUPPORT_GET_CURRENT_FIBER > 0
268COMMON_SYSDEP
269void cilkos_set_tls_cilk_fiber(cilk_fiber_sysdep* fiber)
270{
271 if (__builtin_expect(cilk_keys_defined, 1)) {
272 int status;
273 status = pthread_setspecific(fiber_key, fiber);
274 CILK_ASSERT (status == 0);
275 return;
276 }
277 abort();
278}
279#endif
280
281#else
282void __cilkrts_init_tls_variables(void)
283{
284}
285#endif
286
b1cd42c5 287#if defined (__linux__) && ! defined(__ANDROID__)
3038054c
BI
288/*
289 * Get the thread id, rather than the pid. In the case of MIC offload, it's
290 * possible that we have multiple threads entering Cilk, and each has a
291 * different affinity.
292 */
293static pid_t linux_gettid(void)
294{
295 return syscall(SYS_gettid);
296}
297
298/*
299 * On Linux we look at the thread affinity mask and restrict ourself to one
300 * thread for each of the hardware contexts to which we are bound.
301 * Therefore if user does
302 * % taskset 0-1 cilkProgram
303 * # restrict execution to hardware contexts zero and one
304 * the Cilk program will only use two threads even if it is running on a
305 * machine that has 32 hardware contexts.
306 * This is the right thing to do, because the threads are restricted to two
307 * hardware contexts by the affinity mask set by taskset, and if we were to
308 * create extra threads they would simply oversubscribe the hardware resources
309 * we can use.
310 * This is particularly important on MIC in offload mode, where the affinity
311 * mask is set by the offload library to force the offload code away from
312 * cores that have offload support threads running on them.
313 */
314static int linux_get_affinity_count (int tid)
315{
ef132d59
BI
316#if !defined HAVE_PTHREAD_AFFINITY_NP
317 return 0;
318#else
319
3038054c
BI
320 cpu_set_t process_mask;
321
322 // Extract the thread affinity mask
323 int err = sched_getaffinity (tid, sizeof(process_mask),&process_mask);
324
325 if (0 != err)
326 {
327 return 0;
328 }
329
330 // We have extracted the mask OK, so now we can count the number of threads
331 // in it. This is linear in the maximum number of CPUs available, We
332 // could do a logarithmic version, if we assume the format of the mask,
333 // but it's not really worth it. We only call this at thread startup
334 // anyway.
335 int available_procs = 0;
336 int i;
337 for (i = 0; i < CPU_SETSIZE; i++)
338 {
339 if (CPU_ISSET(i, &process_mask))
340 {
341 available_procs++;
342 }
343 }
344
345 return available_procs;
ef132d59 346#endif
3038054c 347}
b1cd42c5 348#endif // defined (__linux__) && ! defined(__ANDROID__)
3038054c
BI
349
350/*
351 * __cilkrts_hardware_cpu_count
352 *
353 * Returns the number of available CPUs on this hardware. This is architecture-
354 * specific.
355 */
356
357COMMON_SYSDEP int __cilkrts_hardware_cpu_count(void)
358{
b1cd42c5 359#if defined __ANDROID__ || (defined(__sun__) && defined(__svr4__))
3038054c
BI
360 return sysconf (_SC_NPROCESSORS_ONLN);
361#elif defined __MIC__
362 /// HACK: Usually, the 3rd and 4th hyperthreads are not beneficial
363 /// on KNC. Also, ignore the last core.
364 int P = sysconf (_SC_NPROCESSORS_ONLN);
365 return P/2 - 2;
366#elif defined __linux__
367 int affinity_count = linux_get_affinity_count(linux_gettid());
368
369 return (0 != affinity_count) ? affinity_count : sysconf (_SC_NPROCESSORS_ONLN);
370#elif defined __APPLE__
371 int count = 0;
372 int cmd[2] = { CTL_HW, HW_NCPU };
373 size_t len = sizeof count;
374 int status = sysctl(cmd, 2, &count, &len, 0, 0);
375 assert(status >= 0);
376 assert((unsigned)count == count);
377
378 return count;
dbed5a9b 379#elif defined __FreeBSD__ || defined __CYGWIN__ || defined __DragonFly__
3038054c
BI
380 int ncores = sysconf(_SC_NPROCESSORS_ONLN);
381
382 return ncores;
383 // Just get the number of processors
384// return sysconf(_SC_NPROCESSORS_ONLN);
385#elif defined __VXWORKS__
386 return __builtin_popcount( vxCpuEnabledGet() );
387#else
388#error "Unknown architecture"
389#endif
390}
391
392COMMON_SYSDEP void __cilkrts_sleep(void)
393{
394#ifdef __VXWORKS__
395 taskDelay(1);
396#else
397 usleep(1);
398#endif
399}
400
401COMMON_SYSDEP void __cilkrts_yield(void)
402{
403#if __APPLE__ || __FreeBSD__ || __VXWORKS__
404 // On MacOS, call sched_yield to yield quantum. I'm not sure why we
405 // don't do this on Linux also.
406 sched_yield();
dbed5a9b
JM
407#elif defined(__DragonFly__)
408 // On DragonFly BSD, call sched_yield to yield quantum.
409 sched_yield();
3038054c
BI
410#elif defined(__MIC__)
411 // On MIC, pthread_yield() really trashes things. Arch's measurements
412 // showed that calling _mm_delay_32() (or doing nothing) was a better
413 // option. Delaying 1024 clock cycles is a reasonable compromise between
414 // giving up the processor and latency starting up when work becomes
415 // available
416 _mm_delay_32(1024);
b1cd42c5 417#elif defined(__ANDROID__) || (defined(__sun__) && defined(__svr4__))
2546da0b
RO
418 // On Android and Solaris, call sched_yield to yield quantum. I'm not
419 // sure why we don't do this on Linux also.
3038054c
BI
420 sched_yield();
421#else
422 // On Linux, call pthread_yield (which in turn will call sched_yield)
423 // to yield quantum.
424 pthread_yield();
425#endif
426}
427
428COMMON_SYSDEP __STDNS size_t cilkos_getenv(char* value, __STDNS size_t vallen,
429 const char* varname)
430{
431 CILK_ASSERT(value);
432 CILK_ASSERT(varname);
433
434 const char* envstr = getenv(varname);
435 if (envstr)
436 {
437 size_t len = strlen(envstr);
438 if (len > vallen - 1)
439 return len + 1;
440
441 strcpy(value, envstr);
442 return len;
443 }
444 else
445 {
446 value[0] = '\0';
447 return 0;
448 }
449}
450
451/*
452 * Unrecoverable error: Print an error message and abort execution.
453 */
454COMMON_SYSDEP void cilkos_error(const char *fmt, ...)
455{
456 va_list l;
457 fflush(NULL);
458 fprintf(stderr, "Cilk error: ");
459 va_start(l, fmt);
460 vfprintf(stderr, fmt, l);
461 va_end(l);
462 fprintf(stderr, "Exiting.\n");
463 fflush(stderr);
464
465 abort();
466}
467
468/*
469 * Print a warning message and return.
470 */
471COMMON_SYSDEP void cilkos_warning(const char *fmt, ...)
472{
473 va_list l;
474 fflush(NULL);
475 fprintf(stderr, "Cilk warning: ");
476 va_start(l, fmt);
477 vfprintf(stderr, fmt, l);
478 va_end(l);
479 fflush(stderr);
480}
481
482static void __attribute__((constructor)) init_once()
483{
484 /*__cilkrts_debugger_notification_internal(CILK_DB_RUNTIME_LOADED);*/
485 __cilkrts_init_tls_variables();
486}
487
488
489#define PAGE 4096
490#define CILK_MIN_STACK_SIZE (4*PAGE)
491// Default size for the stacks that we create in Cilk for Unix.
492#define CILK_DEFAULT_STACK_SIZE 0x100000
493
494/*
495 * Convert the user's specified stack size into a "reasonable" value
496 * for this OS.
497 */
498size_t cilkos_validate_stack_size(size_t specified_stack_size) {
499 // Convert any negative value to the default.
500 if (specified_stack_size == 0) {
501 CILK_ASSERT((CILK_DEFAULT_STACK_SIZE % PAGE) == 0);
502 return CILK_DEFAULT_STACK_SIZE;
503 }
504 // Round values in between 0 and CILK_MIN_STACK_SIZE up to
505 // CILK_MIN_STACK_SIZE.
506 if (specified_stack_size <= CILK_MIN_STACK_SIZE) {
507 return CILK_MIN_STACK_SIZE;
508 }
509 if ((specified_stack_size % PAGE) > 0) {
510 // Round the user's stack size value up to nearest page boundary.
511 return (PAGE * (1 + specified_stack_size / PAGE));
512 }
513 return specified_stack_size;
514}
515
516long cilkos_atomic_add(volatile long* p, long x)
517{
518 return __sync_add_and_fetch(p, x);
519}
520
521/* End os-unix.c */