]> git.ipfire.org Git - thirdparty/gcc.git/blob - libgomp/team.c
PR tree-optimization/90090
[thirdparty/gcc.git] / libgomp / team.c
1 /* Copyright (C) 2005-2019 Free Software Foundation, Inc.
2 Contributed by Richard Henderson <rth@redhat.com>.
3
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
6
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
26 /* This file handles the maintainence of threads in response to team
27 creation and termination. */
28
29 #include "libgomp.h"
30 #include "pool.h"
31 #include <stdlib.h>
32 #include <string.h>
33
34 #ifdef LIBGOMP_USE_PTHREADS
35 pthread_attr_t gomp_thread_attr;
36
37 /* This key is for the thread destructor. */
38 pthread_key_t gomp_thread_destructor;
39
40
41 /* This is the libgomp per-thread data structure. */
42 #if defined HAVE_TLS || defined USE_EMUTLS
43 __thread struct gomp_thread gomp_tls_data;
44 #else
45 pthread_key_t gomp_tls_key;
46 #endif
47
48
49 /* This structure is used to communicate across pthread_create. */
50
51 struct gomp_thread_start_data
52 {
53 void (*fn) (void *);
54 void *fn_data;
55 struct gomp_team_state ts;
56 struct gomp_task *task;
57 struct gomp_thread_pool *thread_pool;
58 unsigned int place;
59 bool nested;
60 pthread_t handle;
61 };
62
63
64 /* This function is a pthread_create entry point. This contains the idle
65 loop in which a thread waits to be called up to become part of a team. */
66
67 static void *
68 gomp_thread_start (void *xdata)
69 {
70 struct gomp_thread_start_data *data = xdata;
71 struct gomp_thread *thr;
72 struct gomp_thread_pool *pool;
73 void (*local_fn) (void *);
74 void *local_data;
75
76 #if defined HAVE_TLS || defined USE_EMUTLS
77 thr = &gomp_tls_data;
78 #else
79 struct gomp_thread local_thr;
80 thr = &local_thr;
81 pthread_setspecific (gomp_tls_key, thr);
82 #endif
83 gomp_sem_init (&thr->release, 0);
84
85 /* Extract what we need from data. */
86 local_fn = data->fn;
87 local_data = data->fn_data;
88 thr->thread_pool = data->thread_pool;
89 thr->ts = data->ts;
90 thr->task = data->task;
91 thr->place = data->place;
92 #ifdef GOMP_NEEDS_THREAD_HANDLE
93 thr->handle = data->handle;
94 #endif
95
96 thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release;
97
98 /* Make thread pool local. */
99 pool = thr->thread_pool;
100
101 if (data->nested)
102 {
103 struct gomp_team *team = thr->ts.team;
104 struct gomp_task *task = thr->task;
105
106 gomp_barrier_wait (&team->barrier);
107
108 local_fn (local_data);
109 gomp_team_barrier_wait_final (&team->barrier);
110 gomp_finish_task (task);
111 gomp_barrier_wait_last (&team->barrier);
112 }
113 else
114 {
115 pool->threads[thr->ts.team_id] = thr;
116
117 gomp_simple_barrier_wait (&pool->threads_dock);
118 do
119 {
120 struct gomp_team *team = thr->ts.team;
121 struct gomp_task *task = thr->task;
122
123 local_fn (local_data);
124 gomp_team_barrier_wait_final (&team->barrier);
125 gomp_finish_task (task);
126
127 gomp_simple_barrier_wait (&pool->threads_dock);
128
129 local_fn = thr->fn;
130 local_data = thr->data;
131 thr->fn = NULL;
132 }
133 while (local_fn);
134 }
135
136 gomp_sem_destroy (&thr->release);
137 pthread_detach (pthread_self ());
138 thr->thread_pool = NULL;
139 thr->task = NULL;
140 return NULL;
141 }
142 #endif
143
144 static inline struct gomp_team *
145 get_last_team (unsigned nthreads)
146 {
147 struct gomp_thread *thr = gomp_thread ();
148 if (thr->ts.team == NULL)
149 {
150 struct gomp_thread_pool *pool = gomp_get_thread_pool (thr, nthreads);
151 struct gomp_team *last_team = pool->last_team;
152 if (last_team != NULL && last_team->nthreads == nthreads)
153 {
154 pool->last_team = NULL;
155 return last_team;
156 }
157 }
158 return NULL;
159 }
160
161 /* Create a new team data structure. */
162
163 struct gomp_team *
164 gomp_new_team (unsigned nthreads)
165 {
166 struct gomp_team *team;
167 int i;
168
169 team = get_last_team (nthreads);
170 if (team == NULL)
171 {
172 size_t extra = sizeof (team->ordered_release[0])
173 + sizeof (team->implicit_task[0]);
174 team = gomp_malloc (sizeof (*team) + nthreads * extra);
175
176 #ifndef HAVE_SYNC_BUILTINS
177 gomp_mutex_init (&team->work_share_list_free_lock);
178 #endif
179 gomp_barrier_init (&team->barrier, nthreads);
180 gomp_mutex_init (&team->task_lock);
181
182 team->nthreads = nthreads;
183 }
184
185 team->work_share_chunk = 8;
186 #ifdef HAVE_SYNC_BUILTINS
187 team->single_count = 0;
188 #endif
189 team->work_shares_to_free = &team->work_shares[0];
190 gomp_init_work_share (&team->work_shares[0], 0, nthreads);
191 team->work_shares[0].next_alloc = NULL;
192 team->work_share_list_free = NULL;
193 team->work_share_list_alloc = &team->work_shares[1];
194 for (i = 1; i < 7; i++)
195 team->work_shares[i].next_free = &team->work_shares[i + 1];
196 team->work_shares[i].next_free = NULL;
197
198 gomp_sem_init (&team->master_release, 0);
199 team->ordered_release = (void *) &team->implicit_task[nthreads];
200 team->ordered_release[0] = &team->master_release;
201
202 priority_queue_init (&team->task_queue);
203 team->task_count = 0;
204 team->task_queued_count = 0;
205 team->task_running_count = 0;
206 team->work_share_cancelled = 0;
207 team->team_cancelled = 0;
208
209 return team;
210 }
211
212
213 /* Free a team data structure. */
214
215 static void
216 free_team (struct gomp_team *team)
217 {
218 #ifndef HAVE_SYNC_BUILTINS
219 gomp_mutex_destroy (&team->work_share_list_free_lock);
220 #endif
221 gomp_barrier_destroy (&team->barrier);
222 gomp_mutex_destroy (&team->task_lock);
223 priority_queue_free (&team->task_queue);
224 free (team);
225 }
226
227 static void
228 gomp_free_pool_helper (void *thread_pool)
229 {
230 struct gomp_thread *thr = gomp_thread ();
231 struct gomp_thread_pool *pool
232 = (struct gomp_thread_pool *) thread_pool;
233 gomp_simple_barrier_wait_last (&pool->threads_dock);
234 gomp_sem_destroy (&thr->release);
235 thr->thread_pool = NULL;
236 thr->task = NULL;
237 #ifdef LIBGOMP_USE_PTHREADS
238 pthread_detach (pthread_self ());
239 pthread_exit (NULL);
240 #elif defined(__nvptx__)
241 asm ("exit;");
242 #else
243 #error gomp_free_pool_helper must terminate the thread
244 #endif
245 }
246
247 /* Free a thread pool and release its threads. */
248
249 void
250 gomp_free_thread (void *arg __attribute__((unused)))
251 {
252 struct gomp_thread *thr = gomp_thread ();
253 struct gomp_thread_pool *pool = thr->thread_pool;
254 if (pool)
255 {
256 if (pool->threads_used > 0)
257 {
258 int i;
259 for (i = 1; i < pool->threads_used; i++)
260 {
261 struct gomp_thread *nthr = pool->threads[i];
262 nthr->fn = gomp_free_pool_helper;
263 nthr->data = pool;
264 }
265 /* This barrier undocks threads docked on pool->threads_dock. */
266 gomp_simple_barrier_wait (&pool->threads_dock);
267 /* And this waits till all threads have called gomp_barrier_wait_last
268 in gomp_free_pool_helper. */
269 gomp_simple_barrier_wait (&pool->threads_dock);
270 /* Now it is safe to destroy the barrier and free the pool. */
271 gomp_simple_barrier_destroy (&pool->threads_dock);
272
273 #ifdef HAVE_SYNC_BUILTINS
274 __sync_fetch_and_add (&gomp_managed_threads,
275 1L - pool->threads_used);
276 #else
277 gomp_mutex_lock (&gomp_managed_threads_lock);
278 gomp_managed_threads -= pool->threads_used - 1L;
279 gomp_mutex_unlock (&gomp_managed_threads_lock);
280 #endif
281 }
282 if (pool->last_team)
283 free_team (pool->last_team);
284 #ifndef __nvptx__
285 free (pool->threads);
286 free (pool);
287 #endif
288 thr->thread_pool = NULL;
289 }
290 if (thr->ts.level == 0 && __builtin_expect (thr->ts.team != NULL, 0))
291 gomp_team_end ();
292 if (thr->task != NULL)
293 {
294 struct gomp_task *task = thr->task;
295 gomp_end_task ();
296 free (task);
297 }
298 }
299
300 /* Launch a team. */
301
302 #ifdef LIBGOMP_USE_PTHREADS
303 void
304 gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
305 unsigned flags, struct gomp_team *team,
306 struct gomp_taskgroup *taskgroup)
307 {
308 struct gomp_thread_start_data *start_data;
309 struct gomp_thread *thr, *nthr;
310 struct gomp_task *task;
311 struct gomp_task_icv *icv;
312 bool nested;
313 struct gomp_thread_pool *pool;
314 unsigned i, n, old_threads_used = 0;
315 pthread_attr_t thread_attr, *attr;
316 unsigned long nthreads_var;
317 char bind, bind_var;
318 unsigned int s = 0, rest = 0, p = 0, k = 0;
319 unsigned int affinity_count = 0;
320 struct gomp_thread **affinity_thr = NULL;
321 bool force_display = false;
322
323 thr = gomp_thread ();
324 nested = thr->ts.level;
325 pool = thr->thread_pool;
326 task = thr->task;
327 icv = task ? &task->icv : &gomp_global_icv;
328 if (__builtin_expect (gomp_places_list != NULL, 0) && thr->place == 0)
329 {
330 gomp_init_affinity ();
331 if (__builtin_expect (gomp_display_affinity_var, 0) && nthreads == 1)
332 gomp_display_affinity_thread (gomp_thread_self (), &thr->ts,
333 thr->place);
334 }
335
336 /* Always save the previous state, even if this isn't a nested team.
337 In particular, we should save any work share state from an outer
338 orphaned work share construct. */
339 team->prev_ts = thr->ts;
340
341 thr->ts.team = team;
342 thr->ts.team_id = 0;
343 ++thr->ts.level;
344 if (nthreads > 1)
345 ++thr->ts.active_level;
346 thr->ts.work_share = &team->work_shares[0];
347 thr->ts.last_work_share = NULL;
348 #ifdef HAVE_SYNC_BUILTINS
349 thr->ts.single_count = 0;
350 #endif
351 thr->ts.static_trip = 0;
352 thr->task = &team->implicit_task[0];
353 #ifdef GOMP_NEEDS_THREAD_HANDLE
354 thr->handle = pthread_self ();
355 #endif
356 nthreads_var = icv->nthreads_var;
357 if (__builtin_expect (gomp_nthreads_var_list != NULL, 0)
358 && thr->ts.level < gomp_nthreads_var_list_len)
359 nthreads_var = gomp_nthreads_var_list[thr->ts.level];
360 bind_var = icv->bind_var;
361 if (bind_var != omp_proc_bind_false && (flags & 7) != omp_proc_bind_false)
362 bind_var = flags & 7;
363 bind = bind_var;
364 if (__builtin_expect (gomp_bind_var_list != NULL, 0)
365 && thr->ts.level < gomp_bind_var_list_len)
366 bind_var = gomp_bind_var_list[thr->ts.level];
367 gomp_init_task (thr->task, task, icv);
368 thr->task->taskgroup = taskgroup;
369 team->implicit_task[0].icv.nthreads_var = nthreads_var;
370 team->implicit_task[0].icv.bind_var = bind_var;
371
372 if (nthreads == 1)
373 return;
374
375 i = 1;
376
377 if (__builtin_expect (gomp_places_list != NULL, 0))
378 {
379 /* Depending on chosen proc_bind model, set subpartition
380 for the master thread and initialize helper variables
381 P and optionally S, K and/or REST used by later place
382 computation for each additional thread. */
383 p = thr->place - 1;
384 switch (bind)
385 {
386 case omp_proc_bind_true:
387 case omp_proc_bind_close:
388 if (nthreads > thr->ts.place_partition_len)
389 {
390 /* T > P. S threads will be placed in each place,
391 and the final REM threads placed one by one
392 into the already occupied places. */
393 s = nthreads / thr->ts.place_partition_len;
394 rest = nthreads % thr->ts.place_partition_len;
395 }
396 else
397 s = 1;
398 k = 1;
399 break;
400 case omp_proc_bind_master:
401 /* Each thread will be bound to master's place. */
402 break;
403 case omp_proc_bind_spread:
404 if (nthreads <= thr->ts.place_partition_len)
405 {
406 /* T <= P. Each subpartition will have in between s
407 and s+1 places (subpartitions starting at or
408 after rest will have s places, earlier s+1 places),
409 each thread will be bound to the first place in
410 its subpartition (except for the master thread
411 that can be bound to another place in its
412 subpartition). */
413 s = thr->ts.place_partition_len / nthreads;
414 rest = thr->ts.place_partition_len % nthreads;
415 rest = (s + 1) * rest + thr->ts.place_partition_off;
416 if (p < rest)
417 {
418 p -= (p - thr->ts.place_partition_off) % (s + 1);
419 thr->ts.place_partition_len = s + 1;
420 }
421 else
422 {
423 p -= (p - rest) % s;
424 thr->ts.place_partition_len = s;
425 }
426 thr->ts.place_partition_off = p;
427 }
428 else
429 {
430 /* T > P. Each subpartition will have just a single
431 place and we'll place between s and s+1
432 threads into each subpartition. */
433 s = nthreads / thr->ts.place_partition_len;
434 rest = nthreads % thr->ts.place_partition_len;
435 thr->ts.place_partition_off = p;
436 thr->ts.place_partition_len = 1;
437 k = 1;
438 }
439 break;
440 }
441 }
442 else
443 bind = omp_proc_bind_false;
444
445 /* We only allow the reuse of idle threads for non-nested PARALLEL
446 regions. This appears to be implied by the semantics of
447 threadprivate variables, but perhaps that's reading too much into
448 things. Certainly it does prevent any locking problems, since
449 only the initial program thread will modify gomp_threads. */
450 if (!nested)
451 {
452 old_threads_used = pool->threads_used;
453
454 if (nthreads <= old_threads_used)
455 n = nthreads;
456 else if (old_threads_used == 0)
457 {
458 n = 0;
459 gomp_simple_barrier_init (&pool->threads_dock, nthreads);
460 }
461 else
462 {
463 n = old_threads_used;
464
465 /* Increase the barrier threshold to make sure all new
466 threads arrive before the team is released. */
467 gomp_simple_barrier_reinit (&pool->threads_dock, nthreads);
468 }
469
470 /* Not true yet, but soon will be. We're going to release all
471 threads from the dock, and those that aren't part of the
472 team will exit. */
473 pool->threads_used = nthreads;
474
475 /* If necessary, expand the size of the gomp_threads array. It is
476 expected that changes in the number of threads are rare, thus we
477 make no effort to expand gomp_threads_size geometrically. */
478 if (nthreads >= pool->threads_size)
479 {
480 pool->threads_size = nthreads + 1;
481 pool->threads
482 = gomp_realloc (pool->threads,
483 pool->threads_size
484 * sizeof (struct gomp_thread *));
485 /* Add current (master) thread to threads[]. */
486 pool->threads[0] = thr;
487 }
488
489 /* Release existing idle threads. */
490 for (; i < n; ++i)
491 {
492 unsigned int place_partition_off = thr->ts.place_partition_off;
493 unsigned int place_partition_len = thr->ts.place_partition_len;
494 unsigned int place = 0;
495 if (__builtin_expect (gomp_places_list != NULL, 0))
496 {
497 switch (bind)
498 {
499 case omp_proc_bind_true:
500 case omp_proc_bind_close:
501 if (k == s)
502 {
503 ++p;
504 if (p == (team->prev_ts.place_partition_off
505 + team->prev_ts.place_partition_len))
506 p = team->prev_ts.place_partition_off;
507 k = 1;
508 if (i == nthreads - rest)
509 s = 1;
510 }
511 else
512 ++k;
513 break;
514 case omp_proc_bind_master:
515 break;
516 case omp_proc_bind_spread:
517 if (k == 0)
518 {
519 /* T <= P. */
520 if (p < rest)
521 p += s + 1;
522 else
523 p += s;
524 if (p == (team->prev_ts.place_partition_off
525 + team->prev_ts.place_partition_len))
526 p = team->prev_ts.place_partition_off;
527 place_partition_off = p;
528 if (p < rest)
529 place_partition_len = s + 1;
530 else
531 place_partition_len = s;
532 }
533 else
534 {
535 /* T > P. */
536 if (k == s)
537 {
538 ++p;
539 if (p == (team->prev_ts.place_partition_off
540 + team->prev_ts.place_partition_len))
541 p = team->prev_ts.place_partition_off;
542 k = 1;
543 if (i == nthreads - rest)
544 s = 1;
545 }
546 else
547 ++k;
548 place_partition_off = p;
549 place_partition_len = 1;
550 }
551 break;
552 }
553 if (affinity_thr != NULL
554 || (bind != omp_proc_bind_true
555 && pool->threads[i]->place != p + 1)
556 || pool->threads[i]->place <= place_partition_off
557 || pool->threads[i]->place > (place_partition_off
558 + place_partition_len))
559 {
560 unsigned int l;
561 force_display = true;
562 if (affinity_thr == NULL)
563 {
564 unsigned int j;
565
566 if (team->prev_ts.place_partition_len > 64)
567 affinity_thr
568 = gomp_malloc (team->prev_ts.place_partition_len
569 * sizeof (struct gomp_thread *));
570 else
571 affinity_thr
572 = gomp_alloca (team->prev_ts.place_partition_len
573 * sizeof (struct gomp_thread *));
574 memset (affinity_thr, '\0',
575 team->prev_ts.place_partition_len
576 * sizeof (struct gomp_thread *));
577 for (j = i; j < old_threads_used; j++)
578 {
579 if (pool->threads[j]->place
580 > team->prev_ts.place_partition_off
581 && (pool->threads[j]->place
582 <= (team->prev_ts.place_partition_off
583 + team->prev_ts.place_partition_len)))
584 {
585 l = pool->threads[j]->place - 1
586 - team->prev_ts.place_partition_off;
587 pool->threads[j]->data = affinity_thr[l];
588 affinity_thr[l] = pool->threads[j];
589 }
590 pool->threads[j] = NULL;
591 }
592 if (nthreads > old_threads_used)
593 memset (&pool->threads[old_threads_used],
594 '\0', ((nthreads - old_threads_used)
595 * sizeof (struct gomp_thread *)));
596 n = nthreads;
597 affinity_count = old_threads_used - i;
598 }
599 if (affinity_count == 0)
600 break;
601 l = p;
602 if (affinity_thr[l - team->prev_ts.place_partition_off]
603 == NULL)
604 {
605 if (bind != omp_proc_bind_true)
606 continue;
607 for (l = place_partition_off;
608 l < place_partition_off + place_partition_len;
609 l++)
610 if (affinity_thr[l - team->prev_ts.place_partition_off]
611 != NULL)
612 break;
613 if (l == place_partition_off + place_partition_len)
614 continue;
615 }
616 nthr = affinity_thr[l - team->prev_ts.place_partition_off];
617 affinity_thr[l - team->prev_ts.place_partition_off]
618 = (struct gomp_thread *) nthr->data;
619 affinity_count--;
620 pool->threads[i] = nthr;
621 }
622 else
623 nthr = pool->threads[i];
624 place = p + 1;
625 }
626 else
627 nthr = pool->threads[i];
628 nthr->ts.team = team;
629 nthr->ts.work_share = &team->work_shares[0];
630 nthr->ts.last_work_share = NULL;
631 nthr->ts.team_id = i;
632 nthr->ts.level = team->prev_ts.level + 1;
633 nthr->ts.active_level = thr->ts.active_level;
634 nthr->ts.place_partition_off = place_partition_off;
635 nthr->ts.place_partition_len = place_partition_len;
636 #ifdef HAVE_SYNC_BUILTINS
637 nthr->ts.single_count = 0;
638 #endif
639 nthr->ts.static_trip = 0;
640 nthr->task = &team->implicit_task[i];
641 nthr->place = place;
642 gomp_init_task (nthr->task, task, icv);
643 team->implicit_task[i].icv.nthreads_var = nthreads_var;
644 team->implicit_task[i].icv.bind_var = bind_var;
645 nthr->task->taskgroup = taskgroup;
646 nthr->fn = fn;
647 nthr->data = data;
648 team->ordered_release[i] = &nthr->release;
649 }
650
651 if (__builtin_expect (affinity_thr != NULL, 0))
652 {
653 /* If AFFINITY_THR is non-NULL just because we had to
654 permute some threads in the pool, but we've managed
655 to find exactly as many old threads as we'd find
656 without affinity, we don't need to handle this
657 specially anymore. */
658 if (nthreads <= old_threads_used
659 ? (affinity_count == old_threads_used - nthreads)
660 : (i == old_threads_used))
661 {
662 if (team->prev_ts.place_partition_len > 64)
663 free (affinity_thr);
664 affinity_thr = NULL;
665 affinity_count = 0;
666 }
667 else
668 {
669 i = 1;
670 /* We are going to compute the places/subpartitions
671 again from the beginning. So, we need to reinitialize
672 vars modified by the switch (bind) above inside
673 of the loop, to the state they had after the initial
674 switch (bind). */
675 switch (bind)
676 {
677 case omp_proc_bind_true:
678 case omp_proc_bind_close:
679 if (nthreads > thr->ts.place_partition_len)
680 /* T > P. S has been changed, so needs
681 to be recomputed. */
682 s = nthreads / thr->ts.place_partition_len;
683 k = 1;
684 p = thr->place - 1;
685 break;
686 case omp_proc_bind_master:
687 /* No vars have been changed. */
688 break;
689 case omp_proc_bind_spread:
690 p = thr->ts.place_partition_off;
691 if (k != 0)
692 {
693 /* T > P. */
694 s = nthreads / team->prev_ts.place_partition_len;
695 k = 1;
696 }
697 break;
698 }
699
700 /* Increase the barrier threshold to make sure all new
701 threads and all the threads we're going to let die
702 arrive before the team is released. */
703 if (affinity_count)
704 gomp_simple_barrier_reinit (&pool->threads_dock,
705 nthreads + affinity_count);
706 }
707 }
708
709 if (i == nthreads)
710 goto do_release;
711
712 }
713
714 if (__builtin_expect (nthreads + affinity_count > old_threads_used, 0))
715 {
716 long diff = (long) (nthreads + affinity_count) - (long) old_threads_used;
717
718 if (old_threads_used == 0)
719 --diff;
720
721 #ifdef HAVE_SYNC_BUILTINS
722 __sync_fetch_and_add (&gomp_managed_threads, diff);
723 #else
724 gomp_mutex_lock (&gomp_managed_threads_lock);
725 gomp_managed_threads += diff;
726 gomp_mutex_unlock (&gomp_managed_threads_lock);
727 #endif
728 }
729
730 attr = &gomp_thread_attr;
731 if (__builtin_expect (gomp_places_list != NULL, 0))
732 {
733 size_t stacksize;
734 pthread_attr_init (&thread_attr);
735 if (! pthread_attr_getstacksize (&gomp_thread_attr, &stacksize))
736 pthread_attr_setstacksize (&thread_attr, stacksize);
737 attr = &thread_attr;
738 }
739
740 start_data = gomp_alloca (sizeof (struct gomp_thread_start_data)
741 * (nthreads - i));
742
743 /* Launch new threads. */
744 for (; i < nthreads; ++i)
745 {
746 int err;
747
748 start_data->ts.place_partition_off = thr->ts.place_partition_off;
749 start_data->ts.place_partition_len = thr->ts.place_partition_len;
750 start_data->place = 0;
751 if (__builtin_expect (gomp_places_list != NULL, 0))
752 {
753 switch (bind)
754 {
755 case omp_proc_bind_true:
756 case omp_proc_bind_close:
757 if (k == s)
758 {
759 ++p;
760 if (p == (team->prev_ts.place_partition_off
761 + team->prev_ts.place_partition_len))
762 p = team->prev_ts.place_partition_off;
763 k = 1;
764 if (i == nthreads - rest)
765 s = 1;
766 }
767 else
768 ++k;
769 break;
770 case omp_proc_bind_master:
771 break;
772 case omp_proc_bind_spread:
773 if (k == 0)
774 {
775 /* T <= P. */
776 if (p < rest)
777 p += s + 1;
778 else
779 p += s;
780 if (p == (team->prev_ts.place_partition_off
781 + team->prev_ts.place_partition_len))
782 p = team->prev_ts.place_partition_off;
783 start_data->ts.place_partition_off = p;
784 if (p < rest)
785 start_data->ts.place_partition_len = s + 1;
786 else
787 start_data->ts.place_partition_len = s;
788 }
789 else
790 {
791 /* T > P. */
792 if (k == s)
793 {
794 ++p;
795 if (p == (team->prev_ts.place_partition_off
796 + team->prev_ts.place_partition_len))
797 p = team->prev_ts.place_partition_off;
798 k = 1;
799 if (i == nthreads - rest)
800 s = 1;
801 }
802 else
803 ++k;
804 start_data->ts.place_partition_off = p;
805 start_data->ts.place_partition_len = 1;
806 }
807 break;
808 }
809 start_data->place = p + 1;
810 if (affinity_thr != NULL && pool->threads[i] != NULL)
811 continue;
812 gomp_init_thread_affinity (attr, p);
813 }
814
815 start_data->fn = fn;
816 start_data->fn_data = data;
817 start_data->ts.team = team;
818 start_data->ts.work_share = &team->work_shares[0];
819 start_data->ts.last_work_share = NULL;
820 start_data->ts.team_id = i;
821 start_data->ts.level = team->prev_ts.level + 1;
822 start_data->ts.active_level = thr->ts.active_level;
823 #ifdef HAVE_SYNC_BUILTINS
824 start_data->ts.single_count = 0;
825 #endif
826 start_data->ts.static_trip = 0;
827 start_data->task = &team->implicit_task[i];
828 gomp_init_task (start_data->task, task, icv);
829 team->implicit_task[i].icv.nthreads_var = nthreads_var;
830 team->implicit_task[i].icv.bind_var = bind_var;
831 start_data->task->taskgroup = taskgroup;
832 start_data->thread_pool = pool;
833 start_data->nested = nested;
834
835 attr = gomp_adjust_thread_attr (attr, &thread_attr);
836 err = pthread_create (&start_data->handle, attr, gomp_thread_start,
837 start_data);
838 start_data++;
839 if (err != 0)
840 gomp_fatal ("Thread creation failed: %s", strerror (err));
841 }
842
843 if (__builtin_expect (attr == &thread_attr, 0))
844 pthread_attr_destroy (&thread_attr);
845
846 do_release:
847 if (nested)
848 gomp_barrier_wait (&team->barrier);
849 else
850 gomp_simple_barrier_wait (&pool->threads_dock);
851
852 /* Decrease the barrier threshold to match the number of threads
853 that should arrive back at the end of this team. The extra
854 threads should be exiting. Note that we arrange for this test
855 to never be true for nested teams. If AFFINITY_COUNT is non-zero,
856 the barrier as well as gomp_managed_threads was temporarily
857 set to NTHREADS + AFFINITY_COUNT. For NTHREADS < OLD_THREADS_COUNT,
858 AFFINITY_COUNT if non-zero will be always at least
859 OLD_THREADS_COUNT - NTHREADS. */
860 if (__builtin_expect (nthreads < old_threads_used, 0)
861 || __builtin_expect (affinity_count, 0))
862 {
863 long diff = (long) nthreads - (long) old_threads_used;
864
865 if (affinity_count)
866 diff = -affinity_count;
867
868 gomp_simple_barrier_reinit (&pool->threads_dock, nthreads);
869
870 #ifdef HAVE_SYNC_BUILTINS
871 __sync_fetch_and_add (&gomp_managed_threads, diff);
872 #else
873 gomp_mutex_lock (&gomp_managed_threads_lock);
874 gomp_managed_threads += diff;
875 gomp_mutex_unlock (&gomp_managed_threads_lock);
876 #endif
877 }
878 if (__builtin_expect (gomp_display_affinity_var, 0))
879 {
880 if (nested
881 || nthreads != old_threads_used
882 || force_display)
883 {
884 gomp_display_affinity_thread (gomp_thread_self (), &thr->ts,
885 thr->place);
886 if (nested)
887 {
888 start_data -= nthreads - 1;
889 for (i = 1; i < nthreads; ++i)
890 {
891 gomp_display_affinity_thread (
892 #ifdef LIBGOMP_USE_PTHREADS
893 start_data->handle,
894 #else
895 gomp_thread_self (),
896 #endif
897 &start_data->ts,
898 start_data->place);
899 start_data++;
900 }
901 }
902 else
903 {
904 for (i = 1; i < nthreads; ++i)
905 {
906 gomp_thread_handle handle
907 = gomp_thread_to_pthread_t (pool->threads[i]);
908 gomp_display_affinity_thread (handle, &pool->threads[i]->ts,
909 pool->threads[i]->place);
910 }
911 }
912 }
913 }
914 if (__builtin_expect (affinity_thr != NULL, 0)
915 && team->prev_ts.place_partition_len > 64)
916 free (affinity_thr);
917 }
918 #endif
919
920
921 /* Terminate the current team. This is only to be called by the master
922 thread. We assume that we must wait for the other threads. */
923
924 void
925 gomp_team_end (void)
926 {
927 struct gomp_thread *thr = gomp_thread ();
928 struct gomp_team *team = thr->ts.team;
929
930 /* This barrier handles all pending explicit threads.
931 As #pragma omp cancel parallel might get awaited count in
932 team->barrier in a inconsistent state, we need to use a different
933 counter here. */
934 gomp_team_barrier_wait_final (&team->barrier);
935 if (__builtin_expect (team->team_cancelled, 0))
936 {
937 struct gomp_work_share *ws = team->work_shares_to_free;
938 do
939 {
940 struct gomp_work_share *next_ws = gomp_ptrlock_get (&ws->next_ws);
941 if (next_ws == NULL)
942 gomp_ptrlock_set (&ws->next_ws, ws);
943 gomp_fini_work_share (ws);
944 ws = next_ws;
945 }
946 while (ws != NULL);
947 }
948 else
949 gomp_fini_work_share (thr->ts.work_share);
950
951 gomp_end_task ();
952 thr->ts = team->prev_ts;
953
954 if (__builtin_expect (thr->ts.level != 0, 0))
955 {
956 #ifdef HAVE_SYNC_BUILTINS
957 __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads);
958 #else
959 gomp_mutex_lock (&gomp_managed_threads_lock);
960 gomp_managed_threads -= team->nthreads - 1L;
961 gomp_mutex_unlock (&gomp_managed_threads_lock);
962 #endif
963 /* This barrier has gomp_barrier_wait_last counterparts
964 and ensures the team can be safely destroyed. */
965 gomp_barrier_wait (&team->barrier);
966 }
967
968 if (__builtin_expect (team->work_shares[0].next_alloc != NULL, 0))
969 {
970 struct gomp_work_share *ws = team->work_shares[0].next_alloc;
971 do
972 {
973 struct gomp_work_share *next_ws = ws->next_alloc;
974 free (ws);
975 ws = next_ws;
976 }
977 while (ws != NULL);
978 }
979 gomp_sem_destroy (&team->master_release);
980
981 if (__builtin_expect (thr->ts.team != NULL, 0)
982 || __builtin_expect (team->nthreads == 1, 0))
983 free_team (team);
984 else
985 {
986 struct gomp_thread_pool *pool = thr->thread_pool;
987 if (pool->last_team)
988 free_team (pool->last_team);
989 pool->last_team = team;
990 gomp_release_thread_pool (pool);
991 }
992 }
993
994 #ifdef LIBGOMP_USE_PTHREADS
995
996 /* Constructors for this file. */
997
998 static void __attribute__((constructor))
999 initialize_team (void)
1000 {
1001 #if !defined HAVE_TLS && !defined USE_EMUTLS
1002 static struct gomp_thread initial_thread_tls_data;
1003
1004 pthread_key_create (&gomp_tls_key, NULL);
1005 pthread_setspecific (gomp_tls_key, &initial_thread_tls_data);
1006 #endif
1007
1008 if (pthread_key_create (&gomp_thread_destructor, gomp_free_thread) != 0)
1009 gomp_fatal ("could not create thread pool destructor.");
1010 }
1011
1012 static void __attribute__((destructor))
1013 team_destructor (void)
1014 {
1015 /* Without this dlclose on libgomp could lead to subsequent
1016 crashes. */
1017 pthread_key_delete (gomp_thread_destructor);
1018 }
1019
1020 /* Similar to gomp_free_pool_helper, but don't detach itself,
1021 gomp_pause_host will pthread_join those threads. */
1022
1023 static void
1024 gomp_pause_pool_helper (void *thread_pool)
1025 {
1026 struct gomp_thread *thr = gomp_thread ();
1027 struct gomp_thread_pool *pool
1028 = (struct gomp_thread_pool *) thread_pool;
1029 gomp_simple_barrier_wait_last (&pool->threads_dock);
1030 gomp_sem_destroy (&thr->release);
1031 thr->thread_pool = NULL;
1032 thr->task = NULL;
1033 pthread_exit (NULL);
1034 }
1035
1036 /* Free a thread pool and release its threads. Return non-zero on
1037 failure. */
1038
1039 int
1040 gomp_pause_host (void)
1041 {
1042 struct gomp_thread *thr = gomp_thread ();
1043 struct gomp_thread_pool *pool = thr->thread_pool;
1044 if (thr->ts.level)
1045 return -1;
1046 if (pool)
1047 {
1048 if (pool->threads_used > 0)
1049 {
1050 int i;
1051 pthread_t *thrs
1052 = gomp_alloca (sizeof (pthread_t) * pool->threads_used);
1053 for (i = 1; i < pool->threads_used; i++)
1054 {
1055 struct gomp_thread *nthr = pool->threads[i];
1056 nthr->fn = gomp_pause_pool_helper;
1057 nthr->data = pool;
1058 thrs[i] = gomp_thread_to_pthread_t (nthr);
1059 }
1060 /* This barrier undocks threads docked on pool->threads_dock. */
1061 gomp_simple_barrier_wait (&pool->threads_dock);
1062 /* And this waits till all threads have called gomp_barrier_wait_last
1063 in gomp_pause_pool_helper. */
1064 gomp_simple_barrier_wait (&pool->threads_dock);
1065 /* Now it is safe to destroy the barrier and free the pool. */
1066 gomp_simple_barrier_destroy (&pool->threads_dock);
1067
1068 #ifdef HAVE_SYNC_BUILTINS
1069 __sync_fetch_and_add (&gomp_managed_threads,
1070 1L - pool->threads_used);
1071 #else
1072 gomp_mutex_lock (&gomp_managed_threads_lock);
1073 gomp_managed_threads -= pool->threads_used - 1L;
1074 gomp_mutex_unlock (&gomp_managed_threads_lock);
1075 #endif
1076 for (i = 1; i < pool->threads_used; i++)
1077 pthread_join (thrs[i], NULL);
1078 }
1079 if (pool->last_team)
1080 free_team (pool->last_team);
1081 #ifndef __nvptx__
1082 free (pool->threads);
1083 free (pool);
1084 #endif
1085 thr->thread_pool = NULL;
1086 }
1087 return 0;
1088 }
1089 #endif
1090
1091 struct gomp_task_icv *
1092 gomp_new_icv (void)
1093 {
1094 struct gomp_thread *thr = gomp_thread ();
1095 struct gomp_task *task = gomp_malloc (sizeof (struct gomp_task));
1096 gomp_init_task (task, NULL, &gomp_global_icv);
1097 thr->task = task;
1098 #ifdef LIBGOMP_USE_PTHREADS
1099 pthread_setspecific (gomp_thread_destructor, thr);
1100 #endif
1101 return &task->icv;
1102 }