]> git.ipfire.org Git - thirdparty/gcc.git/blame - libgomp/team.c
amdgcn: Switch to HSACO v3 binary format
[thirdparty/gcc.git] / libgomp / team.c
CommitLineData
8d9254fc 1/* Copyright (C) 2005-2020 Free Software Foundation, Inc.
953ff289
DN
2 Contributed by Richard Henderson <rth@redhat.com>.
3
f1f3453e
TS
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
953ff289
DN
6
7 Libgomp is free software; you can redistribute it and/or modify it
748086b7
JJ
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
953ff289
DN
11
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
748086b7 14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
953ff289
DN
15 more details.
16
748086b7
JJ
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
953ff289 25
93d90219 26/* This file handles the maintenance of threads in response to team
953ff289
DN
27 creation and termination. */
28
29#include "libgomp.h"
66c59f92 30#include "pool.h"
953ff289
DN
31#include <stdlib.h>
32#include <string.h>
33
6103184e 34#ifdef LIBGOMP_USE_PTHREADS
d0d1b24d 35pthread_attr_t gomp_thread_attr;
953ff289 36
a68ab351
JJ
37/* This key is for the thread destructor. */
38pthread_key_t gomp_thread_destructor;
39
953ff289
DN
40
41/* This is the libgomp per-thread data structure. */
f50eecba 42#if defined HAVE_TLS || defined USE_EMUTLS
953ff289
DN
43__thread struct gomp_thread gomp_tls_data;
44#else
45pthread_key_t gomp_tls_key;
46#endif
47
48
49/* This structure is used to communicate across pthread_create. */
50
51struct gomp_thread_start_data
52{
953ff289
DN
53 void (*fn) (void *);
54 void *fn_data;
a68ab351
JJ
55 struct gomp_team_state ts;
56 struct gomp_task *task;
57 struct gomp_thread_pool *thread_pool;
acf0174b 58 unsigned int place;
953ff289 59 bool nested;
28567c40 60 pthread_t handle;
953ff289
DN
61};
62
63
64/* This function is a pthread_create entry point. This contains the idle
65 loop in which a thread waits to be called up to become part of a team. */
66
67static void *
68gomp_thread_start (void *xdata)
69{
70 struct gomp_thread_start_data *data = xdata;
71 struct gomp_thread *thr;
a68ab351 72 struct gomp_thread_pool *pool;
953ff289
DN
73 void (*local_fn) (void *);
74 void *local_data;
75
f50eecba 76#if defined HAVE_TLS || defined USE_EMUTLS
953ff289
DN
77 thr = &gomp_tls_data;
78#else
79 struct gomp_thread local_thr;
80 thr = &local_thr;
81 pthread_setspecific (gomp_tls_key, thr);
82#endif
83 gomp_sem_init (&thr->release, 0);
84
85 /* Extract what we need from data. */
86 local_fn = data->fn;
87 local_data = data->fn_data;
a68ab351 88 thr->thread_pool = data->thread_pool;
953ff289 89 thr->ts = data->ts;
a68ab351 90 thr->task = data->task;
acf0174b 91 thr->place = data->place;
28567c40
JJ
92#ifdef GOMP_NEEDS_THREAD_HANDLE
93 thr->handle = data->handle;
94#endif
953ff289
DN
95
96 thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release;
97
a68ab351
JJ
98 /* Make thread pool local. */
99 pool = thr->thread_pool;
100
953ff289
DN
101 if (data->nested)
102 {
a68ab351
JJ
103 struct gomp_team *team = thr->ts.team;
104 struct gomp_task *task = thr->task;
105
106 gomp_barrier_wait (&team->barrier);
107
953ff289 108 local_fn (local_data);
acf0174b 109 gomp_team_barrier_wait_final (&team->barrier);
a68ab351
JJ
110 gomp_finish_task (task);
111 gomp_barrier_wait_last (&team->barrier);
953ff289
DN
112 }
113 else
114 {
a68ab351 115 pool->threads[thr->ts.team_id] = thr;
953ff289 116
6103184e 117 gomp_simple_barrier_wait (&pool->threads_dock);
953ff289
DN
118 do
119 {
a68ab351
JJ
120 struct gomp_team *team = thr->ts.team;
121 struct gomp_task *task = thr->task;
953ff289
DN
122
123 local_fn (local_data);
acf0174b 124 gomp_team_barrier_wait_final (&team->barrier);
a68ab351 125 gomp_finish_task (task);
953ff289 126
6103184e 127 gomp_simple_barrier_wait (&pool->threads_dock);
953ff289
DN
128
129 local_fn = thr->fn;
130 local_data = thr->data;
a68ab351 131 thr->fn = NULL;
953ff289
DN
132 }
133 while (local_fn);
134 }
135
6dea8e99 136 gomp_sem_destroy (&thr->release);
28567c40 137 pthread_detach (pthread_self ());
acf0174b
JJ
138 thr->thread_pool = NULL;
139 thr->task = NULL;
953ff289
DN
140 return NULL;
141}
6103184e 142#endif
953ff289 143
6dba0113
SH
144static inline struct gomp_team *
145get_last_team (unsigned nthreads)
146{
147 struct gomp_thread *thr = gomp_thread ();
148 if (thr->ts.team == NULL)
149 {
7892ec67
SH
150 struct gomp_thread_pool *pool = gomp_get_thread_pool (thr, nthreads);
151 struct gomp_team *last_team = pool->last_team;
152 if (last_team != NULL && last_team->nthreads == nthreads)
153 {
154 pool->last_team = NULL;
155 return last_team;
156 }
6dba0113
SH
157 }
158 return NULL;
159}
953ff289
DN
160
161/* Create a new team data structure. */
162
a68ab351
JJ
163struct gomp_team *
164gomp_new_team (unsigned nthreads)
953ff289
DN
165{
166 struct gomp_team *team;
a68ab351 167 int i;
953ff289 168
6dba0113
SH
169 team = get_last_team (nthreads);
170 if (team == NULL)
171 {
172 size_t extra = sizeof (team->ordered_release[0])
173 + sizeof (team->implicit_task[0]);
cee16451 174 team = team_malloc (sizeof (*team) + nthreads * extra);
6dba0113
SH
175
176#ifndef HAVE_SYNC_BUILTINS
177 gomp_mutex_init (&team->work_share_list_free_lock);
178#endif
179 gomp_barrier_init (&team->barrier, nthreads);
180 gomp_mutex_init (&team->task_lock);
181
182 team->nthreads = nthreads;
183 }
953ff289 184
a68ab351
JJ
185 team->work_share_chunk = 8;
186#ifdef HAVE_SYNC_BUILTINS
187 team->single_count = 0;
a68ab351 188#endif
acf0174b 189 team->work_shares_to_free = &team->work_shares[0];
28567c40 190 gomp_init_work_share (&team->work_shares[0], 0, nthreads);
a68ab351
JJ
191 team->work_shares[0].next_alloc = NULL;
192 team->work_share_list_free = NULL;
193 team->work_share_list_alloc = &team->work_shares[1];
194 for (i = 1; i < 7; i++)
195 team->work_shares[i].next_free = &team->work_shares[i + 1];
196 team->work_shares[i].next_free = NULL;
953ff289 197
953ff289 198 gomp_sem_init (&team->master_release, 0);
a68ab351 199 team->ordered_release = (void *) &team->implicit_task[nthreads];
953ff289
DN
200 team->ordered_release[0] = &team->master_release;
201
e4606348 202 priority_queue_init (&team->task_queue);
a68ab351 203 team->task_count = 0;
acf0174b 204 team->task_queued_count = 0;
a68ab351 205 team->task_running_count = 0;
acf0174b
JJ
206 team->work_share_cancelled = 0;
207 team->team_cancelled = 0;
a68ab351 208
953ff289
DN
209 return team;
210}
211
212
213/* Free a team data structure. */
214
215static void
216free_team (struct gomp_team *team)
217{
6dba0113
SH
218#ifndef HAVE_SYNC_BUILTINS
219 gomp_mutex_destroy (&team->work_share_list_free_lock);
220#endif
953ff289 221 gomp_barrier_destroy (&team->barrier);
a68ab351 222 gomp_mutex_destroy (&team->task_lock);
e4606348 223 priority_queue_free (&team->task_queue);
cee16451 224 team_free (team);
953ff289
DN
225}
226
a68ab351
JJ
227static void
228gomp_free_pool_helper (void *thread_pool)
229{
acf0174b 230 struct gomp_thread *thr = gomp_thread ();
a68ab351
JJ
231 struct gomp_thread_pool *pool
232 = (struct gomp_thread_pool *) thread_pool;
6103184e 233 gomp_simple_barrier_wait_last (&pool->threads_dock);
acf0174b
JJ
234 gomp_sem_destroy (&thr->release);
235 thr->thread_pool = NULL;
236 thr->task = NULL;
6103184e 237#ifdef LIBGOMP_USE_PTHREADS
28567c40 238 pthread_detach (pthread_self ());
a68ab351 239 pthread_exit (NULL);
6103184e
AM
240#elif defined(__nvptx__)
241 asm ("exit;");
fa499995
AS
242#elif defined(__AMDGCN__)
243 asm ("s_dcache_wb\n\t"
244 "s_endpgm");
6103184e
AM
245#else
246#error gomp_free_pool_helper must terminate the thread
247#endif
a68ab351
JJ
248}
249
250/* Free a thread pool and release its threads. */
251
acf0174b 252void
a68ab351
JJ
253gomp_free_thread (void *arg __attribute__((unused)))
254{
255 struct gomp_thread *thr = gomp_thread ();
256 struct gomp_thread_pool *pool = thr->thread_pool;
257 if (pool)
258 {
259 if (pool->threads_used > 0)
260 {
261 int i;
262 for (i = 1; i < pool->threads_used; i++)
263 {
264 struct gomp_thread *nthr = pool->threads[i];
265 nthr->fn = gomp_free_pool_helper;
266 nthr->data = pool;
267 }
268 /* This barrier undocks threads docked on pool->threads_dock. */
6103184e 269 gomp_simple_barrier_wait (&pool->threads_dock);
a68ab351
JJ
270 /* And this waits till all threads have called gomp_barrier_wait_last
271 in gomp_free_pool_helper. */
6103184e 272 gomp_simple_barrier_wait (&pool->threads_dock);
a68ab351 273 /* Now it is safe to destroy the barrier and free the pool. */
6103184e 274 gomp_simple_barrier_destroy (&pool->threads_dock);
4c5ba8d0
JJ
275
276#ifdef HAVE_SYNC_BUILTINS
277 __sync_fetch_and_add (&gomp_managed_threads,
278 1L - pool->threads_used);
279#else
acf0174b 280 gomp_mutex_lock (&gomp_managed_threads_lock);
4c5ba8d0 281 gomp_managed_threads -= pool->threads_used - 1L;
acf0174b 282 gomp_mutex_unlock (&gomp_managed_threads_lock);
4c5ba8d0 283#endif
a68ab351 284 }
a68ab351
JJ
285 if (pool->last_team)
286 free_team (pool->last_team);
6103184e 287#ifndef __nvptx__
cee16451
AS
288 team_free (pool->threads);
289 team_free (pool);
6103184e 290#endif
a68ab351
JJ
291 thr->thread_pool = NULL;
292 }
e4606348
JJ
293 if (thr->ts.level == 0 && __builtin_expect (thr->ts.team != NULL, 0))
294 gomp_team_end ();
a68ab351
JJ
295 if (thr->task != NULL)
296 {
297 struct gomp_task *task = thr->task;
298 gomp_end_task ();
299 free (task);
300 }
301}
953ff289
DN
302
303/* Launch a team. */
304
6103184e 305#ifdef LIBGOMP_USE_PTHREADS
953ff289
DN
306void
307gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
28567c40
JJ
308 unsigned flags, struct gomp_team *team,
309 struct gomp_taskgroup *taskgroup)
953ff289
DN
310{
311 struct gomp_thread_start_data *start_data;
312 struct gomp_thread *thr, *nthr;
a68ab351
JJ
313 struct gomp_task *task;
314 struct gomp_task_icv *icv;
953ff289 315 bool nested;
a68ab351 316 struct gomp_thread_pool *pool;
953ff289 317 unsigned i, n, old_threads_used = 0;
a0884cf0 318 pthread_attr_t thread_attr, *attr;
20906c66 319 unsigned long nthreads_var;
acf0174b
JJ
320 char bind, bind_var;
321 unsigned int s = 0, rest = 0, p = 0, k = 0;
322 unsigned int affinity_count = 0;
323 struct gomp_thread **affinity_thr = NULL;
28567c40 324 bool force_display = false;
953ff289
DN
325
326 thr = gomp_thread ();
e4606348 327 nested = thr->ts.level;
a68ab351
JJ
328 pool = thr->thread_pool;
329 task = thr->task;
330 icv = task ? &task->icv : &gomp_global_icv;
acf0174b 331 if (__builtin_expect (gomp_places_list != NULL, 0) && thr->place == 0)
28567c40
JJ
332 {
333 gomp_init_affinity ();
334 if (__builtin_expect (gomp_display_affinity_var, 0) && nthreads == 1)
335 gomp_display_affinity_thread (gomp_thread_self (), &thr->ts,
336 thr->place);
337 }
953ff289
DN
338
339 /* Always save the previous state, even if this isn't a nested team.
340 In particular, we should save any work share state from an outer
341 orphaned work share construct. */
342 team->prev_ts = thr->ts;
343
344 thr->ts.team = team;
953ff289 345 thr->ts.team_id = 0;
a68ab351
JJ
346 ++thr->ts.level;
347 if (nthreads > 1)
348 ++thr->ts.active_level;
349 thr->ts.work_share = &team->work_shares[0];
350 thr->ts.last_work_share = NULL;
351#ifdef HAVE_SYNC_BUILTINS
352 thr->ts.single_count = 0;
353#endif
953ff289 354 thr->ts.static_trip = 0;
a68ab351 355 thr->task = &team->implicit_task[0];
28567c40
JJ
356#ifdef GOMP_NEEDS_THREAD_HANDLE
357 thr->handle = pthread_self ();
358#endif
20906c66
JJ
359 nthreads_var = icv->nthreads_var;
360 if (__builtin_expect (gomp_nthreads_var_list != NULL, 0)
361 && thr->ts.level < gomp_nthreads_var_list_len)
362 nthreads_var = gomp_nthreads_var_list[thr->ts.level];
acf0174b
JJ
363 bind_var = icv->bind_var;
364 if (bind_var != omp_proc_bind_false && (flags & 7) != omp_proc_bind_false)
365 bind_var = flags & 7;
366 bind = bind_var;
367 if (__builtin_expect (gomp_bind_var_list != NULL, 0)
368 && thr->ts.level < gomp_bind_var_list_len)
369 bind_var = gomp_bind_var_list[thr->ts.level];
a68ab351 370 gomp_init_task (thr->task, task, icv);
28567c40 371 thr->task->taskgroup = taskgroup;
20906c66 372 team->implicit_task[0].icv.nthreads_var = nthreads_var;
acf0174b 373 team->implicit_task[0].icv.bind_var = bind_var;
953ff289
DN
374
375 if (nthreads == 1)
376 return;
377
378 i = 1;
379
acf0174b
JJ
380 if (__builtin_expect (gomp_places_list != NULL, 0))
381 {
acf0174b
JJ
382 /* Depending on chosen proc_bind model, set subpartition
383 for the master thread and initialize helper variables
384 P and optionally S, K and/or REST used by later place
385 computation for each additional thread. */
386 p = thr->place - 1;
387 switch (bind)
388 {
acf0174b
JJ
389 case omp_proc_bind_true:
390 case omp_proc_bind_close:
391 if (nthreads > thr->ts.place_partition_len)
392 {
393 /* T > P. S threads will be placed in each place,
394 and the final REM threads placed one by one
395 into the already occupied places. */
396 s = nthreads / thr->ts.place_partition_len;
397 rest = nthreads % thr->ts.place_partition_len;
398 }
399 else
400 s = 1;
401 k = 1;
402 break;
403 case omp_proc_bind_master:
404 /* Each thread will be bound to master's place. */
405 break;
406 case omp_proc_bind_spread:
407 if (nthreads <= thr->ts.place_partition_len)
408 {
409 /* T <= P. Each subpartition will have in between s
410 and s+1 places (subpartitions starting at or
411 after rest will have s places, earlier s+1 places),
412 each thread will be bound to the first place in
413 its subpartition (except for the master thread
414 that can be bound to another place in its
415 subpartition). */
416 s = thr->ts.place_partition_len / nthreads;
417 rest = thr->ts.place_partition_len % nthreads;
418 rest = (s + 1) * rest + thr->ts.place_partition_off;
419 if (p < rest)
420 {
421 p -= (p - thr->ts.place_partition_off) % (s + 1);
422 thr->ts.place_partition_len = s + 1;
423 }
424 else
425 {
426 p -= (p - rest) % s;
427 thr->ts.place_partition_len = s;
428 }
429 thr->ts.place_partition_off = p;
430 }
431 else
432 {
433 /* T > P. Each subpartition will have just a single
434 place and we'll place between s and s+1
435 threads into each subpartition. */
436 s = nthreads / thr->ts.place_partition_len;
437 rest = nthreads % thr->ts.place_partition_len;
438 thr->ts.place_partition_off = p;
439 thr->ts.place_partition_len = 1;
440 k = 1;
441 }
442 break;
443 }
444 }
445 else
446 bind = omp_proc_bind_false;
447
953ff289
DN
448 /* We only allow the reuse of idle threads for non-nested PARALLEL
449 regions. This appears to be implied by the semantics of
450 threadprivate variables, but perhaps that's reading too much into
451 things. Certainly it does prevent any locking problems, since
452 only the initial program thread will modify gomp_threads. */
453 if (!nested)
454 {
a68ab351 455 old_threads_used = pool->threads_used;
953ff289
DN
456
457 if (nthreads <= old_threads_used)
458 n = nthreads;
459 else if (old_threads_used == 0)
460 {
461 n = 0;
6103184e 462 gomp_simple_barrier_init (&pool->threads_dock, nthreads);
953ff289
DN
463 }
464 else
465 {
466 n = old_threads_used;
467
468 /* Increase the barrier threshold to make sure all new
469 threads arrive before the team is released. */
6103184e 470 gomp_simple_barrier_reinit (&pool->threads_dock, nthreads);
953ff289
DN
471 }
472
473 /* Not true yet, but soon will be. We're going to release all
a68ab351 474 threads from the dock, and those that aren't part of the
953ff289 475 team will exit. */
a68ab351 476 pool->threads_used = nthreads;
953ff289 477
acf0174b
JJ
478 /* If necessary, expand the size of the gomp_threads array. It is
479 expected that changes in the number of threads are rare, thus we
480 make no effort to expand gomp_threads_size geometrically. */
481 if (nthreads >= pool->threads_size)
482 {
483 pool->threads_size = nthreads + 1;
484 pool->threads
485 = gomp_realloc (pool->threads,
486 pool->threads_size
28567c40 487 * sizeof (struct gomp_thread *));
bbf1efe1
KB
488 /* Add current (master) thread to threads[]. */
489 pool->threads[0] = thr;
acf0174b
JJ
490 }
491
953ff289
DN
492 /* Release existing idle threads. */
493 for (; i < n; ++i)
494 {
acf0174b
JJ
495 unsigned int place_partition_off = thr->ts.place_partition_off;
496 unsigned int place_partition_len = thr->ts.place_partition_len;
497 unsigned int place = 0;
498 if (__builtin_expect (gomp_places_list != NULL, 0))
499 {
500 switch (bind)
501 {
502 case omp_proc_bind_true:
503 case omp_proc_bind_close:
504 if (k == s)
505 {
506 ++p;
507 if (p == (team->prev_ts.place_partition_off
508 + team->prev_ts.place_partition_len))
509 p = team->prev_ts.place_partition_off;
510 k = 1;
511 if (i == nthreads - rest)
512 s = 1;
513 }
514 else
515 ++k;
516 break;
517 case omp_proc_bind_master:
518 break;
519 case omp_proc_bind_spread:
520 if (k == 0)
521 {
522 /* T <= P. */
523 if (p < rest)
524 p += s + 1;
525 else
526 p += s;
527 if (p == (team->prev_ts.place_partition_off
528 + team->prev_ts.place_partition_len))
529 p = team->prev_ts.place_partition_off;
530 place_partition_off = p;
531 if (p < rest)
532 place_partition_len = s + 1;
533 else
534 place_partition_len = s;
535 }
536 else
537 {
538 /* T > P. */
539 if (k == s)
540 {
541 ++p;
542 if (p == (team->prev_ts.place_partition_off
543 + team->prev_ts.place_partition_len))
544 p = team->prev_ts.place_partition_off;
545 k = 1;
546 if (i == nthreads - rest)
547 s = 1;
548 }
549 else
550 ++k;
551 place_partition_off = p;
552 place_partition_len = 1;
553 }
554 break;
555 }
556 if (affinity_thr != NULL
557 || (bind != omp_proc_bind_true
558 && pool->threads[i]->place != p + 1)
559 || pool->threads[i]->place <= place_partition_off
560 || pool->threads[i]->place > (place_partition_off
561 + place_partition_len))
562 {
563 unsigned int l;
28567c40 564 force_display = true;
acf0174b
JJ
565 if (affinity_thr == NULL)
566 {
567 unsigned int j;
568
569 if (team->prev_ts.place_partition_len > 64)
570 affinity_thr
571 = gomp_malloc (team->prev_ts.place_partition_len
572 * sizeof (struct gomp_thread *));
573 else
574 affinity_thr
575 = gomp_alloca (team->prev_ts.place_partition_len
576 * sizeof (struct gomp_thread *));
577 memset (affinity_thr, '\0',
578 team->prev_ts.place_partition_len
579 * sizeof (struct gomp_thread *));
580 for (j = i; j < old_threads_used; j++)
581 {
582 if (pool->threads[j]->place
583 > team->prev_ts.place_partition_off
584 && (pool->threads[j]->place
585 <= (team->prev_ts.place_partition_off
586 + team->prev_ts.place_partition_len)))
587 {
588 l = pool->threads[j]->place - 1
589 - team->prev_ts.place_partition_off;
590 pool->threads[j]->data = affinity_thr[l];
591 affinity_thr[l] = pool->threads[j];
592 }
593 pool->threads[j] = NULL;
594 }
595 if (nthreads > old_threads_used)
596 memset (&pool->threads[old_threads_used],
597 '\0', ((nthreads - old_threads_used)
598 * sizeof (struct gomp_thread *)));
599 n = nthreads;
600 affinity_count = old_threads_used - i;
601 }
602 if (affinity_count == 0)
603 break;
604 l = p;
605 if (affinity_thr[l - team->prev_ts.place_partition_off]
606 == NULL)
607 {
608 if (bind != omp_proc_bind_true)
609 continue;
610 for (l = place_partition_off;
611 l < place_partition_off + place_partition_len;
612 l++)
613 if (affinity_thr[l - team->prev_ts.place_partition_off]
614 != NULL)
615 break;
616 if (l == place_partition_off + place_partition_len)
617 continue;
618 }
619 nthr = affinity_thr[l - team->prev_ts.place_partition_off];
620 affinity_thr[l - team->prev_ts.place_partition_off]
621 = (struct gomp_thread *) nthr->data;
622 affinity_count--;
623 pool->threads[i] = nthr;
624 }
625 else
626 nthr = pool->threads[i];
627 place = p + 1;
628 }
629 else
630 nthr = pool->threads[i];
953ff289 631 nthr->ts.team = team;
a68ab351
JJ
632 nthr->ts.work_share = &team->work_shares[0];
633 nthr->ts.last_work_share = NULL;
953ff289 634 nthr->ts.team_id = i;
a68ab351
JJ
635 nthr->ts.level = team->prev_ts.level + 1;
636 nthr->ts.active_level = thr->ts.active_level;
acf0174b
JJ
637 nthr->ts.place_partition_off = place_partition_off;
638 nthr->ts.place_partition_len = place_partition_len;
800bcc8c 639 nthr->ts.def_allocator = thr->ts.def_allocator;
a68ab351
JJ
640#ifdef HAVE_SYNC_BUILTINS
641 nthr->ts.single_count = 0;
642#endif
953ff289 643 nthr->ts.static_trip = 0;
a68ab351 644 nthr->task = &team->implicit_task[i];
acf0174b 645 nthr->place = place;
a68ab351 646 gomp_init_task (nthr->task, task, icv);
20906c66 647 team->implicit_task[i].icv.nthreads_var = nthreads_var;
acf0174b 648 team->implicit_task[i].icv.bind_var = bind_var;
28567c40 649 nthr->task->taskgroup = taskgroup;
953ff289
DN
650 nthr->fn = fn;
651 nthr->data = data;
652 team->ordered_release[i] = &nthr->release;
653 }
654
acf0174b
JJ
655 if (__builtin_expect (affinity_thr != NULL, 0))
656 {
657 /* If AFFINITY_THR is non-NULL just because we had to
658 permute some threads in the pool, but we've managed
659 to find exactly as many old threads as we'd find
660 without affinity, we don't need to handle this
661 specially anymore. */
662 if (nthreads <= old_threads_used
663 ? (affinity_count == old_threads_used - nthreads)
664 : (i == old_threads_used))
665 {
666 if (team->prev_ts.place_partition_len > 64)
667 free (affinity_thr);
668 affinity_thr = NULL;
669 affinity_count = 0;
670 }
671 else
672 {
673 i = 1;
674 /* We are going to compute the places/subpartitions
675 again from the beginning. So, we need to reinitialize
676 vars modified by the switch (bind) above inside
677 of the loop, to the state they had after the initial
678 switch (bind). */
679 switch (bind)
680 {
681 case omp_proc_bind_true:
682 case omp_proc_bind_close:
683 if (nthreads > thr->ts.place_partition_len)
684 /* T > P. S has been changed, so needs
685 to be recomputed. */
686 s = nthreads / thr->ts.place_partition_len;
687 k = 1;
688 p = thr->place - 1;
689 break;
690 case omp_proc_bind_master:
691 /* No vars have been changed. */
692 break;
693 case omp_proc_bind_spread:
694 p = thr->ts.place_partition_off;
695 if (k != 0)
696 {
697 /* T > P. */
698 s = nthreads / team->prev_ts.place_partition_len;
699 k = 1;
700 }
701 break;
702 }
703
704 /* Increase the barrier threshold to make sure all new
705 threads and all the threads we're going to let die
706 arrive before the team is released. */
707 if (affinity_count)
6103184e
AM
708 gomp_simple_barrier_reinit (&pool->threads_dock,
709 nthreads + affinity_count);
acf0174b
JJ
710 }
711 }
712
953ff289
DN
713 if (i == nthreads)
714 goto do_release;
715
953ff289
DN
716 }
717
acf0174b 718 if (__builtin_expect (nthreads + affinity_count > old_threads_used, 0))
a68ab351 719 {
acf0174b 720 long diff = (long) (nthreads + affinity_count) - (long) old_threads_used;
a68ab351
JJ
721
722 if (old_threads_used == 0)
723 --diff;
724
725#ifdef HAVE_SYNC_BUILTINS
726 __sync_fetch_and_add (&gomp_managed_threads, diff);
727#else
acf0174b 728 gomp_mutex_lock (&gomp_managed_threads_lock);
a68ab351 729 gomp_managed_threads += diff;
acf0174b 730 gomp_mutex_unlock (&gomp_managed_threads_lock);
a68ab351
JJ
731#endif
732 }
733
a0884cf0 734 attr = &gomp_thread_attr;
acf0174b 735 if (__builtin_expect (gomp_places_list != NULL, 0))
a0884cf0
JJ
736 {
737 size_t stacksize;
738 pthread_attr_init (&thread_attr);
46d8fbd1 739 if (! pthread_attr_getstacksize (&gomp_thread_attr, &stacksize))
a0884cf0
JJ
740 pthread_attr_setstacksize (&thread_attr, stacksize);
741 attr = &thread_attr;
742 }
743
a55b8e18 744 start_data = gomp_alloca (sizeof (struct gomp_thread_start_data)
28567c40 745 * (nthreads - i));
953ff289
DN
746
747 /* Launch new threads. */
acf0174b 748 for (; i < nthreads; ++i)
953ff289 749 {
953ff289
DN
750 int err;
751
acf0174b
JJ
752 start_data->ts.place_partition_off = thr->ts.place_partition_off;
753 start_data->ts.place_partition_len = thr->ts.place_partition_len;
754 start_data->place = 0;
755 if (__builtin_expect (gomp_places_list != NULL, 0))
756 {
757 switch (bind)
758 {
759 case omp_proc_bind_true:
760 case omp_proc_bind_close:
761 if (k == s)
762 {
763 ++p;
764 if (p == (team->prev_ts.place_partition_off
765 + team->prev_ts.place_partition_len))
766 p = team->prev_ts.place_partition_off;
767 k = 1;
768 if (i == nthreads - rest)
769 s = 1;
770 }
771 else
772 ++k;
773 break;
774 case omp_proc_bind_master:
775 break;
776 case omp_proc_bind_spread:
777 if (k == 0)
778 {
779 /* T <= P. */
780 if (p < rest)
781 p += s + 1;
782 else
783 p += s;
784 if (p == (team->prev_ts.place_partition_off
785 + team->prev_ts.place_partition_len))
786 p = team->prev_ts.place_partition_off;
787 start_data->ts.place_partition_off = p;
788 if (p < rest)
789 start_data->ts.place_partition_len = s + 1;
790 else
791 start_data->ts.place_partition_len = s;
792 }
793 else
794 {
795 /* T > P. */
796 if (k == s)
797 {
798 ++p;
799 if (p == (team->prev_ts.place_partition_off
800 + team->prev_ts.place_partition_len))
801 p = team->prev_ts.place_partition_off;
802 k = 1;
803 if (i == nthreads - rest)
804 s = 1;
805 }
806 else
807 ++k;
808 start_data->ts.place_partition_off = p;
809 start_data->ts.place_partition_len = 1;
810 }
811 break;
812 }
813 start_data->place = p + 1;
814 if (affinity_thr != NULL && pool->threads[i] != NULL)
815 continue;
816 gomp_init_thread_affinity (attr, p);
817 }
818
a68ab351
JJ
819 start_data->fn = fn;
820 start_data->fn_data = data;
953ff289 821 start_data->ts.team = team;
a68ab351
JJ
822 start_data->ts.work_share = &team->work_shares[0];
823 start_data->ts.last_work_share = NULL;
953ff289 824 start_data->ts.team_id = i;
a68ab351
JJ
825 start_data->ts.level = team->prev_ts.level + 1;
826 start_data->ts.active_level = thr->ts.active_level;
800bcc8c 827 start_data->ts.def_allocator = thr->ts.def_allocator;
a68ab351
JJ
828#ifdef HAVE_SYNC_BUILTINS
829 start_data->ts.single_count = 0;
830#endif
953ff289 831 start_data->ts.static_trip = 0;
a68ab351
JJ
832 start_data->task = &team->implicit_task[i];
833 gomp_init_task (start_data->task, task, icv);
20906c66 834 team->implicit_task[i].icv.nthreads_var = nthreads_var;
acf0174b 835 team->implicit_task[i].icv.bind_var = bind_var;
28567c40 836 start_data->task->taskgroup = taskgroup;
a68ab351 837 start_data->thread_pool = pool;
953ff289
DN
838 start_data->nested = nested;
839
06441dd5 840 attr = gomp_adjust_thread_attr (attr, &thread_attr);
28567c40
JJ
841 err = pthread_create (&start_data->handle, attr, gomp_thread_start,
842 start_data);
843 start_data++;
953ff289
DN
844 if (err != 0)
845 gomp_fatal ("Thread creation failed: %s", strerror (err));
846 }
847
06441dd5 848 if (__builtin_expect (attr == &thread_attr, 0))
a0884cf0
JJ
849 pthread_attr_destroy (&thread_attr);
850
953ff289 851 do_release:
6103184e
AM
852 if (nested)
853 gomp_barrier_wait (&team->barrier);
854 else
855 gomp_simple_barrier_wait (&pool->threads_dock);
953ff289
DN
856
857 /* Decrease the barrier threshold to match the number of threads
858 that should arrive back at the end of this team. The extra
859 threads should be exiting. Note that we arrange for this test
acf0174b
JJ
860 to never be true for nested teams. If AFFINITY_COUNT is non-zero,
861 the barrier as well as gomp_managed_threads was temporarily
862 set to NTHREADS + AFFINITY_COUNT. For NTHREADS < OLD_THREADS_COUNT,
863 AFFINITY_COUNT if non-zero will be always at least
864 OLD_THREADS_COUNT - NTHREADS. */
865 if (__builtin_expect (nthreads < old_threads_used, 0)
866 || __builtin_expect (affinity_count, 0))
a68ab351
JJ
867 {
868 long diff = (long) nthreads - (long) old_threads_used;
869
acf0174b
JJ
870 if (affinity_count)
871 diff = -affinity_count;
872
6103184e 873 gomp_simple_barrier_reinit (&pool->threads_dock, nthreads);
a68ab351
JJ
874
875#ifdef HAVE_SYNC_BUILTINS
876 __sync_fetch_and_add (&gomp_managed_threads, diff);
877#else
acf0174b 878 gomp_mutex_lock (&gomp_managed_threads_lock);
a68ab351 879 gomp_managed_threads += diff;
acf0174b 880 gomp_mutex_unlock (&gomp_managed_threads_lock);
a68ab351
JJ
881#endif
882 }
28567c40
JJ
883 if (__builtin_expect (gomp_display_affinity_var, 0))
884 {
885 if (nested
886 || nthreads != old_threads_used
887 || force_display)
888 {
889 gomp_display_affinity_thread (gomp_thread_self (), &thr->ts,
890 thr->place);
891 if (nested)
892 {
893 start_data -= nthreads - 1;
894 for (i = 1; i < nthreads; ++i)
895 {
896 gomp_display_affinity_thread (
897#ifdef LIBGOMP_USE_PTHREADS
898 start_data->handle,
899#else
900 gomp_thread_self (),
901#endif
902 &start_data->ts,
903 start_data->place);
904 start_data++;
905 }
906 }
907 else
908 {
909 for (i = 1; i < nthreads; ++i)
910 {
911 gomp_thread_handle handle
912 = gomp_thread_to_pthread_t (pool->threads[i]);
913 gomp_display_affinity_thread (handle, &pool->threads[i]->ts,
914 pool->threads[i]->place);
915 }
916 }
917 }
918 }
acf0174b
JJ
919 if (__builtin_expect (affinity_thr != NULL, 0)
920 && team->prev_ts.place_partition_len > 64)
921 free (affinity_thr);
953ff289 922}
6103184e 923#endif
953ff289
DN
924
925
926/* Terminate the current team. This is only to be called by the master
927 thread. We assume that we must wait for the other threads. */
928
929void
930gomp_team_end (void)
931{
932 struct gomp_thread *thr = gomp_thread ();
933 struct gomp_team *team = thr->ts.team;
934
acf0174b
JJ
935 /* This barrier handles all pending explicit threads.
936 As #pragma omp cancel parallel might get awaited count in
937 team->barrier in a inconsistent state, we need to use a different
938 counter here. */
939 gomp_team_barrier_wait_final (&team->barrier);
940 if (__builtin_expect (team->team_cancelled, 0))
941 {
942 struct gomp_work_share *ws = team->work_shares_to_free;
943 do
944 {
945 struct gomp_work_share *next_ws = gomp_ptrlock_get (&ws->next_ws);
946 if (next_ws == NULL)
947 gomp_ptrlock_set (&ws->next_ws, ws);
948 gomp_fini_work_share (ws);
949 ws = next_ws;
950 }
951 while (ws != NULL);
952 }
953 else
954 gomp_fini_work_share (thr->ts.work_share);
953ff289 955
a68ab351 956 gomp_end_task ();
953ff289
DN
957 thr->ts = team->prev_ts;
958
28567c40 959 if (__builtin_expect (thr->ts.level != 0, 0))
a68ab351
JJ
960 {
961#ifdef HAVE_SYNC_BUILTINS
962 __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads);
963#else
acf0174b 964 gomp_mutex_lock (&gomp_managed_threads_lock);
a68ab351 965 gomp_managed_threads -= team->nthreads - 1L;
acf0174b 966 gomp_mutex_unlock (&gomp_managed_threads_lock);
a68ab351
JJ
967#endif
968 /* This barrier has gomp_barrier_wait_last counterparts
969 and ensures the team can be safely destroyed. */
970 gomp_barrier_wait (&team->barrier);
971 }
972
973 if (__builtin_expect (team->work_shares[0].next_alloc != NULL, 0))
974 {
975 struct gomp_work_share *ws = team->work_shares[0].next_alloc;
976 do
977 {
978 struct gomp_work_share *next_ws = ws->next_alloc;
979 free (ws);
980 ws = next_ws;
981 }
982 while (ws != NULL);
983 }
984 gomp_sem_destroy (&team->master_release);
a68ab351 985
4db72361
JJ
986 if (__builtin_expect (thr->ts.team != NULL, 0)
987 || __builtin_expect (team->nthreads == 1, 0))
a68ab351
JJ
988 free_team (team);
989 else
990 {
991 struct gomp_thread_pool *pool = thr->thread_pool;
992 if (pool->last_team)
993 free_team (pool->last_team);
994 pool->last_team = team;
66c59f92 995 gomp_release_thread_pool (pool);
a68ab351 996 }
953ff289
DN
997}
998
6103184e 999#ifdef LIBGOMP_USE_PTHREADS
953ff289
DN
1000
1001/* Constructors for this file. */
1002
1003static void __attribute__((constructor))
1004initialize_team (void)
1005{
f50eecba 1006#if !defined HAVE_TLS && !defined USE_EMUTLS
953ff289
DN
1007 static struct gomp_thread initial_thread_tls_data;
1008
1009 pthread_key_create (&gomp_tls_key, NULL);
1010 pthread_setspecific (gomp_tls_key, &initial_thread_tls_data);
1011#endif
1012
a68ab351
JJ
1013 if (pthread_key_create (&gomp_thread_destructor, gomp_free_thread) != 0)
1014 gomp_fatal ("could not create thread pool destructor.");
953ff289 1015}
a68ab351
JJ
1016
1017static void __attribute__((destructor))
1018team_destructor (void)
1019{
1020 /* Without this dlclose on libgomp could lead to subsequent
1021 crashes. */
1022 pthread_key_delete (gomp_thread_destructor);
1023}
28567c40
JJ
1024
1025/* Similar to gomp_free_pool_helper, but don't detach itself,
1026 gomp_pause_host will pthread_join those threads. */
1027
1028static void
1029gomp_pause_pool_helper (void *thread_pool)
1030{
1031 struct gomp_thread *thr = gomp_thread ();
1032 struct gomp_thread_pool *pool
1033 = (struct gomp_thread_pool *) thread_pool;
1034 gomp_simple_barrier_wait_last (&pool->threads_dock);
1035 gomp_sem_destroy (&thr->release);
1036 thr->thread_pool = NULL;
1037 thr->task = NULL;
1038 pthread_exit (NULL);
1039}
1040
1041/* Free a thread pool and release its threads. Return non-zero on
1042 failure. */
1043
1044int
1045gomp_pause_host (void)
1046{
1047 struct gomp_thread *thr = gomp_thread ();
1048 struct gomp_thread_pool *pool = thr->thread_pool;
1049 if (thr->ts.level)
1050 return -1;
1051 if (pool)
1052 {
1053 if (pool->threads_used > 0)
1054 {
1055 int i;
1056 pthread_t *thrs
1057 = gomp_alloca (sizeof (pthread_t) * pool->threads_used);
1058 for (i = 1; i < pool->threads_used; i++)
1059 {
1060 struct gomp_thread *nthr = pool->threads[i];
1061 nthr->fn = gomp_pause_pool_helper;
1062 nthr->data = pool;
1063 thrs[i] = gomp_thread_to_pthread_t (nthr);
1064 }
1065 /* This barrier undocks threads docked on pool->threads_dock. */
1066 gomp_simple_barrier_wait (&pool->threads_dock);
1067 /* And this waits till all threads have called gomp_barrier_wait_last
1068 in gomp_pause_pool_helper. */
1069 gomp_simple_barrier_wait (&pool->threads_dock);
1070 /* Now it is safe to destroy the barrier and free the pool. */
1071 gomp_simple_barrier_destroy (&pool->threads_dock);
1072
1073#ifdef HAVE_SYNC_BUILTINS
1074 __sync_fetch_and_add (&gomp_managed_threads,
1075 1L - pool->threads_used);
1076#else
1077 gomp_mutex_lock (&gomp_managed_threads_lock);
1078 gomp_managed_threads -= pool->threads_used - 1L;
1079 gomp_mutex_unlock (&gomp_managed_threads_lock);
1080#endif
1081 for (i = 1; i < pool->threads_used; i++)
1082 pthread_join (thrs[i], NULL);
1083 }
1084 if (pool->last_team)
1085 free_team (pool->last_team);
1086#ifndef __nvptx__
cee16451
AS
1087 team_free (pool->threads);
1088 team_free (pool);
28567c40
JJ
1089#endif
1090 thr->thread_pool = NULL;
1091 }
1092 return 0;
1093}
6103184e 1094#endif
a68ab351
JJ
1095
1096struct gomp_task_icv *
1097gomp_new_icv (void)
1098{
1099 struct gomp_thread *thr = gomp_thread ();
1100 struct gomp_task *task = gomp_malloc (sizeof (struct gomp_task));
1101 gomp_init_task (task, NULL, &gomp_global_icv);
1102 thr->task = task;
6103184e 1103#ifdef LIBGOMP_USE_PTHREADS
a68ab351 1104 pthread_setspecific (gomp_thread_destructor, thr);
6103184e 1105#endif
a68ab351
JJ
1106 return &task->icv;
1107}