]> git.ipfire.org Git - thirdparty/gcc.git/blame - libgomp/team.c
amdgcn: config.gcc - enable gfx1030 and gfx1100 multilib; add them to the docs
[thirdparty/gcc.git] / libgomp / team.c
CommitLineData
a945c346 1/* Copyright (C) 2005-2024 Free Software Foundation, Inc.
953ff289
DN
2 Contributed by Richard Henderson <rth@redhat.com>.
3
f1f3453e
TS
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
953ff289
DN
6
7 Libgomp is free software; you can redistribute it and/or modify it
748086b7
JJ
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
953ff289
DN
11
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
748086b7 14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
953ff289
DN
15 more details.
16
748086b7
JJ
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
953ff289 25
93d90219 26/* This file handles the maintenance of threads in response to team
953ff289
DN
27 creation and termination. */
28
29#include "libgomp.h"
66c59f92 30#include "pool.h"
953ff289
DN
31#include <stdlib.h>
32#include <string.h>
33
6103184e 34#ifdef LIBGOMP_USE_PTHREADS
d0d1b24d 35pthread_attr_t gomp_thread_attr;
953ff289 36
a68ab351
JJ
37/* This key is for the thread destructor. */
38pthread_key_t gomp_thread_destructor;
39
953ff289
DN
40
41/* This is the libgomp per-thread data structure. */
f50eecba 42#if defined HAVE_TLS || defined USE_EMUTLS
953ff289
DN
43__thread struct gomp_thread gomp_tls_data;
44#else
45pthread_key_t gomp_tls_key;
46#endif
47
48
49/* This structure is used to communicate across pthread_create. */
50
51struct gomp_thread_start_data
52{
953ff289
DN
53 void (*fn) (void *);
54 void *fn_data;
a68ab351
JJ
55 struct gomp_team_state ts;
56 struct gomp_task *task;
57 struct gomp_thread_pool *thread_pool;
acf0174b 58 unsigned int place;
fa4fcb11
JJ
59 unsigned int num_teams;
60 unsigned int team_num;
953ff289 61 bool nested;
28567c40 62 pthread_t handle;
953ff289
DN
63};
64
65
66/* This function is a pthread_create entry point. This contains the idle
67 loop in which a thread waits to be called up to become part of a team. */
68
69static void *
70gomp_thread_start (void *xdata)
71{
72 struct gomp_thread_start_data *data = xdata;
73 struct gomp_thread *thr;
a68ab351 74 struct gomp_thread_pool *pool;
953ff289
DN
75 void (*local_fn) (void *);
76 void *local_data;
77
f50eecba 78#if defined HAVE_TLS || defined USE_EMUTLS
953ff289
DN
79 thr = &gomp_tls_data;
80#else
81 struct gomp_thread local_thr;
82 thr = &local_thr;
953ff289
DN
83#endif
84 gomp_sem_init (&thr->release, 0);
85
86 /* Extract what we need from data. */
87 local_fn = data->fn;
88 local_data = data->fn_data;
a68ab351 89 thr->thread_pool = data->thread_pool;
953ff289 90 thr->ts = data->ts;
a68ab351 91 thr->task = data->task;
acf0174b 92 thr->place = data->place;
fa4fcb11
JJ
93 thr->num_teams = data->num_teams;
94 thr->team_num = data->team_num;
28567c40
JJ
95#ifdef GOMP_NEEDS_THREAD_HANDLE
96 thr->handle = data->handle;
97#endif
8cc863ca
JJ
98#if !(defined HAVE_TLS || defined USE_EMUTLS)
99 pthread_setspecific (gomp_tls_key, thr);
100#endif
953ff289
DN
101
102 thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release;
103
a68ab351
JJ
104 /* Make thread pool local. */
105 pool = thr->thread_pool;
106
953ff289
DN
107 if (data->nested)
108 {
a68ab351
JJ
109 struct gomp_team *team = thr->ts.team;
110 struct gomp_task *task = thr->task;
111
112 gomp_barrier_wait (&team->barrier);
113
953ff289 114 local_fn (local_data);
acf0174b 115 gomp_team_barrier_wait_final (&team->barrier);
a68ab351
JJ
116 gomp_finish_task (task);
117 gomp_barrier_wait_last (&team->barrier);
953ff289
DN
118 }
119 else
120 {
a68ab351 121 pool->threads[thr->ts.team_id] = thr;
953ff289 122
6103184e 123 gomp_simple_barrier_wait (&pool->threads_dock);
953ff289
DN
124 do
125 {
a68ab351
JJ
126 struct gomp_team *team = thr->ts.team;
127 struct gomp_task *task = thr->task;
953ff289
DN
128
129 local_fn (local_data);
acf0174b 130 gomp_team_barrier_wait_final (&team->barrier);
a68ab351 131 gomp_finish_task (task);
953ff289 132
6103184e 133 gomp_simple_barrier_wait (&pool->threads_dock);
953ff289
DN
134
135 local_fn = thr->fn;
136 local_data = thr->data;
a68ab351 137 thr->fn = NULL;
953ff289
DN
138 }
139 while (local_fn);
140 }
141
6dea8e99 142 gomp_sem_destroy (&thr->release);
28567c40 143 pthread_detach (pthread_self ());
acf0174b
JJ
144 thr->thread_pool = NULL;
145 thr->task = NULL;
953ff289
DN
146 return NULL;
147}
6103184e 148#endif
953ff289 149
6dba0113
SH
150static inline struct gomp_team *
151get_last_team (unsigned nthreads)
152{
153 struct gomp_thread *thr = gomp_thread ();
154 if (thr->ts.team == NULL)
155 {
7892ec67
SH
156 struct gomp_thread_pool *pool = gomp_get_thread_pool (thr, nthreads);
157 struct gomp_team *last_team = pool->last_team;
158 if (last_team != NULL && last_team->nthreads == nthreads)
159 {
160 pool->last_team = NULL;
161 return last_team;
162 }
6dba0113
SH
163 }
164 return NULL;
165}
953ff289
DN
166
167/* Create a new team data structure. */
168
a68ab351
JJ
169struct gomp_team *
170gomp_new_team (unsigned nthreads)
953ff289
DN
171{
172 struct gomp_team *team;
a68ab351 173 int i;
953ff289 174
6dba0113
SH
175 team = get_last_team (nthreads);
176 if (team == NULL)
177 {
178 size_t extra = sizeof (team->ordered_release[0])
179 + sizeof (team->implicit_task[0]);
17da2c74
JJ
180#ifdef GOMP_USE_ALIGNED_WORK_SHARES
181 team = gomp_aligned_alloc (__alignof (struct gomp_team),
182 sizeof (*team) + nthreads * extra);
183#else
cee16451 184 team = team_malloc (sizeof (*team) + nthreads * extra);
17da2c74 185#endif
6dba0113
SH
186
187#ifndef HAVE_SYNC_BUILTINS
188 gomp_mutex_init (&team->work_share_list_free_lock);
189#endif
190 gomp_barrier_init (&team->barrier, nthreads);
191 gomp_mutex_init (&team->task_lock);
192
193 team->nthreads = nthreads;
194 }
953ff289 195
a68ab351
JJ
196 team->work_share_chunk = 8;
197#ifdef HAVE_SYNC_BUILTINS
198 team->single_count = 0;
a68ab351 199#endif
acf0174b 200 team->work_shares_to_free = &team->work_shares[0];
28567c40 201 gomp_init_work_share (&team->work_shares[0], 0, nthreads);
a68ab351
JJ
202 team->work_shares[0].next_alloc = NULL;
203 team->work_share_list_free = NULL;
204 team->work_share_list_alloc = &team->work_shares[1];
205 for (i = 1; i < 7; i++)
206 team->work_shares[i].next_free = &team->work_shares[i + 1];
207 team->work_shares[i].next_free = NULL;
953ff289 208
953ff289 209 gomp_sem_init (&team->master_release, 0);
a68ab351 210 team->ordered_release = (void *) &team->implicit_task[nthreads];
953ff289
DN
211 team->ordered_release[0] = &team->master_release;
212
e4606348 213 priority_queue_init (&team->task_queue);
a68ab351 214 team->task_count = 0;
acf0174b 215 team->task_queued_count = 0;
a68ab351 216 team->task_running_count = 0;
acf0174b
JJ
217 team->work_share_cancelled = 0;
218 team->team_cancelled = 0;
a68ab351 219
a6d22fb2
KCY
220 team->task_detach_count = 0;
221
953ff289
DN
222 return team;
223}
224
225
226/* Free a team data structure. */
227
228static void
229free_team (struct gomp_team *team)
230{
6dba0113
SH
231#ifndef HAVE_SYNC_BUILTINS
232 gomp_mutex_destroy (&team->work_share_list_free_lock);
233#endif
953ff289 234 gomp_barrier_destroy (&team->barrier);
a68ab351 235 gomp_mutex_destroy (&team->task_lock);
e4606348 236 priority_queue_free (&team->task_queue);
cee16451 237 team_free (team);
953ff289
DN
238}
239
a68ab351
JJ
240static void
241gomp_free_pool_helper (void *thread_pool)
242{
acf0174b 243 struct gomp_thread *thr = gomp_thread ();
a68ab351
JJ
244 struct gomp_thread_pool *pool
245 = (struct gomp_thread_pool *) thread_pool;
6103184e 246 gomp_simple_barrier_wait_last (&pool->threads_dock);
acf0174b
JJ
247 gomp_sem_destroy (&thr->release);
248 thr->thread_pool = NULL;
249 thr->task = NULL;
6103184e 250#ifdef LIBGOMP_USE_PTHREADS
28567c40 251 pthread_detach (pthread_self ());
a68ab351 252 pthread_exit (NULL);
6103184e
AM
253#elif defined(__nvptx__)
254 asm ("exit;");
fa499995 255#elif defined(__AMDGCN__)
c7ec7bd1 256 asm ("s_endpgm");
6103184e
AM
257#else
258#error gomp_free_pool_helper must terminate the thread
259#endif
a68ab351
JJ
260}
261
262/* Free a thread pool and release its threads. */
263
acf0174b 264void
a68ab351
JJ
265gomp_free_thread (void *arg __attribute__((unused)))
266{
267 struct gomp_thread *thr = gomp_thread ();
268 struct gomp_thread_pool *pool = thr->thread_pool;
269 if (pool)
270 {
271 if (pool->threads_used > 0)
272 {
273 int i;
274 for (i = 1; i < pool->threads_used; i++)
275 {
276 struct gomp_thread *nthr = pool->threads[i];
277 nthr->fn = gomp_free_pool_helper;
278 nthr->data = pool;
279 }
280 /* This barrier undocks threads docked on pool->threads_dock. */
6103184e 281 gomp_simple_barrier_wait (&pool->threads_dock);
a68ab351
JJ
282 /* And this waits till all threads have called gomp_barrier_wait_last
283 in gomp_free_pool_helper. */
6103184e 284 gomp_simple_barrier_wait (&pool->threads_dock);
a68ab351 285 /* Now it is safe to destroy the barrier and free the pool. */
6103184e 286 gomp_simple_barrier_destroy (&pool->threads_dock);
4c5ba8d0
JJ
287
288#ifdef HAVE_SYNC_BUILTINS
289 __sync_fetch_and_add (&gomp_managed_threads,
290 1L - pool->threads_used);
291#else
acf0174b 292 gomp_mutex_lock (&gomp_managed_threads_lock);
4c5ba8d0 293 gomp_managed_threads -= pool->threads_used - 1L;
acf0174b 294 gomp_mutex_unlock (&gomp_managed_threads_lock);
4c5ba8d0 295#endif
a68ab351 296 }
a68ab351
JJ
297 if (pool->last_team)
298 free_team (pool->last_team);
6103184e 299#ifndef __nvptx__
cee16451
AS
300 team_free (pool->threads);
301 team_free (pool);
6103184e 302#endif
a68ab351
JJ
303 thr->thread_pool = NULL;
304 }
e4606348
JJ
305 if (thr->ts.level == 0 && __builtin_expect (thr->ts.team != NULL, 0))
306 gomp_team_end ();
a68ab351
JJ
307 if (thr->task != NULL)
308 {
309 struct gomp_task *task = thr->task;
310 gomp_end_task ();
311 free (task);
312 }
313}
953ff289
DN
314
315/* Launch a team. */
316
6103184e 317#ifdef LIBGOMP_USE_PTHREADS
953ff289
DN
318void
319gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
28567c40
JJ
320 unsigned flags, struct gomp_team *team,
321 struct gomp_taskgroup *taskgroup)
953ff289 322{
0288527f 323 struct gomp_thread_start_data *start_data = NULL;
953ff289 324 struct gomp_thread *thr, *nthr;
a68ab351
JJ
325 struct gomp_task *task;
326 struct gomp_task_icv *icv;
953ff289 327 bool nested;
a68ab351 328 struct gomp_thread_pool *pool;
953ff289 329 unsigned i, n, old_threads_used = 0;
a0884cf0 330 pthread_attr_t thread_attr, *attr;
20906c66 331 unsigned long nthreads_var;
acf0174b
JJ
332 char bind, bind_var;
333 unsigned int s = 0, rest = 0, p = 0, k = 0;
334 unsigned int affinity_count = 0;
335 struct gomp_thread **affinity_thr = NULL;
28567c40 336 bool force_display = false;
953ff289
DN
337
338 thr = gomp_thread ();
e4606348 339 nested = thr->ts.level;
a68ab351
JJ
340 pool = thr->thread_pool;
341 task = thr->task;
342 icv = task ? &task->icv : &gomp_global_icv;
acf0174b 343 if (__builtin_expect (gomp_places_list != NULL, 0) && thr->place == 0)
28567c40
JJ
344 {
345 gomp_init_affinity ();
346 if (__builtin_expect (gomp_display_affinity_var, 0) && nthreads == 1)
347 gomp_display_affinity_thread (gomp_thread_self (), &thr->ts,
348 thr->place);
349 }
953ff289
DN
350
351 /* Always save the previous state, even if this isn't a nested team.
352 In particular, we should save any work share state from an outer
353 orphaned work share construct. */
354 team->prev_ts = thr->ts;
355
356 thr->ts.team = team;
953ff289 357 thr->ts.team_id = 0;
a68ab351
JJ
358 ++thr->ts.level;
359 if (nthreads > 1)
360 ++thr->ts.active_level;
361 thr->ts.work_share = &team->work_shares[0];
362 thr->ts.last_work_share = NULL;
363#ifdef HAVE_SYNC_BUILTINS
364 thr->ts.single_count = 0;
365#endif
953ff289 366 thr->ts.static_trip = 0;
a68ab351 367 thr->task = &team->implicit_task[0];
28567c40
JJ
368#ifdef GOMP_NEEDS_THREAD_HANDLE
369 thr->handle = pthread_self ();
370#endif
20906c66
JJ
371 nthreads_var = icv->nthreads_var;
372 if (__builtin_expect (gomp_nthreads_var_list != NULL, 0)
373 && thr->ts.level < gomp_nthreads_var_list_len)
374 nthreads_var = gomp_nthreads_var_list[thr->ts.level];
acf0174b
JJ
375 bind_var = icv->bind_var;
376 if (bind_var != omp_proc_bind_false && (flags & 7) != omp_proc_bind_false)
377 bind_var = flags & 7;
378 bind = bind_var;
379 if (__builtin_expect (gomp_bind_var_list != NULL, 0)
380 && thr->ts.level < gomp_bind_var_list_len)
381 bind_var = gomp_bind_var_list[thr->ts.level];
a68ab351 382 gomp_init_task (thr->task, task, icv);
28567c40 383 thr->task->taskgroup = taskgroup;
20906c66 384 team->implicit_task[0].icv.nthreads_var = nthreads_var;
acf0174b 385 team->implicit_task[0].icv.bind_var = bind_var;
953ff289
DN
386
387 if (nthreads == 1)
388 return;
389
390 i = 1;
391
acf0174b
JJ
392 if (__builtin_expect (gomp_places_list != NULL, 0))
393 {
acf0174b
JJ
394 /* Depending on chosen proc_bind model, set subpartition
395 for the master thread and initialize helper variables
396 P and optionally S, K and/or REST used by later place
397 computation for each additional thread. */
398 p = thr->place - 1;
399 switch (bind)
400 {
acf0174b
JJ
401 case omp_proc_bind_true:
402 case omp_proc_bind_close:
403 if (nthreads > thr->ts.place_partition_len)
404 {
405 /* T > P. S threads will be placed in each place,
406 and the final REM threads placed one by one
407 into the already occupied places. */
408 s = nthreads / thr->ts.place_partition_len;
409 rest = nthreads % thr->ts.place_partition_len;
410 }
411 else
412 s = 1;
413 k = 1;
414 break;
415 case omp_proc_bind_master:
416 /* Each thread will be bound to master's place. */
417 break;
418 case omp_proc_bind_spread:
419 if (nthreads <= thr->ts.place_partition_len)
420 {
421 /* T <= P. Each subpartition will have in between s
422 and s+1 places (subpartitions starting at or
423 after rest will have s places, earlier s+1 places),
424 each thread will be bound to the first place in
425 its subpartition (except for the master thread
426 that can be bound to another place in its
427 subpartition). */
428 s = thr->ts.place_partition_len / nthreads;
429 rest = thr->ts.place_partition_len % nthreads;
430 rest = (s + 1) * rest + thr->ts.place_partition_off;
431 if (p < rest)
432 {
433 p -= (p - thr->ts.place_partition_off) % (s + 1);
434 thr->ts.place_partition_len = s + 1;
435 }
436 else
437 {
438 p -= (p - rest) % s;
439 thr->ts.place_partition_len = s;
440 }
441 thr->ts.place_partition_off = p;
442 }
443 else
444 {
445 /* T > P. Each subpartition will have just a single
446 place and we'll place between s and s+1
447 threads into each subpartition. */
448 s = nthreads / thr->ts.place_partition_len;
449 rest = nthreads % thr->ts.place_partition_len;
450 thr->ts.place_partition_off = p;
451 thr->ts.place_partition_len = 1;
452 k = 1;
453 }
454 break;
455 }
456 }
457 else
458 bind = omp_proc_bind_false;
459
953ff289
DN
460 /* We only allow the reuse of idle threads for non-nested PARALLEL
461 regions. This appears to be implied by the semantics of
462 threadprivate variables, but perhaps that's reading too much into
463 things. Certainly it does prevent any locking problems, since
464 only the initial program thread will modify gomp_threads. */
465 if (!nested)
466 {
a68ab351 467 old_threads_used = pool->threads_used;
953ff289
DN
468
469 if (nthreads <= old_threads_used)
470 n = nthreads;
471 else if (old_threads_used == 0)
472 {
473 n = 0;
6103184e 474 gomp_simple_barrier_init (&pool->threads_dock, nthreads);
953ff289
DN
475 }
476 else
477 {
478 n = old_threads_used;
479
480 /* Increase the barrier threshold to make sure all new
481 threads arrive before the team is released. */
6103184e 482 gomp_simple_barrier_reinit (&pool->threads_dock, nthreads);
953ff289
DN
483 }
484
485 /* Not true yet, but soon will be. We're going to release all
a68ab351 486 threads from the dock, and those that aren't part of the
953ff289 487 team will exit. */
a68ab351 488 pool->threads_used = nthreads;
953ff289 489
acf0174b
JJ
490 /* If necessary, expand the size of the gomp_threads array. It is
491 expected that changes in the number of threads are rare, thus we
492 make no effort to expand gomp_threads_size geometrically. */
493 if (nthreads >= pool->threads_size)
494 {
495 pool->threads_size = nthreads + 1;
496 pool->threads
497 = gomp_realloc (pool->threads,
498 pool->threads_size
28567c40 499 * sizeof (struct gomp_thread *));
bbf1efe1
KB
500 /* Add current (master) thread to threads[]. */
501 pool->threads[0] = thr;
acf0174b
JJ
502 }
503
953ff289
DN
504 /* Release existing idle threads. */
505 for (; i < n; ++i)
506 {
acf0174b
JJ
507 unsigned int place_partition_off = thr->ts.place_partition_off;
508 unsigned int place_partition_len = thr->ts.place_partition_len;
509 unsigned int place = 0;
510 if (__builtin_expect (gomp_places_list != NULL, 0))
511 {
512 switch (bind)
513 {
514 case omp_proc_bind_true:
515 case omp_proc_bind_close:
516 if (k == s)
517 {
518 ++p;
519 if (p == (team->prev_ts.place_partition_off
520 + team->prev_ts.place_partition_len))
521 p = team->prev_ts.place_partition_off;
522 k = 1;
523 if (i == nthreads - rest)
524 s = 1;
525 }
526 else
527 ++k;
528 break;
529 case omp_proc_bind_master:
530 break;
531 case omp_proc_bind_spread:
532 if (k == 0)
533 {
534 /* T <= P. */
535 if (p < rest)
536 p += s + 1;
537 else
538 p += s;
539 if (p == (team->prev_ts.place_partition_off
540 + team->prev_ts.place_partition_len))
541 p = team->prev_ts.place_partition_off;
542 place_partition_off = p;
543 if (p < rest)
544 place_partition_len = s + 1;
545 else
546 place_partition_len = s;
547 }
548 else
549 {
550 /* T > P. */
551 if (k == s)
552 {
553 ++p;
554 if (p == (team->prev_ts.place_partition_off
555 + team->prev_ts.place_partition_len))
556 p = team->prev_ts.place_partition_off;
557 k = 1;
558 if (i == nthreads - rest)
559 s = 1;
560 }
561 else
562 ++k;
563 place_partition_off = p;
564 place_partition_len = 1;
565 }
566 break;
567 }
568 if (affinity_thr != NULL
569 || (bind != omp_proc_bind_true
570 && pool->threads[i]->place != p + 1)
571 || pool->threads[i]->place <= place_partition_off
572 || pool->threads[i]->place > (place_partition_off
573 + place_partition_len))
574 {
575 unsigned int l;
28567c40 576 force_display = true;
acf0174b
JJ
577 if (affinity_thr == NULL)
578 {
579 unsigned int j;
580
581 if (team->prev_ts.place_partition_len > 64)
582 affinity_thr
583 = gomp_malloc (team->prev_ts.place_partition_len
584 * sizeof (struct gomp_thread *));
585 else
586 affinity_thr
587 = gomp_alloca (team->prev_ts.place_partition_len
588 * sizeof (struct gomp_thread *));
589 memset (affinity_thr, '\0',
590 team->prev_ts.place_partition_len
591 * sizeof (struct gomp_thread *));
592 for (j = i; j < old_threads_used; j++)
593 {
594 if (pool->threads[j]->place
595 > team->prev_ts.place_partition_off
596 && (pool->threads[j]->place
597 <= (team->prev_ts.place_partition_off
598 + team->prev_ts.place_partition_len)))
599 {
600 l = pool->threads[j]->place - 1
601 - team->prev_ts.place_partition_off;
602 pool->threads[j]->data = affinity_thr[l];
603 affinity_thr[l] = pool->threads[j];
604 }
605 pool->threads[j] = NULL;
606 }
607 if (nthreads > old_threads_used)
608 memset (&pool->threads[old_threads_used],
609 '\0', ((nthreads - old_threads_used)
610 * sizeof (struct gomp_thread *)));
611 n = nthreads;
612 affinity_count = old_threads_used - i;
613 }
614 if (affinity_count == 0)
615 break;
616 l = p;
617 if (affinity_thr[l - team->prev_ts.place_partition_off]
618 == NULL)
619 {
620 if (bind != omp_proc_bind_true)
621 continue;
622 for (l = place_partition_off;
623 l < place_partition_off + place_partition_len;
624 l++)
625 if (affinity_thr[l - team->prev_ts.place_partition_off]
626 != NULL)
627 break;
628 if (l == place_partition_off + place_partition_len)
629 continue;
630 }
631 nthr = affinity_thr[l - team->prev_ts.place_partition_off];
632 affinity_thr[l - team->prev_ts.place_partition_off]
633 = (struct gomp_thread *) nthr->data;
634 affinity_count--;
635 pool->threads[i] = nthr;
636 }
637 else
638 nthr = pool->threads[i];
639 place = p + 1;
640 }
641 else
642 nthr = pool->threads[i];
953ff289 643 nthr->ts.team = team;
a68ab351
JJ
644 nthr->ts.work_share = &team->work_shares[0];
645 nthr->ts.last_work_share = NULL;
953ff289 646 nthr->ts.team_id = i;
a68ab351
JJ
647 nthr->ts.level = team->prev_ts.level + 1;
648 nthr->ts.active_level = thr->ts.active_level;
acf0174b
JJ
649 nthr->ts.place_partition_off = place_partition_off;
650 nthr->ts.place_partition_len = place_partition_len;
800bcc8c 651 nthr->ts.def_allocator = thr->ts.def_allocator;
a68ab351
JJ
652#ifdef HAVE_SYNC_BUILTINS
653 nthr->ts.single_count = 0;
654#endif
953ff289 655 nthr->ts.static_trip = 0;
fa4fcb11
JJ
656 nthr->num_teams = thr->num_teams;
657 nthr->team_num = thr->team_num;
a68ab351 658 nthr->task = &team->implicit_task[i];
acf0174b 659 nthr->place = place;
a68ab351 660 gomp_init_task (nthr->task, task, icv);
20906c66 661 team->implicit_task[i].icv.nthreads_var = nthreads_var;
acf0174b 662 team->implicit_task[i].icv.bind_var = bind_var;
28567c40 663 nthr->task->taskgroup = taskgroup;
953ff289
DN
664 nthr->fn = fn;
665 nthr->data = data;
666 team->ordered_release[i] = &nthr->release;
667 }
668
acf0174b
JJ
669 if (__builtin_expect (affinity_thr != NULL, 0))
670 {
671 /* If AFFINITY_THR is non-NULL just because we had to
672 permute some threads in the pool, but we've managed
673 to find exactly as many old threads as we'd find
674 without affinity, we don't need to handle this
675 specially anymore. */
676 if (nthreads <= old_threads_used
677 ? (affinity_count == old_threads_used - nthreads)
678 : (i == old_threads_used))
679 {
680 if (team->prev_ts.place_partition_len > 64)
681 free (affinity_thr);
682 affinity_thr = NULL;
683 affinity_count = 0;
684 }
685 else
686 {
687 i = 1;
688 /* We are going to compute the places/subpartitions
689 again from the beginning. So, we need to reinitialize
690 vars modified by the switch (bind) above inside
691 of the loop, to the state they had after the initial
692 switch (bind). */
693 switch (bind)
694 {
695 case omp_proc_bind_true:
696 case omp_proc_bind_close:
697 if (nthreads > thr->ts.place_partition_len)
698 /* T > P. S has been changed, so needs
699 to be recomputed. */
700 s = nthreads / thr->ts.place_partition_len;
701 k = 1;
702 p = thr->place - 1;
703 break;
704 case omp_proc_bind_master:
705 /* No vars have been changed. */
706 break;
707 case omp_proc_bind_spread:
708 p = thr->ts.place_partition_off;
709 if (k != 0)
710 {
711 /* T > P. */
712 s = nthreads / team->prev_ts.place_partition_len;
713 k = 1;
714 }
715 break;
716 }
717
718 /* Increase the barrier threshold to make sure all new
719 threads and all the threads we're going to let die
720 arrive before the team is released. */
721 if (affinity_count)
6103184e
AM
722 gomp_simple_barrier_reinit (&pool->threads_dock,
723 nthreads + affinity_count);
acf0174b
JJ
724 }
725 }
726
953ff289
DN
727 if (i == nthreads)
728 goto do_release;
729
953ff289
DN
730 }
731
acf0174b 732 if (__builtin_expect (nthreads + affinity_count > old_threads_used, 0))
a68ab351 733 {
acf0174b 734 long diff = (long) (nthreads + affinity_count) - (long) old_threads_used;
a68ab351
JJ
735
736 if (old_threads_used == 0)
737 --diff;
738
739#ifdef HAVE_SYNC_BUILTINS
740 __sync_fetch_and_add (&gomp_managed_threads, diff);
741#else
acf0174b 742 gomp_mutex_lock (&gomp_managed_threads_lock);
a68ab351 743 gomp_managed_threads += diff;
acf0174b 744 gomp_mutex_unlock (&gomp_managed_threads_lock);
a68ab351
JJ
745#endif
746 }
747
a0884cf0 748 attr = &gomp_thread_attr;
acf0174b 749 if (__builtin_expect (gomp_places_list != NULL, 0))
a0884cf0
JJ
750 {
751 size_t stacksize;
752 pthread_attr_init (&thread_attr);
46d8fbd1 753 if (! pthread_attr_getstacksize (&gomp_thread_attr, &stacksize))
a0884cf0
JJ
754 pthread_attr_setstacksize (&thread_attr, stacksize);
755 attr = &thread_attr;
756 }
757
d45ddc2c
RB
758 if (i >= nthreads)
759 __builtin_unreachable ();
a55b8e18 760 start_data = gomp_alloca (sizeof (struct gomp_thread_start_data)
28567c40 761 * (nthreads - i));
953ff289
DN
762
763 /* Launch new threads. */
acf0174b 764 for (; i < nthreads; ++i)
953ff289 765 {
953ff289
DN
766 int err;
767
acf0174b
JJ
768 start_data->ts.place_partition_off = thr->ts.place_partition_off;
769 start_data->ts.place_partition_len = thr->ts.place_partition_len;
770 start_data->place = 0;
771 if (__builtin_expect (gomp_places_list != NULL, 0))
772 {
773 switch (bind)
774 {
775 case omp_proc_bind_true:
776 case omp_proc_bind_close:
777 if (k == s)
778 {
779 ++p;
780 if (p == (team->prev_ts.place_partition_off
781 + team->prev_ts.place_partition_len))
782 p = team->prev_ts.place_partition_off;
783 k = 1;
784 if (i == nthreads - rest)
785 s = 1;
786 }
787 else
788 ++k;
789 break;
790 case omp_proc_bind_master:
791 break;
792 case omp_proc_bind_spread:
793 if (k == 0)
794 {
795 /* T <= P. */
796 if (p < rest)
797 p += s + 1;
798 else
799 p += s;
800 if (p == (team->prev_ts.place_partition_off
801 + team->prev_ts.place_partition_len))
802 p = team->prev_ts.place_partition_off;
803 start_data->ts.place_partition_off = p;
804 if (p < rest)
805 start_data->ts.place_partition_len = s + 1;
806 else
807 start_data->ts.place_partition_len = s;
808 }
809 else
810 {
811 /* T > P. */
812 if (k == s)
813 {
814 ++p;
815 if (p == (team->prev_ts.place_partition_off
816 + team->prev_ts.place_partition_len))
817 p = team->prev_ts.place_partition_off;
818 k = 1;
819 if (i == nthreads - rest)
820 s = 1;
821 }
822 else
823 ++k;
824 start_data->ts.place_partition_off = p;
825 start_data->ts.place_partition_len = 1;
826 }
827 break;
828 }
829 start_data->place = p + 1;
830 if (affinity_thr != NULL && pool->threads[i] != NULL)
831 continue;
832 gomp_init_thread_affinity (attr, p);
833 }
834
a68ab351
JJ
835 start_data->fn = fn;
836 start_data->fn_data = data;
953ff289 837 start_data->ts.team = team;
a68ab351
JJ
838 start_data->ts.work_share = &team->work_shares[0];
839 start_data->ts.last_work_share = NULL;
953ff289 840 start_data->ts.team_id = i;
a68ab351
JJ
841 start_data->ts.level = team->prev_ts.level + 1;
842 start_data->ts.active_level = thr->ts.active_level;
800bcc8c 843 start_data->ts.def_allocator = thr->ts.def_allocator;
a68ab351
JJ
844#ifdef HAVE_SYNC_BUILTINS
845 start_data->ts.single_count = 0;
846#endif
953ff289 847 start_data->ts.static_trip = 0;
fa4fcb11
JJ
848 start_data->num_teams = thr->num_teams;
849 start_data->team_num = thr->team_num;
a68ab351
JJ
850 start_data->task = &team->implicit_task[i];
851 gomp_init_task (start_data->task, task, icv);
20906c66 852 team->implicit_task[i].icv.nthreads_var = nthreads_var;
acf0174b 853 team->implicit_task[i].icv.bind_var = bind_var;
28567c40 854 start_data->task->taskgroup = taskgroup;
a68ab351 855 start_data->thread_pool = pool;
953ff289
DN
856 start_data->nested = nested;
857
06441dd5 858 attr = gomp_adjust_thread_attr (attr, &thread_attr);
28567c40
JJ
859 err = pthread_create (&start_data->handle, attr, gomp_thread_start,
860 start_data);
861 start_data++;
953ff289
DN
862 if (err != 0)
863 gomp_fatal ("Thread creation failed: %s", strerror (err));
864 }
865
06441dd5 866 if (__builtin_expect (attr == &thread_attr, 0))
a0884cf0
JJ
867 pthread_attr_destroy (&thread_attr);
868
953ff289 869 do_release:
6103184e
AM
870 if (nested)
871 gomp_barrier_wait (&team->barrier);
872 else
873 gomp_simple_barrier_wait (&pool->threads_dock);
953ff289
DN
874
875 /* Decrease the barrier threshold to match the number of threads
876 that should arrive back at the end of this team. The extra
877 threads should be exiting. Note that we arrange for this test
acf0174b
JJ
878 to never be true for nested teams. If AFFINITY_COUNT is non-zero,
879 the barrier as well as gomp_managed_threads was temporarily
880 set to NTHREADS + AFFINITY_COUNT. For NTHREADS < OLD_THREADS_COUNT,
881 AFFINITY_COUNT if non-zero will be always at least
882 OLD_THREADS_COUNT - NTHREADS. */
883 if (__builtin_expect (nthreads < old_threads_used, 0)
884 || __builtin_expect (affinity_count, 0))
a68ab351
JJ
885 {
886 long diff = (long) nthreads - (long) old_threads_used;
887
acf0174b
JJ
888 if (affinity_count)
889 diff = -affinity_count;
890
6103184e 891 gomp_simple_barrier_reinit (&pool->threads_dock, nthreads);
a68ab351
JJ
892
893#ifdef HAVE_SYNC_BUILTINS
894 __sync_fetch_and_add (&gomp_managed_threads, diff);
895#else
acf0174b 896 gomp_mutex_lock (&gomp_managed_threads_lock);
a68ab351 897 gomp_managed_threads += diff;
acf0174b 898 gomp_mutex_unlock (&gomp_managed_threads_lock);
a68ab351
JJ
899#endif
900 }
28567c40
JJ
901 if (__builtin_expect (gomp_display_affinity_var, 0))
902 {
903 if (nested
904 || nthreads != old_threads_used
905 || force_display)
906 {
907 gomp_display_affinity_thread (gomp_thread_self (), &thr->ts,
908 thr->place);
909 if (nested)
910 {
911 start_data -= nthreads - 1;
912 for (i = 1; i < nthreads; ++i)
913 {
914 gomp_display_affinity_thread (
915#ifdef LIBGOMP_USE_PTHREADS
916 start_data->handle,
917#else
918 gomp_thread_self (),
919#endif
920 &start_data->ts,
921 start_data->place);
922 start_data++;
923 }
924 }
925 else
926 {
927 for (i = 1; i < nthreads; ++i)
928 {
929 gomp_thread_handle handle
930 = gomp_thread_to_pthread_t (pool->threads[i]);
931 gomp_display_affinity_thread (handle, &pool->threads[i]->ts,
932 pool->threads[i]->place);
933 }
934 }
935 }
936 }
acf0174b
JJ
937 if (__builtin_expect (affinity_thr != NULL, 0)
938 && team->prev_ts.place_partition_len > 64)
939 free (affinity_thr);
953ff289 940}
6103184e 941#endif
953ff289
DN
942
943
944/* Terminate the current team. This is only to be called by the master
945 thread. We assume that we must wait for the other threads. */
946
947void
948gomp_team_end (void)
949{
950 struct gomp_thread *thr = gomp_thread ();
951 struct gomp_team *team = thr->ts.team;
952
acf0174b
JJ
953 /* This barrier handles all pending explicit threads.
954 As #pragma omp cancel parallel might get awaited count in
955 team->barrier in a inconsistent state, we need to use a different
956 counter here. */
957 gomp_team_barrier_wait_final (&team->barrier);
958 if (__builtin_expect (team->team_cancelled, 0))
959 {
960 struct gomp_work_share *ws = team->work_shares_to_free;
961 do
962 {
963 struct gomp_work_share *next_ws = gomp_ptrlock_get (&ws->next_ws);
964 if (next_ws == NULL)
965 gomp_ptrlock_set (&ws->next_ws, ws);
966 gomp_fini_work_share (ws);
967 ws = next_ws;
968 }
969 while (ws != NULL);
970 }
971 else
972 gomp_fini_work_share (thr->ts.work_share);
953ff289 973
a68ab351 974 gomp_end_task ();
953ff289
DN
975 thr->ts = team->prev_ts;
976
28567c40 977 if (__builtin_expect (thr->ts.level != 0, 0))
a68ab351
JJ
978 {
979#ifdef HAVE_SYNC_BUILTINS
980 __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads);
981#else
acf0174b 982 gomp_mutex_lock (&gomp_managed_threads_lock);
a68ab351 983 gomp_managed_threads -= team->nthreads - 1L;
acf0174b 984 gomp_mutex_unlock (&gomp_managed_threads_lock);
a68ab351
JJ
985#endif
986 /* This barrier has gomp_barrier_wait_last counterparts
987 and ensures the team can be safely destroyed. */
988 gomp_barrier_wait (&team->barrier);
989 }
990
991 if (__builtin_expect (team->work_shares[0].next_alloc != NULL, 0))
992 {
993 struct gomp_work_share *ws = team->work_shares[0].next_alloc;
994 do
995 {
996 struct gomp_work_share *next_ws = ws->next_alloc;
997 free (ws);
998 ws = next_ws;
999 }
1000 while (ws != NULL);
1001 }
1002 gomp_sem_destroy (&team->master_release);
a68ab351 1003
4db72361
JJ
1004 if (__builtin_expect (thr->ts.team != NULL, 0)
1005 || __builtin_expect (team->nthreads == 1, 0))
a68ab351
JJ
1006 free_team (team);
1007 else
1008 {
1009 struct gomp_thread_pool *pool = thr->thread_pool;
1010 if (pool->last_team)
1011 free_team (pool->last_team);
1012 pool->last_team = team;
66c59f92 1013 gomp_release_thread_pool (pool);
a68ab351 1014 }
953ff289
DN
1015}
1016
6103184e 1017#ifdef LIBGOMP_USE_PTHREADS
953ff289
DN
1018
1019/* Constructors for this file. */
1020
1021static void __attribute__((constructor))
1022initialize_team (void)
1023{
f50eecba 1024#if !defined HAVE_TLS && !defined USE_EMUTLS
953ff289
DN
1025 static struct gomp_thread initial_thread_tls_data;
1026
1027 pthread_key_create (&gomp_tls_key, NULL);
1028 pthread_setspecific (gomp_tls_key, &initial_thread_tls_data);
1029#endif
1030
a68ab351
JJ
1031 if (pthread_key_create (&gomp_thread_destructor, gomp_free_thread) != 0)
1032 gomp_fatal ("could not create thread pool destructor.");
953ff289 1033}
a68ab351
JJ
1034
1035static void __attribute__((destructor))
1036team_destructor (void)
1037{
1038 /* Without this dlclose on libgomp could lead to subsequent
1039 crashes. */
1040 pthread_key_delete (gomp_thread_destructor);
1041}
28567c40
JJ
1042
1043/* Similar to gomp_free_pool_helper, but don't detach itself,
1044 gomp_pause_host will pthread_join those threads. */
1045
1046static void
1047gomp_pause_pool_helper (void *thread_pool)
1048{
1049 struct gomp_thread *thr = gomp_thread ();
1050 struct gomp_thread_pool *pool
1051 = (struct gomp_thread_pool *) thread_pool;
1052 gomp_simple_barrier_wait_last (&pool->threads_dock);
1053 gomp_sem_destroy (&thr->release);
1054 thr->thread_pool = NULL;
1055 thr->task = NULL;
1056 pthread_exit (NULL);
1057}
1058
1059/* Free a thread pool and release its threads. Return non-zero on
1060 failure. */
1061
1062int
1063gomp_pause_host (void)
1064{
1065 struct gomp_thread *thr = gomp_thread ();
1066 struct gomp_thread_pool *pool = thr->thread_pool;
1067 if (thr->ts.level)
1068 return -1;
1069 if (pool)
1070 {
1071 if (pool->threads_used > 0)
1072 {
1073 int i;
1074 pthread_t *thrs
1075 = gomp_alloca (sizeof (pthread_t) * pool->threads_used);
1076 for (i = 1; i < pool->threads_used; i++)
1077 {
1078 struct gomp_thread *nthr = pool->threads[i];
1079 nthr->fn = gomp_pause_pool_helper;
1080 nthr->data = pool;
1081 thrs[i] = gomp_thread_to_pthread_t (nthr);
1082 }
1083 /* This barrier undocks threads docked on pool->threads_dock. */
1084 gomp_simple_barrier_wait (&pool->threads_dock);
1085 /* And this waits till all threads have called gomp_barrier_wait_last
1086 in gomp_pause_pool_helper. */
1087 gomp_simple_barrier_wait (&pool->threads_dock);
1088 /* Now it is safe to destroy the barrier and free the pool. */
1089 gomp_simple_barrier_destroy (&pool->threads_dock);
1090
1091#ifdef HAVE_SYNC_BUILTINS
1092 __sync_fetch_and_add (&gomp_managed_threads,
1093 1L - pool->threads_used);
1094#else
1095 gomp_mutex_lock (&gomp_managed_threads_lock);
1096 gomp_managed_threads -= pool->threads_used - 1L;
1097 gomp_mutex_unlock (&gomp_managed_threads_lock);
1098#endif
1099 for (i = 1; i < pool->threads_used; i++)
1100 pthread_join (thrs[i], NULL);
1101 }
1102 if (pool->last_team)
1103 free_team (pool->last_team);
1104#ifndef __nvptx__
cee16451
AS
1105 team_free (pool->threads);
1106 team_free (pool);
28567c40
JJ
1107#endif
1108 thr->thread_pool = NULL;
1109 }
1110 return 0;
1111}
6103184e 1112#endif
a68ab351
JJ
1113
1114struct gomp_task_icv *
1115gomp_new_icv (void)
1116{
1117 struct gomp_thread *thr = gomp_thread ();
1118 struct gomp_task *task = gomp_malloc (sizeof (struct gomp_task));
1119 gomp_init_task (task, NULL, &gomp_global_icv);
1120 thr->task = task;
6103184e 1121#ifdef LIBGOMP_USE_PTHREADS
a68ab351 1122 pthread_setspecific (gomp_thread_destructor, thr);
6103184e 1123#endif
a68ab351
JJ
1124 return &task->icv;
1125}