]> git.ipfire.org Git - thirdparty/gcc.git/blame - libgomp/team.c
Daily bump.
[thirdparty/gcc.git] / libgomp / team.c
CommitLineData
99dee823 1/* Copyright (C) 2005-2021 Free Software Foundation, Inc.
953ff289
DN
2 Contributed by Richard Henderson <rth@redhat.com>.
3
f1f3453e
TS
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
953ff289
DN
6
7 Libgomp is free software; you can redistribute it and/or modify it
748086b7
JJ
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
953ff289
DN
11
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
748086b7 14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
953ff289
DN
15 more details.
16
748086b7
JJ
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
953ff289 25
93d90219 26/* This file handles the maintenance of threads in response to team
953ff289
DN
27 creation and termination. */
28
29#include "libgomp.h"
66c59f92 30#include "pool.h"
953ff289
DN
31#include <stdlib.h>
32#include <string.h>
33
6103184e 34#ifdef LIBGOMP_USE_PTHREADS
d0d1b24d 35pthread_attr_t gomp_thread_attr;
953ff289 36
a68ab351
JJ
37/* This key is for the thread destructor. */
38pthread_key_t gomp_thread_destructor;
39
953ff289
DN
40
41/* This is the libgomp per-thread data structure. */
f50eecba 42#if defined HAVE_TLS || defined USE_EMUTLS
953ff289
DN
43__thread struct gomp_thread gomp_tls_data;
44#else
45pthread_key_t gomp_tls_key;
46#endif
47
48
49/* This structure is used to communicate across pthread_create. */
50
51struct gomp_thread_start_data
52{
953ff289
DN
53 void (*fn) (void *);
54 void *fn_data;
a68ab351
JJ
55 struct gomp_team_state ts;
56 struct gomp_task *task;
57 struct gomp_thread_pool *thread_pool;
acf0174b 58 unsigned int place;
953ff289 59 bool nested;
28567c40 60 pthread_t handle;
953ff289
DN
61};
62
63
64/* This function is a pthread_create entry point. This contains the idle
65 loop in which a thread waits to be called up to become part of a team. */
66
67static void *
68gomp_thread_start (void *xdata)
69{
70 struct gomp_thread_start_data *data = xdata;
71 struct gomp_thread *thr;
a68ab351 72 struct gomp_thread_pool *pool;
953ff289
DN
73 void (*local_fn) (void *);
74 void *local_data;
75
f50eecba 76#if defined HAVE_TLS || defined USE_EMUTLS
953ff289
DN
77 thr = &gomp_tls_data;
78#else
79 struct gomp_thread local_thr;
80 thr = &local_thr;
81 pthread_setspecific (gomp_tls_key, thr);
82#endif
83 gomp_sem_init (&thr->release, 0);
84
85 /* Extract what we need from data. */
86 local_fn = data->fn;
87 local_data = data->fn_data;
a68ab351 88 thr->thread_pool = data->thread_pool;
953ff289 89 thr->ts = data->ts;
a68ab351 90 thr->task = data->task;
acf0174b 91 thr->place = data->place;
28567c40
JJ
92#ifdef GOMP_NEEDS_THREAD_HANDLE
93 thr->handle = data->handle;
94#endif
953ff289
DN
95
96 thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release;
97
a68ab351
JJ
98 /* Make thread pool local. */
99 pool = thr->thread_pool;
100
953ff289
DN
101 if (data->nested)
102 {
a68ab351
JJ
103 struct gomp_team *team = thr->ts.team;
104 struct gomp_task *task = thr->task;
105
106 gomp_barrier_wait (&team->barrier);
107
953ff289 108 local_fn (local_data);
acf0174b 109 gomp_team_barrier_wait_final (&team->barrier);
a68ab351
JJ
110 gomp_finish_task (task);
111 gomp_barrier_wait_last (&team->barrier);
953ff289
DN
112 }
113 else
114 {
a68ab351 115 pool->threads[thr->ts.team_id] = thr;
953ff289 116
6103184e 117 gomp_simple_barrier_wait (&pool->threads_dock);
953ff289
DN
118 do
119 {
a68ab351
JJ
120 struct gomp_team *team = thr->ts.team;
121 struct gomp_task *task = thr->task;
953ff289
DN
122
123 local_fn (local_data);
acf0174b 124 gomp_team_barrier_wait_final (&team->barrier);
a68ab351 125 gomp_finish_task (task);
953ff289 126
6103184e 127 gomp_simple_barrier_wait (&pool->threads_dock);
953ff289
DN
128
129 local_fn = thr->fn;
130 local_data = thr->data;
a68ab351 131 thr->fn = NULL;
953ff289
DN
132 }
133 while (local_fn);
134 }
135
6dea8e99 136 gomp_sem_destroy (&thr->release);
28567c40 137 pthread_detach (pthread_self ());
acf0174b
JJ
138 thr->thread_pool = NULL;
139 thr->task = NULL;
953ff289
DN
140 return NULL;
141}
6103184e 142#endif
953ff289 143
6dba0113
SH
144static inline struct gomp_team *
145get_last_team (unsigned nthreads)
146{
147 struct gomp_thread *thr = gomp_thread ();
148 if (thr->ts.team == NULL)
149 {
7892ec67
SH
150 struct gomp_thread_pool *pool = gomp_get_thread_pool (thr, nthreads);
151 struct gomp_team *last_team = pool->last_team;
152 if (last_team != NULL && last_team->nthreads == nthreads)
153 {
154 pool->last_team = NULL;
155 return last_team;
156 }
6dba0113
SH
157 }
158 return NULL;
159}
953ff289
DN
160
161/* Create a new team data structure. */
162
a68ab351
JJ
163struct gomp_team *
164gomp_new_team (unsigned nthreads)
953ff289
DN
165{
166 struct gomp_team *team;
a68ab351 167 int i;
953ff289 168
6dba0113
SH
169 team = get_last_team (nthreads);
170 if (team == NULL)
171 {
172 size_t extra = sizeof (team->ordered_release[0])
173 + sizeof (team->implicit_task[0]);
cee16451 174 team = team_malloc (sizeof (*team) + nthreads * extra);
6dba0113
SH
175
176#ifndef HAVE_SYNC_BUILTINS
177 gomp_mutex_init (&team->work_share_list_free_lock);
178#endif
179 gomp_barrier_init (&team->barrier, nthreads);
180 gomp_mutex_init (&team->task_lock);
181
182 team->nthreads = nthreads;
183 }
953ff289 184
a68ab351
JJ
185 team->work_share_chunk = 8;
186#ifdef HAVE_SYNC_BUILTINS
187 team->single_count = 0;
a68ab351 188#endif
acf0174b 189 team->work_shares_to_free = &team->work_shares[0];
28567c40 190 gomp_init_work_share (&team->work_shares[0], 0, nthreads);
a68ab351
JJ
191 team->work_shares[0].next_alloc = NULL;
192 team->work_share_list_free = NULL;
193 team->work_share_list_alloc = &team->work_shares[1];
194 for (i = 1; i < 7; i++)
195 team->work_shares[i].next_free = &team->work_shares[i + 1];
196 team->work_shares[i].next_free = NULL;
953ff289 197
953ff289 198 gomp_sem_init (&team->master_release, 0);
a68ab351 199 team->ordered_release = (void *) &team->implicit_task[nthreads];
953ff289
DN
200 team->ordered_release[0] = &team->master_release;
201
e4606348 202 priority_queue_init (&team->task_queue);
a68ab351 203 team->task_count = 0;
acf0174b 204 team->task_queued_count = 0;
a68ab351 205 team->task_running_count = 0;
acf0174b
JJ
206 team->work_share_cancelled = 0;
207 team->team_cancelled = 0;
a68ab351 208
a6d22fb2
KCY
209 team->task_detach_count = 0;
210
953ff289
DN
211 return team;
212}
213
214
215/* Free a team data structure. */
216
217static void
218free_team (struct gomp_team *team)
219{
6dba0113
SH
220#ifndef HAVE_SYNC_BUILTINS
221 gomp_mutex_destroy (&team->work_share_list_free_lock);
222#endif
953ff289 223 gomp_barrier_destroy (&team->barrier);
a68ab351 224 gomp_mutex_destroy (&team->task_lock);
e4606348 225 priority_queue_free (&team->task_queue);
cee16451 226 team_free (team);
953ff289
DN
227}
228
a68ab351
JJ
229static void
230gomp_free_pool_helper (void *thread_pool)
231{
acf0174b 232 struct gomp_thread *thr = gomp_thread ();
a68ab351
JJ
233 struct gomp_thread_pool *pool
234 = (struct gomp_thread_pool *) thread_pool;
6103184e 235 gomp_simple_barrier_wait_last (&pool->threads_dock);
acf0174b
JJ
236 gomp_sem_destroy (&thr->release);
237 thr->thread_pool = NULL;
238 thr->task = NULL;
6103184e 239#ifdef LIBGOMP_USE_PTHREADS
28567c40 240 pthread_detach (pthread_self ());
a68ab351 241 pthread_exit (NULL);
6103184e
AM
242#elif defined(__nvptx__)
243 asm ("exit;");
fa499995
AS
244#elif defined(__AMDGCN__)
245 asm ("s_dcache_wb\n\t"
246 "s_endpgm");
6103184e
AM
247#else
248#error gomp_free_pool_helper must terminate the thread
249#endif
a68ab351
JJ
250}
251
252/* Free a thread pool and release its threads. */
253
acf0174b 254void
a68ab351
JJ
255gomp_free_thread (void *arg __attribute__((unused)))
256{
257 struct gomp_thread *thr = gomp_thread ();
258 struct gomp_thread_pool *pool = thr->thread_pool;
259 if (pool)
260 {
261 if (pool->threads_used > 0)
262 {
263 int i;
264 for (i = 1; i < pool->threads_used; i++)
265 {
266 struct gomp_thread *nthr = pool->threads[i];
267 nthr->fn = gomp_free_pool_helper;
268 nthr->data = pool;
269 }
270 /* This barrier undocks threads docked on pool->threads_dock. */
6103184e 271 gomp_simple_barrier_wait (&pool->threads_dock);
a68ab351
JJ
272 /* And this waits till all threads have called gomp_barrier_wait_last
273 in gomp_free_pool_helper. */
6103184e 274 gomp_simple_barrier_wait (&pool->threads_dock);
a68ab351 275 /* Now it is safe to destroy the barrier and free the pool. */
6103184e 276 gomp_simple_barrier_destroy (&pool->threads_dock);
4c5ba8d0
JJ
277
278#ifdef HAVE_SYNC_BUILTINS
279 __sync_fetch_and_add (&gomp_managed_threads,
280 1L - pool->threads_used);
281#else
acf0174b 282 gomp_mutex_lock (&gomp_managed_threads_lock);
4c5ba8d0 283 gomp_managed_threads -= pool->threads_used - 1L;
acf0174b 284 gomp_mutex_unlock (&gomp_managed_threads_lock);
4c5ba8d0 285#endif
a68ab351 286 }
a68ab351
JJ
287 if (pool->last_team)
288 free_team (pool->last_team);
6103184e 289#ifndef __nvptx__
cee16451
AS
290 team_free (pool->threads);
291 team_free (pool);
6103184e 292#endif
a68ab351
JJ
293 thr->thread_pool = NULL;
294 }
e4606348
JJ
295 if (thr->ts.level == 0 && __builtin_expect (thr->ts.team != NULL, 0))
296 gomp_team_end ();
a68ab351
JJ
297 if (thr->task != NULL)
298 {
299 struct gomp_task *task = thr->task;
300 gomp_end_task ();
301 free (task);
302 }
303}
953ff289
DN
304
305/* Launch a team. */
306
6103184e 307#ifdef LIBGOMP_USE_PTHREADS
953ff289
DN
308void
309gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
28567c40
JJ
310 unsigned flags, struct gomp_team *team,
311 struct gomp_taskgroup *taskgroup)
953ff289
DN
312{
313 struct gomp_thread_start_data *start_data;
314 struct gomp_thread *thr, *nthr;
a68ab351
JJ
315 struct gomp_task *task;
316 struct gomp_task_icv *icv;
953ff289 317 bool nested;
a68ab351 318 struct gomp_thread_pool *pool;
953ff289 319 unsigned i, n, old_threads_used = 0;
a0884cf0 320 pthread_attr_t thread_attr, *attr;
20906c66 321 unsigned long nthreads_var;
acf0174b
JJ
322 char bind, bind_var;
323 unsigned int s = 0, rest = 0, p = 0, k = 0;
324 unsigned int affinity_count = 0;
325 struct gomp_thread **affinity_thr = NULL;
28567c40 326 bool force_display = false;
953ff289
DN
327
328 thr = gomp_thread ();
e4606348 329 nested = thr->ts.level;
a68ab351
JJ
330 pool = thr->thread_pool;
331 task = thr->task;
332 icv = task ? &task->icv : &gomp_global_icv;
acf0174b 333 if (__builtin_expect (gomp_places_list != NULL, 0) && thr->place == 0)
28567c40
JJ
334 {
335 gomp_init_affinity ();
336 if (__builtin_expect (gomp_display_affinity_var, 0) && nthreads == 1)
337 gomp_display_affinity_thread (gomp_thread_self (), &thr->ts,
338 thr->place);
339 }
953ff289
DN
340
341 /* Always save the previous state, even if this isn't a nested team.
342 In particular, we should save any work share state from an outer
343 orphaned work share construct. */
344 team->prev_ts = thr->ts;
345
346 thr->ts.team = team;
953ff289 347 thr->ts.team_id = 0;
a68ab351
JJ
348 ++thr->ts.level;
349 if (nthreads > 1)
350 ++thr->ts.active_level;
351 thr->ts.work_share = &team->work_shares[0];
352 thr->ts.last_work_share = NULL;
353#ifdef HAVE_SYNC_BUILTINS
354 thr->ts.single_count = 0;
355#endif
953ff289 356 thr->ts.static_trip = 0;
a68ab351 357 thr->task = &team->implicit_task[0];
28567c40
JJ
358#ifdef GOMP_NEEDS_THREAD_HANDLE
359 thr->handle = pthread_self ();
360#endif
20906c66
JJ
361 nthreads_var = icv->nthreads_var;
362 if (__builtin_expect (gomp_nthreads_var_list != NULL, 0)
363 && thr->ts.level < gomp_nthreads_var_list_len)
364 nthreads_var = gomp_nthreads_var_list[thr->ts.level];
acf0174b
JJ
365 bind_var = icv->bind_var;
366 if (bind_var != omp_proc_bind_false && (flags & 7) != omp_proc_bind_false)
367 bind_var = flags & 7;
368 bind = bind_var;
369 if (__builtin_expect (gomp_bind_var_list != NULL, 0)
370 && thr->ts.level < gomp_bind_var_list_len)
371 bind_var = gomp_bind_var_list[thr->ts.level];
a68ab351 372 gomp_init_task (thr->task, task, icv);
28567c40 373 thr->task->taskgroup = taskgroup;
20906c66 374 team->implicit_task[0].icv.nthreads_var = nthreads_var;
acf0174b 375 team->implicit_task[0].icv.bind_var = bind_var;
953ff289
DN
376
377 if (nthreads == 1)
378 return;
379
380 i = 1;
381
acf0174b
JJ
382 if (__builtin_expect (gomp_places_list != NULL, 0))
383 {
acf0174b
JJ
384 /* Depending on chosen proc_bind model, set subpartition
385 for the master thread and initialize helper variables
386 P and optionally S, K and/or REST used by later place
387 computation for each additional thread. */
388 p = thr->place - 1;
389 switch (bind)
390 {
acf0174b
JJ
391 case omp_proc_bind_true:
392 case omp_proc_bind_close:
393 if (nthreads > thr->ts.place_partition_len)
394 {
395 /* T > P. S threads will be placed in each place,
396 and the final REM threads placed one by one
397 into the already occupied places. */
398 s = nthreads / thr->ts.place_partition_len;
399 rest = nthreads % thr->ts.place_partition_len;
400 }
401 else
402 s = 1;
403 k = 1;
404 break;
405 case omp_proc_bind_master:
406 /* Each thread will be bound to master's place. */
407 break;
408 case omp_proc_bind_spread:
409 if (nthreads <= thr->ts.place_partition_len)
410 {
411 /* T <= P. Each subpartition will have in between s
412 and s+1 places (subpartitions starting at or
413 after rest will have s places, earlier s+1 places),
414 each thread will be bound to the first place in
415 its subpartition (except for the master thread
416 that can be bound to another place in its
417 subpartition). */
418 s = thr->ts.place_partition_len / nthreads;
419 rest = thr->ts.place_partition_len % nthreads;
420 rest = (s + 1) * rest + thr->ts.place_partition_off;
421 if (p < rest)
422 {
423 p -= (p - thr->ts.place_partition_off) % (s + 1);
424 thr->ts.place_partition_len = s + 1;
425 }
426 else
427 {
428 p -= (p - rest) % s;
429 thr->ts.place_partition_len = s;
430 }
431 thr->ts.place_partition_off = p;
432 }
433 else
434 {
435 /* T > P. Each subpartition will have just a single
436 place and we'll place between s and s+1
437 threads into each subpartition. */
438 s = nthreads / thr->ts.place_partition_len;
439 rest = nthreads % thr->ts.place_partition_len;
440 thr->ts.place_partition_off = p;
441 thr->ts.place_partition_len = 1;
442 k = 1;
443 }
444 break;
445 }
446 }
447 else
448 bind = omp_proc_bind_false;
449
953ff289
DN
450 /* We only allow the reuse of idle threads for non-nested PARALLEL
451 regions. This appears to be implied by the semantics of
452 threadprivate variables, but perhaps that's reading too much into
453 things. Certainly it does prevent any locking problems, since
454 only the initial program thread will modify gomp_threads. */
455 if (!nested)
456 {
a68ab351 457 old_threads_used = pool->threads_used;
953ff289
DN
458
459 if (nthreads <= old_threads_used)
460 n = nthreads;
461 else if (old_threads_used == 0)
462 {
463 n = 0;
6103184e 464 gomp_simple_barrier_init (&pool->threads_dock, nthreads);
953ff289
DN
465 }
466 else
467 {
468 n = old_threads_used;
469
470 /* Increase the barrier threshold to make sure all new
471 threads arrive before the team is released. */
6103184e 472 gomp_simple_barrier_reinit (&pool->threads_dock, nthreads);
953ff289
DN
473 }
474
475 /* Not true yet, but soon will be. We're going to release all
a68ab351 476 threads from the dock, and those that aren't part of the
953ff289 477 team will exit. */
a68ab351 478 pool->threads_used = nthreads;
953ff289 479
acf0174b
JJ
480 /* If necessary, expand the size of the gomp_threads array. It is
481 expected that changes in the number of threads are rare, thus we
482 make no effort to expand gomp_threads_size geometrically. */
483 if (nthreads >= pool->threads_size)
484 {
485 pool->threads_size = nthreads + 1;
486 pool->threads
487 = gomp_realloc (pool->threads,
488 pool->threads_size
28567c40 489 * sizeof (struct gomp_thread *));
bbf1efe1
KB
490 /* Add current (master) thread to threads[]. */
491 pool->threads[0] = thr;
acf0174b
JJ
492 }
493
953ff289
DN
494 /* Release existing idle threads. */
495 for (; i < n; ++i)
496 {
acf0174b
JJ
497 unsigned int place_partition_off = thr->ts.place_partition_off;
498 unsigned int place_partition_len = thr->ts.place_partition_len;
499 unsigned int place = 0;
500 if (__builtin_expect (gomp_places_list != NULL, 0))
501 {
502 switch (bind)
503 {
504 case omp_proc_bind_true:
505 case omp_proc_bind_close:
506 if (k == s)
507 {
508 ++p;
509 if (p == (team->prev_ts.place_partition_off
510 + team->prev_ts.place_partition_len))
511 p = team->prev_ts.place_partition_off;
512 k = 1;
513 if (i == nthreads - rest)
514 s = 1;
515 }
516 else
517 ++k;
518 break;
519 case omp_proc_bind_master:
520 break;
521 case omp_proc_bind_spread:
522 if (k == 0)
523 {
524 /* T <= P. */
525 if (p < rest)
526 p += s + 1;
527 else
528 p += s;
529 if (p == (team->prev_ts.place_partition_off
530 + team->prev_ts.place_partition_len))
531 p = team->prev_ts.place_partition_off;
532 place_partition_off = p;
533 if (p < rest)
534 place_partition_len = s + 1;
535 else
536 place_partition_len = s;
537 }
538 else
539 {
540 /* T > P. */
541 if (k == s)
542 {
543 ++p;
544 if (p == (team->prev_ts.place_partition_off
545 + team->prev_ts.place_partition_len))
546 p = team->prev_ts.place_partition_off;
547 k = 1;
548 if (i == nthreads - rest)
549 s = 1;
550 }
551 else
552 ++k;
553 place_partition_off = p;
554 place_partition_len = 1;
555 }
556 break;
557 }
558 if (affinity_thr != NULL
559 || (bind != omp_proc_bind_true
560 && pool->threads[i]->place != p + 1)
561 || pool->threads[i]->place <= place_partition_off
562 || pool->threads[i]->place > (place_partition_off
563 + place_partition_len))
564 {
565 unsigned int l;
28567c40 566 force_display = true;
acf0174b
JJ
567 if (affinity_thr == NULL)
568 {
569 unsigned int j;
570
571 if (team->prev_ts.place_partition_len > 64)
572 affinity_thr
573 = gomp_malloc (team->prev_ts.place_partition_len
574 * sizeof (struct gomp_thread *));
575 else
576 affinity_thr
577 = gomp_alloca (team->prev_ts.place_partition_len
578 * sizeof (struct gomp_thread *));
579 memset (affinity_thr, '\0',
580 team->prev_ts.place_partition_len
581 * sizeof (struct gomp_thread *));
582 for (j = i; j < old_threads_used; j++)
583 {
584 if (pool->threads[j]->place
585 > team->prev_ts.place_partition_off
586 && (pool->threads[j]->place
587 <= (team->prev_ts.place_partition_off
588 + team->prev_ts.place_partition_len)))
589 {
590 l = pool->threads[j]->place - 1
591 - team->prev_ts.place_partition_off;
592 pool->threads[j]->data = affinity_thr[l];
593 affinity_thr[l] = pool->threads[j];
594 }
595 pool->threads[j] = NULL;
596 }
597 if (nthreads > old_threads_used)
598 memset (&pool->threads[old_threads_used],
599 '\0', ((nthreads - old_threads_used)
600 * sizeof (struct gomp_thread *)));
601 n = nthreads;
602 affinity_count = old_threads_used - i;
603 }
604 if (affinity_count == 0)
605 break;
606 l = p;
607 if (affinity_thr[l - team->prev_ts.place_partition_off]
608 == NULL)
609 {
610 if (bind != omp_proc_bind_true)
611 continue;
612 for (l = place_partition_off;
613 l < place_partition_off + place_partition_len;
614 l++)
615 if (affinity_thr[l - team->prev_ts.place_partition_off]
616 != NULL)
617 break;
618 if (l == place_partition_off + place_partition_len)
619 continue;
620 }
621 nthr = affinity_thr[l - team->prev_ts.place_partition_off];
622 affinity_thr[l - team->prev_ts.place_partition_off]
623 = (struct gomp_thread *) nthr->data;
624 affinity_count--;
625 pool->threads[i] = nthr;
626 }
627 else
628 nthr = pool->threads[i];
629 place = p + 1;
630 }
631 else
632 nthr = pool->threads[i];
953ff289 633 nthr->ts.team = team;
a68ab351
JJ
634 nthr->ts.work_share = &team->work_shares[0];
635 nthr->ts.last_work_share = NULL;
953ff289 636 nthr->ts.team_id = i;
a68ab351
JJ
637 nthr->ts.level = team->prev_ts.level + 1;
638 nthr->ts.active_level = thr->ts.active_level;
acf0174b
JJ
639 nthr->ts.place_partition_off = place_partition_off;
640 nthr->ts.place_partition_len = place_partition_len;
800bcc8c 641 nthr->ts.def_allocator = thr->ts.def_allocator;
a68ab351
JJ
642#ifdef HAVE_SYNC_BUILTINS
643 nthr->ts.single_count = 0;
644#endif
953ff289 645 nthr->ts.static_trip = 0;
a68ab351 646 nthr->task = &team->implicit_task[i];
acf0174b 647 nthr->place = place;
a68ab351 648 gomp_init_task (nthr->task, task, icv);
20906c66 649 team->implicit_task[i].icv.nthreads_var = nthreads_var;
acf0174b 650 team->implicit_task[i].icv.bind_var = bind_var;
28567c40 651 nthr->task->taskgroup = taskgroup;
953ff289
DN
652 nthr->fn = fn;
653 nthr->data = data;
654 team->ordered_release[i] = &nthr->release;
655 }
656
acf0174b
JJ
657 if (__builtin_expect (affinity_thr != NULL, 0))
658 {
659 /* If AFFINITY_THR is non-NULL just because we had to
660 permute some threads in the pool, but we've managed
661 to find exactly as many old threads as we'd find
662 without affinity, we don't need to handle this
663 specially anymore. */
664 if (nthreads <= old_threads_used
665 ? (affinity_count == old_threads_used - nthreads)
666 : (i == old_threads_used))
667 {
668 if (team->prev_ts.place_partition_len > 64)
669 free (affinity_thr);
670 affinity_thr = NULL;
671 affinity_count = 0;
672 }
673 else
674 {
675 i = 1;
676 /* We are going to compute the places/subpartitions
677 again from the beginning. So, we need to reinitialize
678 vars modified by the switch (bind) above inside
679 of the loop, to the state they had after the initial
680 switch (bind). */
681 switch (bind)
682 {
683 case omp_proc_bind_true:
684 case omp_proc_bind_close:
685 if (nthreads > thr->ts.place_partition_len)
686 /* T > P. S has been changed, so needs
687 to be recomputed. */
688 s = nthreads / thr->ts.place_partition_len;
689 k = 1;
690 p = thr->place - 1;
691 break;
692 case omp_proc_bind_master:
693 /* No vars have been changed. */
694 break;
695 case omp_proc_bind_spread:
696 p = thr->ts.place_partition_off;
697 if (k != 0)
698 {
699 /* T > P. */
700 s = nthreads / team->prev_ts.place_partition_len;
701 k = 1;
702 }
703 break;
704 }
705
706 /* Increase the barrier threshold to make sure all new
707 threads and all the threads we're going to let die
708 arrive before the team is released. */
709 if (affinity_count)
6103184e
AM
710 gomp_simple_barrier_reinit (&pool->threads_dock,
711 nthreads + affinity_count);
acf0174b
JJ
712 }
713 }
714
953ff289
DN
715 if (i == nthreads)
716 goto do_release;
717
953ff289
DN
718 }
719
acf0174b 720 if (__builtin_expect (nthreads + affinity_count > old_threads_used, 0))
a68ab351 721 {
acf0174b 722 long diff = (long) (nthreads + affinity_count) - (long) old_threads_used;
a68ab351
JJ
723
724 if (old_threads_used == 0)
725 --diff;
726
727#ifdef HAVE_SYNC_BUILTINS
728 __sync_fetch_and_add (&gomp_managed_threads, diff);
729#else
acf0174b 730 gomp_mutex_lock (&gomp_managed_threads_lock);
a68ab351 731 gomp_managed_threads += diff;
acf0174b 732 gomp_mutex_unlock (&gomp_managed_threads_lock);
a68ab351
JJ
733#endif
734 }
735
a0884cf0 736 attr = &gomp_thread_attr;
acf0174b 737 if (__builtin_expect (gomp_places_list != NULL, 0))
a0884cf0
JJ
738 {
739 size_t stacksize;
740 pthread_attr_init (&thread_attr);
46d8fbd1 741 if (! pthread_attr_getstacksize (&gomp_thread_attr, &stacksize))
a0884cf0
JJ
742 pthread_attr_setstacksize (&thread_attr, stacksize);
743 attr = &thread_attr;
744 }
745
a55b8e18 746 start_data = gomp_alloca (sizeof (struct gomp_thread_start_data)
28567c40 747 * (nthreads - i));
953ff289
DN
748
749 /* Launch new threads. */
acf0174b 750 for (; i < nthreads; ++i)
953ff289 751 {
953ff289
DN
752 int err;
753
acf0174b
JJ
754 start_data->ts.place_partition_off = thr->ts.place_partition_off;
755 start_data->ts.place_partition_len = thr->ts.place_partition_len;
756 start_data->place = 0;
757 if (__builtin_expect (gomp_places_list != NULL, 0))
758 {
759 switch (bind)
760 {
761 case omp_proc_bind_true:
762 case omp_proc_bind_close:
763 if (k == s)
764 {
765 ++p;
766 if (p == (team->prev_ts.place_partition_off
767 + team->prev_ts.place_partition_len))
768 p = team->prev_ts.place_partition_off;
769 k = 1;
770 if (i == nthreads - rest)
771 s = 1;
772 }
773 else
774 ++k;
775 break;
776 case omp_proc_bind_master:
777 break;
778 case omp_proc_bind_spread:
779 if (k == 0)
780 {
781 /* T <= P. */
782 if (p < rest)
783 p += s + 1;
784 else
785 p += s;
786 if (p == (team->prev_ts.place_partition_off
787 + team->prev_ts.place_partition_len))
788 p = team->prev_ts.place_partition_off;
789 start_data->ts.place_partition_off = p;
790 if (p < rest)
791 start_data->ts.place_partition_len = s + 1;
792 else
793 start_data->ts.place_partition_len = s;
794 }
795 else
796 {
797 /* T > P. */
798 if (k == s)
799 {
800 ++p;
801 if (p == (team->prev_ts.place_partition_off
802 + team->prev_ts.place_partition_len))
803 p = team->prev_ts.place_partition_off;
804 k = 1;
805 if (i == nthreads - rest)
806 s = 1;
807 }
808 else
809 ++k;
810 start_data->ts.place_partition_off = p;
811 start_data->ts.place_partition_len = 1;
812 }
813 break;
814 }
815 start_data->place = p + 1;
816 if (affinity_thr != NULL && pool->threads[i] != NULL)
817 continue;
818 gomp_init_thread_affinity (attr, p);
819 }
820
a68ab351
JJ
821 start_data->fn = fn;
822 start_data->fn_data = data;
953ff289 823 start_data->ts.team = team;
a68ab351
JJ
824 start_data->ts.work_share = &team->work_shares[0];
825 start_data->ts.last_work_share = NULL;
953ff289 826 start_data->ts.team_id = i;
a68ab351
JJ
827 start_data->ts.level = team->prev_ts.level + 1;
828 start_data->ts.active_level = thr->ts.active_level;
800bcc8c 829 start_data->ts.def_allocator = thr->ts.def_allocator;
a68ab351
JJ
830#ifdef HAVE_SYNC_BUILTINS
831 start_data->ts.single_count = 0;
832#endif
953ff289 833 start_data->ts.static_trip = 0;
a68ab351
JJ
834 start_data->task = &team->implicit_task[i];
835 gomp_init_task (start_data->task, task, icv);
20906c66 836 team->implicit_task[i].icv.nthreads_var = nthreads_var;
acf0174b 837 team->implicit_task[i].icv.bind_var = bind_var;
28567c40 838 start_data->task->taskgroup = taskgroup;
a68ab351 839 start_data->thread_pool = pool;
953ff289
DN
840 start_data->nested = nested;
841
06441dd5 842 attr = gomp_adjust_thread_attr (attr, &thread_attr);
28567c40
JJ
843 err = pthread_create (&start_data->handle, attr, gomp_thread_start,
844 start_data);
845 start_data++;
953ff289
DN
846 if (err != 0)
847 gomp_fatal ("Thread creation failed: %s", strerror (err));
848 }
849
06441dd5 850 if (__builtin_expect (attr == &thread_attr, 0))
a0884cf0
JJ
851 pthread_attr_destroy (&thread_attr);
852
953ff289 853 do_release:
6103184e
AM
854 if (nested)
855 gomp_barrier_wait (&team->barrier);
856 else
857 gomp_simple_barrier_wait (&pool->threads_dock);
953ff289
DN
858
859 /* Decrease the barrier threshold to match the number of threads
860 that should arrive back at the end of this team. The extra
861 threads should be exiting. Note that we arrange for this test
acf0174b
JJ
862 to never be true for nested teams. If AFFINITY_COUNT is non-zero,
863 the barrier as well as gomp_managed_threads was temporarily
864 set to NTHREADS + AFFINITY_COUNT. For NTHREADS < OLD_THREADS_COUNT,
865 AFFINITY_COUNT if non-zero will be always at least
866 OLD_THREADS_COUNT - NTHREADS. */
867 if (__builtin_expect (nthreads < old_threads_used, 0)
868 || __builtin_expect (affinity_count, 0))
a68ab351
JJ
869 {
870 long diff = (long) nthreads - (long) old_threads_used;
871
acf0174b
JJ
872 if (affinity_count)
873 diff = -affinity_count;
874
6103184e 875 gomp_simple_barrier_reinit (&pool->threads_dock, nthreads);
a68ab351
JJ
876
877#ifdef HAVE_SYNC_BUILTINS
878 __sync_fetch_and_add (&gomp_managed_threads, diff);
879#else
acf0174b 880 gomp_mutex_lock (&gomp_managed_threads_lock);
a68ab351 881 gomp_managed_threads += diff;
acf0174b 882 gomp_mutex_unlock (&gomp_managed_threads_lock);
a68ab351
JJ
883#endif
884 }
28567c40
JJ
885 if (__builtin_expect (gomp_display_affinity_var, 0))
886 {
887 if (nested
888 || nthreads != old_threads_used
889 || force_display)
890 {
891 gomp_display_affinity_thread (gomp_thread_self (), &thr->ts,
892 thr->place);
893 if (nested)
894 {
895 start_data -= nthreads - 1;
896 for (i = 1; i < nthreads; ++i)
897 {
898 gomp_display_affinity_thread (
899#ifdef LIBGOMP_USE_PTHREADS
900 start_data->handle,
901#else
902 gomp_thread_self (),
903#endif
904 &start_data->ts,
905 start_data->place);
906 start_data++;
907 }
908 }
909 else
910 {
911 for (i = 1; i < nthreads; ++i)
912 {
913 gomp_thread_handle handle
914 = gomp_thread_to_pthread_t (pool->threads[i]);
915 gomp_display_affinity_thread (handle, &pool->threads[i]->ts,
916 pool->threads[i]->place);
917 }
918 }
919 }
920 }
acf0174b
JJ
921 if (__builtin_expect (affinity_thr != NULL, 0)
922 && team->prev_ts.place_partition_len > 64)
923 free (affinity_thr);
953ff289 924}
6103184e 925#endif
953ff289
DN
926
927
928/* Terminate the current team. This is only to be called by the master
929 thread. We assume that we must wait for the other threads. */
930
931void
932gomp_team_end (void)
933{
934 struct gomp_thread *thr = gomp_thread ();
935 struct gomp_team *team = thr->ts.team;
936
acf0174b
JJ
937 /* This barrier handles all pending explicit threads.
938 As #pragma omp cancel parallel might get awaited count in
939 team->barrier in a inconsistent state, we need to use a different
940 counter here. */
941 gomp_team_barrier_wait_final (&team->barrier);
942 if (__builtin_expect (team->team_cancelled, 0))
943 {
944 struct gomp_work_share *ws = team->work_shares_to_free;
945 do
946 {
947 struct gomp_work_share *next_ws = gomp_ptrlock_get (&ws->next_ws);
948 if (next_ws == NULL)
949 gomp_ptrlock_set (&ws->next_ws, ws);
950 gomp_fini_work_share (ws);
951 ws = next_ws;
952 }
953 while (ws != NULL);
954 }
955 else
956 gomp_fini_work_share (thr->ts.work_share);
953ff289 957
a68ab351 958 gomp_end_task ();
953ff289
DN
959 thr->ts = team->prev_ts;
960
28567c40 961 if (__builtin_expect (thr->ts.level != 0, 0))
a68ab351
JJ
962 {
963#ifdef HAVE_SYNC_BUILTINS
964 __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads);
965#else
acf0174b 966 gomp_mutex_lock (&gomp_managed_threads_lock);
a68ab351 967 gomp_managed_threads -= team->nthreads - 1L;
acf0174b 968 gomp_mutex_unlock (&gomp_managed_threads_lock);
a68ab351
JJ
969#endif
970 /* This barrier has gomp_barrier_wait_last counterparts
971 and ensures the team can be safely destroyed. */
972 gomp_barrier_wait (&team->barrier);
973 }
974
975 if (__builtin_expect (team->work_shares[0].next_alloc != NULL, 0))
976 {
977 struct gomp_work_share *ws = team->work_shares[0].next_alloc;
978 do
979 {
980 struct gomp_work_share *next_ws = ws->next_alloc;
981 free (ws);
982 ws = next_ws;
983 }
984 while (ws != NULL);
985 }
986 gomp_sem_destroy (&team->master_release);
a68ab351 987
4db72361
JJ
988 if (__builtin_expect (thr->ts.team != NULL, 0)
989 || __builtin_expect (team->nthreads == 1, 0))
a68ab351
JJ
990 free_team (team);
991 else
992 {
993 struct gomp_thread_pool *pool = thr->thread_pool;
994 if (pool->last_team)
995 free_team (pool->last_team);
996 pool->last_team = team;
66c59f92 997 gomp_release_thread_pool (pool);
a68ab351 998 }
953ff289
DN
999}
1000
6103184e 1001#ifdef LIBGOMP_USE_PTHREADS
953ff289
DN
1002
1003/* Constructors for this file. */
1004
1005static void __attribute__((constructor))
1006initialize_team (void)
1007{
f50eecba 1008#if !defined HAVE_TLS && !defined USE_EMUTLS
953ff289
DN
1009 static struct gomp_thread initial_thread_tls_data;
1010
1011 pthread_key_create (&gomp_tls_key, NULL);
1012 pthread_setspecific (gomp_tls_key, &initial_thread_tls_data);
1013#endif
1014
a68ab351
JJ
1015 if (pthread_key_create (&gomp_thread_destructor, gomp_free_thread) != 0)
1016 gomp_fatal ("could not create thread pool destructor.");
953ff289 1017}
a68ab351
JJ
1018
1019static void __attribute__((destructor))
1020team_destructor (void)
1021{
1022 /* Without this dlclose on libgomp could lead to subsequent
1023 crashes. */
1024 pthread_key_delete (gomp_thread_destructor);
1025}
28567c40
JJ
1026
1027/* Similar to gomp_free_pool_helper, but don't detach itself,
1028 gomp_pause_host will pthread_join those threads. */
1029
1030static void
1031gomp_pause_pool_helper (void *thread_pool)
1032{
1033 struct gomp_thread *thr = gomp_thread ();
1034 struct gomp_thread_pool *pool
1035 = (struct gomp_thread_pool *) thread_pool;
1036 gomp_simple_barrier_wait_last (&pool->threads_dock);
1037 gomp_sem_destroy (&thr->release);
1038 thr->thread_pool = NULL;
1039 thr->task = NULL;
1040 pthread_exit (NULL);
1041}
1042
1043/* Free a thread pool and release its threads. Return non-zero on
1044 failure. */
1045
1046int
1047gomp_pause_host (void)
1048{
1049 struct gomp_thread *thr = gomp_thread ();
1050 struct gomp_thread_pool *pool = thr->thread_pool;
1051 if (thr->ts.level)
1052 return -1;
1053 if (pool)
1054 {
1055 if (pool->threads_used > 0)
1056 {
1057 int i;
1058 pthread_t *thrs
1059 = gomp_alloca (sizeof (pthread_t) * pool->threads_used);
1060 for (i = 1; i < pool->threads_used; i++)
1061 {
1062 struct gomp_thread *nthr = pool->threads[i];
1063 nthr->fn = gomp_pause_pool_helper;
1064 nthr->data = pool;
1065 thrs[i] = gomp_thread_to_pthread_t (nthr);
1066 }
1067 /* This barrier undocks threads docked on pool->threads_dock. */
1068 gomp_simple_barrier_wait (&pool->threads_dock);
1069 /* And this waits till all threads have called gomp_barrier_wait_last
1070 in gomp_pause_pool_helper. */
1071 gomp_simple_barrier_wait (&pool->threads_dock);
1072 /* Now it is safe to destroy the barrier and free the pool. */
1073 gomp_simple_barrier_destroy (&pool->threads_dock);
1074
1075#ifdef HAVE_SYNC_BUILTINS
1076 __sync_fetch_and_add (&gomp_managed_threads,
1077 1L - pool->threads_used);
1078#else
1079 gomp_mutex_lock (&gomp_managed_threads_lock);
1080 gomp_managed_threads -= pool->threads_used - 1L;
1081 gomp_mutex_unlock (&gomp_managed_threads_lock);
1082#endif
1083 for (i = 1; i < pool->threads_used; i++)
1084 pthread_join (thrs[i], NULL);
1085 }
1086 if (pool->last_team)
1087 free_team (pool->last_team);
1088#ifndef __nvptx__
cee16451
AS
1089 team_free (pool->threads);
1090 team_free (pool);
28567c40
JJ
1091#endif
1092 thr->thread_pool = NULL;
1093 }
1094 return 0;
1095}
6103184e 1096#endif
a68ab351
JJ
1097
1098struct gomp_task_icv *
1099gomp_new_icv (void)
1100{
1101 struct gomp_thread *thr = gomp_thread ();
1102 struct gomp_task *task = gomp_malloc (sizeof (struct gomp_task));
1103 gomp_init_task (task, NULL, &gomp_global_icv);
1104 thr->task = task;
6103184e 1105#ifdef LIBGOMP_USE_PTHREADS
a68ab351 1106 pthread_setspecific (gomp_thread_destructor, thr);
6103184e 1107#endif
a68ab351
JJ
1108 return &task->icv;
1109}