]> git.ipfire.org Git - thirdparty/gcc.git/blob - libgomp/team.c
Fix tests for gomp
[thirdparty/gcc.git] / libgomp / team.c
1 /* Copyright (C) 2005-2023 Free Software Foundation, Inc.
2 Contributed by Richard Henderson <rth@redhat.com>.
3
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
6
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
26 /* This file handles the maintenance of threads in response to team
27 creation and termination. */
28
29 #include "libgomp.h"
30 #include "pool.h"
31 #include <stdlib.h>
32 #include <string.h>
33
34 #ifdef LIBGOMP_USE_PTHREADS
35 pthread_attr_t gomp_thread_attr;
36
37 /* This key is for the thread destructor. */
38 pthread_key_t gomp_thread_destructor;
39
40
41 /* This is the libgomp per-thread data structure. */
42 #if defined HAVE_TLS || defined USE_EMUTLS
43 __thread struct gomp_thread gomp_tls_data;
44 #else
45 pthread_key_t gomp_tls_key;
46 #endif
47
48
49 /* This structure is used to communicate across pthread_create. */
50
51 struct gomp_thread_start_data
52 {
53 void (*fn) (void *);
54 void *fn_data;
55 struct gomp_team_state ts;
56 struct gomp_task *task;
57 struct gomp_thread_pool *thread_pool;
58 unsigned int place;
59 unsigned int num_teams;
60 unsigned int team_num;
61 bool nested;
62 pthread_t handle;
63 };
64
65
66 /* This function is a pthread_create entry point. This contains the idle
67 loop in which a thread waits to be called up to become part of a team. */
68
69 static void *
70 gomp_thread_start (void *xdata)
71 {
72 struct gomp_thread_start_data *data = xdata;
73 struct gomp_thread *thr;
74 struct gomp_thread_pool *pool;
75 void (*local_fn) (void *);
76 void *local_data;
77
78 #if defined HAVE_TLS || defined USE_EMUTLS
79 thr = &gomp_tls_data;
80 #else
81 struct gomp_thread local_thr;
82 thr = &local_thr;
83 #endif
84 gomp_sem_init (&thr->release, 0);
85
86 /* Extract what we need from data. */
87 local_fn = data->fn;
88 local_data = data->fn_data;
89 thr->thread_pool = data->thread_pool;
90 thr->ts = data->ts;
91 thr->task = data->task;
92 thr->place = data->place;
93 thr->num_teams = data->num_teams;
94 thr->team_num = data->team_num;
95 #ifdef GOMP_NEEDS_THREAD_HANDLE
96 thr->handle = data->handle;
97 #endif
98 #if !(defined HAVE_TLS || defined USE_EMUTLS)
99 pthread_setspecific (gomp_tls_key, thr);
100 #endif
101
102 thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release;
103
104 /* Make thread pool local. */
105 pool = thr->thread_pool;
106
107 if (data->nested)
108 {
109 struct gomp_team *team = thr->ts.team;
110 struct gomp_task *task = thr->task;
111
112 gomp_barrier_wait (&team->barrier);
113
114 local_fn (local_data);
115 gomp_team_barrier_wait_final (&team->barrier);
116 gomp_finish_task (task);
117 gomp_barrier_wait_last (&team->barrier);
118 }
119 else
120 {
121 pool->threads[thr->ts.team_id] = thr;
122
123 gomp_simple_barrier_wait (&pool->threads_dock);
124 do
125 {
126 struct gomp_team *team = thr->ts.team;
127 struct gomp_task *task = thr->task;
128
129 local_fn (local_data);
130 gomp_team_barrier_wait_final (&team->barrier);
131 gomp_finish_task (task);
132
133 gomp_simple_barrier_wait (&pool->threads_dock);
134
135 local_fn = thr->fn;
136 local_data = thr->data;
137 thr->fn = NULL;
138 }
139 while (local_fn);
140 }
141
142 gomp_sem_destroy (&thr->release);
143 pthread_detach (pthread_self ());
144 thr->thread_pool = NULL;
145 thr->task = NULL;
146 return NULL;
147 }
148 #endif
149
150 static inline struct gomp_team *
151 get_last_team (unsigned nthreads)
152 {
153 struct gomp_thread *thr = gomp_thread ();
154 if (thr->ts.team == NULL)
155 {
156 struct gomp_thread_pool *pool = gomp_get_thread_pool (thr, nthreads);
157 struct gomp_team *last_team = pool->last_team;
158 if (last_team != NULL && last_team->nthreads == nthreads)
159 {
160 pool->last_team = NULL;
161 return last_team;
162 }
163 }
164 return NULL;
165 }
166
167 /* Create a new team data structure. */
168
169 struct gomp_team *
170 gomp_new_team (unsigned nthreads)
171 {
172 struct gomp_team *team;
173 int i;
174
175 team = get_last_team (nthreads);
176 if (team == NULL)
177 {
178 size_t extra = sizeof (team->ordered_release[0])
179 + sizeof (team->implicit_task[0]);
180 #ifdef GOMP_USE_ALIGNED_WORK_SHARES
181 team = gomp_aligned_alloc (__alignof (struct gomp_team),
182 sizeof (*team) + nthreads * extra);
183 #else
184 team = team_malloc (sizeof (*team) + nthreads * extra);
185 #endif
186
187 #ifndef HAVE_SYNC_BUILTINS
188 gomp_mutex_init (&team->work_share_list_free_lock);
189 #endif
190 gomp_barrier_init (&team->barrier, nthreads);
191 gomp_mutex_init (&team->task_lock);
192
193 team->nthreads = nthreads;
194 }
195
196 team->work_share_chunk = 8;
197 #ifdef HAVE_SYNC_BUILTINS
198 team->single_count = 0;
199 #endif
200 team->work_shares_to_free = &team->work_shares[0];
201 gomp_init_work_share (&team->work_shares[0], 0, nthreads);
202 team->work_shares[0].next_alloc = NULL;
203 team->work_share_list_free = NULL;
204 team->work_share_list_alloc = &team->work_shares[1];
205 for (i = 1; i < 7; i++)
206 team->work_shares[i].next_free = &team->work_shares[i + 1];
207 team->work_shares[i].next_free = NULL;
208
209 gomp_sem_init (&team->master_release, 0);
210 team->ordered_release = (void *) &team->implicit_task[nthreads];
211 team->ordered_release[0] = &team->master_release;
212
213 priority_queue_init (&team->task_queue);
214 team->task_count = 0;
215 team->task_queued_count = 0;
216 team->task_running_count = 0;
217 team->work_share_cancelled = 0;
218 team->team_cancelled = 0;
219
220 team->task_detach_count = 0;
221
222 return team;
223 }
224
225
226 /* Free a team data structure. */
227
228 static void
229 free_team (struct gomp_team *team)
230 {
231 #ifndef HAVE_SYNC_BUILTINS
232 gomp_mutex_destroy (&team->work_share_list_free_lock);
233 #endif
234 gomp_barrier_destroy (&team->barrier);
235 gomp_mutex_destroy (&team->task_lock);
236 priority_queue_free (&team->task_queue);
237 team_free (team);
238 }
239
240 static void
241 gomp_free_pool_helper (void *thread_pool)
242 {
243 struct gomp_thread *thr = gomp_thread ();
244 struct gomp_thread_pool *pool
245 = (struct gomp_thread_pool *) thread_pool;
246 gomp_simple_barrier_wait_last (&pool->threads_dock);
247 gomp_sem_destroy (&thr->release);
248 thr->thread_pool = NULL;
249 thr->task = NULL;
250 #ifdef LIBGOMP_USE_PTHREADS
251 pthread_detach (pthread_self ());
252 pthread_exit (NULL);
253 #elif defined(__nvptx__)
254 asm ("exit;");
255 #elif defined(__AMDGCN__)
256 asm ("s_endpgm");
257 #else
258 #error gomp_free_pool_helper must terminate the thread
259 #endif
260 }
261
262 /* Free a thread pool and release its threads. */
263
264 void
265 gomp_free_thread (void *arg __attribute__((unused)))
266 {
267 struct gomp_thread *thr = gomp_thread ();
268 struct gomp_thread_pool *pool = thr->thread_pool;
269 if (pool)
270 {
271 if (pool->threads_used > 0)
272 {
273 int i;
274 for (i = 1; i < pool->threads_used; i++)
275 {
276 struct gomp_thread *nthr = pool->threads[i];
277 nthr->fn = gomp_free_pool_helper;
278 nthr->data = pool;
279 }
280 /* This barrier undocks threads docked on pool->threads_dock. */
281 gomp_simple_barrier_wait (&pool->threads_dock);
282 /* And this waits till all threads have called gomp_barrier_wait_last
283 in gomp_free_pool_helper. */
284 gomp_simple_barrier_wait (&pool->threads_dock);
285 /* Now it is safe to destroy the barrier and free the pool. */
286 gomp_simple_barrier_destroy (&pool->threads_dock);
287
288 #ifdef HAVE_SYNC_BUILTINS
289 __sync_fetch_and_add (&gomp_managed_threads,
290 1L - pool->threads_used);
291 #else
292 gomp_mutex_lock (&gomp_managed_threads_lock);
293 gomp_managed_threads -= pool->threads_used - 1L;
294 gomp_mutex_unlock (&gomp_managed_threads_lock);
295 #endif
296 }
297 if (pool->last_team)
298 free_team (pool->last_team);
299 #ifndef __nvptx__
300 team_free (pool->threads);
301 team_free (pool);
302 #endif
303 thr->thread_pool = NULL;
304 }
305 if (thr->ts.level == 0 && __builtin_expect (thr->ts.team != NULL, 0))
306 gomp_team_end ();
307 if (thr->task != NULL)
308 {
309 struct gomp_task *task = thr->task;
310 gomp_end_task ();
311 free (task);
312 }
313 }
314
315 /* Launch a team. */
316
317 #ifdef LIBGOMP_USE_PTHREADS
318 void
319 gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
320 unsigned flags, struct gomp_team *team,
321 struct gomp_taskgroup *taskgroup)
322 {
323 struct gomp_thread_start_data *start_data = NULL;
324 struct gomp_thread *thr, *nthr;
325 struct gomp_task *task;
326 struct gomp_task_icv *icv;
327 bool nested;
328 struct gomp_thread_pool *pool;
329 unsigned i, n, old_threads_used = 0;
330 pthread_attr_t thread_attr, *attr;
331 unsigned long nthreads_var;
332 char bind, bind_var;
333 unsigned int s = 0, rest = 0, p = 0, k = 0;
334 unsigned int affinity_count = 0;
335 struct gomp_thread **affinity_thr = NULL;
336 bool force_display = false;
337
338 thr = gomp_thread ();
339 nested = thr->ts.level;
340 pool = thr->thread_pool;
341 task = thr->task;
342 icv = task ? &task->icv : &gomp_global_icv;
343 if (__builtin_expect (gomp_places_list != NULL, 0) && thr->place == 0)
344 {
345 gomp_init_affinity ();
346 if (__builtin_expect (gomp_display_affinity_var, 0) && nthreads == 1)
347 gomp_display_affinity_thread (gomp_thread_self (), &thr->ts,
348 thr->place);
349 }
350
351 /* Always save the previous state, even if this isn't a nested team.
352 In particular, we should save any work share state from an outer
353 orphaned work share construct. */
354 team->prev_ts = thr->ts;
355
356 thr->ts.team = team;
357 thr->ts.team_id = 0;
358 ++thr->ts.level;
359 if (nthreads > 1)
360 ++thr->ts.active_level;
361 thr->ts.work_share = &team->work_shares[0];
362 thr->ts.last_work_share = NULL;
363 #ifdef HAVE_SYNC_BUILTINS
364 thr->ts.single_count = 0;
365 #endif
366 thr->ts.static_trip = 0;
367 thr->task = &team->implicit_task[0];
368 #ifdef GOMP_NEEDS_THREAD_HANDLE
369 thr->handle = pthread_self ();
370 #endif
371 nthreads_var = icv->nthreads_var;
372 if (__builtin_expect (gomp_nthreads_var_list != NULL, 0)
373 && thr->ts.level < gomp_nthreads_var_list_len)
374 nthreads_var = gomp_nthreads_var_list[thr->ts.level];
375 bind_var = icv->bind_var;
376 if (bind_var != omp_proc_bind_false && (flags & 7) != omp_proc_bind_false)
377 bind_var = flags & 7;
378 bind = bind_var;
379 if (__builtin_expect (gomp_bind_var_list != NULL, 0)
380 && thr->ts.level < gomp_bind_var_list_len)
381 bind_var = gomp_bind_var_list[thr->ts.level];
382 gomp_init_task (thr->task, task, icv);
383 thr->task->taskgroup = taskgroup;
384 team->implicit_task[0].icv.nthreads_var = nthreads_var;
385 team->implicit_task[0].icv.bind_var = bind_var;
386
387 if (nthreads == 1)
388 return;
389
390 i = 1;
391
392 if (__builtin_expect (gomp_places_list != NULL, 0))
393 {
394 /* Depending on chosen proc_bind model, set subpartition
395 for the master thread and initialize helper variables
396 P and optionally S, K and/or REST used by later place
397 computation for each additional thread. */
398 p = thr->place - 1;
399 switch (bind)
400 {
401 case omp_proc_bind_true:
402 case omp_proc_bind_close:
403 if (nthreads > thr->ts.place_partition_len)
404 {
405 /* T > P. S threads will be placed in each place,
406 and the final REM threads placed one by one
407 into the already occupied places. */
408 s = nthreads / thr->ts.place_partition_len;
409 rest = nthreads % thr->ts.place_partition_len;
410 }
411 else
412 s = 1;
413 k = 1;
414 break;
415 case omp_proc_bind_master:
416 /* Each thread will be bound to master's place. */
417 break;
418 case omp_proc_bind_spread:
419 if (nthreads <= thr->ts.place_partition_len)
420 {
421 /* T <= P. Each subpartition will have in between s
422 and s+1 places (subpartitions starting at or
423 after rest will have s places, earlier s+1 places),
424 each thread will be bound to the first place in
425 its subpartition (except for the master thread
426 that can be bound to another place in its
427 subpartition). */
428 s = thr->ts.place_partition_len / nthreads;
429 rest = thr->ts.place_partition_len % nthreads;
430 rest = (s + 1) * rest + thr->ts.place_partition_off;
431 if (p < rest)
432 {
433 p -= (p - thr->ts.place_partition_off) % (s + 1);
434 thr->ts.place_partition_len = s + 1;
435 }
436 else
437 {
438 p -= (p - rest) % s;
439 thr->ts.place_partition_len = s;
440 }
441 thr->ts.place_partition_off = p;
442 }
443 else
444 {
445 /* T > P. Each subpartition will have just a single
446 place and we'll place between s and s+1
447 threads into each subpartition. */
448 s = nthreads / thr->ts.place_partition_len;
449 rest = nthreads % thr->ts.place_partition_len;
450 thr->ts.place_partition_off = p;
451 thr->ts.place_partition_len = 1;
452 k = 1;
453 }
454 break;
455 }
456 }
457 else
458 bind = omp_proc_bind_false;
459
460 /* We only allow the reuse of idle threads for non-nested PARALLEL
461 regions. This appears to be implied by the semantics of
462 threadprivate variables, but perhaps that's reading too much into
463 things. Certainly it does prevent any locking problems, since
464 only the initial program thread will modify gomp_threads. */
465 if (!nested)
466 {
467 old_threads_used = pool->threads_used;
468
469 if (nthreads <= old_threads_used)
470 n = nthreads;
471 else if (old_threads_used == 0)
472 {
473 n = 0;
474 gomp_simple_barrier_init (&pool->threads_dock, nthreads);
475 }
476 else
477 {
478 n = old_threads_used;
479
480 /* Increase the barrier threshold to make sure all new
481 threads arrive before the team is released. */
482 gomp_simple_barrier_reinit (&pool->threads_dock, nthreads);
483 }
484
485 /* Not true yet, but soon will be. We're going to release all
486 threads from the dock, and those that aren't part of the
487 team will exit. */
488 pool->threads_used = nthreads;
489
490 /* If necessary, expand the size of the gomp_threads array. It is
491 expected that changes in the number of threads are rare, thus we
492 make no effort to expand gomp_threads_size geometrically. */
493 if (nthreads >= pool->threads_size)
494 {
495 pool->threads_size = nthreads + 1;
496 pool->threads
497 = gomp_realloc (pool->threads,
498 pool->threads_size
499 * sizeof (struct gomp_thread *));
500 /* Add current (master) thread to threads[]. */
501 pool->threads[0] = thr;
502 }
503
504 /* Release existing idle threads. */
505 for (; i < n; ++i)
506 {
507 unsigned int place_partition_off = thr->ts.place_partition_off;
508 unsigned int place_partition_len = thr->ts.place_partition_len;
509 unsigned int place = 0;
510 if (__builtin_expect (gomp_places_list != NULL, 0))
511 {
512 switch (bind)
513 {
514 case omp_proc_bind_true:
515 case omp_proc_bind_close:
516 if (k == s)
517 {
518 ++p;
519 if (p == (team->prev_ts.place_partition_off
520 + team->prev_ts.place_partition_len))
521 p = team->prev_ts.place_partition_off;
522 k = 1;
523 if (i == nthreads - rest)
524 s = 1;
525 }
526 else
527 ++k;
528 break;
529 case omp_proc_bind_master:
530 break;
531 case omp_proc_bind_spread:
532 if (k == 0)
533 {
534 /* T <= P. */
535 if (p < rest)
536 p += s + 1;
537 else
538 p += s;
539 if (p == (team->prev_ts.place_partition_off
540 + team->prev_ts.place_partition_len))
541 p = team->prev_ts.place_partition_off;
542 place_partition_off = p;
543 if (p < rest)
544 place_partition_len = s + 1;
545 else
546 place_partition_len = s;
547 }
548 else
549 {
550 /* T > P. */
551 if (k == s)
552 {
553 ++p;
554 if (p == (team->prev_ts.place_partition_off
555 + team->prev_ts.place_partition_len))
556 p = team->prev_ts.place_partition_off;
557 k = 1;
558 if (i == nthreads - rest)
559 s = 1;
560 }
561 else
562 ++k;
563 place_partition_off = p;
564 place_partition_len = 1;
565 }
566 break;
567 }
568 if (affinity_thr != NULL
569 || (bind != omp_proc_bind_true
570 && pool->threads[i]->place != p + 1)
571 || pool->threads[i]->place <= place_partition_off
572 || pool->threads[i]->place > (place_partition_off
573 + place_partition_len))
574 {
575 unsigned int l;
576 force_display = true;
577 if (affinity_thr == NULL)
578 {
579 unsigned int j;
580
581 if (team->prev_ts.place_partition_len > 64)
582 affinity_thr
583 = gomp_malloc (team->prev_ts.place_partition_len
584 * sizeof (struct gomp_thread *));
585 else
586 affinity_thr
587 = gomp_alloca (team->prev_ts.place_partition_len
588 * sizeof (struct gomp_thread *));
589 memset (affinity_thr, '\0',
590 team->prev_ts.place_partition_len
591 * sizeof (struct gomp_thread *));
592 for (j = i; j < old_threads_used; j++)
593 {
594 if (pool->threads[j]->place
595 > team->prev_ts.place_partition_off
596 && (pool->threads[j]->place
597 <= (team->prev_ts.place_partition_off
598 + team->prev_ts.place_partition_len)))
599 {
600 l = pool->threads[j]->place - 1
601 - team->prev_ts.place_partition_off;
602 pool->threads[j]->data = affinity_thr[l];
603 affinity_thr[l] = pool->threads[j];
604 }
605 pool->threads[j] = NULL;
606 }
607 if (nthreads > old_threads_used)
608 memset (&pool->threads[old_threads_used],
609 '\0', ((nthreads - old_threads_used)
610 * sizeof (struct gomp_thread *)));
611 n = nthreads;
612 affinity_count = old_threads_used - i;
613 }
614 if (affinity_count == 0)
615 break;
616 l = p;
617 if (affinity_thr[l - team->prev_ts.place_partition_off]
618 == NULL)
619 {
620 if (bind != omp_proc_bind_true)
621 continue;
622 for (l = place_partition_off;
623 l < place_partition_off + place_partition_len;
624 l++)
625 if (affinity_thr[l - team->prev_ts.place_partition_off]
626 != NULL)
627 break;
628 if (l == place_partition_off + place_partition_len)
629 continue;
630 }
631 nthr = affinity_thr[l - team->prev_ts.place_partition_off];
632 affinity_thr[l - team->prev_ts.place_partition_off]
633 = (struct gomp_thread *) nthr->data;
634 affinity_count--;
635 pool->threads[i] = nthr;
636 }
637 else
638 nthr = pool->threads[i];
639 place = p + 1;
640 }
641 else
642 nthr = pool->threads[i];
643 nthr->ts.team = team;
644 nthr->ts.work_share = &team->work_shares[0];
645 nthr->ts.last_work_share = NULL;
646 nthr->ts.team_id = i;
647 nthr->ts.level = team->prev_ts.level + 1;
648 nthr->ts.active_level = thr->ts.active_level;
649 nthr->ts.place_partition_off = place_partition_off;
650 nthr->ts.place_partition_len = place_partition_len;
651 nthr->ts.def_allocator = thr->ts.def_allocator;
652 #ifdef HAVE_SYNC_BUILTINS
653 nthr->ts.single_count = 0;
654 #endif
655 nthr->ts.static_trip = 0;
656 nthr->num_teams = thr->num_teams;
657 nthr->team_num = thr->team_num;
658 nthr->task = &team->implicit_task[i];
659 nthr->place = place;
660 gomp_init_task (nthr->task, task, icv);
661 team->implicit_task[i].icv.nthreads_var = nthreads_var;
662 team->implicit_task[i].icv.bind_var = bind_var;
663 nthr->task->taskgroup = taskgroup;
664 nthr->fn = fn;
665 nthr->data = data;
666 team->ordered_release[i] = &nthr->release;
667 }
668
669 if (__builtin_expect (affinity_thr != NULL, 0))
670 {
671 /* If AFFINITY_THR is non-NULL just because we had to
672 permute some threads in the pool, but we've managed
673 to find exactly as many old threads as we'd find
674 without affinity, we don't need to handle this
675 specially anymore. */
676 if (nthreads <= old_threads_used
677 ? (affinity_count == old_threads_used - nthreads)
678 : (i == old_threads_used))
679 {
680 if (team->prev_ts.place_partition_len > 64)
681 free (affinity_thr);
682 affinity_thr = NULL;
683 affinity_count = 0;
684 }
685 else
686 {
687 i = 1;
688 /* We are going to compute the places/subpartitions
689 again from the beginning. So, we need to reinitialize
690 vars modified by the switch (bind) above inside
691 of the loop, to the state they had after the initial
692 switch (bind). */
693 switch (bind)
694 {
695 case omp_proc_bind_true:
696 case omp_proc_bind_close:
697 if (nthreads > thr->ts.place_partition_len)
698 /* T > P. S has been changed, so needs
699 to be recomputed. */
700 s = nthreads / thr->ts.place_partition_len;
701 k = 1;
702 p = thr->place - 1;
703 break;
704 case omp_proc_bind_master:
705 /* No vars have been changed. */
706 break;
707 case omp_proc_bind_spread:
708 p = thr->ts.place_partition_off;
709 if (k != 0)
710 {
711 /* T > P. */
712 s = nthreads / team->prev_ts.place_partition_len;
713 k = 1;
714 }
715 break;
716 }
717
718 /* Increase the barrier threshold to make sure all new
719 threads and all the threads we're going to let die
720 arrive before the team is released. */
721 if (affinity_count)
722 gomp_simple_barrier_reinit (&pool->threads_dock,
723 nthreads + affinity_count);
724 }
725 }
726
727 if (i == nthreads)
728 goto do_release;
729
730 }
731
732 if (__builtin_expect (nthreads + affinity_count > old_threads_used, 0))
733 {
734 long diff = (long) (nthreads + affinity_count) - (long) old_threads_used;
735
736 if (old_threads_used == 0)
737 --diff;
738
739 #ifdef HAVE_SYNC_BUILTINS
740 __sync_fetch_and_add (&gomp_managed_threads, diff);
741 #else
742 gomp_mutex_lock (&gomp_managed_threads_lock);
743 gomp_managed_threads += diff;
744 gomp_mutex_unlock (&gomp_managed_threads_lock);
745 #endif
746 }
747
748 attr = &gomp_thread_attr;
749 if (__builtin_expect (gomp_places_list != NULL, 0))
750 {
751 size_t stacksize;
752 pthread_attr_init (&thread_attr);
753 if (! pthread_attr_getstacksize (&gomp_thread_attr, &stacksize))
754 pthread_attr_setstacksize (&thread_attr, stacksize);
755 attr = &thread_attr;
756 }
757
758 if (i >= nthreads)
759 __builtin_unreachable ();
760 start_data = gomp_alloca (sizeof (struct gomp_thread_start_data)
761 * (nthreads - i));
762
763 /* Launch new threads. */
764 for (; i < nthreads; ++i)
765 {
766 int err;
767
768 start_data->ts.place_partition_off = thr->ts.place_partition_off;
769 start_data->ts.place_partition_len = thr->ts.place_partition_len;
770 start_data->place = 0;
771 if (__builtin_expect (gomp_places_list != NULL, 0))
772 {
773 switch (bind)
774 {
775 case omp_proc_bind_true:
776 case omp_proc_bind_close:
777 if (k == s)
778 {
779 ++p;
780 if (p == (team->prev_ts.place_partition_off
781 + team->prev_ts.place_partition_len))
782 p = team->prev_ts.place_partition_off;
783 k = 1;
784 if (i == nthreads - rest)
785 s = 1;
786 }
787 else
788 ++k;
789 break;
790 case omp_proc_bind_master:
791 break;
792 case omp_proc_bind_spread:
793 if (k == 0)
794 {
795 /* T <= P. */
796 if (p < rest)
797 p += s + 1;
798 else
799 p += s;
800 if (p == (team->prev_ts.place_partition_off
801 + team->prev_ts.place_partition_len))
802 p = team->prev_ts.place_partition_off;
803 start_data->ts.place_partition_off = p;
804 if (p < rest)
805 start_data->ts.place_partition_len = s + 1;
806 else
807 start_data->ts.place_partition_len = s;
808 }
809 else
810 {
811 /* T > P. */
812 if (k == s)
813 {
814 ++p;
815 if (p == (team->prev_ts.place_partition_off
816 + team->prev_ts.place_partition_len))
817 p = team->prev_ts.place_partition_off;
818 k = 1;
819 if (i == nthreads - rest)
820 s = 1;
821 }
822 else
823 ++k;
824 start_data->ts.place_partition_off = p;
825 start_data->ts.place_partition_len = 1;
826 }
827 break;
828 }
829 start_data->place = p + 1;
830 if (affinity_thr != NULL && pool->threads[i] != NULL)
831 continue;
832 gomp_init_thread_affinity (attr, p);
833 }
834
835 start_data->fn = fn;
836 start_data->fn_data = data;
837 start_data->ts.team = team;
838 start_data->ts.work_share = &team->work_shares[0];
839 start_data->ts.last_work_share = NULL;
840 start_data->ts.team_id = i;
841 start_data->ts.level = team->prev_ts.level + 1;
842 start_data->ts.active_level = thr->ts.active_level;
843 start_data->ts.def_allocator = thr->ts.def_allocator;
844 #ifdef HAVE_SYNC_BUILTINS
845 start_data->ts.single_count = 0;
846 #endif
847 start_data->ts.static_trip = 0;
848 start_data->num_teams = thr->num_teams;
849 start_data->team_num = thr->team_num;
850 start_data->task = &team->implicit_task[i];
851 gomp_init_task (start_data->task, task, icv);
852 team->implicit_task[i].icv.nthreads_var = nthreads_var;
853 team->implicit_task[i].icv.bind_var = bind_var;
854 start_data->task->taskgroup = taskgroup;
855 start_data->thread_pool = pool;
856 start_data->nested = nested;
857
858 attr = gomp_adjust_thread_attr (attr, &thread_attr);
859 err = pthread_create (&start_data->handle, attr, gomp_thread_start,
860 start_data);
861 start_data++;
862 if (err != 0)
863 gomp_fatal ("Thread creation failed: %s", strerror (err));
864 }
865
866 if (__builtin_expect (attr == &thread_attr, 0))
867 pthread_attr_destroy (&thread_attr);
868
869 do_release:
870 if (nested)
871 gomp_barrier_wait (&team->barrier);
872 else
873 gomp_simple_barrier_wait (&pool->threads_dock);
874
875 /* Decrease the barrier threshold to match the number of threads
876 that should arrive back at the end of this team. The extra
877 threads should be exiting. Note that we arrange for this test
878 to never be true for nested teams. If AFFINITY_COUNT is non-zero,
879 the barrier as well as gomp_managed_threads was temporarily
880 set to NTHREADS + AFFINITY_COUNT. For NTHREADS < OLD_THREADS_COUNT,
881 AFFINITY_COUNT if non-zero will be always at least
882 OLD_THREADS_COUNT - NTHREADS. */
883 if (__builtin_expect (nthreads < old_threads_used, 0)
884 || __builtin_expect (affinity_count, 0))
885 {
886 long diff = (long) nthreads - (long) old_threads_used;
887
888 if (affinity_count)
889 diff = -affinity_count;
890
891 gomp_simple_barrier_reinit (&pool->threads_dock, nthreads);
892
893 #ifdef HAVE_SYNC_BUILTINS
894 __sync_fetch_and_add (&gomp_managed_threads, diff);
895 #else
896 gomp_mutex_lock (&gomp_managed_threads_lock);
897 gomp_managed_threads += diff;
898 gomp_mutex_unlock (&gomp_managed_threads_lock);
899 #endif
900 }
901 if (__builtin_expect (gomp_display_affinity_var, 0))
902 {
903 if (nested
904 || nthreads != old_threads_used
905 || force_display)
906 {
907 gomp_display_affinity_thread (gomp_thread_self (), &thr->ts,
908 thr->place);
909 if (nested)
910 {
911 start_data -= nthreads - 1;
912 for (i = 1; i < nthreads; ++i)
913 {
914 gomp_display_affinity_thread (
915 #ifdef LIBGOMP_USE_PTHREADS
916 start_data->handle,
917 #else
918 gomp_thread_self (),
919 #endif
920 &start_data->ts,
921 start_data->place);
922 start_data++;
923 }
924 }
925 else
926 {
927 for (i = 1; i < nthreads; ++i)
928 {
929 gomp_thread_handle handle
930 = gomp_thread_to_pthread_t (pool->threads[i]);
931 gomp_display_affinity_thread (handle, &pool->threads[i]->ts,
932 pool->threads[i]->place);
933 }
934 }
935 }
936 }
937 if (__builtin_expect (affinity_thr != NULL, 0)
938 && team->prev_ts.place_partition_len > 64)
939 free (affinity_thr);
940 }
941 #endif
942
943
944 /* Terminate the current team. This is only to be called by the master
945 thread. We assume that we must wait for the other threads. */
946
947 void
948 gomp_team_end (void)
949 {
950 struct gomp_thread *thr = gomp_thread ();
951 struct gomp_team *team = thr->ts.team;
952
953 /* This barrier handles all pending explicit threads.
954 As #pragma omp cancel parallel might get awaited count in
955 team->barrier in a inconsistent state, we need to use a different
956 counter here. */
957 gomp_team_barrier_wait_final (&team->barrier);
958 if (__builtin_expect (team->team_cancelled, 0))
959 {
960 struct gomp_work_share *ws = team->work_shares_to_free;
961 do
962 {
963 struct gomp_work_share *next_ws = gomp_ptrlock_get (&ws->next_ws);
964 if (next_ws == NULL)
965 gomp_ptrlock_set (&ws->next_ws, ws);
966 gomp_fini_work_share (ws);
967 ws = next_ws;
968 }
969 while (ws != NULL);
970 }
971 else
972 gomp_fini_work_share (thr->ts.work_share);
973
974 gomp_end_task ();
975 thr->ts = team->prev_ts;
976
977 if (__builtin_expect (thr->ts.level != 0, 0))
978 {
979 #ifdef HAVE_SYNC_BUILTINS
980 __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads);
981 #else
982 gomp_mutex_lock (&gomp_managed_threads_lock);
983 gomp_managed_threads -= team->nthreads - 1L;
984 gomp_mutex_unlock (&gomp_managed_threads_lock);
985 #endif
986 /* This barrier has gomp_barrier_wait_last counterparts
987 and ensures the team can be safely destroyed. */
988 gomp_barrier_wait (&team->barrier);
989 }
990
991 if (__builtin_expect (team->work_shares[0].next_alloc != NULL, 0))
992 {
993 struct gomp_work_share *ws = team->work_shares[0].next_alloc;
994 do
995 {
996 struct gomp_work_share *next_ws = ws->next_alloc;
997 free (ws);
998 ws = next_ws;
999 }
1000 while (ws != NULL);
1001 }
1002 gomp_sem_destroy (&team->master_release);
1003
1004 if (__builtin_expect (thr->ts.team != NULL, 0)
1005 || __builtin_expect (team->nthreads == 1, 0))
1006 free_team (team);
1007 else
1008 {
1009 struct gomp_thread_pool *pool = thr->thread_pool;
1010 if (pool->last_team)
1011 free_team (pool->last_team);
1012 pool->last_team = team;
1013 gomp_release_thread_pool (pool);
1014 }
1015 }
1016
1017 #ifdef LIBGOMP_USE_PTHREADS
1018
1019 /* Constructors for this file. */
1020
1021 static void __attribute__((constructor))
1022 initialize_team (void)
1023 {
1024 #if !defined HAVE_TLS && !defined USE_EMUTLS
1025 static struct gomp_thread initial_thread_tls_data;
1026
1027 pthread_key_create (&gomp_tls_key, NULL);
1028 pthread_setspecific (gomp_tls_key, &initial_thread_tls_data);
1029 #endif
1030
1031 if (pthread_key_create (&gomp_thread_destructor, gomp_free_thread) != 0)
1032 gomp_fatal ("could not create thread pool destructor.");
1033 }
1034
1035 static void __attribute__((destructor))
1036 team_destructor (void)
1037 {
1038 /* Without this dlclose on libgomp could lead to subsequent
1039 crashes. */
1040 pthread_key_delete (gomp_thread_destructor);
1041 }
1042
1043 /* Similar to gomp_free_pool_helper, but don't detach itself,
1044 gomp_pause_host will pthread_join those threads. */
1045
1046 static void
1047 gomp_pause_pool_helper (void *thread_pool)
1048 {
1049 struct gomp_thread *thr = gomp_thread ();
1050 struct gomp_thread_pool *pool
1051 = (struct gomp_thread_pool *) thread_pool;
1052 gomp_simple_barrier_wait_last (&pool->threads_dock);
1053 gomp_sem_destroy (&thr->release);
1054 thr->thread_pool = NULL;
1055 thr->task = NULL;
1056 pthread_exit (NULL);
1057 }
1058
1059 /* Free a thread pool and release its threads. Return non-zero on
1060 failure. */
1061
1062 int
1063 gomp_pause_host (void)
1064 {
1065 struct gomp_thread *thr = gomp_thread ();
1066 struct gomp_thread_pool *pool = thr->thread_pool;
1067 if (thr->ts.level)
1068 return -1;
1069 if (pool)
1070 {
1071 if (pool->threads_used > 0)
1072 {
1073 int i;
1074 pthread_t *thrs
1075 = gomp_alloca (sizeof (pthread_t) * pool->threads_used);
1076 for (i = 1; i < pool->threads_used; i++)
1077 {
1078 struct gomp_thread *nthr = pool->threads[i];
1079 nthr->fn = gomp_pause_pool_helper;
1080 nthr->data = pool;
1081 thrs[i] = gomp_thread_to_pthread_t (nthr);
1082 }
1083 /* This barrier undocks threads docked on pool->threads_dock. */
1084 gomp_simple_barrier_wait (&pool->threads_dock);
1085 /* And this waits till all threads have called gomp_barrier_wait_last
1086 in gomp_pause_pool_helper. */
1087 gomp_simple_barrier_wait (&pool->threads_dock);
1088 /* Now it is safe to destroy the barrier and free the pool. */
1089 gomp_simple_barrier_destroy (&pool->threads_dock);
1090
1091 #ifdef HAVE_SYNC_BUILTINS
1092 __sync_fetch_and_add (&gomp_managed_threads,
1093 1L - pool->threads_used);
1094 #else
1095 gomp_mutex_lock (&gomp_managed_threads_lock);
1096 gomp_managed_threads -= pool->threads_used - 1L;
1097 gomp_mutex_unlock (&gomp_managed_threads_lock);
1098 #endif
1099 for (i = 1; i < pool->threads_used; i++)
1100 pthread_join (thrs[i], NULL);
1101 }
1102 if (pool->last_team)
1103 free_team (pool->last_team);
1104 #ifndef __nvptx__
1105 team_free (pool->threads);
1106 team_free (pool);
1107 #endif
1108 thr->thread_pool = NULL;
1109 }
1110 return 0;
1111 }
1112 #endif
1113
1114 struct gomp_task_icv *
1115 gomp_new_icv (void)
1116 {
1117 struct gomp_thread *thr = gomp_thread ();
1118 struct gomp_task *task = gomp_malloc (sizeof (struct gomp_task));
1119 gomp_init_task (task, NULL, &gomp_global_icv);
1120 thr->task = task;
1121 #ifdef LIBGOMP_USE_PTHREADS
1122 pthread_setspecific (gomp_thread_destructor, thr);
1123 #endif
1124 return &task->icv;
1125 }