]> git.ipfire.org Git - thirdparty/gcc.git/blame - libgomp/task.c
Update copyright years.
[thirdparty/gcc.git] / libgomp / task.c
CommitLineData
99dee823 1/* Copyright (C) 2007-2021 Free Software Foundation, Inc.
a68ab351
JJ
2 Contributed by Richard Henderson <rth@redhat.com>.
3
f1f3453e
TS
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
a68ab351
JJ
6
7 Libgomp is free software; you can redistribute it and/or modify it
748086b7
JJ
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
a68ab351
JJ
11
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
748086b7 14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
a68ab351
JJ
15 more details.
16
748086b7
JJ
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
a68ab351 25
93d90219 26/* This file handles the maintenance of tasks in response to task
a68ab351
JJ
27 creation and termination. */
28
29#include "libgomp.h"
30#include <stdlib.h>
31#include <string.h>
d9a6bd32 32#include "gomp-constants.h"
a68ab351 33
acf0174b
JJ
34typedef struct gomp_task_depend_entry *hash_entry_type;
35
36static inline void *
37htab_alloc (size_t size)
38{
39 return gomp_malloc (size);
40}
41
42static inline void
43htab_free (void *ptr)
44{
45 free (ptr);
46}
47
48#include "hashtab.h"
49
50static inline hashval_t
51htab_hash (hash_entry_type element)
52{
53 return hash_pointer (element->addr);
54}
55
56static inline bool
57htab_eq (hash_entry_type x, hash_entry_type y)
58{
59 return x->addr == y->addr;
60}
a68ab351
JJ
61
62/* Create a new task data structure. */
63
64void
65gomp_init_task (struct gomp_task *task, struct gomp_task *parent_task,
66 struct gomp_task_icv *prev_icv)
67{
e4606348
JJ
68 /* It would seem that using memset here would be a win, but it turns
69 out that partially filling gomp_task allows us to keep the
70 overhead of task creation low. In the nqueens-1.c test, for a
71 sufficiently large N, we drop the overhead from 5-6% to 1%.
72
73 Note, the nqueens-1.c test in serial mode is a good test to
74 benchmark the overhead of creating tasks as there are millions of
75 tiny tasks created that all run undeferred. */
a68ab351
JJ
76 task->parent = parent_task;
77 task->icv = *prev_icv;
78 task->kind = GOMP_TASK_IMPLICIT;
0494285a 79 task->taskwait = NULL;
5f836cbb 80 task->in_tied_task = false;
20906c66 81 task->final_task = false;
acf0174b 82 task->copy_ctors_done = false;
0494285a 83 task->parent_depends_on = false;
e4606348 84 priority_queue_init (&task->children_queue);
acf0174b
JJ
85 task->taskgroup = NULL;
86 task->dependers = NULL;
87 task->depend_hash = NULL;
88 task->depend_count = 0;
a68ab351
JJ
89}
90
91/* Clean up a task, after completing it. */
92
93void
94gomp_end_task (void)
95{
96 struct gomp_thread *thr = gomp_thread ();
97 struct gomp_task *task = thr->task;
98
99 gomp_finish_task (task);
100 thr->task = task->parent;
101}
102
e4606348 103/* Clear the parent field of every task in LIST. */
d9a6bd32 104
a68ab351 105static inline void
e4606348 106gomp_clear_parent_in_list (struct priority_list *list)
a68ab351 107{
e4606348
JJ
108 struct priority_node *p = list->tasks;
109 if (p)
a68ab351
JJ
110 do
111 {
e4606348
JJ
112 priority_node_to_task (PQ_CHILDREN, p)->parent = NULL;
113 p = p->next;
a68ab351 114 }
e4606348 115 while (p != list->tasks);
a68ab351
JJ
116}
117
e4606348
JJ
118/* Splay tree version of gomp_clear_parent_in_list.
119
120 Clear the parent field of every task in NODE within SP, and free
121 the node when done. */
122
123static void
124gomp_clear_parent_in_tree (prio_splay_tree sp, prio_splay_tree_node node)
125{
126 if (!node)
127 return;
128 prio_splay_tree_node left = node->left, right = node->right;
129 gomp_clear_parent_in_list (&node->key.l);
130#if _LIBGOMP_CHECKING_
131 memset (node, 0xaf, sizeof (*node));
132#endif
133 /* No need to remove the node from the tree. We're nuking
134 everything, so just free the nodes and our caller can clear the
135 entire splay tree. */
136 free (node);
137 gomp_clear_parent_in_tree (sp, left);
138 gomp_clear_parent_in_tree (sp, right);
139}
140
141/* Clear the parent field of every task in Q and remove every task
142 from Q. */
143
144static inline void
145gomp_clear_parent (struct priority_queue *q)
146{
147 if (priority_queue_multi_p (q))
148 {
149 gomp_clear_parent_in_tree (&q->t, q->t.root);
150 /* All the nodes have been cleared in gomp_clear_parent_in_tree.
151 No need to remove anything. We can just nuke everything. */
152 q->t.root = NULL;
153 }
154 else
155 gomp_clear_parent_in_list (&q->l);
156}
157
158/* Helper function for GOMP_task and gomp_create_target_task.
159
160 For a TASK with in/out dependencies, fill in the various dependency
161 queues. PARENT is the parent of said task. DEPEND is as in
162 GOMP_task. */
d9a6bd32
JJ
163
164static void
165gomp_task_handle_depend (struct gomp_task *task, struct gomp_task *parent,
166 void **depend)
167{
168 size_t ndepend = (uintptr_t) depend[0];
d9a6bd32
JJ
169 size_t i;
170 hash_entry_type ent;
171
28567c40
JJ
172 if (ndepend)
173 {
174 /* depend[0] is total # */
175 size_t nout = (uintptr_t) depend[1]; /* # of out: and inout: */
176 /* ndepend - nout is # of in: */
177 for (i = 0; i < ndepend; i++)
178 {
179 task->depend[i].addr = depend[2 + i];
180 task->depend[i].is_in = i >= nout;
181 }
182 }
183 else
184 {
185 ndepend = (uintptr_t) depend[1]; /* total # */
186 size_t nout = (uintptr_t) depend[2]; /* # of out: and inout: */
187 size_t nmutexinoutset = (uintptr_t) depend[3]; /* # of mutexinoutset: */
188 /* For now we treat mutexinoutset like out, which is compliant, but
189 inefficient. */
190 size_t nin = (uintptr_t) depend[4]; /* # of in: */
191 /* ndepend - nout - nmutexinoutset - nin is # of depobjs */
192 size_t normal = nout + nmutexinoutset + nin;
193 size_t n = 0;
194 for (i = normal; i < ndepend; i++)
195 {
196 void **d = (void **) (uintptr_t) depend[5 + i];
197 switch ((uintptr_t) d[1])
198 {
199 case GOMP_DEPEND_OUT:
200 case GOMP_DEPEND_INOUT:
201 case GOMP_DEPEND_MUTEXINOUTSET:
202 break;
203 case GOMP_DEPEND_IN:
204 continue;
205 default:
206 gomp_fatal ("unknown omp_depend_t dependence type %d",
207 (int) (uintptr_t) d[1]);
208 }
209 task->depend[n].addr = d[0];
210 task->depend[n++].is_in = 0;
211 }
212 for (i = 0; i < normal; i++)
213 {
214 task->depend[n].addr = depend[5 + i];
215 task->depend[n++].is_in = i >= nout + nmutexinoutset;
216 }
217 for (i = normal; i < ndepend; i++)
218 {
219 void **d = (void **) (uintptr_t) depend[5 + i];
220 if ((uintptr_t) d[1] != GOMP_DEPEND_IN)
221 continue;
222 task->depend[n].addr = d[0];
223 task->depend[n++].is_in = 1;
224 }
225 }
d9a6bd32
JJ
226 task->depend_count = ndepend;
227 task->num_dependees = 0;
228 if (parent->depend_hash == NULL)
229 parent->depend_hash = htab_create (2 * ndepend > 12 ? 2 * ndepend : 12);
230 for (i = 0; i < ndepend; i++)
231 {
d9a6bd32
JJ
232 task->depend[i].next = NULL;
233 task->depend[i].prev = NULL;
234 task->depend[i].task = task;
d9a6bd32
JJ
235 task->depend[i].redundant = false;
236 task->depend[i].redundant_out = false;
237
238 hash_entry_type *slot = htab_find_slot (&parent->depend_hash,
239 &task->depend[i], INSERT);
240 hash_entry_type out = NULL, last = NULL;
241 if (*slot)
242 {
243 /* If multiple depends on the same task are the same, all but the
244 first one are redundant. As inout/out come first, if any of them
245 is inout/out, it will win, which is the right semantics. */
246 if ((*slot)->task == task)
247 {
248 task->depend[i].redundant = true;
249 continue;
250 }
251 for (ent = *slot; ent; ent = ent->next)
252 {
253 if (ent->redundant_out)
254 break;
255
256 last = ent;
257
258 /* depend(in:...) doesn't depend on earlier depend(in:...). */
28567c40 259 if (task->depend[i].is_in && ent->is_in)
d9a6bd32
JJ
260 continue;
261
262 if (!ent->is_in)
263 out = ent;
264
265 struct gomp_task *tsk = ent->task;
266 if (tsk->dependers == NULL)
267 {
268 tsk->dependers
269 = gomp_malloc (sizeof (struct gomp_dependers_vec)
270 + 6 * sizeof (struct gomp_task *));
271 tsk->dependers->n_elem = 1;
272 tsk->dependers->allocated = 6;
273 tsk->dependers->elem[0] = task;
274 task->num_dependees++;
275 continue;
276 }
277 /* We already have some other dependency on tsk from earlier
278 depend clause. */
279 else if (tsk->dependers->n_elem
280 && (tsk->dependers->elem[tsk->dependers->n_elem - 1]
281 == task))
282 continue;
283 else if (tsk->dependers->n_elem == tsk->dependers->allocated)
284 {
285 tsk->dependers->allocated
286 = tsk->dependers->allocated * 2 + 2;
287 tsk->dependers
288 = gomp_realloc (tsk->dependers,
289 sizeof (struct gomp_dependers_vec)
290 + (tsk->dependers->allocated
291 * sizeof (struct gomp_task *)));
292 }
293 tsk->dependers->elem[tsk->dependers->n_elem++] = task;
294 task->num_dependees++;
295 }
296 task->depend[i].next = *slot;
297 (*slot)->prev = &task->depend[i];
298 }
299 *slot = &task->depend[i];
300
301 /* There is no need to store more than one depend({,in}out:) task per
302 address in the hash table chain for the purpose of creation of
303 deferred tasks, because each out depends on all earlier outs, thus it
304 is enough to record just the last depend({,in}out:). For depend(in:),
305 we need to keep all of the previous ones not terminated yet, because
306 a later depend({,in}out:) might need to depend on all of them. So, if
307 the new task's clause is depend({,in}out:), we know there is at most
308 one other depend({,in}out:) clause in the list (out). For
309 non-deferred tasks we want to see all outs, so they are moved to the
310 end of the chain, after first redundant_out entry all following
311 entries should be redundant_out. */
312 if (!task->depend[i].is_in && out)
313 {
314 if (out != last)
315 {
316 out->next->prev = out->prev;
317 out->prev->next = out->next;
318 out->next = last->next;
319 out->prev = last;
320 last->next = out;
321 if (out->next)
322 out->next->prev = out;
323 }
324 out->redundant_out = true;
325 }
326 }
327}
0494285a 328
a68ab351
JJ
329/* Called when encountering an explicit task directive. If IF_CLAUSE is
330 false, then we must not delay in executing the task. If UNTIED is true,
d9a6bd32
JJ
331 then the task may be executed by any member of the team.
332
333 DEPEND is an array containing:
28567c40 334 if depend[0] is non-zero, then:
d9a6bd32 335 depend[0]: number of depend elements.
28567c40
JJ
336 depend[1]: number of depend elements of type "out/inout".
337 depend[2..N+1]: address of [1..N]th depend element.
338 otherwise, when depend[0] is zero, then:
339 depend[1]: number of depend elements.
340 depend[2]: number of depend elements of type "out/inout".
341 depend[3]: number of depend elements of type "mutexinoutset".
342 depend[4]: number of depend elements of type "in".
343 depend[5..4+depend[2]+depend[3]+depend[4]]: address of depend elements
344 depend[5+depend[2]+depend[3]+depend[4]..4+depend[1]]: address of
345 omp_depend_t objects. */
a68ab351
JJ
346
347void
348GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
acf0174b 349 long arg_size, long arg_align, bool if_clause, unsigned flags,
d9a6bd32 350 void **depend, int priority)
a68ab351
JJ
351{
352 struct gomp_thread *thr = gomp_thread ();
353 struct gomp_team *team = thr->ts.team;
354
355#ifdef HAVE_BROKEN_POSIX_SEMAPHORES
356 /* If pthread_mutex_* is used for omp_*lock*, then each task must be
357 tied to one thread all the time. This means UNTIED tasks must be
358 tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN
359 might be running on different thread than FN. */
360 if (cpyfn)
361 if_clause = false;
d9a6bd32 362 flags &= ~GOMP_TASK_FLAG_UNTIED;
a68ab351
JJ
363#endif
364
acf0174b 365 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
28567c40
JJ
366 if (__builtin_expect (gomp_cancel_var, 0) && team)
367 {
368 if (gomp_team_barrier_cancelled (&team->barrier))
369 return;
370 if (thr->task->taskgroup)
371 {
372 if (thr->task->taskgroup->cancelled)
373 return;
374 if (thr->task->taskgroup->workshare
375 && thr->task->taskgroup->prev
376 && thr->task->taskgroup->prev->cancelled)
377 return;
378 }
379 }
acf0174b 380
d9a6bd32
JJ
381 if ((flags & GOMP_TASK_FLAG_PRIORITY) == 0)
382 priority = 0;
e4606348
JJ
383 else if (priority > gomp_max_task_priority_var)
384 priority = gomp_max_task_priority_var;
d9a6bd32 385
a68ab351 386 if (!if_clause || team == NULL
20906c66 387 || (thr->task && thr->task->final_task)
a68ab351
JJ
388 || team->task_count > 64 * team->nthreads)
389 {
390 struct gomp_task task;
391
acf0174b
JJ
392 /* If there are depend clauses and earlier deferred sibling tasks
393 with depend clauses, check if there isn't a dependency. If there
0494285a 394 is, we need to wait for them. There is no need to handle
acf0174b
JJ
395 depend clauses for non-deferred tasks other than this, because
396 the parent task is suspended until the child task finishes and thus
397 it can't start further child tasks. */
d9a6bd32
JJ
398 if ((flags & GOMP_TASK_FLAG_DEPEND)
399 && thr->task && thr->task->depend_hash)
0494285a 400 gomp_task_maybe_wait_for_dependencies (depend);
acf0174b 401
a68ab351 402 gomp_init_task (&task, thr->task, gomp_icv (false));
d9a6bd32
JJ
403 task.kind = GOMP_TASK_UNDEFERRED;
404 task.final_task = (thr->task && thr->task->final_task)
405 || (flags & GOMP_TASK_FLAG_FINAL);
e4606348 406 task.priority = priority;
5f836cbb 407 if (thr->task)
acf0174b
JJ
408 {
409 task.in_tied_task = thr->task->in_tied_task;
410 task.taskgroup = thr->task->taskgroup;
411 }
a68ab351
JJ
412 thr->task = &task;
413 if (__builtin_expect (cpyfn != NULL, 0))
414 {
415 char buf[arg_size + arg_align - 1];
416 char *arg = (char *) (((uintptr_t) buf + arg_align - 1)
417 & ~(uintptr_t) (arg_align - 1));
418 cpyfn (arg, data);
419 fn (arg);
420 }
421 else
422 fn (data);
bed8d8a6
AM
423 /* Access to "children" is normally done inside a task_lock
424 mutex region, but the only way this particular task.children
425 can be set is if this thread's task work function (fn)
426 creates children. So since the setter is *this* thread, we
427 need no barriers here when testing for non-NULL. We can have
428 task.children set by the current thread then changed by a
429 child thread, but seeing a stale non-NULL value is not a
430 problem. Once past the task_lock acquisition, this thread
431 will see the real value of task.children. */
e4606348 432 if (!priority_queue_empty_p (&task.children_queue, MEMMODEL_RELAXED))
a68ab351
JJ
433 {
434 gomp_mutex_lock (&team->task_lock);
e4606348 435 gomp_clear_parent (&task.children_queue);
a68ab351
JJ
436 gomp_mutex_unlock (&team->task_lock);
437 }
438 gomp_end_task ();
439 }
440 else
441 {
442 struct gomp_task *task;
443 struct gomp_task *parent = thr->task;
acf0174b 444 struct gomp_taskgroup *taskgroup = parent->taskgroup;
a68ab351
JJ
445 char *arg;
446 bool do_wake;
acf0174b
JJ
447 size_t depend_size = 0;
448
d9a6bd32 449 if (flags & GOMP_TASK_FLAG_DEPEND)
28567c40 450 depend_size = ((uintptr_t) (depend[0] ? depend[0] : depend[1])
acf0174b
JJ
451 * sizeof (struct gomp_task_depend_entry));
452 task = gomp_malloc (sizeof (*task) + depend_size
453 + arg_size + arg_align - 1);
454 arg = (char *) (((uintptr_t) (task + 1) + depend_size + arg_align - 1)
a68ab351
JJ
455 & ~(uintptr_t) (arg_align - 1));
456 gomp_init_task (task, parent, gomp_icv (false));
e4606348 457 task->priority = priority;
d9a6bd32 458 task->kind = GOMP_TASK_UNDEFERRED;
5f836cbb 459 task->in_tied_task = parent->in_tied_task;
acf0174b 460 task->taskgroup = taskgroup;
a68ab351
JJ
461 thr->task = task;
462 if (cpyfn)
acf0174b
JJ
463 {
464 cpyfn (arg, data);
465 task->copy_ctors_done = true;
466 }
a68ab351
JJ
467 else
468 memcpy (arg, data, arg_size);
469 thr->task = parent;
470 task->kind = GOMP_TASK_WAITING;
471 task->fn = fn;
472 task->fn_data = arg;
d9a6bd32 473 task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1;
a68ab351 474 gomp_mutex_lock (&team->task_lock);
acf0174b
JJ
475 /* If parallel or taskgroup has been cancelled, don't start new
476 tasks. */
28567c40
JJ
477 if (__builtin_expect (gomp_cancel_var, 0)
478 && !task->copy_ctors_done)
acf0174b 479 {
28567c40
JJ
480 if (gomp_team_barrier_cancelled (&team->barrier))
481 {
482 do_cancel:
483 gomp_mutex_unlock (&team->task_lock);
484 gomp_finish_task (task);
485 free (task);
486 return;
487 }
488 if (taskgroup)
489 {
490 if (taskgroup->cancelled)
491 goto do_cancel;
492 if (taskgroup->workshare
493 && taskgroup->prev
494 && taskgroup->prev->cancelled)
495 goto do_cancel;
496 }
acf0174b
JJ
497 }
498 if (taskgroup)
499 taskgroup->num_children++;
500 if (depend_size)
501 {
d9a6bd32 502 gomp_task_handle_depend (task, parent, depend);
acf0174b
JJ
503 if (task->num_dependees)
504 {
e4606348
JJ
505 /* Tasks that depend on other tasks are not put into the
506 various waiting queues, so we are done for now. Said
507 tasks are instead put into the queues via
508 gomp_task_run_post_handle_dependers() after their
509 dependencies have been satisfied. After which, they
510 can be picked up by the various scheduling
511 points. */
acf0174b
JJ
512 gomp_mutex_unlock (&team->task_lock);
513 return;
514 }
515 }
e4606348
JJ
516
517 priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
518 task, priority,
519 PRIORITY_INSERT_BEGIN,
520 /*adjust_parent_depends_on=*/false,
521 task->parent_depends_on);
acf0174b 522 if (taskgroup)
e4606348
JJ
523 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
524 task, priority,
525 PRIORITY_INSERT_BEGIN,
526 /*adjust_parent_depends_on=*/false,
527 task->parent_depends_on);
528
529 priority_queue_insert (PQ_TEAM, &team->task_queue,
530 task, priority,
531 PRIORITY_INSERT_END,
532 /*adjust_parent_depends_on=*/false,
533 task->parent_depends_on);
534
5f836cbb 535 ++team->task_count;
acf0174b 536 ++team->task_queued_count;
5f836cbb
JJ
537 gomp_team_barrier_set_task_pending (&team->barrier);
538 do_wake = team->task_running_count + !parent->in_tied_task
539 < team->nthreads;
a68ab351
JJ
540 gomp_mutex_unlock (&team->task_lock);
541 if (do_wake)
542 gomp_team_barrier_wake (&team->barrier, 1);
543 }
544}
545
d9a6bd32
JJ
546ialias (GOMP_taskgroup_start)
547ialias (GOMP_taskgroup_end)
28567c40 548ialias (GOMP_taskgroup_reduction_register)
d9a6bd32
JJ
549
550#define TYPE long
551#define UTYPE unsigned long
552#define TYPE_is_long 1
553#include "taskloop.c"
554#undef TYPE
555#undef UTYPE
556#undef TYPE_is_long
557
558#define TYPE unsigned long long
559#define UTYPE TYPE
560#define GOMP_taskloop GOMP_taskloop_ull
561#include "taskloop.c"
562#undef TYPE
563#undef UTYPE
564#undef GOMP_taskloop
565
e4606348
JJ
566static void inline
567priority_queue_move_task_first (enum priority_queue_type type,
568 struct priority_queue *head,
569 struct gomp_task *task)
570{
571#if _LIBGOMP_CHECKING_
572 if (!priority_queue_task_in_queue_p (type, head, task))
573 gomp_fatal ("Attempt to move first missing task %p", task);
574#endif
575 struct priority_list *list;
576 if (priority_queue_multi_p (head))
577 {
578 list = priority_queue_lookup_priority (head, task->priority);
579#if _LIBGOMP_CHECKING_
580 if (!list)
581 gomp_fatal ("Unable to find priority %d", task->priority);
582#endif
583 }
584 else
585 list = &head->l;
586 priority_list_remove (list, task_to_priority_node (type, task), 0);
587 priority_list_insert (type, list, task, task->priority,
588 PRIORITY_INSERT_BEGIN, type == PQ_CHILDREN,
589 task->parent_depends_on);
590}
591
592/* Actual body of GOMP_PLUGIN_target_task_completion that is executed
593 with team->task_lock held, or is executed in the thread that called
594 gomp_target_task_fn if GOMP_PLUGIN_target_task_completion has been
595 run before it acquires team->task_lock. */
596
597static void
598gomp_target_task_completion (struct gomp_team *team, struct gomp_task *task)
599{
600 struct gomp_task *parent = task->parent;
601 if (parent)
602 priority_queue_move_task_first (PQ_CHILDREN, &parent->children_queue,
603 task);
604
605 struct gomp_taskgroup *taskgroup = task->taskgroup;
606 if (taskgroup)
607 priority_queue_move_task_first (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
608 task);
609
610 priority_queue_insert (PQ_TEAM, &team->task_queue, task, task->priority,
611 PRIORITY_INSERT_BEGIN, false,
612 task->parent_depends_on);
613 task->kind = GOMP_TASK_WAITING;
614 if (parent && parent->taskwait)
615 {
616 if (parent->taskwait->in_taskwait)
617 {
618 /* One more task has had its dependencies met.
619 Inform any waiters. */
620 parent->taskwait->in_taskwait = false;
621 gomp_sem_post (&parent->taskwait->taskwait_sem);
622 }
623 else if (parent->taskwait->in_depend_wait)
624 {
625 /* One more task has had its dependencies met.
626 Inform any waiters. */
627 parent->taskwait->in_depend_wait = false;
628 gomp_sem_post (&parent->taskwait->taskwait_sem);
629 }
630 }
631 if (taskgroup && taskgroup->in_taskgroup_wait)
632 {
633 /* One more task has had its dependencies met.
634 Inform any waiters. */
635 taskgroup->in_taskgroup_wait = false;
636 gomp_sem_post (&taskgroup->taskgroup_sem);
637 }
638
639 ++team->task_queued_count;
640 gomp_team_barrier_set_task_pending (&team->barrier);
641 /* I'm afraid this can't be done after releasing team->task_lock,
642 as gomp_target_task_completion is run from unrelated thread and
643 therefore in between gomp_mutex_unlock and gomp_team_barrier_wake
644 the team could be gone already. */
645 if (team->nthreads > team->task_running_count)
646 gomp_team_barrier_wake (&team->barrier, 1);
647}
648
649/* Signal that a target task TTASK has completed the asynchronously
650 running phase and should be requeued as a task to handle the
651 variable unmapping. */
d9a6bd32
JJ
652
653void
e4606348
JJ
654GOMP_PLUGIN_target_task_completion (void *data)
655{
656 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
657 struct gomp_task *task = ttask->task;
658 struct gomp_team *team = ttask->team;
659
660 gomp_mutex_lock (&team->task_lock);
661 if (ttask->state == GOMP_TARGET_TASK_READY_TO_RUN)
662 {
663 ttask->state = GOMP_TARGET_TASK_FINISHED;
664 gomp_mutex_unlock (&team->task_lock);
cb11f3cf 665 return;
e4606348
JJ
666 }
667 ttask->state = GOMP_TARGET_TASK_FINISHED;
668 gomp_target_task_completion (team, task);
669 gomp_mutex_unlock (&team->task_lock);
670}
671
8e4e4719
JJ
672static void gomp_task_run_post_handle_depend_hash (struct gomp_task *);
673
e4606348
JJ
674/* Called for nowait target tasks. */
675
676bool
d9a6bd32
JJ
677gomp_create_target_task (struct gomp_device_descr *devicep,
678 void (*fn) (void *), size_t mapnum, void **hostaddrs,
679 size_t *sizes, unsigned short *kinds,
b2b40051 680 unsigned int flags, void **depend, void **args,
e4606348 681 enum gomp_target_task_state state)
d9a6bd32
JJ
682{
683 struct gomp_thread *thr = gomp_thread ();
684 struct gomp_team *team = thr->ts.team;
685
686 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
28567c40
JJ
687 if (__builtin_expect (gomp_cancel_var, 0) && team)
688 {
689 if (gomp_team_barrier_cancelled (&team->barrier))
690 return true;
691 if (thr->task->taskgroup)
692 {
693 if (thr->task->taskgroup->cancelled)
694 return true;
695 if (thr->task->taskgroup->workshare
696 && thr->task->taskgroup->prev
697 && thr->task->taskgroup->prev->cancelled)
698 return true;
699 }
700 }
d9a6bd32
JJ
701
702 struct gomp_target_task *ttask;
703 struct gomp_task *task;
704 struct gomp_task *parent = thr->task;
705 struct gomp_taskgroup *taskgroup = parent->taskgroup;
706 bool do_wake;
707 size_t depend_size = 0;
e4606348
JJ
708 uintptr_t depend_cnt = 0;
709 size_t tgt_align = 0, tgt_size = 0;
d9a6bd32
JJ
710
711 if (depend != NULL)
e4606348 712 {
28567c40 713 depend_cnt = (uintptr_t) (depend[0] ? depend[0] : depend[1]);
e4606348
JJ
714 depend_size = depend_cnt * sizeof (struct gomp_task_depend_entry);
715 }
716 if (fn)
717 {
718 /* GOMP_MAP_FIRSTPRIVATE need to be copied first, as they are
719 firstprivate on the target task. */
720 size_t i;
721 for (i = 0; i < mapnum; i++)
722 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
723 {
724 size_t align = (size_t) 1 << (kinds[i] >> 8);
725 if (tgt_align < align)
726 tgt_align = align;
727 tgt_size = (tgt_size + align - 1) & ~(align - 1);
728 tgt_size += sizes[i];
729 }
730 if (tgt_align)
731 tgt_size += tgt_align - 1;
732 else
733 tgt_size = 0;
734 }
735
d9a6bd32
JJ
736 task = gomp_malloc (sizeof (*task) + depend_size
737 + sizeof (*ttask)
738 + mapnum * (sizeof (void *) + sizeof (size_t)
e4606348
JJ
739 + sizeof (unsigned short))
740 + tgt_size);
d9a6bd32 741 gomp_init_task (task, parent, gomp_icv (false));
e4606348 742 task->priority = 0;
d9a6bd32
JJ
743 task->kind = GOMP_TASK_WAITING;
744 task->in_tied_task = parent->in_tied_task;
745 task->taskgroup = taskgroup;
e4606348 746 ttask = (struct gomp_target_task *) &task->depend[depend_cnt];
d9a6bd32
JJ
747 ttask->devicep = devicep;
748 ttask->fn = fn;
749 ttask->mapnum = mapnum;
b2b40051 750 ttask->args = args;
d9a6bd32
JJ
751 memcpy (ttask->hostaddrs, hostaddrs, mapnum * sizeof (void *));
752 ttask->sizes = (size_t *) &ttask->hostaddrs[mapnum];
753 memcpy (ttask->sizes, sizes, mapnum * sizeof (size_t));
754 ttask->kinds = (unsigned short *) &ttask->sizes[mapnum];
755 memcpy (ttask->kinds, kinds, mapnum * sizeof (unsigned short));
e4606348
JJ
756 if (tgt_align)
757 {
758 char *tgt = (char *) &ttask->kinds[mapnum];
759 size_t i;
760 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
761 if (al)
762 tgt += tgt_align - al;
763 tgt_size = 0;
764 for (i = 0; i < mapnum; i++)
765 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
766 {
767 size_t align = (size_t) 1 << (kinds[i] >> 8);
768 tgt_size = (tgt_size + align - 1) & ~(align - 1);
769 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
770 ttask->hostaddrs[i] = tgt + tgt_size;
771 tgt_size = tgt_size + sizes[i];
772 }
773 }
d9a6bd32 774 ttask->flags = flags;
e4606348
JJ
775 ttask->state = state;
776 ttask->task = task;
777 ttask->team = team;
778 task->fn = NULL;
d9a6bd32
JJ
779 task->fn_data = ttask;
780 task->final_task = 0;
781 gomp_mutex_lock (&team->task_lock);
782 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
28567c40 783 if (__builtin_expect (gomp_cancel_var, 0))
d9a6bd32 784 {
28567c40
JJ
785 if (gomp_team_barrier_cancelled (&team->barrier))
786 {
787 do_cancel:
788 gomp_mutex_unlock (&team->task_lock);
789 gomp_finish_task (task);
790 free (task);
791 return true;
792 }
793 if (taskgroup)
794 {
795 if (taskgroup->cancelled)
796 goto do_cancel;
797 if (taskgroup->workshare
798 && taskgroup->prev
799 && taskgroup->prev->cancelled)
800 goto do_cancel;
801 }
d9a6bd32 802 }
d9a6bd32
JJ
803 if (depend_size)
804 {
805 gomp_task_handle_depend (task, parent, depend);
806 if (task->num_dependees)
807 {
e4606348
JJ
808 if (taskgroup)
809 taskgroup->num_children++;
d9a6bd32 810 gomp_mutex_unlock (&team->task_lock);
e4606348 811 return true;
d9a6bd32
JJ
812 }
813 }
e4606348 814 if (state == GOMP_TARGET_TASK_DATA)
d9a6bd32 815 {
8e4e4719 816 gomp_task_run_post_handle_depend_hash (task);
e4606348
JJ
817 gomp_mutex_unlock (&team->task_lock);
818 gomp_finish_task (task);
819 free (task);
820 return false;
d9a6bd32 821 }
d9a6bd32 822 if (taskgroup)
e4606348
JJ
823 taskgroup->num_children++;
824 /* For async offloading, if we don't need to wait for dependencies,
825 run the gomp_target_task_fn right away, essentially schedule the
826 mapping part of the task in the current thread. */
827 if (devicep != NULL
828 && (devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
d9a6bd32 829 {
e4606348
JJ
830 priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0,
831 PRIORITY_INSERT_END,
832 /*adjust_parent_depends_on=*/false,
833 task->parent_depends_on);
834 if (taskgroup)
835 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
836 task, 0, PRIORITY_INSERT_END,
837 /*adjust_parent_depends_on=*/false,
838 task->parent_depends_on);
839 task->pnode[PQ_TEAM].next = NULL;
840 task->pnode[PQ_TEAM].prev = NULL;
841 task->kind = GOMP_TASK_TIED;
842 ++team->task_count;
843 gomp_mutex_unlock (&team->task_lock);
844
845 thr->task = task;
846 gomp_target_task_fn (task->fn_data);
847 thr->task = parent;
848
849 gomp_mutex_lock (&team->task_lock);
850 task->kind = GOMP_TASK_ASYNC_RUNNING;
851 /* If GOMP_PLUGIN_target_task_completion has run already
852 in between gomp_target_task_fn and the mutex lock,
853 perform the requeuing here. */
854 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
855 gomp_target_task_completion (team, task);
d9a6bd32 856 else
e4606348
JJ
857 ttask->state = GOMP_TARGET_TASK_RUNNING;
858 gomp_mutex_unlock (&team->task_lock);
859 return true;
d9a6bd32 860 }
e4606348
JJ
861 priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0,
862 PRIORITY_INSERT_BEGIN,
863 /*adjust_parent_depends_on=*/false,
864 task->parent_depends_on);
865 if (taskgroup)
866 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, task, 0,
867 PRIORITY_INSERT_BEGIN,
868 /*adjust_parent_depends_on=*/false,
869 task->parent_depends_on);
870 priority_queue_insert (PQ_TEAM, &team->task_queue, task, 0,
871 PRIORITY_INSERT_END,
872 /*adjust_parent_depends_on=*/false,
873 task->parent_depends_on);
d9a6bd32
JJ
874 ++team->task_count;
875 ++team->task_queued_count;
876 gomp_team_barrier_set_task_pending (&team->barrier);
877 do_wake = team->task_running_count + !parent->in_tied_task
878 < team->nthreads;
879 gomp_mutex_unlock (&team->task_lock);
880 if (do_wake)
881 gomp_team_barrier_wake (&team->barrier, 1);
e4606348 882 return true;
d9a6bd32
JJ
883}
884
e4606348
JJ
885/* Given a parent_depends_on task in LIST, move it to the front of its
886 priority so it is run as soon as possible.
d9a6bd32 887
e4606348 888 Care is taken to update the list's LAST_PARENT_DEPENDS_ON field.
d9a6bd32 889
e4606348
JJ
890 We rearrange the queue such that all parent_depends_on tasks are
891 first, and last_parent_depends_on points to the last such task we
892 rearranged. For example, given the following tasks in a queue
893 where PD[123] are the parent_depends_on tasks:
d9a6bd32 894
e4606348
JJ
895 task->children
896 |
897 V
898 C1 -> C2 -> C3 -> PD1 -> PD2 -> PD3 -> C4
d9a6bd32 899
e4606348
JJ
900 We rearrange such that:
901
902 task->children
903 | +--- last_parent_depends_on
904 | |
905 V V
906 PD1 -> PD2 -> PD3 -> C1 -> C2 -> C3 -> C4. */
907
908static void inline
909priority_list_upgrade_task (struct priority_list *list,
910 struct priority_node *node)
911{
912 struct priority_node *last_parent_depends_on
913 = list->last_parent_depends_on;
914 if (last_parent_depends_on)
d9a6bd32 915 {
e4606348
JJ
916 node->prev->next = node->next;
917 node->next->prev = node->prev;
918 node->prev = last_parent_depends_on;
919 node->next = last_parent_depends_on->next;
920 node->prev->next = node;
921 node->next->prev = node;
d9a6bd32 922 }
e4606348
JJ
923 else if (node != list->tasks)
924 {
925 node->prev->next = node->next;
926 node->next->prev = node->prev;
927 node->prev = list->tasks->prev;
928 node->next = list->tasks;
929 list->tasks = node;
930 node->prev->next = node;
931 node->next->prev = node;
932 }
933 list->last_parent_depends_on = node;
d9a6bd32
JJ
934}
935
e4606348
JJ
936/* Given a parent_depends_on TASK in its parent's children_queue, move
937 it to the front of its priority so it is run as soon as possible.
d9a6bd32 938
e4606348 939 PARENT is passed as an optimization.
d9a6bd32 940
e4606348
JJ
941 (This function could be defined in priority_queue.c, but we want it
942 inlined, and putting it in priority_queue.h is not an option, given
943 that gomp_task has not been properly defined at that point). */
d9a6bd32 944
e4606348
JJ
945static void inline
946priority_queue_upgrade_task (struct gomp_task *task,
947 struct gomp_task *parent)
d9a6bd32 948{
e4606348
JJ
949 struct priority_queue *head = &parent->children_queue;
950 struct priority_node *node = &task->pnode[PQ_CHILDREN];
951#if _LIBGOMP_CHECKING_
952 if (!task->parent_depends_on)
953 gomp_fatal ("priority_queue_upgrade_task: task must be a "
954 "parent_depends_on task");
955 if (!priority_queue_task_in_queue_p (PQ_CHILDREN, head, task))
956 gomp_fatal ("priority_queue_upgrade_task: cannot find task=%p", task);
957#endif
958 if (priority_queue_multi_p (head))
d9a6bd32 959 {
e4606348
JJ
960 struct priority_list *list
961 = priority_queue_lookup_priority (head, task->priority);
962 priority_list_upgrade_task (list, node);
d9a6bd32 963 }
e4606348
JJ
964 else
965 priority_list_upgrade_task (&head->l, node);
d9a6bd32
JJ
966}
967
e4606348
JJ
968/* Given a CHILD_TASK in LIST that is about to be executed, move it out of
969 the way in LIST so that other tasks can be considered for
970 execution. LIST contains tasks of type TYPE.
d9a6bd32 971
e4606348
JJ
972 Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field
973 if applicable. */
974
975static void inline
976priority_list_downgrade_task (enum priority_queue_type type,
977 struct priority_list *list,
978 struct gomp_task *child_task)
d9a6bd32 979{
e4606348
JJ
980 struct priority_node *node = task_to_priority_node (type, child_task);
981 if (list->tasks == node)
982 list->tasks = node->next;
983 else if (node->next != list->tasks)
984 {
985 /* The task in NODE is about to become TIED and TIED tasks
986 cannot come before WAITING tasks. If we're about to
987 leave the queue in such an indeterminate state, rewire
988 things appropriately. However, a TIED task at the end is
989 perfectly fine. */
990 struct gomp_task *next_task = priority_node_to_task (type, node->next);
991 if (next_task->kind == GOMP_TASK_WAITING)
992 {
993 /* Remove from list. */
994 node->prev->next = node->next;
995 node->next->prev = node->prev;
996 /* Rewire at the end. */
997 node->next = list->tasks;
998 node->prev = list->tasks->prev;
999 list->tasks->prev->next = node;
1000 list->tasks->prev = node;
1001 }
1002 }
1003
1004 /* If the current task is the last_parent_depends_on for its
1005 priority, adjust last_parent_depends_on appropriately. */
1006 if (__builtin_expect (child_task->parent_depends_on, 0)
1007 && list->last_parent_depends_on == node)
1008 {
1009 struct gomp_task *prev_child = priority_node_to_task (type, node->prev);
1010 if (node->prev != node
1011 && prev_child->kind == GOMP_TASK_WAITING
1012 && prev_child->parent_depends_on)
1013 list->last_parent_depends_on = node->prev;
1014 else
1015 {
1016 /* There are no more parent_depends_on entries waiting
1017 to run, clear the list. */
1018 list->last_parent_depends_on = NULL;
1019 }
1020 }
d9a6bd32 1021}
e4606348
JJ
1022
1023/* Given a TASK in HEAD that is about to be executed, move it out of
1024 the way so that other tasks can be considered for execution. HEAD
1025 contains tasks of type TYPE.
1026
1027 Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field
1028 if applicable.
1029
1030 (This function could be defined in priority_queue.c, but we want it
1031 inlined, and putting it in priority_queue.h is not an option, given
1032 that gomp_task has not been properly defined at that point). */
1033
1034static void inline
1035priority_queue_downgrade_task (enum priority_queue_type type,
1036 struct priority_queue *head,
1037 struct gomp_task *task)
1038{
1039#if _LIBGOMP_CHECKING_
1040 if (!priority_queue_task_in_queue_p (type, head, task))
1041 gomp_fatal ("Attempt to downgrade missing task %p", task);
d9a6bd32 1042#endif
e4606348
JJ
1043 if (priority_queue_multi_p (head))
1044 {
1045 struct priority_list *list
1046 = priority_queue_lookup_priority (head, task->priority);
1047 priority_list_downgrade_task (type, list, task);
1048 }
1049 else
1050 priority_list_downgrade_task (type, &head->l, task);
1051}
1052
1053/* Setup CHILD_TASK to execute. This is done by setting the task to
1054 TIED, and updating all relevant queues so that CHILD_TASK is no
1055 longer chosen for scheduling. Also, remove CHILD_TASK from the
1056 overall team task queue entirely.
1057
1058 Return TRUE if task or its containing taskgroup has been
1059 cancelled. */
d9a6bd32 1060
acf0174b
JJ
1061static inline bool
1062gomp_task_run_pre (struct gomp_task *child_task, struct gomp_task *parent,
d9a6bd32 1063 struct gomp_team *team)
acf0174b 1064{
e4606348
JJ
1065#if _LIBGOMP_CHECKING_
1066 if (child_task->parent)
1067 priority_queue_verify (PQ_CHILDREN,
1068 &child_task->parent->children_queue, true);
1069 if (child_task->taskgroup)
1070 priority_queue_verify (PQ_TASKGROUP,
1071 &child_task->taskgroup->taskgroup_queue, false);
1072 priority_queue_verify (PQ_TEAM, &team->task_queue, false);
d9a6bd32
JJ
1073#endif
1074
e4606348 1075 /* Task is about to go tied, move it out of the way. */
0494285a 1076 if (parent)
e4606348
JJ
1077 priority_queue_downgrade_task (PQ_CHILDREN, &parent->children_queue,
1078 child_task);
d9a6bd32 1079
e4606348 1080 /* Task is about to go tied, move it out of the way. */
d9a6bd32
JJ
1081 struct gomp_taskgroup *taskgroup = child_task->taskgroup;
1082 if (taskgroup)
e4606348
JJ
1083 priority_queue_downgrade_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
1084 child_task);
d9a6bd32 1085
e4606348
JJ
1086 priority_queue_remove (PQ_TEAM, &team->task_queue, child_task,
1087 MEMMODEL_RELAXED);
1088 child_task->pnode[PQ_TEAM].next = NULL;
1089 child_task->pnode[PQ_TEAM].prev = NULL;
acf0174b 1090 child_task->kind = GOMP_TASK_TIED;
d9a6bd32 1091
acf0174b
JJ
1092 if (--team->task_queued_count == 0)
1093 gomp_team_barrier_clear_task_pending (&team->barrier);
28567c40 1094 if (__builtin_expect (gomp_cancel_var, 0)
acf0174b 1095 && !child_task->copy_ctors_done)
28567c40
JJ
1096 {
1097 if (gomp_team_barrier_cancelled (&team->barrier))
1098 return true;
1099 if (taskgroup)
1100 {
1101 if (taskgroup->cancelled)
1102 return true;
1103 if (taskgroup->workshare
1104 && taskgroup->prev
1105 && taskgroup->prev->cancelled)
1106 return true;
1107 }
1108 }
acf0174b
JJ
1109 return false;
1110}
1111
1112static void
1113gomp_task_run_post_handle_depend_hash (struct gomp_task *child_task)
1114{
1115 struct gomp_task *parent = child_task->parent;
1116 size_t i;
1117
1118 for (i = 0; i < child_task->depend_count; i++)
1119 if (!child_task->depend[i].redundant)
1120 {
1121 if (child_task->depend[i].next)
1122 child_task->depend[i].next->prev = child_task->depend[i].prev;
1123 if (child_task->depend[i].prev)
1124 child_task->depend[i].prev->next = child_task->depend[i].next;
1125 else
1126 {
1127 hash_entry_type *slot
1128 = htab_find_slot (&parent->depend_hash, &child_task->depend[i],
1129 NO_INSERT);
1130 if (*slot != &child_task->depend[i])
1131 abort ();
1132 if (child_task->depend[i].next)
1133 *slot = child_task->depend[i].next;
1134 else
1135 htab_clear_slot (parent->depend_hash, slot);
1136 }
1137 }
1138}
1139
e4606348
JJ
1140/* After a CHILD_TASK has been run, adjust the dependency queue for
1141 each task that depends on CHILD_TASK, to record the fact that there
1142 is one less dependency to worry about. If a task that depended on
1143 CHILD_TASK now has no dependencies, place it in the various queues
1144 so it gets scheduled to run.
d9a6bd32
JJ
1145
1146 TEAM is the team to which CHILD_TASK belongs to. */
1147
acf0174b
JJ
1148static size_t
1149gomp_task_run_post_handle_dependers (struct gomp_task *child_task,
1150 struct gomp_team *team)
1151{
1152 struct gomp_task *parent = child_task->parent;
1153 size_t i, count = child_task->dependers->n_elem, ret = 0;
1154 for (i = 0; i < count; i++)
1155 {
1156 struct gomp_task *task = child_task->dependers->elem[i];
e4606348
JJ
1157
1158 /* CHILD_TASK satisfies a dependency for TASK. Keep track of
1159 TASK's remaining dependencies. Once TASK has no other
93d90219 1160 dependencies, put it into the various queues so it will get
e4606348 1161 scheduled for execution. */
acf0174b
JJ
1162 if (--task->num_dependees != 0)
1163 continue;
1164
1165 struct gomp_taskgroup *taskgroup = task->taskgroup;
1166 if (parent)
1167 {
e4606348
JJ
1168 priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
1169 task, task->priority,
1170 PRIORITY_INSERT_BEGIN,
1171 /*adjust_parent_depends_on=*/true,
1172 task->parent_depends_on);
0494285a 1173 if (parent->taskwait)
acf0174b 1174 {
0494285a
JJ
1175 if (parent->taskwait->in_taskwait)
1176 {
e4606348
JJ
1177 /* One more task has had its dependencies met.
1178 Inform any waiters. */
0494285a
JJ
1179 parent->taskwait->in_taskwait = false;
1180 gomp_sem_post (&parent->taskwait->taskwait_sem);
1181 }
1182 else if (parent->taskwait->in_depend_wait)
1183 {
e4606348
JJ
1184 /* One more task has had its dependencies met.
1185 Inform any waiters. */
0494285a
JJ
1186 parent->taskwait->in_depend_wait = false;
1187 gomp_sem_post (&parent->taskwait->taskwait_sem);
1188 }
acf0174b
JJ
1189 }
1190 }
1191 if (taskgroup)
1192 {
e4606348
JJ
1193 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
1194 task, task->priority,
1195 PRIORITY_INSERT_BEGIN,
1196 /*adjust_parent_depends_on=*/false,
1197 task->parent_depends_on);
acf0174b
JJ
1198 if (taskgroup->in_taskgroup_wait)
1199 {
e4606348
JJ
1200 /* One more task has had its dependencies met.
1201 Inform any waiters. */
acf0174b
JJ
1202 taskgroup->in_taskgroup_wait = false;
1203 gomp_sem_post (&taskgroup->taskgroup_sem);
1204 }
1205 }
e4606348
JJ
1206 priority_queue_insert (PQ_TEAM, &team->task_queue,
1207 task, task->priority,
1208 PRIORITY_INSERT_END,
1209 /*adjust_parent_depends_on=*/false,
1210 task->parent_depends_on);
acf0174b
JJ
1211 ++team->task_count;
1212 ++team->task_queued_count;
1213 ++ret;
1214 }
1215 free (child_task->dependers);
1216 child_task->dependers = NULL;
1217 if (ret > 1)
1218 gomp_team_barrier_set_task_pending (&team->barrier);
1219 return ret;
1220}
1221
1222static inline size_t
1223gomp_task_run_post_handle_depend (struct gomp_task *child_task,
1224 struct gomp_team *team)
1225{
1226 if (child_task->depend_count == 0)
1227 return 0;
1228
1229 /* If parent is gone already, the hash table is freed and nothing
1230 will use the hash table anymore, no need to remove anything from it. */
1231 if (child_task->parent != NULL)
1232 gomp_task_run_post_handle_depend_hash (child_task);
1233
1234 if (child_task->dependers == NULL)
1235 return 0;
1236
1237 return gomp_task_run_post_handle_dependers (child_task, team);
1238}
1239
d9a6bd32
JJ
1240/* Remove CHILD_TASK from its parent. */
1241
acf0174b
JJ
1242static inline void
1243gomp_task_run_post_remove_parent (struct gomp_task *child_task)
1244{
1245 struct gomp_task *parent = child_task->parent;
1246 if (parent == NULL)
1247 return;
d9a6bd32
JJ
1248
1249 /* If this was the last task the parent was depending on,
1250 synchronize with gomp_task_maybe_wait_for_dependencies so it can
1251 clean up and return. */
0494285a
JJ
1252 if (__builtin_expect (child_task->parent_depends_on, 0)
1253 && --parent->taskwait->n_depend == 0
1254 && parent->taskwait->in_depend_wait)
1255 {
1256 parent->taskwait->in_depend_wait = false;
1257 gomp_sem_post (&parent->taskwait->taskwait_sem);
1258 }
d9a6bd32 1259
e4606348
JJ
1260 if (priority_queue_remove (PQ_CHILDREN, &parent->children_queue,
1261 child_task, MEMMODEL_RELEASE)
1262 && parent->taskwait && parent->taskwait->in_taskwait)
acf0174b 1263 {
e4606348
JJ
1264 parent->taskwait->in_taskwait = false;
1265 gomp_sem_post (&parent->taskwait->taskwait_sem);
acf0174b 1266 }
e4606348
JJ
1267 child_task->pnode[PQ_CHILDREN].next = NULL;
1268 child_task->pnode[PQ_CHILDREN].prev = NULL;
acf0174b
JJ
1269}
1270
d9a6bd32
JJ
1271/* Remove CHILD_TASK from its taskgroup. */
1272
acf0174b
JJ
1273static inline void
1274gomp_task_run_post_remove_taskgroup (struct gomp_task *child_task)
1275{
1276 struct gomp_taskgroup *taskgroup = child_task->taskgroup;
1277 if (taskgroup == NULL)
1278 return;
e4606348
JJ
1279 bool empty = priority_queue_remove (PQ_TASKGROUP,
1280 &taskgroup->taskgroup_queue,
1281 child_task, MEMMODEL_RELAXED);
1282 child_task->pnode[PQ_TASKGROUP].next = NULL;
1283 child_task->pnode[PQ_TASKGROUP].prev = NULL;
acf0174b
JJ
1284 if (taskgroup->num_children > 1)
1285 --taskgroup->num_children;
1286 else
1287 {
1288 /* We access taskgroup->num_children in GOMP_taskgroup_end
1289 outside of the task lock mutex region, so
1290 need a release barrier here to ensure memory
1291 written by child_task->fn above is flushed
1292 before the NULL is written. */
1293 __atomic_store_n (&taskgroup->num_children, 0, MEMMODEL_RELEASE);
1294 }
e4606348 1295 if (empty && taskgroup->in_taskgroup_wait)
acf0174b 1296 {
e4606348
JJ
1297 taskgroup->in_taskgroup_wait = false;
1298 gomp_sem_post (&taskgroup->taskgroup_sem);
acf0174b
JJ
1299 }
1300}
1301
a68ab351
JJ
1302void
1303gomp_barrier_handle_tasks (gomp_barrier_state_t state)
1304{
1305 struct gomp_thread *thr = gomp_thread ();
1306 struct gomp_team *team = thr->ts.team;
1307 struct gomp_task *task = thr->task;
1308 struct gomp_task *child_task = NULL;
1309 struct gomp_task *to_free = NULL;
acf0174b 1310 int do_wake = 0;
a68ab351
JJ
1311
1312 gomp_mutex_lock (&team->task_lock);
1313 if (gomp_barrier_last_thread (state))
1314 {
1315 if (team->task_count == 0)
1316 {
1317 gomp_team_barrier_done (&team->barrier, state);
1318 gomp_mutex_unlock (&team->task_lock);
1319 gomp_team_barrier_wake (&team->barrier, 0);
1320 return;
1321 }
1322 gomp_team_barrier_set_waiting_for_tasks (&team->barrier);
1323 }
1324
1325 while (1)
1326 {
acf0174b 1327 bool cancelled = false;
e4606348 1328 if (!priority_queue_empty_p (&team->task_queue, MEMMODEL_RELAXED))
a68ab351 1329 {
e4606348
JJ
1330 bool ignored;
1331 child_task
1332 = priority_queue_next_task (PQ_TEAM, &team->task_queue,
1333 PQ_IGNORED, NULL,
1334 &ignored);
acf0174b 1335 cancelled = gomp_task_run_pre (child_task, child_task->parent,
d9a6bd32 1336 team);
acf0174b
JJ
1337 if (__builtin_expect (cancelled, 0))
1338 {
1339 if (to_free)
1340 {
1341 gomp_finish_task (to_free);
1342 free (to_free);
1343 to_free = NULL;
1344 }
1345 goto finish_cancelled;
1346 }
a68ab351 1347 team->task_running_count++;
acf0174b 1348 child_task->in_tied_task = true;
a68ab351
JJ
1349 }
1350 gomp_mutex_unlock (&team->task_lock);
acf0174b
JJ
1351 if (do_wake)
1352 {
1353 gomp_team_barrier_wake (&team->barrier, do_wake);
1354 do_wake = 0;
1355 }
a68ab351
JJ
1356 if (to_free)
1357 {
1358 gomp_finish_task (to_free);
1359 free (to_free);
1360 to_free = NULL;
1361 }
1362 if (child_task)
1363 {
1364 thr->task = child_task;
e4606348
JJ
1365 if (__builtin_expect (child_task->fn == NULL, 0))
1366 {
1367 if (gomp_target_task_fn (child_task->fn_data))
1368 {
1369 thr->task = task;
1370 gomp_mutex_lock (&team->task_lock);
1371 child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1372 team->task_running_count--;
1373 struct gomp_target_task *ttask
1374 = (struct gomp_target_task *) child_task->fn_data;
1375 /* If GOMP_PLUGIN_target_task_completion has run already
1376 in between gomp_target_task_fn and the mutex lock,
1377 perform the requeuing here. */
1378 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1379 gomp_target_task_completion (team, child_task);
1380 else
1381 ttask->state = GOMP_TARGET_TASK_RUNNING;
1382 child_task = NULL;
1383 continue;
1384 }
1385 }
1386 else
1387 child_task->fn (child_task->fn_data);
a68ab351
JJ
1388 thr->task = task;
1389 }
1390 else
1391 return;
1392 gomp_mutex_lock (&team->task_lock);
1393 if (child_task)
1394 {
acf0174b
JJ
1395 finish_cancelled:;
1396 size_t new_tasks
1397 = gomp_task_run_post_handle_depend (child_task, team);
1398 gomp_task_run_post_remove_parent (child_task);
e4606348 1399 gomp_clear_parent (&child_task->children_queue);
acf0174b 1400 gomp_task_run_post_remove_taskgroup (child_task);
a68ab351
JJ
1401 to_free = child_task;
1402 child_task = NULL;
acf0174b
JJ
1403 if (!cancelled)
1404 team->task_running_count--;
1405 if (new_tasks > 1)
1406 {
1407 do_wake = team->nthreads - team->task_running_count;
1408 if (do_wake > new_tasks)
1409 do_wake = new_tasks;
1410 }
a68ab351
JJ
1411 if (--team->task_count == 0
1412 && gomp_team_barrier_waiting_for_tasks (&team->barrier))
1413 {
1414 gomp_team_barrier_done (&team->barrier, state);
1415 gomp_mutex_unlock (&team->task_lock);
1416 gomp_team_barrier_wake (&team->barrier, 0);
ab6dd406 1417 gomp_mutex_lock (&team->task_lock);
a68ab351
JJ
1418 }
1419 }
1420 }
1421}
1422
d9a6bd32
JJ
1423/* Called when encountering a taskwait directive.
1424
1425 Wait for all children of the current task. */
a68ab351
JJ
1426
1427void
1428GOMP_taskwait (void)
1429{
1430 struct gomp_thread *thr = gomp_thread ();
1431 struct gomp_team *team = thr->ts.team;
1432 struct gomp_task *task = thr->task;
1433 struct gomp_task *child_task = NULL;
1434 struct gomp_task *to_free = NULL;
0494285a 1435 struct gomp_taskwait taskwait;
acf0174b 1436 int do_wake = 0;
a68ab351 1437
bed8d8a6 1438 /* The acquire barrier on load of task->children here synchronizes
acf0174b 1439 with the write of a NULL in gomp_task_run_post_remove_parent. It is
bed8d8a6
AM
1440 not necessary that we synchronize with other non-NULL writes at
1441 this point, but we must ensure that all writes to memory by a
1442 child thread task work function are seen before we exit from
1443 GOMP_taskwait. */
9a647288 1444 if (task == NULL
e4606348 1445 || priority_queue_empty_p (&task->children_queue, MEMMODEL_ACQUIRE))
a68ab351 1446 return;
fbf7be80 1447
0494285a 1448 memset (&taskwait, 0, sizeof (taskwait));
e4606348 1449 bool child_q = false;
a68ab351
JJ
1450 gomp_mutex_lock (&team->task_lock);
1451 while (1)
1452 {
acf0174b 1453 bool cancelled = false;
e4606348 1454 if (priority_queue_empty_p (&task->children_queue, MEMMODEL_RELAXED))
a68ab351 1455 {
0494285a
JJ
1456 bool destroy_taskwait = task->taskwait != NULL;
1457 task->taskwait = NULL;
1458 gomp_mutex_unlock (&team->task_lock);
1459 if (to_free)
1460 {
1461 gomp_finish_task (to_free);
1462 free (to_free);
1463 }
1464 if (destroy_taskwait)
1465 gomp_sem_destroy (&taskwait.taskwait_sem);
1466 return;
1467 }
e4606348
JJ
1468 struct gomp_task *next_task
1469 = priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
1470 PQ_TEAM, &team->task_queue, &child_q);
1471 if (next_task->kind == GOMP_TASK_WAITING)
0494285a 1472 {
e4606348 1473 child_task = next_task;
0494285a 1474 cancelled
d9a6bd32 1475 = gomp_task_run_pre (child_task, task, team);
0494285a
JJ
1476 if (__builtin_expect (cancelled, 0))
1477 {
1478 if (to_free)
1479 {
1480 gomp_finish_task (to_free);
1481 free (to_free);
1482 to_free = NULL;
1483 }
1484 goto finish_cancelled;
1485 }
1486 }
1487 else
1488 {
e4606348
JJ
1489 /* All tasks we are waiting for are either running in other
1490 threads, or they are tasks that have not had their
1491 dependencies met (so they're not even in the queue). Wait
1492 for them. */
0494285a
JJ
1493 if (task->taskwait == NULL)
1494 {
1495 taskwait.in_depend_wait = false;
1496 gomp_sem_init (&taskwait.taskwait_sem, 0);
1497 task->taskwait = &taskwait;
1498 }
1499 taskwait.in_taskwait = true;
1500 }
1501 gomp_mutex_unlock (&team->task_lock);
1502 if (do_wake)
1503 {
1504 gomp_team_barrier_wake (&team->barrier, do_wake);
1505 do_wake = 0;
1506 }
1507 if (to_free)
1508 {
1509 gomp_finish_task (to_free);
1510 free (to_free);
1511 to_free = NULL;
1512 }
1513 if (child_task)
1514 {
1515 thr->task = child_task;
e4606348
JJ
1516 if (__builtin_expect (child_task->fn == NULL, 0))
1517 {
1518 if (gomp_target_task_fn (child_task->fn_data))
1519 {
1520 thr->task = task;
1521 gomp_mutex_lock (&team->task_lock);
1522 child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1523 struct gomp_target_task *ttask
1524 = (struct gomp_target_task *) child_task->fn_data;
1525 /* If GOMP_PLUGIN_target_task_completion has run already
1526 in between gomp_target_task_fn and the mutex lock,
1527 perform the requeuing here. */
1528 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1529 gomp_target_task_completion (team, child_task);
1530 else
1531 ttask->state = GOMP_TARGET_TASK_RUNNING;
1532 child_task = NULL;
1533 continue;
1534 }
1535 }
1536 else
1537 child_task->fn (child_task->fn_data);
0494285a
JJ
1538 thr->task = task;
1539 }
1540 else
1541 gomp_sem_wait (&taskwait.taskwait_sem);
1542 gomp_mutex_lock (&team->task_lock);
1543 if (child_task)
1544 {
1545 finish_cancelled:;
1546 size_t new_tasks
1547 = gomp_task_run_post_handle_depend (child_task, team);
d9a6bd32 1548
e4606348 1549 if (child_q)
0494285a 1550 {
e4606348
JJ
1551 priority_queue_remove (PQ_CHILDREN, &task->children_queue,
1552 child_task, MEMMODEL_RELAXED);
1553 child_task->pnode[PQ_CHILDREN].next = NULL;
1554 child_task->pnode[PQ_CHILDREN].prev = NULL;
0494285a 1555 }
d9a6bd32 1556
e4606348
JJ
1557 gomp_clear_parent (&child_task->children_queue);
1558
0494285a 1559 gomp_task_run_post_remove_taskgroup (child_task);
d9a6bd32 1560
0494285a
JJ
1561 to_free = child_task;
1562 child_task = NULL;
1563 team->task_count--;
1564 if (new_tasks > 1)
1565 {
1566 do_wake = team->nthreads - team->task_running_count
1567 - !task->in_tied_task;
1568 if (do_wake > new_tasks)
1569 do_wake = new_tasks;
1570 }
1571 }
1572 }
1573}
1574
28567c40
JJ
1575/* Called when encountering a taskwait directive with depend clause(s).
1576 Wait as if it was an mergeable included task construct with empty body. */
1577
1578void
1579GOMP_taskwait_depend (void **depend)
1580{
1581 struct gomp_thread *thr = gomp_thread ();
1582 struct gomp_team *team = thr->ts.team;
1583
1584 /* If parallel or taskgroup has been cancelled, return early. */
1585 if (__builtin_expect (gomp_cancel_var, 0) && team)
1586 {
1587 if (gomp_team_barrier_cancelled (&team->barrier))
1588 return;
1589 if (thr->task->taskgroup)
1590 {
1591 if (thr->task->taskgroup->cancelled)
1592 return;
1593 if (thr->task->taskgroup->workshare
1594 && thr->task->taskgroup->prev
1595 && thr->task->taskgroup->prev->cancelled)
1596 return;
1597 }
1598 }
1599
1600 if (thr->task && thr->task->depend_hash)
1601 gomp_task_maybe_wait_for_dependencies (depend);
1602}
1603
e4606348
JJ
1604/* An undeferred task is about to run. Wait for all tasks that this
1605 undeferred task depends on.
1606
1607 This is done by first putting all known ready dependencies
1608 (dependencies that have their own dependencies met) at the top of
1609 the scheduling queues. Then we iterate through these imminently
1610 ready tasks (and possibly other high priority tasks), and run them.
1611 If we run out of ready dependencies to execute, we either wait for
28567c40 1612 the remaining dependencies to finish, or wait for them to get
e4606348 1613 scheduled so we can run them.
0494285a 1614
d9a6bd32
JJ
1615 DEPEND is as in GOMP_task. */
1616
1617void
0494285a
JJ
1618gomp_task_maybe_wait_for_dependencies (void **depend)
1619{
1620 struct gomp_thread *thr = gomp_thread ();
1621 struct gomp_task *task = thr->task;
1622 struct gomp_team *team = thr->ts.team;
1623 struct gomp_task_depend_entry elem, *ent = NULL;
1624 struct gomp_taskwait taskwait;
28567c40 1625 size_t orig_ndepend = (uintptr_t) depend[0];
0494285a 1626 size_t nout = (uintptr_t) depend[1];
28567c40
JJ
1627 size_t ndepend = orig_ndepend;
1628 size_t normal = ndepend;
1629 size_t n = 2;
0494285a
JJ
1630 size_t i;
1631 size_t num_awaited = 0;
1632 struct gomp_task *child_task = NULL;
1633 struct gomp_task *to_free = NULL;
1634 int do_wake = 0;
1635
28567c40
JJ
1636 if (ndepend == 0)
1637 {
1638 ndepend = nout;
1639 nout = (uintptr_t) depend[2] + (uintptr_t) depend[3];
1640 normal = nout + (uintptr_t) depend[4];
1641 n = 5;
1642 }
0494285a
JJ
1643 gomp_mutex_lock (&team->task_lock);
1644 for (i = 0; i < ndepend; i++)
1645 {
28567c40
JJ
1646 elem.addr = depend[i + n];
1647 elem.is_in = i >= nout;
1648 if (__builtin_expect (i >= normal, 0))
1649 {
1650 void **d = (void **) elem.addr;
1651 switch ((uintptr_t) d[1])
1652 {
1653 case GOMP_DEPEND_IN:
1654 break;
1655 case GOMP_DEPEND_OUT:
1656 case GOMP_DEPEND_INOUT:
1657 case GOMP_DEPEND_MUTEXINOUTSET:
1658 elem.is_in = 0;
1659 break;
1660 default:
1661 gomp_fatal ("unknown omp_depend_t dependence type %d",
1662 (int) (uintptr_t) d[1]);
1663 }
1664 elem.addr = d[0];
1665 }
0494285a
JJ
1666 ent = htab_find (task->depend_hash, &elem);
1667 for (; ent; ent = ent->next)
28567c40 1668 if (elem.is_in && ent->is_in)
0494285a
JJ
1669 continue;
1670 else
1671 {
1672 struct gomp_task *tsk = ent->task;
1673 if (!tsk->parent_depends_on)
1674 {
1675 tsk->parent_depends_on = true;
1676 ++num_awaited;
93d90219 1677 /* If dependency TSK itself has no dependencies and is
e4606348
JJ
1678 ready to run, move it up front so that we run it as
1679 soon as possible. */
0494285a 1680 if (tsk->num_dependees == 0 && tsk->kind == GOMP_TASK_WAITING)
e4606348 1681 priority_queue_upgrade_task (tsk, task);
0494285a
JJ
1682 }
1683 }
1684 }
1685 if (num_awaited == 0)
1686 {
1687 gomp_mutex_unlock (&team->task_lock);
1688 return;
1689 }
1690
1691 memset (&taskwait, 0, sizeof (taskwait));
1692 taskwait.n_depend = num_awaited;
0494285a
JJ
1693 gomp_sem_init (&taskwait.taskwait_sem, 0);
1694 task->taskwait = &taskwait;
1695
1696 while (1)
1697 {
1698 bool cancelled = false;
1699 if (taskwait.n_depend == 0)
1700 {
1701 task->taskwait = NULL;
a68ab351
JJ
1702 gomp_mutex_unlock (&team->task_lock);
1703 if (to_free)
1704 {
1705 gomp_finish_task (to_free);
1706 free (to_free);
1707 }
0494285a 1708 gomp_sem_destroy (&taskwait.taskwait_sem);
a68ab351
JJ
1709 return;
1710 }
e4606348
JJ
1711
1712 /* Theoretically when we have multiple priorities, we should
1713 chose between the highest priority item in
1714 task->children_queue and team->task_queue here, so we should
1715 use priority_queue_next_task(). However, since we are
1716 running an undeferred task, perhaps that makes all tasks it
1717 depends on undeferred, thus a priority of INF? This would
1718 make it unnecessary to take anything into account here,
1719 but the dependencies.
1720
1721 On the other hand, if we want to use priority_queue_next_task(),
1722 care should be taken to only use priority_queue_remove()
1723 below if the task was actually removed from the children
1724 queue. */
1725 bool ignored;
1726 struct gomp_task *next_task
1727 = priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
1728 PQ_IGNORED, NULL, &ignored);
1729
1730 if (next_task->kind == GOMP_TASK_WAITING)
a68ab351 1731 {
e4606348 1732 child_task = next_task;
acf0174b 1733 cancelled
d9a6bd32 1734 = gomp_task_run_pre (child_task, task, team);
acf0174b 1735 if (__builtin_expect (cancelled, 0))
a68ab351 1736 {
acf0174b
JJ
1737 if (to_free)
1738 {
1739 gomp_finish_task (to_free);
1740 free (to_free);
1741 to_free = NULL;
1742 }
1743 goto finish_cancelled;
a68ab351 1744 }
a68ab351
JJ
1745 }
1746 else
e4606348
JJ
1747 /* All tasks we are waiting for are either running in other
1748 threads, or they are tasks that have not had their
1749 dependencies met (so they're not even in the queue). Wait
1750 for them. */
0494285a 1751 taskwait.in_depend_wait = true;
a68ab351 1752 gomp_mutex_unlock (&team->task_lock);
acf0174b
JJ
1753 if (do_wake)
1754 {
1755 gomp_team_barrier_wake (&team->barrier, do_wake);
1756 do_wake = 0;
1757 }
a68ab351
JJ
1758 if (to_free)
1759 {
1760 gomp_finish_task (to_free);
1761 free (to_free);
1762 to_free = NULL;
1763 }
1764 if (child_task)
1765 {
1766 thr->task = child_task;
e4606348
JJ
1767 if (__builtin_expect (child_task->fn == NULL, 0))
1768 {
1769 if (gomp_target_task_fn (child_task->fn_data))
1770 {
1771 thr->task = task;
1772 gomp_mutex_lock (&team->task_lock);
1773 child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1774 struct gomp_target_task *ttask
1775 = (struct gomp_target_task *) child_task->fn_data;
1776 /* If GOMP_PLUGIN_target_task_completion has run already
1777 in between gomp_target_task_fn and the mutex lock,
1778 perform the requeuing here. */
1779 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1780 gomp_target_task_completion (team, child_task);
1781 else
1782 ttask->state = GOMP_TARGET_TASK_RUNNING;
1783 child_task = NULL;
1784 continue;
1785 }
1786 }
1787 else
1788 child_task->fn (child_task->fn_data);
a68ab351
JJ
1789 thr->task = task;
1790 }
1791 else
0494285a 1792 gomp_sem_wait (&taskwait.taskwait_sem);
a68ab351
JJ
1793 gomp_mutex_lock (&team->task_lock);
1794 if (child_task)
1795 {
acf0174b
JJ
1796 finish_cancelled:;
1797 size_t new_tasks
1798 = gomp_task_run_post_handle_depend (child_task, team);
0494285a
JJ
1799 if (child_task->parent_depends_on)
1800 --taskwait.n_depend;
d9a6bd32 1801
e4606348
JJ
1802 priority_queue_remove (PQ_CHILDREN, &task->children_queue,
1803 child_task, MEMMODEL_RELAXED);
1804 child_task->pnode[PQ_CHILDREN].next = NULL;
1805 child_task->pnode[PQ_CHILDREN].prev = NULL;
d9a6bd32 1806
e4606348 1807 gomp_clear_parent (&child_task->children_queue);
acf0174b 1808 gomp_task_run_post_remove_taskgroup (child_task);
a68ab351
JJ
1809 to_free = child_task;
1810 child_task = NULL;
1811 team->task_count--;
acf0174b
JJ
1812 if (new_tasks > 1)
1813 {
1814 do_wake = team->nthreads - team->task_running_count
1815 - !task->in_tied_task;
1816 if (do_wake > new_tasks)
1817 do_wake = new_tasks;
1818 }
a68ab351
JJ
1819 }
1820 }
1821}
20906c66
JJ
1822
1823/* Called when encountering a taskyield directive. */
1824
1825void
1826GOMP_taskyield (void)
1827{
1828 /* Nothing at the moment. */
1829}
1830
28567c40
JJ
1831static inline struct gomp_taskgroup *
1832gomp_taskgroup_init (struct gomp_taskgroup *prev)
1833{
1834 struct gomp_taskgroup *taskgroup
1835 = gomp_malloc (sizeof (struct gomp_taskgroup));
1836 taskgroup->prev = prev;
1837 priority_queue_init (&taskgroup->taskgroup_queue);
1838 taskgroup->reductions = prev ? prev->reductions : NULL;
1839 taskgroup->in_taskgroup_wait = false;
1840 taskgroup->cancelled = false;
1841 taskgroup->workshare = false;
1842 taskgroup->num_children = 0;
1843 gomp_sem_init (&taskgroup->taskgroup_sem, 0);
1844 return taskgroup;
1845}
1846
acf0174b
JJ
1847void
1848GOMP_taskgroup_start (void)
1849{
1850 struct gomp_thread *thr = gomp_thread ();
1851 struct gomp_team *team = thr->ts.team;
1852 struct gomp_task *task = thr->task;
acf0174b
JJ
1853
1854 /* If team is NULL, all tasks are executed as
d9a6bd32 1855 GOMP_TASK_UNDEFERRED tasks and thus all children tasks of
acf0174b
JJ
1856 taskgroup and their descendant tasks will be finished
1857 by the time GOMP_taskgroup_end is called. */
1858 if (team == NULL)
1859 return;
28567c40 1860 task->taskgroup = gomp_taskgroup_init (task->taskgroup);
acf0174b
JJ
1861}
1862
1863void
1864GOMP_taskgroup_end (void)
1865{
1866 struct gomp_thread *thr = gomp_thread ();
1867 struct gomp_team *team = thr->ts.team;
1868 struct gomp_task *task = thr->task;
1869 struct gomp_taskgroup *taskgroup;
1870 struct gomp_task *child_task = NULL;
1871 struct gomp_task *to_free = NULL;
1872 int do_wake = 0;
1873
1874 if (team == NULL)
1875 return;
1876 taskgroup = task->taskgroup;
e4606348
JJ
1877 if (__builtin_expect (taskgroup == NULL, 0)
1878 && thr->ts.level == 0)
1879 {
1880 /* This can happen if GOMP_taskgroup_start is called when
1881 thr->ts.team == NULL, but inside of the taskgroup there
1882 is #pragma omp target nowait that creates an implicit
1883 team with a single thread. In this case, we want to wait
1884 for all outstanding tasks in this team. */
1885 gomp_team_barrier_wait (&team->barrier);
1886 return;
1887 }
acf0174b
JJ
1888
1889 /* The acquire barrier on load of taskgroup->num_children here
1890 synchronizes with the write of 0 in gomp_task_run_post_remove_taskgroup.
1891 It is not necessary that we synchronize with other non-0 writes at
1892 this point, but we must ensure that all writes to memory by a
1893 child thread task work function are seen before we exit from
1894 GOMP_taskgroup_end. */
1895 if (__atomic_load_n (&taskgroup->num_children, MEMMODEL_ACQUIRE) == 0)
1896 goto finish;
1897
e4606348 1898 bool unused;
acf0174b
JJ
1899 gomp_mutex_lock (&team->task_lock);
1900 while (1)
1901 {
1902 bool cancelled = false;
e4606348
JJ
1903 if (priority_queue_empty_p (&taskgroup->taskgroup_queue,
1904 MEMMODEL_RELAXED))
acf0174b
JJ
1905 {
1906 if (taskgroup->num_children)
acf0174b 1907 {
e4606348
JJ
1908 if (priority_queue_empty_p (&task->children_queue,
1909 MEMMODEL_RELAXED))
3696163c 1910 goto do_wait;
e4606348
JJ
1911 child_task
1912 = priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
1913 PQ_TEAM, &team->task_queue,
1914 &unused);
1915 }
1916 else
3696163c
JJ
1917 {
1918 gomp_mutex_unlock (&team->task_lock);
1919 if (to_free)
1920 {
1921 gomp_finish_task (to_free);
1922 free (to_free);
1923 }
1924 goto finish;
acf0174b 1925 }
acf0174b 1926 }
3696163c 1927 else
e4606348
JJ
1928 child_task
1929 = priority_queue_next_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
1930 PQ_TEAM, &team->task_queue, &unused);
3696163c 1931 if (child_task->kind == GOMP_TASK_WAITING)
acf0174b 1932 {
acf0174b 1933 cancelled
d9a6bd32 1934 = gomp_task_run_pre (child_task, child_task->parent, team);
acf0174b
JJ
1935 if (__builtin_expect (cancelled, 0))
1936 {
1937 if (to_free)
1938 {
1939 gomp_finish_task (to_free);
1940 free (to_free);
1941 to_free = NULL;
1942 }
1943 goto finish_cancelled;
1944 }
1945 }
1946 else
1947 {
3696163c 1948 child_task = NULL;
acf0174b 1949 do_wait:
e4606348
JJ
1950 /* All tasks we are waiting for are either running in other
1951 threads, or they are tasks that have not had their
1952 dependencies met (so they're not even in the queue). Wait
1953 for them. */
acf0174b
JJ
1954 taskgroup->in_taskgroup_wait = true;
1955 }
1956 gomp_mutex_unlock (&team->task_lock);
1957 if (do_wake)
1958 {
1959 gomp_team_barrier_wake (&team->barrier, do_wake);
1960 do_wake = 0;
1961 }
1962 if (to_free)
1963 {
1964 gomp_finish_task (to_free);
1965 free (to_free);
1966 to_free = NULL;
1967 }
1968 if (child_task)
1969 {
1970 thr->task = child_task;
e4606348
JJ
1971 if (__builtin_expect (child_task->fn == NULL, 0))
1972 {
1973 if (gomp_target_task_fn (child_task->fn_data))
1974 {
1975 thr->task = task;
1976 gomp_mutex_lock (&team->task_lock);
1977 child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1978 struct gomp_target_task *ttask
1979 = (struct gomp_target_task *) child_task->fn_data;
1980 /* If GOMP_PLUGIN_target_task_completion has run already
1981 in between gomp_target_task_fn and the mutex lock,
1982 perform the requeuing here. */
1983 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1984 gomp_target_task_completion (team, child_task);
1985 else
1986 ttask->state = GOMP_TARGET_TASK_RUNNING;
1987 child_task = NULL;
1988 continue;
1989 }
1990 }
1991 else
1992 child_task->fn (child_task->fn_data);
acf0174b
JJ
1993 thr->task = task;
1994 }
1995 else
1996 gomp_sem_wait (&taskgroup->taskgroup_sem);
1997 gomp_mutex_lock (&team->task_lock);
1998 if (child_task)
1999 {
2000 finish_cancelled:;
2001 size_t new_tasks
2002 = gomp_task_run_post_handle_depend (child_task, team);
acf0174b 2003 gomp_task_run_post_remove_parent (child_task);
e4606348 2004 gomp_clear_parent (&child_task->children_queue);
3696163c 2005 gomp_task_run_post_remove_taskgroup (child_task);
acf0174b
JJ
2006 to_free = child_task;
2007 child_task = NULL;
2008 team->task_count--;
2009 if (new_tasks > 1)
2010 {
2011 do_wake = team->nthreads - team->task_running_count
2012 - !task->in_tied_task;
2013 if (do_wake > new_tasks)
2014 do_wake = new_tasks;
2015 }
2016 }
2017 }
2018
2019 finish:
2020 task->taskgroup = taskgroup->prev;
2021 gomp_sem_destroy (&taskgroup->taskgroup_sem);
2022 free (taskgroup);
2023}
2024
28567c40
JJ
2025static inline __attribute__((always_inline)) void
2026gomp_reduction_register (uintptr_t *data, uintptr_t *old, uintptr_t *orig,
2027 unsigned nthreads)
2028{
2029 size_t total_cnt = 0;
2030 uintptr_t *d = data;
2031 struct htab *old_htab = NULL, *new_htab;
2032 do
2033 {
2034 if (__builtin_expect (orig != NULL, 0))
2035 {
2036 /* For worksharing task reductions, memory has been allocated
2037 already by some other thread that encountered the construct
2038 earlier. */
2039 d[2] = orig[2];
2040 d[6] = orig[6];
2041 orig = (uintptr_t *) orig[4];
2042 }
2043 else
2044 {
2045 size_t sz = d[1] * nthreads;
2046 /* Should use omp_alloc if d[3] is not -1. */
2047 void *ptr = gomp_aligned_alloc (d[2], sz);
2048 memset (ptr, '\0', sz);
2049 d[2] = (uintptr_t) ptr;
2050 d[6] = d[2] + sz;
2051 }
2052 d[5] = 0;
2053 total_cnt += d[0];
2054 if (d[4] == 0)
2055 {
2056 d[4] = (uintptr_t) old;
2057 break;
2058 }
2059 else
2060 d = (uintptr_t *) d[4];
2061 }
2062 while (1);
2063 if (old && old[5])
2064 {
2065 old_htab = (struct htab *) old[5];
2066 total_cnt += htab_elements (old_htab);
2067 }
2068 new_htab = htab_create (total_cnt);
2069 if (old_htab)
2070 {
2071 /* Copy old hash table, like in htab_expand. */
2072 hash_entry_type *p, *olimit;
2073 new_htab->n_elements = htab_elements (old_htab);
2074 olimit = old_htab->entries + old_htab->size;
2075 p = old_htab->entries;
2076 do
2077 {
2078 hash_entry_type x = *p;
2079 if (x != HTAB_EMPTY_ENTRY && x != HTAB_DELETED_ENTRY)
2080 *find_empty_slot_for_expand (new_htab, htab_hash (x)) = x;
2081 p++;
2082 }
2083 while (p < olimit);
2084 }
2085 d = data;
2086 do
2087 {
2088 size_t j;
2089 for (j = 0; j < d[0]; ++j)
2090 {
2091 uintptr_t *p = d + 7 + j * 3;
2092 p[2] = (uintptr_t) d;
2093 /* Ugly hack, hash_entry_type is defined for the task dependencies,
2094 which hash on the first element which is a pointer. We need
2095 to hash also on the first sizeof (uintptr_t) bytes which contain
2096 a pointer. Hide the cast from the compiler. */
2097 hash_entry_type n;
2098 __asm ("" : "=g" (n) : "0" (p));
2099 *htab_find_slot (&new_htab, n, INSERT) = n;
2100 }
2101 if (d[4] == (uintptr_t) old)
2102 break;
2103 else
2104 d = (uintptr_t *) d[4];
2105 }
2106 while (1);
2107 d[5] = (uintptr_t) new_htab;
2108}
2109
2110static void
2111gomp_create_artificial_team (void)
2112{
2113 struct gomp_thread *thr = gomp_thread ();
2114 struct gomp_task_icv *icv;
2115 struct gomp_team *team = gomp_new_team (1);
2116 struct gomp_task *task = thr->task;
2117 icv = task ? &task->icv : &gomp_global_icv;
2118 team->prev_ts = thr->ts;
2119 thr->ts.team = team;
2120 thr->ts.team_id = 0;
2121 thr->ts.work_share = &team->work_shares[0];
2122 thr->ts.last_work_share = NULL;
2123#ifdef HAVE_SYNC_BUILTINS
2124 thr->ts.single_count = 0;
2125#endif
2126 thr->ts.static_trip = 0;
2127 thr->task = &team->implicit_task[0];
2128 gomp_init_task (thr->task, NULL, icv);
2129 if (task)
2130 {
2131 thr->task = task;
2132 gomp_end_task ();
2133 free (task);
2134 thr->task = &team->implicit_task[0];
2135 }
2136#ifdef LIBGOMP_USE_PTHREADS
2137 else
2138 pthread_setspecific (gomp_thread_destructor, thr);
2139#endif
2140}
2141
2142/* The format of data is:
2143 data[0] cnt
2144 data[1] size
2145 data[2] alignment (on output array pointer)
2146 data[3] allocator (-1 if malloc allocator)
2147 data[4] next pointer
2148 data[5] used internally (htab pointer)
2149 data[6] used internally (end of array)
2150 cnt times
2151 ent[0] address
2152 ent[1] offset
2153 ent[2] used internally (pointer to data[0])
2154 The entries are sorted by increasing offset, so that a binary
2155 search can be performed. Normally, data[8] is 0, exception is
2156 for worksharing construct task reductions in cancellable parallel,
2157 where at offset 0 there should be space for a pointer and an integer
2158 which are used internally. */
2159
2160void
2161GOMP_taskgroup_reduction_register (uintptr_t *data)
2162{
2163 struct gomp_thread *thr = gomp_thread ();
2164 struct gomp_team *team = thr->ts.team;
2165 struct gomp_task *task;
2166 unsigned nthreads;
2167 if (__builtin_expect (team == NULL, 0))
2168 {
2169 /* The task reduction code needs a team and task, so for
2170 orphaned taskgroups just create the implicit team. */
2171 gomp_create_artificial_team ();
2172 ialias_call (GOMP_taskgroup_start) ();
2173 team = thr->ts.team;
2174 }
2175 nthreads = team->nthreads;
2176 task = thr->task;
2177 gomp_reduction_register (data, task->taskgroup->reductions, NULL, nthreads);
2178 task->taskgroup->reductions = data;
2179}
2180
2181void
2182GOMP_taskgroup_reduction_unregister (uintptr_t *data)
2183{
2184 uintptr_t *d = data;
2185 htab_free ((struct htab *) data[5]);
2186 do
2187 {
2188 gomp_aligned_free ((void *) d[2]);
2189 d = (uintptr_t *) d[4];
2190 }
2191 while (d && !d[5]);
2192}
2193ialias (GOMP_taskgroup_reduction_unregister)
2194
2195/* For i = 0 to cnt-1, remap ptrs[i] which is either address of the
2196 original list item or address of previously remapped original list
2197 item to address of the private copy, store that to ptrs[i].
2198 For i < cntorig, additionally set ptrs[cnt+i] to the address of
2199 the original list item. */
2200
2201void
2202GOMP_task_reduction_remap (size_t cnt, size_t cntorig, void **ptrs)
2203{
2204 struct gomp_thread *thr = gomp_thread ();
2205 struct gomp_task *task = thr->task;
2206 unsigned id = thr->ts.team_id;
2207 uintptr_t *data = task->taskgroup->reductions;
2208 uintptr_t *d;
2209 struct htab *reduction_htab = (struct htab *) data[5];
2210 size_t i;
2211 for (i = 0; i < cnt; ++i)
2212 {
2213 hash_entry_type ent, n;
2214 __asm ("" : "=g" (ent) : "0" (ptrs + i));
2215 n = htab_find (reduction_htab, ent);
2216 if (n)
2217 {
2218 uintptr_t *p;
2219 __asm ("" : "=g" (p) : "0" (n));
2220 /* At this point, p[0] should be equal to (uintptr_t) ptrs[i],
2221 p[1] is the offset within the allocated chunk for each
2222 thread, p[2] is the array registered with
2223 GOMP_taskgroup_reduction_register, d[2] is the base of the
2224 allocated memory and d[1] is the size of the allocated chunk
2225 for one thread. */
2226 d = (uintptr_t *) p[2];
2227 ptrs[i] = (void *) (d[2] + id * d[1] + p[1]);
2228 if (__builtin_expect (i < cntorig, 0))
2229 ptrs[cnt + i] = (void *) p[0];
2230 continue;
2231 }
2232 d = data;
2233 while (d != NULL)
2234 {
2235 if ((uintptr_t) ptrs[i] >= d[2] && (uintptr_t) ptrs[i] < d[6])
2236 break;
2237 d = (uintptr_t *) d[4];
2238 }
2239 if (d == NULL)
2240 gomp_fatal ("couldn't find matching task_reduction or reduction with "
2241 "task modifier for %p", ptrs[i]);
2242 uintptr_t off = ((uintptr_t) ptrs[i] - d[2]) % d[1];
2243 ptrs[i] = (void *) (d[2] + id * d[1] + off);
2244 if (__builtin_expect (i < cntorig, 0))
2245 {
2246 size_t lo = 0, hi = d[0] - 1;
2247 while (lo <= hi)
2248 {
2249 size_t m = (lo + hi) / 2;
2250 if (d[7 + 3 * m + 1] < off)
2251 lo = m + 1;
2252 else if (d[7 + 3 * m + 1] == off)
2253 {
2254 ptrs[cnt + i] = (void *) d[7 + 3 * m];
2255 break;
2256 }
2257 else
2258 hi = m - 1;
2259 }
2260 if (lo > hi)
2261 gomp_fatal ("couldn't find matching task_reduction or reduction "
2262 "with task modifier for %p", ptrs[i]);
2263 }
2264 }
2265}
2266
2267struct gomp_taskgroup *
2268gomp_parallel_reduction_register (uintptr_t *data, unsigned nthreads)
2269{
2270 struct gomp_taskgroup *taskgroup = gomp_taskgroup_init (NULL);
2271 gomp_reduction_register (data, NULL, NULL, nthreads);
2272 taskgroup->reductions = data;
2273 return taskgroup;
2274}
2275
2276void
2277gomp_workshare_task_reduction_register (uintptr_t *data, uintptr_t *orig)
2278{
2279 struct gomp_thread *thr = gomp_thread ();
2280 struct gomp_team *team = thr->ts.team;
2281 struct gomp_task *task = thr->task;
2282 unsigned nthreads = team->nthreads;
2283 gomp_reduction_register (data, task->taskgroup->reductions, orig, nthreads);
2284 task->taskgroup->reductions = data;
2285}
2286
2287void
2288gomp_workshare_taskgroup_start (void)
2289{
2290 struct gomp_thread *thr = gomp_thread ();
2291 struct gomp_team *team = thr->ts.team;
2292 struct gomp_task *task;
2293
2294 if (team == NULL)
2295 {
2296 gomp_create_artificial_team ();
2297 team = thr->ts.team;
2298 }
2299 task = thr->task;
2300 task->taskgroup = gomp_taskgroup_init (task->taskgroup);
2301 task->taskgroup->workshare = true;
2302}
2303
2304void
2305GOMP_workshare_task_reduction_unregister (bool cancelled)
2306{
2307 struct gomp_thread *thr = gomp_thread ();
2308 struct gomp_task *task = thr->task;
2309 struct gomp_team *team = thr->ts.team;
2310 uintptr_t *data = task->taskgroup->reductions;
2311 ialias_call (GOMP_taskgroup_end) ();
2312 if (thr->ts.team_id == 0)
2313 ialias_call (GOMP_taskgroup_reduction_unregister) (data);
2314 else
2315 htab_free ((struct htab *) data[5]);
2316
2317 if (!cancelled)
2318 gomp_team_barrier_wait (&team->barrier);
2319}
2320
20906c66
JJ
2321int
2322omp_in_final (void)
2323{
2324 struct gomp_thread *thr = gomp_thread ();
2325 return thr->task && thr->task->final_task;
2326}
2327
2328ialias (omp_in_final)