]> git.ipfire.org Git - thirdparty/gcc.git/blob - libgomp/task.c
Merge of HSA
[thirdparty/gcc.git] / libgomp / task.c
1 /* Copyright (C) 2007-2016 Free Software Foundation, Inc.
2 Contributed by Richard Henderson <rth@redhat.com>.
3
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
6
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
26 /* This file handles the maintainence of tasks in response to task
27 creation and termination. */
28
29 #include "libgomp.h"
30 #include <stdlib.h>
31 #include <string.h>
32 #include "gomp-constants.h"
33
34 typedef struct gomp_task_depend_entry *hash_entry_type;
35
36 static inline void *
37 htab_alloc (size_t size)
38 {
39 return gomp_malloc (size);
40 }
41
42 static inline void
43 htab_free (void *ptr)
44 {
45 free (ptr);
46 }
47
48 #include "hashtab.h"
49
50 static inline hashval_t
51 htab_hash (hash_entry_type element)
52 {
53 return hash_pointer (element->addr);
54 }
55
56 static inline bool
57 htab_eq (hash_entry_type x, hash_entry_type y)
58 {
59 return x->addr == y->addr;
60 }
61
62 /* Create a new task data structure. */
63
64 void
65 gomp_init_task (struct gomp_task *task, struct gomp_task *parent_task,
66 struct gomp_task_icv *prev_icv)
67 {
68 /* It would seem that using memset here would be a win, but it turns
69 out that partially filling gomp_task allows us to keep the
70 overhead of task creation low. In the nqueens-1.c test, for a
71 sufficiently large N, we drop the overhead from 5-6% to 1%.
72
73 Note, the nqueens-1.c test in serial mode is a good test to
74 benchmark the overhead of creating tasks as there are millions of
75 tiny tasks created that all run undeferred. */
76 task->parent = parent_task;
77 task->icv = *prev_icv;
78 task->kind = GOMP_TASK_IMPLICIT;
79 task->taskwait = NULL;
80 task->in_tied_task = false;
81 task->final_task = false;
82 task->copy_ctors_done = false;
83 task->parent_depends_on = false;
84 priority_queue_init (&task->children_queue);
85 task->taskgroup = NULL;
86 task->dependers = NULL;
87 task->depend_hash = NULL;
88 task->depend_count = 0;
89 }
90
91 /* Clean up a task, after completing it. */
92
93 void
94 gomp_end_task (void)
95 {
96 struct gomp_thread *thr = gomp_thread ();
97 struct gomp_task *task = thr->task;
98
99 gomp_finish_task (task);
100 thr->task = task->parent;
101 }
102
103 /* Clear the parent field of every task in LIST. */
104
105 static inline void
106 gomp_clear_parent_in_list (struct priority_list *list)
107 {
108 struct priority_node *p = list->tasks;
109 if (p)
110 do
111 {
112 priority_node_to_task (PQ_CHILDREN, p)->parent = NULL;
113 p = p->next;
114 }
115 while (p != list->tasks);
116 }
117
118 /* Splay tree version of gomp_clear_parent_in_list.
119
120 Clear the parent field of every task in NODE within SP, and free
121 the node when done. */
122
123 static void
124 gomp_clear_parent_in_tree (prio_splay_tree sp, prio_splay_tree_node node)
125 {
126 if (!node)
127 return;
128 prio_splay_tree_node left = node->left, right = node->right;
129 gomp_clear_parent_in_list (&node->key.l);
130 #if _LIBGOMP_CHECKING_
131 memset (node, 0xaf, sizeof (*node));
132 #endif
133 /* No need to remove the node from the tree. We're nuking
134 everything, so just free the nodes and our caller can clear the
135 entire splay tree. */
136 free (node);
137 gomp_clear_parent_in_tree (sp, left);
138 gomp_clear_parent_in_tree (sp, right);
139 }
140
141 /* Clear the parent field of every task in Q and remove every task
142 from Q. */
143
144 static inline void
145 gomp_clear_parent (struct priority_queue *q)
146 {
147 if (priority_queue_multi_p (q))
148 {
149 gomp_clear_parent_in_tree (&q->t, q->t.root);
150 /* All the nodes have been cleared in gomp_clear_parent_in_tree.
151 No need to remove anything. We can just nuke everything. */
152 q->t.root = NULL;
153 }
154 else
155 gomp_clear_parent_in_list (&q->l);
156 }
157
158 /* Helper function for GOMP_task and gomp_create_target_task.
159
160 For a TASK with in/out dependencies, fill in the various dependency
161 queues. PARENT is the parent of said task. DEPEND is as in
162 GOMP_task. */
163
164 static void
165 gomp_task_handle_depend (struct gomp_task *task, struct gomp_task *parent,
166 void **depend)
167 {
168 size_t ndepend = (uintptr_t) depend[0];
169 size_t nout = (uintptr_t) depend[1];
170 size_t i;
171 hash_entry_type ent;
172
173 task->depend_count = ndepend;
174 task->num_dependees = 0;
175 if (parent->depend_hash == NULL)
176 parent->depend_hash = htab_create (2 * ndepend > 12 ? 2 * ndepend : 12);
177 for (i = 0; i < ndepend; i++)
178 {
179 task->depend[i].addr = depend[2 + i];
180 task->depend[i].next = NULL;
181 task->depend[i].prev = NULL;
182 task->depend[i].task = task;
183 task->depend[i].is_in = i >= nout;
184 task->depend[i].redundant = false;
185 task->depend[i].redundant_out = false;
186
187 hash_entry_type *slot = htab_find_slot (&parent->depend_hash,
188 &task->depend[i], INSERT);
189 hash_entry_type out = NULL, last = NULL;
190 if (*slot)
191 {
192 /* If multiple depends on the same task are the same, all but the
193 first one are redundant. As inout/out come first, if any of them
194 is inout/out, it will win, which is the right semantics. */
195 if ((*slot)->task == task)
196 {
197 task->depend[i].redundant = true;
198 continue;
199 }
200 for (ent = *slot; ent; ent = ent->next)
201 {
202 if (ent->redundant_out)
203 break;
204
205 last = ent;
206
207 /* depend(in:...) doesn't depend on earlier depend(in:...). */
208 if (i >= nout && ent->is_in)
209 continue;
210
211 if (!ent->is_in)
212 out = ent;
213
214 struct gomp_task *tsk = ent->task;
215 if (tsk->dependers == NULL)
216 {
217 tsk->dependers
218 = gomp_malloc (sizeof (struct gomp_dependers_vec)
219 + 6 * sizeof (struct gomp_task *));
220 tsk->dependers->n_elem = 1;
221 tsk->dependers->allocated = 6;
222 tsk->dependers->elem[0] = task;
223 task->num_dependees++;
224 continue;
225 }
226 /* We already have some other dependency on tsk from earlier
227 depend clause. */
228 else if (tsk->dependers->n_elem
229 && (tsk->dependers->elem[tsk->dependers->n_elem - 1]
230 == task))
231 continue;
232 else if (tsk->dependers->n_elem == tsk->dependers->allocated)
233 {
234 tsk->dependers->allocated
235 = tsk->dependers->allocated * 2 + 2;
236 tsk->dependers
237 = gomp_realloc (tsk->dependers,
238 sizeof (struct gomp_dependers_vec)
239 + (tsk->dependers->allocated
240 * sizeof (struct gomp_task *)));
241 }
242 tsk->dependers->elem[tsk->dependers->n_elem++] = task;
243 task->num_dependees++;
244 }
245 task->depend[i].next = *slot;
246 (*slot)->prev = &task->depend[i];
247 }
248 *slot = &task->depend[i];
249
250 /* There is no need to store more than one depend({,in}out:) task per
251 address in the hash table chain for the purpose of creation of
252 deferred tasks, because each out depends on all earlier outs, thus it
253 is enough to record just the last depend({,in}out:). For depend(in:),
254 we need to keep all of the previous ones not terminated yet, because
255 a later depend({,in}out:) might need to depend on all of them. So, if
256 the new task's clause is depend({,in}out:), we know there is at most
257 one other depend({,in}out:) clause in the list (out). For
258 non-deferred tasks we want to see all outs, so they are moved to the
259 end of the chain, after first redundant_out entry all following
260 entries should be redundant_out. */
261 if (!task->depend[i].is_in && out)
262 {
263 if (out != last)
264 {
265 out->next->prev = out->prev;
266 out->prev->next = out->next;
267 out->next = last->next;
268 out->prev = last;
269 last->next = out;
270 if (out->next)
271 out->next->prev = out;
272 }
273 out->redundant_out = true;
274 }
275 }
276 }
277
278 /* Called when encountering an explicit task directive. If IF_CLAUSE is
279 false, then we must not delay in executing the task. If UNTIED is true,
280 then the task may be executed by any member of the team.
281
282 DEPEND is an array containing:
283 depend[0]: number of depend elements.
284 depend[1]: number of depend elements of type "out".
285 depend[2..N+1]: address of [1..N]th depend element. */
286
287 void
288 GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
289 long arg_size, long arg_align, bool if_clause, unsigned flags,
290 void **depend, int priority)
291 {
292 struct gomp_thread *thr = gomp_thread ();
293 struct gomp_team *team = thr->ts.team;
294
295 #ifdef HAVE_BROKEN_POSIX_SEMAPHORES
296 /* If pthread_mutex_* is used for omp_*lock*, then each task must be
297 tied to one thread all the time. This means UNTIED tasks must be
298 tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN
299 might be running on different thread than FN. */
300 if (cpyfn)
301 if_clause = false;
302 flags &= ~GOMP_TASK_FLAG_UNTIED;
303 #endif
304
305 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
306 if (team
307 && (gomp_team_barrier_cancelled (&team->barrier)
308 || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
309 return;
310
311 if ((flags & GOMP_TASK_FLAG_PRIORITY) == 0)
312 priority = 0;
313 else if (priority > gomp_max_task_priority_var)
314 priority = gomp_max_task_priority_var;
315
316 if (!if_clause || team == NULL
317 || (thr->task && thr->task->final_task)
318 || team->task_count > 64 * team->nthreads)
319 {
320 struct gomp_task task;
321
322 /* If there are depend clauses and earlier deferred sibling tasks
323 with depend clauses, check if there isn't a dependency. If there
324 is, we need to wait for them. There is no need to handle
325 depend clauses for non-deferred tasks other than this, because
326 the parent task is suspended until the child task finishes and thus
327 it can't start further child tasks. */
328 if ((flags & GOMP_TASK_FLAG_DEPEND)
329 && thr->task && thr->task->depend_hash)
330 gomp_task_maybe_wait_for_dependencies (depend);
331
332 gomp_init_task (&task, thr->task, gomp_icv (false));
333 task.kind = GOMP_TASK_UNDEFERRED;
334 task.final_task = (thr->task && thr->task->final_task)
335 || (flags & GOMP_TASK_FLAG_FINAL);
336 task.priority = priority;
337 if (thr->task)
338 {
339 task.in_tied_task = thr->task->in_tied_task;
340 task.taskgroup = thr->task->taskgroup;
341 }
342 thr->task = &task;
343 if (__builtin_expect (cpyfn != NULL, 0))
344 {
345 char buf[arg_size + arg_align - 1];
346 char *arg = (char *) (((uintptr_t) buf + arg_align - 1)
347 & ~(uintptr_t) (arg_align - 1));
348 cpyfn (arg, data);
349 fn (arg);
350 }
351 else
352 fn (data);
353 /* Access to "children" is normally done inside a task_lock
354 mutex region, but the only way this particular task.children
355 can be set is if this thread's task work function (fn)
356 creates children. So since the setter is *this* thread, we
357 need no barriers here when testing for non-NULL. We can have
358 task.children set by the current thread then changed by a
359 child thread, but seeing a stale non-NULL value is not a
360 problem. Once past the task_lock acquisition, this thread
361 will see the real value of task.children. */
362 if (!priority_queue_empty_p (&task.children_queue, MEMMODEL_RELAXED))
363 {
364 gomp_mutex_lock (&team->task_lock);
365 gomp_clear_parent (&task.children_queue);
366 gomp_mutex_unlock (&team->task_lock);
367 }
368 gomp_end_task ();
369 }
370 else
371 {
372 struct gomp_task *task;
373 struct gomp_task *parent = thr->task;
374 struct gomp_taskgroup *taskgroup = parent->taskgroup;
375 char *arg;
376 bool do_wake;
377 size_t depend_size = 0;
378
379 if (flags & GOMP_TASK_FLAG_DEPEND)
380 depend_size = ((uintptr_t) depend[0]
381 * sizeof (struct gomp_task_depend_entry));
382 task = gomp_malloc (sizeof (*task) + depend_size
383 + arg_size + arg_align - 1);
384 arg = (char *) (((uintptr_t) (task + 1) + depend_size + arg_align - 1)
385 & ~(uintptr_t) (arg_align - 1));
386 gomp_init_task (task, parent, gomp_icv (false));
387 task->priority = priority;
388 task->kind = GOMP_TASK_UNDEFERRED;
389 task->in_tied_task = parent->in_tied_task;
390 task->taskgroup = taskgroup;
391 thr->task = task;
392 if (cpyfn)
393 {
394 cpyfn (arg, data);
395 task->copy_ctors_done = true;
396 }
397 else
398 memcpy (arg, data, arg_size);
399 thr->task = parent;
400 task->kind = GOMP_TASK_WAITING;
401 task->fn = fn;
402 task->fn_data = arg;
403 task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1;
404 gomp_mutex_lock (&team->task_lock);
405 /* If parallel or taskgroup has been cancelled, don't start new
406 tasks. */
407 if (__builtin_expect ((gomp_team_barrier_cancelled (&team->barrier)
408 || (taskgroup && taskgroup->cancelled))
409 && !task->copy_ctors_done, 0))
410 {
411 gomp_mutex_unlock (&team->task_lock);
412 gomp_finish_task (task);
413 free (task);
414 return;
415 }
416 if (taskgroup)
417 taskgroup->num_children++;
418 if (depend_size)
419 {
420 gomp_task_handle_depend (task, parent, depend);
421 if (task->num_dependees)
422 {
423 /* Tasks that depend on other tasks are not put into the
424 various waiting queues, so we are done for now. Said
425 tasks are instead put into the queues via
426 gomp_task_run_post_handle_dependers() after their
427 dependencies have been satisfied. After which, they
428 can be picked up by the various scheduling
429 points. */
430 gomp_mutex_unlock (&team->task_lock);
431 return;
432 }
433 }
434
435 priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
436 task, priority,
437 PRIORITY_INSERT_BEGIN,
438 /*adjust_parent_depends_on=*/false,
439 task->parent_depends_on);
440 if (taskgroup)
441 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
442 task, priority,
443 PRIORITY_INSERT_BEGIN,
444 /*adjust_parent_depends_on=*/false,
445 task->parent_depends_on);
446
447 priority_queue_insert (PQ_TEAM, &team->task_queue,
448 task, priority,
449 PRIORITY_INSERT_END,
450 /*adjust_parent_depends_on=*/false,
451 task->parent_depends_on);
452
453 ++team->task_count;
454 ++team->task_queued_count;
455 gomp_team_barrier_set_task_pending (&team->barrier);
456 do_wake = team->task_running_count + !parent->in_tied_task
457 < team->nthreads;
458 gomp_mutex_unlock (&team->task_lock);
459 if (do_wake)
460 gomp_team_barrier_wake (&team->barrier, 1);
461 }
462 }
463
464 ialias (GOMP_taskgroup_start)
465 ialias (GOMP_taskgroup_end)
466
467 #define TYPE long
468 #define UTYPE unsigned long
469 #define TYPE_is_long 1
470 #include "taskloop.c"
471 #undef TYPE
472 #undef UTYPE
473 #undef TYPE_is_long
474
475 #define TYPE unsigned long long
476 #define UTYPE TYPE
477 #define GOMP_taskloop GOMP_taskloop_ull
478 #include "taskloop.c"
479 #undef TYPE
480 #undef UTYPE
481 #undef GOMP_taskloop
482
483 static void inline
484 priority_queue_move_task_first (enum priority_queue_type type,
485 struct priority_queue *head,
486 struct gomp_task *task)
487 {
488 #if _LIBGOMP_CHECKING_
489 if (!priority_queue_task_in_queue_p (type, head, task))
490 gomp_fatal ("Attempt to move first missing task %p", task);
491 #endif
492 struct priority_list *list;
493 if (priority_queue_multi_p (head))
494 {
495 list = priority_queue_lookup_priority (head, task->priority);
496 #if _LIBGOMP_CHECKING_
497 if (!list)
498 gomp_fatal ("Unable to find priority %d", task->priority);
499 #endif
500 }
501 else
502 list = &head->l;
503 priority_list_remove (list, task_to_priority_node (type, task), 0);
504 priority_list_insert (type, list, task, task->priority,
505 PRIORITY_INSERT_BEGIN, type == PQ_CHILDREN,
506 task->parent_depends_on);
507 }
508
509 /* Actual body of GOMP_PLUGIN_target_task_completion that is executed
510 with team->task_lock held, or is executed in the thread that called
511 gomp_target_task_fn if GOMP_PLUGIN_target_task_completion has been
512 run before it acquires team->task_lock. */
513
514 static void
515 gomp_target_task_completion (struct gomp_team *team, struct gomp_task *task)
516 {
517 struct gomp_task *parent = task->parent;
518 if (parent)
519 priority_queue_move_task_first (PQ_CHILDREN, &parent->children_queue,
520 task);
521
522 struct gomp_taskgroup *taskgroup = task->taskgroup;
523 if (taskgroup)
524 priority_queue_move_task_first (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
525 task);
526
527 priority_queue_insert (PQ_TEAM, &team->task_queue, task, task->priority,
528 PRIORITY_INSERT_BEGIN, false,
529 task->parent_depends_on);
530 task->kind = GOMP_TASK_WAITING;
531 if (parent && parent->taskwait)
532 {
533 if (parent->taskwait->in_taskwait)
534 {
535 /* One more task has had its dependencies met.
536 Inform any waiters. */
537 parent->taskwait->in_taskwait = false;
538 gomp_sem_post (&parent->taskwait->taskwait_sem);
539 }
540 else if (parent->taskwait->in_depend_wait)
541 {
542 /* One more task has had its dependencies met.
543 Inform any waiters. */
544 parent->taskwait->in_depend_wait = false;
545 gomp_sem_post (&parent->taskwait->taskwait_sem);
546 }
547 }
548 if (taskgroup && taskgroup->in_taskgroup_wait)
549 {
550 /* One more task has had its dependencies met.
551 Inform any waiters. */
552 taskgroup->in_taskgroup_wait = false;
553 gomp_sem_post (&taskgroup->taskgroup_sem);
554 }
555
556 ++team->task_queued_count;
557 gomp_team_barrier_set_task_pending (&team->barrier);
558 /* I'm afraid this can't be done after releasing team->task_lock,
559 as gomp_target_task_completion is run from unrelated thread and
560 therefore in between gomp_mutex_unlock and gomp_team_barrier_wake
561 the team could be gone already. */
562 if (team->nthreads > team->task_running_count)
563 gomp_team_barrier_wake (&team->barrier, 1);
564 }
565
566 /* Signal that a target task TTASK has completed the asynchronously
567 running phase and should be requeued as a task to handle the
568 variable unmapping. */
569
570 void
571 GOMP_PLUGIN_target_task_completion (void *data)
572 {
573 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
574 struct gomp_task *task = ttask->task;
575 struct gomp_team *team = ttask->team;
576
577 gomp_mutex_lock (&team->task_lock);
578 if (ttask->state == GOMP_TARGET_TASK_READY_TO_RUN)
579 {
580 ttask->state = GOMP_TARGET_TASK_FINISHED;
581 gomp_mutex_unlock (&team->task_lock);
582 return;
583 }
584 ttask->state = GOMP_TARGET_TASK_FINISHED;
585 free (ttask->firstprivate_copies);
586 gomp_target_task_completion (team, task);
587 gomp_mutex_unlock (&team->task_lock);
588 }
589
590 static void gomp_task_run_post_handle_depend_hash (struct gomp_task *);
591
592 /* Called for nowait target tasks. */
593
594 bool
595 gomp_create_target_task (struct gomp_device_descr *devicep,
596 void (*fn) (void *), size_t mapnum, void **hostaddrs,
597 size_t *sizes, unsigned short *kinds,
598 unsigned int flags, void **depend, void **args,
599 enum gomp_target_task_state state)
600 {
601 struct gomp_thread *thr = gomp_thread ();
602 struct gomp_team *team = thr->ts.team;
603
604 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
605 if (team
606 && (gomp_team_barrier_cancelled (&team->barrier)
607 || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
608 return true;
609
610 struct gomp_target_task *ttask;
611 struct gomp_task *task;
612 struct gomp_task *parent = thr->task;
613 struct gomp_taskgroup *taskgroup = parent->taskgroup;
614 bool do_wake;
615 size_t depend_size = 0;
616 uintptr_t depend_cnt = 0;
617 size_t tgt_align = 0, tgt_size = 0;
618
619 if (depend != NULL)
620 {
621 depend_cnt = (uintptr_t) depend[0];
622 depend_size = depend_cnt * sizeof (struct gomp_task_depend_entry);
623 }
624 if (fn)
625 {
626 /* GOMP_MAP_FIRSTPRIVATE need to be copied first, as they are
627 firstprivate on the target task. */
628 size_t i;
629 for (i = 0; i < mapnum; i++)
630 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
631 {
632 size_t align = (size_t) 1 << (kinds[i] >> 8);
633 if (tgt_align < align)
634 tgt_align = align;
635 tgt_size = (tgt_size + align - 1) & ~(align - 1);
636 tgt_size += sizes[i];
637 }
638 if (tgt_align)
639 tgt_size += tgt_align - 1;
640 else
641 tgt_size = 0;
642 }
643
644 task = gomp_malloc (sizeof (*task) + depend_size
645 + sizeof (*ttask)
646 + mapnum * (sizeof (void *) + sizeof (size_t)
647 + sizeof (unsigned short))
648 + tgt_size);
649 gomp_init_task (task, parent, gomp_icv (false));
650 task->priority = 0;
651 task->kind = GOMP_TASK_WAITING;
652 task->in_tied_task = parent->in_tied_task;
653 task->taskgroup = taskgroup;
654 ttask = (struct gomp_target_task *) &task->depend[depend_cnt];
655 ttask->devicep = devicep;
656 ttask->fn = fn;
657 ttask->mapnum = mapnum;
658 ttask->args = args;
659 memcpy (ttask->hostaddrs, hostaddrs, mapnum * sizeof (void *));
660 ttask->sizes = (size_t *) &ttask->hostaddrs[mapnum];
661 memcpy (ttask->sizes, sizes, mapnum * sizeof (size_t));
662 ttask->kinds = (unsigned short *) &ttask->sizes[mapnum];
663 memcpy (ttask->kinds, kinds, mapnum * sizeof (unsigned short));
664 if (tgt_align)
665 {
666 char *tgt = (char *) &ttask->kinds[mapnum];
667 size_t i;
668 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
669 if (al)
670 tgt += tgt_align - al;
671 tgt_size = 0;
672 for (i = 0; i < mapnum; i++)
673 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
674 {
675 size_t align = (size_t) 1 << (kinds[i] >> 8);
676 tgt_size = (tgt_size + align - 1) & ~(align - 1);
677 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
678 ttask->hostaddrs[i] = tgt + tgt_size;
679 tgt_size = tgt_size + sizes[i];
680 }
681 }
682 ttask->flags = flags;
683 ttask->state = state;
684 ttask->task = task;
685 ttask->team = team;
686 task->fn = NULL;
687 task->fn_data = ttask;
688 task->final_task = 0;
689 gomp_mutex_lock (&team->task_lock);
690 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
691 if (__builtin_expect (gomp_team_barrier_cancelled (&team->barrier)
692 || (taskgroup && taskgroup->cancelled), 0))
693 {
694 gomp_mutex_unlock (&team->task_lock);
695 gomp_finish_task (task);
696 free (task);
697 return true;
698 }
699 if (depend_size)
700 {
701 gomp_task_handle_depend (task, parent, depend);
702 if (task->num_dependees)
703 {
704 if (taskgroup)
705 taskgroup->num_children++;
706 gomp_mutex_unlock (&team->task_lock);
707 return true;
708 }
709 }
710 if (state == GOMP_TARGET_TASK_DATA)
711 {
712 gomp_task_run_post_handle_depend_hash (task);
713 gomp_mutex_unlock (&team->task_lock);
714 gomp_finish_task (task);
715 free (task);
716 return false;
717 }
718 if (taskgroup)
719 taskgroup->num_children++;
720 /* For async offloading, if we don't need to wait for dependencies,
721 run the gomp_target_task_fn right away, essentially schedule the
722 mapping part of the task in the current thread. */
723 if (devicep != NULL
724 && (devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
725 {
726 priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0,
727 PRIORITY_INSERT_END,
728 /*adjust_parent_depends_on=*/false,
729 task->parent_depends_on);
730 if (taskgroup)
731 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
732 task, 0, PRIORITY_INSERT_END,
733 /*adjust_parent_depends_on=*/false,
734 task->parent_depends_on);
735 task->pnode[PQ_TEAM].next = NULL;
736 task->pnode[PQ_TEAM].prev = NULL;
737 task->kind = GOMP_TASK_TIED;
738 ++team->task_count;
739 gomp_mutex_unlock (&team->task_lock);
740
741 thr->task = task;
742 gomp_target_task_fn (task->fn_data);
743 thr->task = parent;
744
745 gomp_mutex_lock (&team->task_lock);
746 task->kind = GOMP_TASK_ASYNC_RUNNING;
747 /* If GOMP_PLUGIN_target_task_completion has run already
748 in between gomp_target_task_fn and the mutex lock,
749 perform the requeuing here. */
750 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
751 gomp_target_task_completion (team, task);
752 else
753 ttask->state = GOMP_TARGET_TASK_RUNNING;
754 gomp_mutex_unlock (&team->task_lock);
755 return true;
756 }
757 priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0,
758 PRIORITY_INSERT_BEGIN,
759 /*adjust_parent_depends_on=*/false,
760 task->parent_depends_on);
761 if (taskgroup)
762 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, task, 0,
763 PRIORITY_INSERT_BEGIN,
764 /*adjust_parent_depends_on=*/false,
765 task->parent_depends_on);
766 priority_queue_insert (PQ_TEAM, &team->task_queue, task, 0,
767 PRIORITY_INSERT_END,
768 /*adjust_parent_depends_on=*/false,
769 task->parent_depends_on);
770 ++team->task_count;
771 ++team->task_queued_count;
772 gomp_team_barrier_set_task_pending (&team->barrier);
773 do_wake = team->task_running_count + !parent->in_tied_task
774 < team->nthreads;
775 gomp_mutex_unlock (&team->task_lock);
776 if (do_wake)
777 gomp_team_barrier_wake (&team->barrier, 1);
778 return true;
779 }
780
781 /* Given a parent_depends_on task in LIST, move it to the front of its
782 priority so it is run as soon as possible.
783
784 Care is taken to update the list's LAST_PARENT_DEPENDS_ON field.
785
786 We rearrange the queue such that all parent_depends_on tasks are
787 first, and last_parent_depends_on points to the last such task we
788 rearranged. For example, given the following tasks in a queue
789 where PD[123] are the parent_depends_on tasks:
790
791 task->children
792 |
793 V
794 C1 -> C2 -> C3 -> PD1 -> PD2 -> PD3 -> C4
795
796 We rearrange such that:
797
798 task->children
799 | +--- last_parent_depends_on
800 | |
801 V V
802 PD1 -> PD2 -> PD3 -> C1 -> C2 -> C3 -> C4. */
803
804 static void inline
805 priority_list_upgrade_task (struct priority_list *list,
806 struct priority_node *node)
807 {
808 struct priority_node *last_parent_depends_on
809 = list->last_parent_depends_on;
810 if (last_parent_depends_on)
811 {
812 node->prev->next = node->next;
813 node->next->prev = node->prev;
814 node->prev = last_parent_depends_on;
815 node->next = last_parent_depends_on->next;
816 node->prev->next = node;
817 node->next->prev = node;
818 }
819 else if (node != list->tasks)
820 {
821 node->prev->next = node->next;
822 node->next->prev = node->prev;
823 node->prev = list->tasks->prev;
824 node->next = list->tasks;
825 list->tasks = node;
826 node->prev->next = node;
827 node->next->prev = node;
828 }
829 list->last_parent_depends_on = node;
830 }
831
832 /* Given a parent_depends_on TASK in its parent's children_queue, move
833 it to the front of its priority so it is run as soon as possible.
834
835 PARENT is passed as an optimization.
836
837 (This function could be defined in priority_queue.c, but we want it
838 inlined, and putting it in priority_queue.h is not an option, given
839 that gomp_task has not been properly defined at that point). */
840
841 static void inline
842 priority_queue_upgrade_task (struct gomp_task *task,
843 struct gomp_task *parent)
844 {
845 struct priority_queue *head = &parent->children_queue;
846 struct priority_node *node = &task->pnode[PQ_CHILDREN];
847 #if _LIBGOMP_CHECKING_
848 if (!task->parent_depends_on)
849 gomp_fatal ("priority_queue_upgrade_task: task must be a "
850 "parent_depends_on task");
851 if (!priority_queue_task_in_queue_p (PQ_CHILDREN, head, task))
852 gomp_fatal ("priority_queue_upgrade_task: cannot find task=%p", task);
853 #endif
854 if (priority_queue_multi_p (head))
855 {
856 struct priority_list *list
857 = priority_queue_lookup_priority (head, task->priority);
858 priority_list_upgrade_task (list, node);
859 }
860 else
861 priority_list_upgrade_task (&head->l, node);
862 }
863
864 /* Given a CHILD_TASK in LIST that is about to be executed, move it out of
865 the way in LIST so that other tasks can be considered for
866 execution. LIST contains tasks of type TYPE.
867
868 Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field
869 if applicable. */
870
871 static void inline
872 priority_list_downgrade_task (enum priority_queue_type type,
873 struct priority_list *list,
874 struct gomp_task *child_task)
875 {
876 struct priority_node *node = task_to_priority_node (type, child_task);
877 if (list->tasks == node)
878 list->tasks = node->next;
879 else if (node->next != list->tasks)
880 {
881 /* The task in NODE is about to become TIED and TIED tasks
882 cannot come before WAITING tasks. If we're about to
883 leave the queue in such an indeterminate state, rewire
884 things appropriately. However, a TIED task at the end is
885 perfectly fine. */
886 struct gomp_task *next_task = priority_node_to_task (type, node->next);
887 if (next_task->kind == GOMP_TASK_WAITING)
888 {
889 /* Remove from list. */
890 node->prev->next = node->next;
891 node->next->prev = node->prev;
892 /* Rewire at the end. */
893 node->next = list->tasks;
894 node->prev = list->tasks->prev;
895 list->tasks->prev->next = node;
896 list->tasks->prev = node;
897 }
898 }
899
900 /* If the current task is the last_parent_depends_on for its
901 priority, adjust last_parent_depends_on appropriately. */
902 if (__builtin_expect (child_task->parent_depends_on, 0)
903 && list->last_parent_depends_on == node)
904 {
905 struct gomp_task *prev_child = priority_node_to_task (type, node->prev);
906 if (node->prev != node
907 && prev_child->kind == GOMP_TASK_WAITING
908 && prev_child->parent_depends_on)
909 list->last_parent_depends_on = node->prev;
910 else
911 {
912 /* There are no more parent_depends_on entries waiting
913 to run, clear the list. */
914 list->last_parent_depends_on = NULL;
915 }
916 }
917 }
918
919 /* Given a TASK in HEAD that is about to be executed, move it out of
920 the way so that other tasks can be considered for execution. HEAD
921 contains tasks of type TYPE.
922
923 Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field
924 if applicable.
925
926 (This function could be defined in priority_queue.c, but we want it
927 inlined, and putting it in priority_queue.h is not an option, given
928 that gomp_task has not been properly defined at that point). */
929
930 static void inline
931 priority_queue_downgrade_task (enum priority_queue_type type,
932 struct priority_queue *head,
933 struct gomp_task *task)
934 {
935 #if _LIBGOMP_CHECKING_
936 if (!priority_queue_task_in_queue_p (type, head, task))
937 gomp_fatal ("Attempt to downgrade missing task %p", task);
938 #endif
939 if (priority_queue_multi_p (head))
940 {
941 struct priority_list *list
942 = priority_queue_lookup_priority (head, task->priority);
943 priority_list_downgrade_task (type, list, task);
944 }
945 else
946 priority_list_downgrade_task (type, &head->l, task);
947 }
948
949 /* Setup CHILD_TASK to execute. This is done by setting the task to
950 TIED, and updating all relevant queues so that CHILD_TASK is no
951 longer chosen for scheduling. Also, remove CHILD_TASK from the
952 overall team task queue entirely.
953
954 Return TRUE if task or its containing taskgroup has been
955 cancelled. */
956
957 static inline bool
958 gomp_task_run_pre (struct gomp_task *child_task, struct gomp_task *parent,
959 struct gomp_team *team)
960 {
961 #if _LIBGOMP_CHECKING_
962 if (child_task->parent)
963 priority_queue_verify (PQ_CHILDREN,
964 &child_task->parent->children_queue, true);
965 if (child_task->taskgroup)
966 priority_queue_verify (PQ_TASKGROUP,
967 &child_task->taskgroup->taskgroup_queue, false);
968 priority_queue_verify (PQ_TEAM, &team->task_queue, false);
969 #endif
970
971 /* Task is about to go tied, move it out of the way. */
972 if (parent)
973 priority_queue_downgrade_task (PQ_CHILDREN, &parent->children_queue,
974 child_task);
975
976 /* Task is about to go tied, move it out of the way. */
977 struct gomp_taskgroup *taskgroup = child_task->taskgroup;
978 if (taskgroup)
979 priority_queue_downgrade_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
980 child_task);
981
982 priority_queue_remove (PQ_TEAM, &team->task_queue, child_task,
983 MEMMODEL_RELAXED);
984 child_task->pnode[PQ_TEAM].next = NULL;
985 child_task->pnode[PQ_TEAM].prev = NULL;
986 child_task->kind = GOMP_TASK_TIED;
987
988 if (--team->task_queued_count == 0)
989 gomp_team_barrier_clear_task_pending (&team->barrier);
990 if ((gomp_team_barrier_cancelled (&team->barrier)
991 || (taskgroup && taskgroup->cancelled))
992 && !child_task->copy_ctors_done)
993 return true;
994 return false;
995 }
996
997 static void
998 gomp_task_run_post_handle_depend_hash (struct gomp_task *child_task)
999 {
1000 struct gomp_task *parent = child_task->parent;
1001 size_t i;
1002
1003 for (i = 0; i < child_task->depend_count; i++)
1004 if (!child_task->depend[i].redundant)
1005 {
1006 if (child_task->depend[i].next)
1007 child_task->depend[i].next->prev = child_task->depend[i].prev;
1008 if (child_task->depend[i].prev)
1009 child_task->depend[i].prev->next = child_task->depend[i].next;
1010 else
1011 {
1012 hash_entry_type *slot
1013 = htab_find_slot (&parent->depend_hash, &child_task->depend[i],
1014 NO_INSERT);
1015 if (*slot != &child_task->depend[i])
1016 abort ();
1017 if (child_task->depend[i].next)
1018 *slot = child_task->depend[i].next;
1019 else
1020 htab_clear_slot (parent->depend_hash, slot);
1021 }
1022 }
1023 }
1024
1025 /* After a CHILD_TASK has been run, adjust the dependency queue for
1026 each task that depends on CHILD_TASK, to record the fact that there
1027 is one less dependency to worry about. If a task that depended on
1028 CHILD_TASK now has no dependencies, place it in the various queues
1029 so it gets scheduled to run.
1030
1031 TEAM is the team to which CHILD_TASK belongs to. */
1032
1033 static size_t
1034 gomp_task_run_post_handle_dependers (struct gomp_task *child_task,
1035 struct gomp_team *team)
1036 {
1037 struct gomp_task *parent = child_task->parent;
1038 size_t i, count = child_task->dependers->n_elem, ret = 0;
1039 for (i = 0; i < count; i++)
1040 {
1041 struct gomp_task *task = child_task->dependers->elem[i];
1042
1043 /* CHILD_TASK satisfies a dependency for TASK. Keep track of
1044 TASK's remaining dependencies. Once TASK has no other
1045 depenencies, put it into the various queues so it will get
1046 scheduled for execution. */
1047 if (--task->num_dependees != 0)
1048 continue;
1049
1050 struct gomp_taskgroup *taskgroup = task->taskgroup;
1051 if (parent)
1052 {
1053 priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
1054 task, task->priority,
1055 PRIORITY_INSERT_BEGIN,
1056 /*adjust_parent_depends_on=*/true,
1057 task->parent_depends_on);
1058 if (parent->taskwait)
1059 {
1060 if (parent->taskwait->in_taskwait)
1061 {
1062 /* One more task has had its dependencies met.
1063 Inform any waiters. */
1064 parent->taskwait->in_taskwait = false;
1065 gomp_sem_post (&parent->taskwait->taskwait_sem);
1066 }
1067 else if (parent->taskwait->in_depend_wait)
1068 {
1069 /* One more task has had its dependencies met.
1070 Inform any waiters. */
1071 parent->taskwait->in_depend_wait = false;
1072 gomp_sem_post (&parent->taskwait->taskwait_sem);
1073 }
1074 }
1075 }
1076 if (taskgroup)
1077 {
1078 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
1079 task, task->priority,
1080 PRIORITY_INSERT_BEGIN,
1081 /*adjust_parent_depends_on=*/false,
1082 task->parent_depends_on);
1083 if (taskgroup->in_taskgroup_wait)
1084 {
1085 /* One more task has had its dependencies met.
1086 Inform any waiters. */
1087 taskgroup->in_taskgroup_wait = false;
1088 gomp_sem_post (&taskgroup->taskgroup_sem);
1089 }
1090 }
1091 priority_queue_insert (PQ_TEAM, &team->task_queue,
1092 task, task->priority,
1093 PRIORITY_INSERT_END,
1094 /*adjust_parent_depends_on=*/false,
1095 task->parent_depends_on);
1096 ++team->task_count;
1097 ++team->task_queued_count;
1098 ++ret;
1099 }
1100 free (child_task->dependers);
1101 child_task->dependers = NULL;
1102 if (ret > 1)
1103 gomp_team_barrier_set_task_pending (&team->barrier);
1104 return ret;
1105 }
1106
1107 static inline size_t
1108 gomp_task_run_post_handle_depend (struct gomp_task *child_task,
1109 struct gomp_team *team)
1110 {
1111 if (child_task->depend_count == 0)
1112 return 0;
1113
1114 /* If parent is gone already, the hash table is freed and nothing
1115 will use the hash table anymore, no need to remove anything from it. */
1116 if (child_task->parent != NULL)
1117 gomp_task_run_post_handle_depend_hash (child_task);
1118
1119 if (child_task->dependers == NULL)
1120 return 0;
1121
1122 return gomp_task_run_post_handle_dependers (child_task, team);
1123 }
1124
1125 /* Remove CHILD_TASK from its parent. */
1126
1127 static inline void
1128 gomp_task_run_post_remove_parent (struct gomp_task *child_task)
1129 {
1130 struct gomp_task *parent = child_task->parent;
1131 if (parent == NULL)
1132 return;
1133
1134 /* If this was the last task the parent was depending on,
1135 synchronize with gomp_task_maybe_wait_for_dependencies so it can
1136 clean up and return. */
1137 if (__builtin_expect (child_task->parent_depends_on, 0)
1138 && --parent->taskwait->n_depend == 0
1139 && parent->taskwait->in_depend_wait)
1140 {
1141 parent->taskwait->in_depend_wait = false;
1142 gomp_sem_post (&parent->taskwait->taskwait_sem);
1143 }
1144
1145 if (priority_queue_remove (PQ_CHILDREN, &parent->children_queue,
1146 child_task, MEMMODEL_RELEASE)
1147 && parent->taskwait && parent->taskwait->in_taskwait)
1148 {
1149 parent->taskwait->in_taskwait = false;
1150 gomp_sem_post (&parent->taskwait->taskwait_sem);
1151 }
1152 child_task->pnode[PQ_CHILDREN].next = NULL;
1153 child_task->pnode[PQ_CHILDREN].prev = NULL;
1154 }
1155
1156 /* Remove CHILD_TASK from its taskgroup. */
1157
1158 static inline void
1159 gomp_task_run_post_remove_taskgroup (struct gomp_task *child_task)
1160 {
1161 struct gomp_taskgroup *taskgroup = child_task->taskgroup;
1162 if (taskgroup == NULL)
1163 return;
1164 bool empty = priority_queue_remove (PQ_TASKGROUP,
1165 &taskgroup->taskgroup_queue,
1166 child_task, MEMMODEL_RELAXED);
1167 child_task->pnode[PQ_TASKGROUP].next = NULL;
1168 child_task->pnode[PQ_TASKGROUP].prev = NULL;
1169 if (taskgroup->num_children > 1)
1170 --taskgroup->num_children;
1171 else
1172 {
1173 /* We access taskgroup->num_children in GOMP_taskgroup_end
1174 outside of the task lock mutex region, so
1175 need a release barrier here to ensure memory
1176 written by child_task->fn above is flushed
1177 before the NULL is written. */
1178 __atomic_store_n (&taskgroup->num_children, 0, MEMMODEL_RELEASE);
1179 }
1180 if (empty && taskgroup->in_taskgroup_wait)
1181 {
1182 taskgroup->in_taskgroup_wait = false;
1183 gomp_sem_post (&taskgroup->taskgroup_sem);
1184 }
1185 }
1186
1187 void
1188 gomp_barrier_handle_tasks (gomp_barrier_state_t state)
1189 {
1190 struct gomp_thread *thr = gomp_thread ();
1191 struct gomp_team *team = thr->ts.team;
1192 struct gomp_task *task = thr->task;
1193 struct gomp_task *child_task = NULL;
1194 struct gomp_task *to_free = NULL;
1195 int do_wake = 0;
1196
1197 gomp_mutex_lock (&team->task_lock);
1198 if (gomp_barrier_last_thread (state))
1199 {
1200 if (team->task_count == 0)
1201 {
1202 gomp_team_barrier_done (&team->barrier, state);
1203 gomp_mutex_unlock (&team->task_lock);
1204 gomp_team_barrier_wake (&team->barrier, 0);
1205 return;
1206 }
1207 gomp_team_barrier_set_waiting_for_tasks (&team->barrier);
1208 }
1209
1210 while (1)
1211 {
1212 bool cancelled = false;
1213 if (!priority_queue_empty_p (&team->task_queue, MEMMODEL_RELAXED))
1214 {
1215 bool ignored;
1216 child_task
1217 = priority_queue_next_task (PQ_TEAM, &team->task_queue,
1218 PQ_IGNORED, NULL,
1219 &ignored);
1220 cancelled = gomp_task_run_pre (child_task, child_task->parent,
1221 team);
1222 if (__builtin_expect (cancelled, 0))
1223 {
1224 if (to_free)
1225 {
1226 gomp_finish_task (to_free);
1227 free (to_free);
1228 to_free = NULL;
1229 }
1230 goto finish_cancelled;
1231 }
1232 team->task_running_count++;
1233 child_task->in_tied_task = true;
1234 }
1235 gomp_mutex_unlock (&team->task_lock);
1236 if (do_wake)
1237 {
1238 gomp_team_barrier_wake (&team->barrier, do_wake);
1239 do_wake = 0;
1240 }
1241 if (to_free)
1242 {
1243 gomp_finish_task (to_free);
1244 free (to_free);
1245 to_free = NULL;
1246 }
1247 if (child_task)
1248 {
1249 thr->task = child_task;
1250 if (__builtin_expect (child_task->fn == NULL, 0))
1251 {
1252 if (gomp_target_task_fn (child_task->fn_data))
1253 {
1254 thr->task = task;
1255 gomp_mutex_lock (&team->task_lock);
1256 child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1257 team->task_running_count--;
1258 struct gomp_target_task *ttask
1259 = (struct gomp_target_task *) child_task->fn_data;
1260 /* If GOMP_PLUGIN_target_task_completion has run already
1261 in between gomp_target_task_fn and the mutex lock,
1262 perform the requeuing here. */
1263 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1264 gomp_target_task_completion (team, child_task);
1265 else
1266 ttask->state = GOMP_TARGET_TASK_RUNNING;
1267 child_task = NULL;
1268 continue;
1269 }
1270 }
1271 else
1272 child_task->fn (child_task->fn_data);
1273 thr->task = task;
1274 }
1275 else
1276 return;
1277 gomp_mutex_lock (&team->task_lock);
1278 if (child_task)
1279 {
1280 finish_cancelled:;
1281 size_t new_tasks
1282 = gomp_task_run_post_handle_depend (child_task, team);
1283 gomp_task_run_post_remove_parent (child_task);
1284 gomp_clear_parent (&child_task->children_queue);
1285 gomp_task_run_post_remove_taskgroup (child_task);
1286 to_free = child_task;
1287 child_task = NULL;
1288 if (!cancelled)
1289 team->task_running_count--;
1290 if (new_tasks > 1)
1291 {
1292 do_wake = team->nthreads - team->task_running_count;
1293 if (do_wake > new_tasks)
1294 do_wake = new_tasks;
1295 }
1296 if (--team->task_count == 0
1297 && gomp_team_barrier_waiting_for_tasks (&team->barrier))
1298 {
1299 gomp_team_barrier_done (&team->barrier, state);
1300 gomp_mutex_unlock (&team->task_lock);
1301 gomp_team_barrier_wake (&team->barrier, 0);
1302 gomp_mutex_lock (&team->task_lock);
1303 }
1304 }
1305 }
1306 }
1307
1308 /* Called when encountering a taskwait directive.
1309
1310 Wait for all children of the current task. */
1311
1312 void
1313 GOMP_taskwait (void)
1314 {
1315 struct gomp_thread *thr = gomp_thread ();
1316 struct gomp_team *team = thr->ts.team;
1317 struct gomp_task *task = thr->task;
1318 struct gomp_task *child_task = NULL;
1319 struct gomp_task *to_free = NULL;
1320 struct gomp_taskwait taskwait;
1321 int do_wake = 0;
1322
1323 /* The acquire barrier on load of task->children here synchronizes
1324 with the write of a NULL in gomp_task_run_post_remove_parent. It is
1325 not necessary that we synchronize with other non-NULL writes at
1326 this point, but we must ensure that all writes to memory by a
1327 child thread task work function are seen before we exit from
1328 GOMP_taskwait. */
1329 if (task == NULL
1330 || priority_queue_empty_p (&task->children_queue, MEMMODEL_ACQUIRE))
1331 return;
1332
1333 memset (&taskwait, 0, sizeof (taskwait));
1334 bool child_q = false;
1335 gomp_mutex_lock (&team->task_lock);
1336 while (1)
1337 {
1338 bool cancelled = false;
1339 if (priority_queue_empty_p (&task->children_queue, MEMMODEL_RELAXED))
1340 {
1341 bool destroy_taskwait = task->taskwait != NULL;
1342 task->taskwait = NULL;
1343 gomp_mutex_unlock (&team->task_lock);
1344 if (to_free)
1345 {
1346 gomp_finish_task (to_free);
1347 free (to_free);
1348 }
1349 if (destroy_taskwait)
1350 gomp_sem_destroy (&taskwait.taskwait_sem);
1351 return;
1352 }
1353 struct gomp_task *next_task
1354 = priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
1355 PQ_TEAM, &team->task_queue, &child_q);
1356 if (next_task->kind == GOMP_TASK_WAITING)
1357 {
1358 child_task = next_task;
1359 cancelled
1360 = gomp_task_run_pre (child_task, task, team);
1361 if (__builtin_expect (cancelled, 0))
1362 {
1363 if (to_free)
1364 {
1365 gomp_finish_task (to_free);
1366 free (to_free);
1367 to_free = NULL;
1368 }
1369 goto finish_cancelled;
1370 }
1371 }
1372 else
1373 {
1374 /* All tasks we are waiting for are either running in other
1375 threads, or they are tasks that have not had their
1376 dependencies met (so they're not even in the queue). Wait
1377 for them. */
1378 if (task->taskwait == NULL)
1379 {
1380 taskwait.in_depend_wait = false;
1381 gomp_sem_init (&taskwait.taskwait_sem, 0);
1382 task->taskwait = &taskwait;
1383 }
1384 taskwait.in_taskwait = true;
1385 }
1386 gomp_mutex_unlock (&team->task_lock);
1387 if (do_wake)
1388 {
1389 gomp_team_barrier_wake (&team->barrier, do_wake);
1390 do_wake = 0;
1391 }
1392 if (to_free)
1393 {
1394 gomp_finish_task (to_free);
1395 free (to_free);
1396 to_free = NULL;
1397 }
1398 if (child_task)
1399 {
1400 thr->task = child_task;
1401 if (__builtin_expect (child_task->fn == NULL, 0))
1402 {
1403 if (gomp_target_task_fn (child_task->fn_data))
1404 {
1405 thr->task = task;
1406 gomp_mutex_lock (&team->task_lock);
1407 child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1408 struct gomp_target_task *ttask
1409 = (struct gomp_target_task *) child_task->fn_data;
1410 /* If GOMP_PLUGIN_target_task_completion has run already
1411 in between gomp_target_task_fn and the mutex lock,
1412 perform the requeuing here. */
1413 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1414 gomp_target_task_completion (team, child_task);
1415 else
1416 ttask->state = GOMP_TARGET_TASK_RUNNING;
1417 child_task = NULL;
1418 continue;
1419 }
1420 }
1421 else
1422 child_task->fn (child_task->fn_data);
1423 thr->task = task;
1424 }
1425 else
1426 gomp_sem_wait (&taskwait.taskwait_sem);
1427 gomp_mutex_lock (&team->task_lock);
1428 if (child_task)
1429 {
1430 finish_cancelled:;
1431 size_t new_tasks
1432 = gomp_task_run_post_handle_depend (child_task, team);
1433
1434 if (child_q)
1435 {
1436 priority_queue_remove (PQ_CHILDREN, &task->children_queue,
1437 child_task, MEMMODEL_RELAXED);
1438 child_task->pnode[PQ_CHILDREN].next = NULL;
1439 child_task->pnode[PQ_CHILDREN].prev = NULL;
1440 }
1441
1442 gomp_clear_parent (&child_task->children_queue);
1443
1444 gomp_task_run_post_remove_taskgroup (child_task);
1445
1446 to_free = child_task;
1447 child_task = NULL;
1448 team->task_count--;
1449 if (new_tasks > 1)
1450 {
1451 do_wake = team->nthreads - team->task_running_count
1452 - !task->in_tied_task;
1453 if (do_wake > new_tasks)
1454 do_wake = new_tasks;
1455 }
1456 }
1457 }
1458 }
1459
1460 /* An undeferred task is about to run. Wait for all tasks that this
1461 undeferred task depends on.
1462
1463 This is done by first putting all known ready dependencies
1464 (dependencies that have their own dependencies met) at the top of
1465 the scheduling queues. Then we iterate through these imminently
1466 ready tasks (and possibly other high priority tasks), and run them.
1467 If we run out of ready dependencies to execute, we either wait for
1468 the reamining dependencies to finish, or wait for them to get
1469 scheduled so we can run them.
1470
1471 DEPEND is as in GOMP_task. */
1472
1473 void
1474 gomp_task_maybe_wait_for_dependencies (void **depend)
1475 {
1476 struct gomp_thread *thr = gomp_thread ();
1477 struct gomp_task *task = thr->task;
1478 struct gomp_team *team = thr->ts.team;
1479 struct gomp_task_depend_entry elem, *ent = NULL;
1480 struct gomp_taskwait taskwait;
1481 size_t ndepend = (uintptr_t) depend[0];
1482 size_t nout = (uintptr_t) depend[1];
1483 size_t i;
1484 size_t num_awaited = 0;
1485 struct gomp_task *child_task = NULL;
1486 struct gomp_task *to_free = NULL;
1487 int do_wake = 0;
1488
1489 gomp_mutex_lock (&team->task_lock);
1490 for (i = 0; i < ndepend; i++)
1491 {
1492 elem.addr = depend[i + 2];
1493 ent = htab_find (task->depend_hash, &elem);
1494 for (; ent; ent = ent->next)
1495 if (i >= nout && ent->is_in)
1496 continue;
1497 else
1498 {
1499 struct gomp_task *tsk = ent->task;
1500 if (!tsk->parent_depends_on)
1501 {
1502 tsk->parent_depends_on = true;
1503 ++num_awaited;
1504 /* If depenency TSK itself has no dependencies and is
1505 ready to run, move it up front so that we run it as
1506 soon as possible. */
1507 if (tsk->num_dependees == 0 && tsk->kind == GOMP_TASK_WAITING)
1508 priority_queue_upgrade_task (tsk, task);
1509 }
1510 }
1511 }
1512 if (num_awaited == 0)
1513 {
1514 gomp_mutex_unlock (&team->task_lock);
1515 return;
1516 }
1517
1518 memset (&taskwait, 0, sizeof (taskwait));
1519 taskwait.n_depend = num_awaited;
1520 gomp_sem_init (&taskwait.taskwait_sem, 0);
1521 task->taskwait = &taskwait;
1522
1523 while (1)
1524 {
1525 bool cancelled = false;
1526 if (taskwait.n_depend == 0)
1527 {
1528 task->taskwait = NULL;
1529 gomp_mutex_unlock (&team->task_lock);
1530 if (to_free)
1531 {
1532 gomp_finish_task (to_free);
1533 free (to_free);
1534 }
1535 gomp_sem_destroy (&taskwait.taskwait_sem);
1536 return;
1537 }
1538
1539 /* Theoretically when we have multiple priorities, we should
1540 chose between the highest priority item in
1541 task->children_queue and team->task_queue here, so we should
1542 use priority_queue_next_task(). However, since we are
1543 running an undeferred task, perhaps that makes all tasks it
1544 depends on undeferred, thus a priority of INF? This would
1545 make it unnecessary to take anything into account here,
1546 but the dependencies.
1547
1548 On the other hand, if we want to use priority_queue_next_task(),
1549 care should be taken to only use priority_queue_remove()
1550 below if the task was actually removed from the children
1551 queue. */
1552 bool ignored;
1553 struct gomp_task *next_task
1554 = priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
1555 PQ_IGNORED, NULL, &ignored);
1556
1557 if (next_task->kind == GOMP_TASK_WAITING)
1558 {
1559 child_task = next_task;
1560 cancelled
1561 = gomp_task_run_pre (child_task, task, team);
1562 if (__builtin_expect (cancelled, 0))
1563 {
1564 if (to_free)
1565 {
1566 gomp_finish_task (to_free);
1567 free (to_free);
1568 to_free = NULL;
1569 }
1570 goto finish_cancelled;
1571 }
1572 }
1573 else
1574 /* All tasks we are waiting for are either running in other
1575 threads, or they are tasks that have not had their
1576 dependencies met (so they're not even in the queue). Wait
1577 for them. */
1578 taskwait.in_depend_wait = true;
1579 gomp_mutex_unlock (&team->task_lock);
1580 if (do_wake)
1581 {
1582 gomp_team_barrier_wake (&team->barrier, do_wake);
1583 do_wake = 0;
1584 }
1585 if (to_free)
1586 {
1587 gomp_finish_task (to_free);
1588 free (to_free);
1589 to_free = NULL;
1590 }
1591 if (child_task)
1592 {
1593 thr->task = child_task;
1594 if (__builtin_expect (child_task->fn == NULL, 0))
1595 {
1596 if (gomp_target_task_fn (child_task->fn_data))
1597 {
1598 thr->task = task;
1599 gomp_mutex_lock (&team->task_lock);
1600 child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1601 struct gomp_target_task *ttask
1602 = (struct gomp_target_task *) child_task->fn_data;
1603 /* If GOMP_PLUGIN_target_task_completion has run already
1604 in between gomp_target_task_fn and the mutex lock,
1605 perform the requeuing here. */
1606 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1607 gomp_target_task_completion (team, child_task);
1608 else
1609 ttask->state = GOMP_TARGET_TASK_RUNNING;
1610 child_task = NULL;
1611 continue;
1612 }
1613 }
1614 else
1615 child_task->fn (child_task->fn_data);
1616 thr->task = task;
1617 }
1618 else
1619 gomp_sem_wait (&taskwait.taskwait_sem);
1620 gomp_mutex_lock (&team->task_lock);
1621 if (child_task)
1622 {
1623 finish_cancelled:;
1624 size_t new_tasks
1625 = gomp_task_run_post_handle_depend (child_task, team);
1626 if (child_task->parent_depends_on)
1627 --taskwait.n_depend;
1628
1629 priority_queue_remove (PQ_CHILDREN, &task->children_queue,
1630 child_task, MEMMODEL_RELAXED);
1631 child_task->pnode[PQ_CHILDREN].next = NULL;
1632 child_task->pnode[PQ_CHILDREN].prev = NULL;
1633
1634 gomp_clear_parent (&child_task->children_queue);
1635 gomp_task_run_post_remove_taskgroup (child_task);
1636 to_free = child_task;
1637 child_task = NULL;
1638 team->task_count--;
1639 if (new_tasks > 1)
1640 {
1641 do_wake = team->nthreads - team->task_running_count
1642 - !task->in_tied_task;
1643 if (do_wake > new_tasks)
1644 do_wake = new_tasks;
1645 }
1646 }
1647 }
1648 }
1649
1650 /* Called when encountering a taskyield directive. */
1651
1652 void
1653 GOMP_taskyield (void)
1654 {
1655 /* Nothing at the moment. */
1656 }
1657
1658 void
1659 GOMP_taskgroup_start (void)
1660 {
1661 struct gomp_thread *thr = gomp_thread ();
1662 struct gomp_team *team = thr->ts.team;
1663 struct gomp_task *task = thr->task;
1664 struct gomp_taskgroup *taskgroup;
1665
1666 /* If team is NULL, all tasks are executed as
1667 GOMP_TASK_UNDEFERRED tasks and thus all children tasks of
1668 taskgroup and their descendant tasks will be finished
1669 by the time GOMP_taskgroup_end is called. */
1670 if (team == NULL)
1671 return;
1672 taskgroup = gomp_malloc (sizeof (struct gomp_taskgroup));
1673 taskgroup->prev = task->taskgroup;
1674 priority_queue_init (&taskgroup->taskgroup_queue);
1675 taskgroup->in_taskgroup_wait = false;
1676 taskgroup->cancelled = false;
1677 taskgroup->num_children = 0;
1678 gomp_sem_init (&taskgroup->taskgroup_sem, 0);
1679 task->taskgroup = taskgroup;
1680 }
1681
1682 void
1683 GOMP_taskgroup_end (void)
1684 {
1685 struct gomp_thread *thr = gomp_thread ();
1686 struct gomp_team *team = thr->ts.team;
1687 struct gomp_task *task = thr->task;
1688 struct gomp_taskgroup *taskgroup;
1689 struct gomp_task *child_task = NULL;
1690 struct gomp_task *to_free = NULL;
1691 int do_wake = 0;
1692
1693 if (team == NULL)
1694 return;
1695 taskgroup = task->taskgroup;
1696 if (__builtin_expect (taskgroup == NULL, 0)
1697 && thr->ts.level == 0)
1698 {
1699 /* This can happen if GOMP_taskgroup_start is called when
1700 thr->ts.team == NULL, but inside of the taskgroup there
1701 is #pragma omp target nowait that creates an implicit
1702 team with a single thread. In this case, we want to wait
1703 for all outstanding tasks in this team. */
1704 gomp_team_barrier_wait (&team->barrier);
1705 return;
1706 }
1707
1708 /* The acquire barrier on load of taskgroup->num_children here
1709 synchronizes with the write of 0 in gomp_task_run_post_remove_taskgroup.
1710 It is not necessary that we synchronize with other non-0 writes at
1711 this point, but we must ensure that all writes to memory by a
1712 child thread task work function are seen before we exit from
1713 GOMP_taskgroup_end. */
1714 if (__atomic_load_n (&taskgroup->num_children, MEMMODEL_ACQUIRE) == 0)
1715 goto finish;
1716
1717 bool unused;
1718 gomp_mutex_lock (&team->task_lock);
1719 while (1)
1720 {
1721 bool cancelled = false;
1722 if (priority_queue_empty_p (&taskgroup->taskgroup_queue,
1723 MEMMODEL_RELAXED))
1724 {
1725 if (taskgroup->num_children)
1726 {
1727 if (priority_queue_empty_p (&task->children_queue,
1728 MEMMODEL_RELAXED))
1729 goto do_wait;
1730 child_task
1731 = priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
1732 PQ_TEAM, &team->task_queue,
1733 &unused);
1734 }
1735 else
1736 {
1737 gomp_mutex_unlock (&team->task_lock);
1738 if (to_free)
1739 {
1740 gomp_finish_task (to_free);
1741 free (to_free);
1742 }
1743 goto finish;
1744 }
1745 }
1746 else
1747 child_task
1748 = priority_queue_next_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
1749 PQ_TEAM, &team->task_queue, &unused);
1750 if (child_task->kind == GOMP_TASK_WAITING)
1751 {
1752 cancelled
1753 = gomp_task_run_pre (child_task, child_task->parent, team);
1754 if (__builtin_expect (cancelled, 0))
1755 {
1756 if (to_free)
1757 {
1758 gomp_finish_task (to_free);
1759 free (to_free);
1760 to_free = NULL;
1761 }
1762 goto finish_cancelled;
1763 }
1764 }
1765 else
1766 {
1767 child_task = NULL;
1768 do_wait:
1769 /* All tasks we are waiting for are either running in other
1770 threads, or they are tasks that have not had their
1771 dependencies met (so they're not even in the queue). Wait
1772 for them. */
1773 taskgroup->in_taskgroup_wait = true;
1774 }
1775 gomp_mutex_unlock (&team->task_lock);
1776 if (do_wake)
1777 {
1778 gomp_team_barrier_wake (&team->barrier, do_wake);
1779 do_wake = 0;
1780 }
1781 if (to_free)
1782 {
1783 gomp_finish_task (to_free);
1784 free (to_free);
1785 to_free = NULL;
1786 }
1787 if (child_task)
1788 {
1789 thr->task = child_task;
1790 if (__builtin_expect (child_task->fn == NULL, 0))
1791 {
1792 if (gomp_target_task_fn (child_task->fn_data))
1793 {
1794 thr->task = task;
1795 gomp_mutex_lock (&team->task_lock);
1796 child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1797 struct gomp_target_task *ttask
1798 = (struct gomp_target_task *) child_task->fn_data;
1799 /* If GOMP_PLUGIN_target_task_completion has run already
1800 in between gomp_target_task_fn and the mutex lock,
1801 perform the requeuing here. */
1802 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1803 gomp_target_task_completion (team, child_task);
1804 else
1805 ttask->state = GOMP_TARGET_TASK_RUNNING;
1806 child_task = NULL;
1807 continue;
1808 }
1809 }
1810 else
1811 child_task->fn (child_task->fn_data);
1812 thr->task = task;
1813 }
1814 else
1815 gomp_sem_wait (&taskgroup->taskgroup_sem);
1816 gomp_mutex_lock (&team->task_lock);
1817 if (child_task)
1818 {
1819 finish_cancelled:;
1820 size_t new_tasks
1821 = gomp_task_run_post_handle_depend (child_task, team);
1822 gomp_task_run_post_remove_parent (child_task);
1823 gomp_clear_parent (&child_task->children_queue);
1824 gomp_task_run_post_remove_taskgroup (child_task);
1825 to_free = child_task;
1826 child_task = NULL;
1827 team->task_count--;
1828 if (new_tasks > 1)
1829 {
1830 do_wake = team->nthreads - team->task_running_count
1831 - !task->in_tied_task;
1832 if (do_wake > new_tasks)
1833 do_wake = new_tasks;
1834 }
1835 }
1836 }
1837
1838 finish:
1839 task->taskgroup = taskgroup->prev;
1840 gomp_sem_destroy (&taskgroup->taskgroup_sem);
1841 free (taskgroup);
1842 }
1843
1844 int
1845 omp_in_final (void)
1846 {
1847 struct gomp_thread *thr = gomp_thread ();
1848 return thr->task && thr->task->final_task;
1849 }
1850
1851 ialias (omp_in_final)