]> git.ipfire.org Git - thirdparty/gcc.git/blob - libgomp/task.c
omp-low.c (lower_omp_ordered): Add argument to GOMP_SMD_ORDERED_* internal calls...
[thirdparty/gcc.git] / libgomp / task.c
1 /* Copyright (C) 2007-2015 Free Software Foundation, Inc.
2 Contributed by Richard Henderson <rth@redhat.com>.
3
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
6
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
26 /* This file handles the maintainence of tasks in response to task
27 creation and termination. */
28
29 #include "libgomp.h"
30 #include <stdlib.h>
31 #include <string.h>
32 #include "gomp-constants.h"
33
34 typedef struct gomp_task_depend_entry *hash_entry_type;
35
36 static inline void *
37 htab_alloc (size_t size)
38 {
39 return gomp_malloc (size);
40 }
41
42 static inline void
43 htab_free (void *ptr)
44 {
45 free (ptr);
46 }
47
48 #include "hashtab.h"
49
50 static inline hashval_t
51 htab_hash (hash_entry_type element)
52 {
53 return hash_pointer (element->addr);
54 }
55
56 static inline bool
57 htab_eq (hash_entry_type x, hash_entry_type y)
58 {
59 return x->addr == y->addr;
60 }
61
62 /* Create a new task data structure. */
63
64 void
65 gomp_init_task (struct gomp_task *task, struct gomp_task *parent_task,
66 struct gomp_task_icv *prev_icv)
67 {
68 /* It would seem that using memset here would be a win, but it turns
69 out that partially filling gomp_task allows us to keep the
70 overhead of task creation low. In the nqueens-1.c test, for a
71 sufficiently large N, we drop the overhead from 5-6% to 1%.
72
73 Note, the nqueens-1.c test in serial mode is a good test to
74 benchmark the overhead of creating tasks as there are millions of
75 tiny tasks created that all run undeferred. */
76 task->parent = parent_task;
77 task->icv = *prev_icv;
78 task->kind = GOMP_TASK_IMPLICIT;
79 task->taskwait = NULL;
80 task->in_tied_task = false;
81 task->final_task = false;
82 task->copy_ctors_done = false;
83 task->parent_depends_on = false;
84 priority_queue_init (&task->children_queue);
85 task->taskgroup = NULL;
86 task->dependers = NULL;
87 task->depend_hash = NULL;
88 task->depend_count = 0;
89 }
90
91 /* Clean up a task, after completing it. */
92
93 void
94 gomp_end_task (void)
95 {
96 struct gomp_thread *thr = gomp_thread ();
97 struct gomp_task *task = thr->task;
98
99 gomp_finish_task (task);
100 thr->task = task->parent;
101 }
102
103 /* Clear the parent field of every task in LIST. */
104
105 static inline void
106 gomp_clear_parent_in_list (struct priority_list *list)
107 {
108 struct priority_node *p = list->tasks;
109 if (p)
110 do
111 {
112 priority_node_to_task (PQ_CHILDREN, p)->parent = NULL;
113 p = p->next;
114 }
115 while (p != list->tasks);
116 }
117
118 /* Splay tree version of gomp_clear_parent_in_list.
119
120 Clear the parent field of every task in NODE within SP, and free
121 the node when done. */
122
123 static void
124 gomp_clear_parent_in_tree (prio_splay_tree sp, prio_splay_tree_node node)
125 {
126 if (!node)
127 return;
128 prio_splay_tree_node left = node->left, right = node->right;
129 gomp_clear_parent_in_list (&node->key.l);
130 #if _LIBGOMP_CHECKING_
131 memset (node, 0xaf, sizeof (*node));
132 #endif
133 /* No need to remove the node from the tree. We're nuking
134 everything, so just free the nodes and our caller can clear the
135 entire splay tree. */
136 free (node);
137 gomp_clear_parent_in_tree (sp, left);
138 gomp_clear_parent_in_tree (sp, right);
139 }
140
141 /* Clear the parent field of every task in Q and remove every task
142 from Q. */
143
144 static inline void
145 gomp_clear_parent (struct priority_queue *q)
146 {
147 if (priority_queue_multi_p (q))
148 {
149 gomp_clear_parent_in_tree (&q->t, q->t.root);
150 /* All the nodes have been cleared in gomp_clear_parent_in_tree.
151 No need to remove anything. We can just nuke everything. */
152 q->t.root = NULL;
153 }
154 else
155 gomp_clear_parent_in_list (&q->l);
156 }
157
158 /* Helper function for GOMP_task and gomp_create_target_task.
159
160 For a TASK with in/out dependencies, fill in the various dependency
161 queues. PARENT is the parent of said task. DEPEND is as in
162 GOMP_task. */
163
164 static void
165 gomp_task_handle_depend (struct gomp_task *task, struct gomp_task *parent,
166 void **depend)
167 {
168 size_t ndepend = (uintptr_t) depend[0];
169 size_t nout = (uintptr_t) depend[1];
170 size_t i;
171 hash_entry_type ent;
172
173 task->depend_count = ndepend;
174 task->num_dependees = 0;
175 if (parent->depend_hash == NULL)
176 parent->depend_hash = htab_create (2 * ndepend > 12 ? 2 * ndepend : 12);
177 for (i = 0; i < ndepend; i++)
178 {
179 task->depend[i].addr = depend[2 + i];
180 task->depend[i].next = NULL;
181 task->depend[i].prev = NULL;
182 task->depend[i].task = task;
183 task->depend[i].is_in = i >= nout;
184 task->depend[i].redundant = false;
185 task->depend[i].redundant_out = false;
186
187 hash_entry_type *slot = htab_find_slot (&parent->depend_hash,
188 &task->depend[i], INSERT);
189 hash_entry_type out = NULL, last = NULL;
190 if (*slot)
191 {
192 /* If multiple depends on the same task are the same, all but the
193 first one are redundant. As inout/out come first, if any of them
194 is inout/out, it will win, which is the right semantics. */
195 if ((*slot)->task == task)
196 {
197 task->depend[i].redundant = true;
198 continue;
199 }
200 for (ent = *slot; ent; ent = ent->next)
201 {
202 if (ent->redundant_out)
203 break;
204
205 last = ent;
206
207 /* depend(in:...) doesn't depend on earlier depend(in:...). */
208 if (i >= nout && ent->is_in)
209 continue;
210
211 if (!ent->is_in)
212 out = ent;
213
214 struct gomp_task *tsk = ent->task;
215 if (tsk->dependers == NULL)
216 {
217 tsk->dependers
218 = gomp_malloc (sizeof (struct gomp_dependers_vec)
219 + 6 * sizeof (struct gomp_task *));
220 tsk->dependers->n_elem = 1;
221 tsk->dependers->allocated = 6;
222 tsk->dependers->elem[0] = task;
223 task->num_dependees++;
224 continue;
225 }
226 /* We already have some other dependency on tsk from earlier
227 depend clause. */
228 else if (tsk->dependers->n_elem
229 && (tsk->dependers->elem[tsk->dependers->n_elem - 1]
230 == task))
231 continue;
232 else if (tsk->dependers->n_elem == tsk->dependers->allocated)
233 {
234 tsk->dependers->allocated
235 = tsk->dependers->allocated * 2 + 2;
236 tsk->dependers
237 = gomp_realloc (tsk->dependers,
238 sizeof (struct gomp_dependers_vec)
239 + (tsk->dependers->allocated
240 * sizeof (struct gomp_task *)));
241 }
242 tsk->dependers->elem[tsk->dependers->n_elem++] = task;
243 task->num_dependees++;
244 }
245 task->depend[i].next = *slot;
246 (*slot)->prev = &task->depend[i];
247 }
248 *slot = &task->depend[i];
249
250 /* There is no need to store more than one depend({,in}out:) task per
251 address in the hash table chain for the purpose of creation of
252 deferred tasks, because each out depends on all earlier outs, thus it
253 is enough to record just the last depend({,in}out:). For depend(in:),
254 we need to keep all of the previous ones not terminated yet, because
255 a later depend({,in}out:) might need to depend on all of them. So, if
256 the new task's clause is depend({,in}out:), we know there is at most
257 one other depend({,in}out:) clause in the list (out). For
258 non-deferred tasks we want to see all outs, so they are moved to the
259 end of the chain, after first redundant_out entry all following
260 entries should be redundant_out. */
261 if (!task->depend[i].is_in && out)
262 {
263 if (out != last)
264 {
265 out->next->prev = out->prev;
266 out->prev->next = out->next;
267 out->next = last->next;
268 out->prev = last;
269 last->next = out;
270 if (out->next)
271 out->next->prev = out;
272 }
273 out->redundant_out = true;
274 }
275 }
276 }
277
278 /* Called when encountering an explicit task directive. If IF_CLAUSE is
279 false, then we must not delay in executing the task. If UNTIED is true,
280 then the task may be executed by any member of the team.
281
282 DEPEND is an array containing:
283 depend[0]: number of depend elements.
284 depend[1]: number of depend elements of type "out".
285 depend[2..N+1]: address of [1..N]th depend element. */
286
287 void
288 GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
289 long arg_size, long arg_align, bool if_clause, unsigned flags,
290 void **depend, int priority)
291 {
292 struct gomp_thread *thr = gomp_thread ();
293 struct gomp_team *team = thr->ts.team;
294
295 #ifdef HAVE_BROKEN_POSIX_SEMAPHORES
296 /* If pthread_mutex_* is used for omp_*lock*, then each task must be
297 tied to one thread all the time. This means UNTIED tasks must be
298 tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN
299 might be running on different thread than FN. */
300 if (cpyfn)
301 if_clause = false;
302 flags &= ~GOMP_TASK_FLAG_UNTIED;
303 #endif
304
305 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
306 if (team
307 && (gomp_team_barrier_cancelled (&team->barrier)
308 || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
309 return;
310
311 if ((flags & GOMP_TASK_FLAG_PRIORITY) == 0)
312 priority = 0;
313 else if (priority > gomp_max_task_priority_var)
314 priority = gomp_max_task_priority_var;
315
316 if (!if_clause || team == NULL
317 || (thr->task && thr->task->final_task)
318 || team->task_count > 64 * team->nthreads)
319 {
320 struct gomp_task task;
321
322 /* If there are depend clauses and earlier deferred sibling tasks
323 with depend clauses, check if there isn't a dependency. If there
324 is, we need to wait for them. There is no need to handle
325 depend clauses for non-deferred tasks other than this, because
326 the parent task is suspended until the child task finishes and thus
327 it can't start further child tasks. */
328 if ((flags & GOMP_TASK_FLAG_DEPEND)
329 && thr->task && thr->task->depend_hash)
330 gomp_task_maybe_wait_for_dependencies (depend);
331
332 gomp_init_task (&task, thr->task, gomp_icv (false));
333 task.kind = GOMP_TASK_UNDEFERRED;
334 task.final_task = (thr->task && thr->task->final_task)
335 || (flags & GOMP_TASK_FLAG_FINAL);
336 task.priority = priority;
337 if (thr->task)
338 {
339 task.in_tied_task = thr->task->in_tied_task;
340 task.taskgroup = thr->task->taskgroup;
341 }
342 thr->task = &task;
343 if (__builtin_expect (cpyfn != NULL, 0))
344 {
345 char buf[arg_size + arg_align - 1];
346 char *arg = (char *) (((uintptr_t) buf + arg_align - 1)
347 & ~(uintptr_t) (arg_align - 1));
348 cpyfn (arg, data);
349 fn (arg);
350 }
351 else
352 fn (data);
353 /* Access to "children" is normally done inside a task_lock
354 mutex region, but the only way this particular task.children
355 can be set is if this thread's task work function (fn)
356 creates children. So since the setter is *this* thread, we
357 need no barriers here when testing for non-NULL. We can have
358 task.children set by the current thread then changed by a
359 child thread, but seeing a stale non-NULL value is not a
360 problem. Once past the task_lock acquisition, this thread
361 will see the real value of task.children. */
362 if (!priority_queue_empty_p (&task.children_queue, MEMMODEL_RELAXED))
363 {
364 gomp_mutex_lock (&team->task_lock);
365 gomp_clear_parent (&task.children_queue);
366 gomp_mutex_unlock (&team->task_lock);
367 }
368 gomp_end_task ();
369 }
370 else
371 {
372 struct gomp_task *task;
373 struct gomp_task *parent = thr->task;
374 struct gomp_taskgroup *taskgroup = parent->taskgroup;
375 char *arg;
376 bool do_wake;
377 size_t depend_size = 0;
378
379 if (flags & GOMP_TASK_FLAG_DEPEND)
380 depend_size = ((uintptr_t) depend[0]
381 * sizeof (struct gomp_task_depend_entry));
382 task = gomp_malloc (sizeof (*task) + depend_size
383 + arg_size + arg_align - 1);
384 arg = (char *) (((uintptr_t) (task + 1) + depend_size + arg_align - 1)
385 & ~(uintptr_t) (arg_align - 1));
386 gomp_init_task (task, parent, gomp_icv (false));
387 task->priority = priority;
388 task->kind = GOMP_TASK_UNDEFERRED;
389 task->in_tied_task = parent->in_tied_task;
390 task->taskgroup = taskgroup;
391 thr->task = task;
392 if (cpyfn)
393 {
394 cpyfn (arg, data);
395 task->copy_ctors_done = true;
396 }
397 else
398 memcpy (arg, data, arg_size);
399 thr->task = parent;
400 task->kind = GOMP_TASK_WAITING;
401 task->fn = fn;
402 task->fn_data = arg;
403 task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1;
404 gomp_mutex_lock (&team->task_lock);
405 /* If parallel or taskgroup has been cancelled, don't start new
406 tasks. */
407 if (__builtin_expect ((gomp_team_barrier_cancelled (&team->barrier)
408 || (taskgroup && taskgroup->cancelled))
409 && !task->copy_ctors_done, 0))
410 {
411 gomp_mutex_unlock (&team->task_lock);
412 gomp_finish_task (task);
413 free (task);
414 return;
415 }
416 if (taskgroup)
417 taskgroup->num_children++;
418 if (depend_size)
419 {
420 gomp_task_handle_depend (task, parent, depend);
421 if (task->num_dependees)
422 {
423 /* Tasks that depend on other tasks are not put into the
424 various waiting queues, so we are done for now. Said
425 tasks are instead put into the queues via
426 gomp_task_run_post_handle_dependers() after their
427 dependencies have been satisfied. After which, they
428 can be picked up by the various scheduling
429 points. */
430 gomp_mutex_unlock (&team->task_lock);
431 return;
432 }
433 }
434
435 priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
436 task, priority,
437 PRIORITY_INSERT_BEGIN,
438 /*adjust_parent_depends_on=*/false,
439 task->parent_depends_on);
440 if (taskgroup)
441 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
442 task, priority,
443 PRIORITY_INSERT_BEGIN,
444 /*adjust_parent_depends_on=*/false,
445 task->parent_depends_on);
446
447 priority_queue_insert (PQ_TEAM, &team->task_queue,
448 task, priority,
449 PRIORITY_INSERT_END,
450 /*adjust_parent_depends_on=*/false,
451 task->parent_depends_on);
452
453 ++team->task_count;
454 ++team->task_queued_count;
455 gomp_team_barrier_set_task_pending (&team->barrier);
456 do_wake = team->task_running_count + !parent->in_tied_task
457 < team->nthreads;
458 gomp_mutex_unlock (&team->task_lock);
459 if (do_wake)
460 gomp_team_barrier_wake (&team->barrier, 1);
461 }
462 }
463
464 ialias (GOMP_taskgroup_start)
465 ialias (GOMP_taskgroup_end)
466
467 #define TYPE long
468 #define UTYPE unsigned long
469 #define TYPE_is_long 1
470 #include "taskloop.c"
471 #undef TYPE
472 #undef UTYPE
473 #undef TYPE_is_long
474
475 #define TYPE unsigned long long
476 #define UTYPE TYPE
477 #define GOMP_taskloop GOMP_taskloop_ull
478 #include "taskloop.c"
479 #undef TYPE
480 #undef UTYPE
481 #undef GOMP_taskloop
482
483 static void inline
484 priority_queue_move_task_first (enum priority_queue_type type,
485 struct priority_queue *head,
486 struct gomp_task *task)
487 {
488 #if _LIBGOMP_CHECKING_
489 if (!priority_queue_task_in_queue_p (type, head, task))
490 gomp_fatal ("Attempt to move first missing task %p", task);
491 #endif
492 struct priority_list *list;
493 if (priority_queue_multi_p (head))
494 {
495 list = priority_queue_lookup_priority (head, task->priority);
496 #if _LIBGOMP_CHECKING_
497 if (!list)
498 gomp_fatal ("Unable to find priority %d", task->priority);
499 #endif
500 }
501 else
502 list = &head->l;
503 priority_list_remove (list, task_to_priority_node (type, task), 0);
504 priority_list_insert (type, list, task, task->priority,
505 PRIORITY_INSERT_BEGIN, type == PQ_CHILDREN,
506 task->parent_depends_on);
507 }
508
509 /* Actual body of GOMP_PLUGIN_target_task_completion that is executed
510 with team->task_lock held, or is executed in the thread that called
511 gomp_target_task_fn if GOMP_PLUGIN_target_task_completion has been
512 run before it acquires team->task_lock. */
513
514 static void
515 gomp_target_task_completion (struct gomp_team *team, struct gomp_task *task)
516 {
517 struct gomp_task *parent = task->parent;
518 if (parent)
519 priority_queue_move_task_first (PQ_CHILDREN, &parent->children_queue,
520 task);
521
522 struct gomp_taskgroup *taskgroup = task->taskgroup;
523 if (taskgroup)
524 priority_queue_move_task_first (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
525 task);
526
527 priority_queue_insert (PQ_TEAM, &team->task_queue, task, task->priority,
528 PRIORITY_INSERT_BEGIN, false,
529 task->parent_depends_on);
530 task->kind = GOMP_TASK_WAITING;
531 if (parent && parent->taskwait)
532 {
533 if (parent->taskwait->in_taskwait)
534 {
535 /* One more task has had its dependencies met.
536 Inform any waiters. */
537 parent->taskwait->in_taskwait = false;
538 gomp_sem_post (&parent->taskwait->taskwait_sem);
539 }
540 else if (parent->taskwait->in_depend_wait)
541 {
542 /* One more task has had its dependencies met.
543 Inform any waiters. */
544 parent->taskwait->in_depend_wait = false;
545 gomp_sem_post (&parent->taskwait->taskwait_sem);
546 }
547 }
548 if (taskgroup && taskgroup->in_taskgroup_wait)
549 {
550 /* One more task has had its dependencies met.
551 Inform any waiters. */
552 taskgroup->in_taskgroup_wait = false;
553 gomp_sem_post (&taskgroup->taskgroup_sem);
554 }
555
556 ++team->task_queued_count;
557 gomp_team_barrier_set_task_pending (&team->barrier);
558 /* I'm afraid this can't be done after releasing team->task_lock,
559 as gomp_target_task_completion is run from unrelated thread and
560 therefore in between gomp_mutex_unlock and gomp_team_barrier_wake
561 the team could be gone already. */
562 if (team->nthreads > team->task_running_count)
563 gomp_team_barrier_wake (&team->barrier, 1);
564 }
565
566 /* Signal that a target task TTASK has completed the asynchronously
567 running phase and should be requeued as a task to handle the
568 variable unmapping. */
569
570 void
571 GOMP_PLUGIN_target_task_completion (void *data)
572 {
573 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
574 struct gomp_task *task = ttask->task;
575 struct gomp_team *team = ttask->team;
576
577 gomp_mutex_lock (&team->task_lock);
578 if (ttask->state == GOMP_TARGET_TASK_READY_TO_RUN)
579 {
580 ttask->state = GOMP_TARGET_TASK_FINISHED;
581 gomp_mutex_unlock (&team->task_lock);
582 }
583 ttask->state = GOMP_TARGET_TASK_FINISHED;
584 gomp_target_task_completion (team, task);
585 gomp_mutex_unlock (&team->task_lock);
586 }
587
588 /* Called for nowait target tasks. */
589
590 bool
591 gomp_create_target_task (struct gomp_device_descr *devicep,
592 void (*fn) (void *), size_t mapnum, void **hostaddrs,
593 size_t *sizes, unsigned short *kinds,
594 unsigned int flags, void **depend,
595 enum gomp_target_task_state state)
596 {
597 struct gomp_thread *thr = gomp_thread ();
598 struct gomp_team *team = thr->ts.team;
599
600 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
601 if (team
602 && (gomp_team_barrier_cancelled (&team->barrier)
603 || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
604 return true;
605
606 struct gomp_target_task *ttask;
607 struct gomp_task *task;
608 struct gomp_task *parent = thr->task;
609 struct gomp_taskgroup *taskgroup = parent->taskgroup;
610 bool do_wake;
611 size_t depend_size = 0;
612 uintptr_t depend_cnt = 0;
613 size_t tgt_align = 0, tgt_size = 0;
614
615 if (depend != NULL)
616 {
617 depend_cnt = (uintptr_t) depend[0];
618 depend_size = depend_cnt * sizeof (struct gomp_task_depend_entry);
619 }
620 if (fn)
621 {
622 /* GOMP_MAP_FIRSTPRIVATE need to be copied first, as they are
623 firstprivate on the target task. */
624 size_t i;
625 for (i = 0; i < mapnum; i++)
626 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
627 {
628 size_t align = (size_t) 1 << (kinds[i] >> 8);
629 if (tgt_align < align)
630 tgt_align = align;
631 tgt_size = (tgt_size + align - 1) & ~(align - 1);
632 tgt_size += sizes[i];
633 }
634 if (tgt_align)
635 tgt_size += tgt_align - 1;
636 else
637 tgt_size = 0;
638 }
639
640 task = gomp_malloc (sizeof (*task) + depend_size
641 + sizeof (*ttask)
642 + mapnum * (sizeof (void *) + sizeof (size_t)
643 + sizeof (unsigned short))
644 + tgt_size);
645 gomp_init_task (task, parent, gomp_icv (false));
646 task->priority = 0;
647 task->kind = GOMP_TASK_WAITING;
648 task->in_tied_task = parent->in_tied_task;
649 task->taskgroup = taskgroup;
650 ttask = (struct gomp_target_task *) &task->depend[depend_cnt];
651 ttask->devicep = devicep;
652 ttask->fn = fn;
653 ttask->mapnum = mapnum;
654 memcpy (ttask->hostaddrs, hostaddrs, mapnum * sizeof (void *));
655 ttask->sizes = (size_t *) &ttask->hostaddrs[mapnum];
656 memcpy (ttask->sizes, sizes, mapnum * sizeof (size_t));
657 ttask->kinds = (unsigned short *) &ttask->sizes[mapnum];
658 memcpy (ttask->kinds, kinds, mapnum * sizeof (unsigned short));
659 if (tgt_align)
660 {
661 char *tgt = (char *) &ttask->kinds[mapnum];
662 size_t i;
663 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
664 if (al)
665 tgt += tgt_align - al;
666 tgt_size = 0;
667 for (i = 0; i < mapnum; i++)
668 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
669 {
670 size_t align = (size_t) 1 << (kinds[i] >> 8);
671 tgt_size = (tgt_size + align - 1) & ~(align - 1);
672 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
673 ttask->hostaddrs[i] = tgt + tgt_size;
674 tgt_size = tgt_size + sizes[i];
675 }
676 }
677 ttask->flags = flags;
678 ttask->state = state;
679 ttask->task = task;
680 ttask->team = team;
681 task->fn = NULL;
682 task->fn_data = ttask;
683 task->final_task = 0;
684 gomp_mutex_lock (&team->task_lock);
685 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
686 if (__builtin_expect (gomp_team_barrier_cancelled (&team->barrier)
687 || (taskgroup && taskgroup->cancelled), 0))
688 {
689 gomp_mutex_unlock (&team->task_lock);
690 gomp_finish_task (task);
691 free (task);
692 return true;
693 }
694 if (depend_size)
695 {
696 gomp_task_handle_depend (task, parent, depend);
697 if (task->num_dependees)
698 {
699 if (taskgroup)
700 taskgroup->num_children++;
701 gomp_mutex_unlock (&team->task_lock);
702 return true;
703 }
704 }
705 if (state == GOMP_TARGET_TASK_DATA)
706 {
707 gomp_mutex_unlock (&team->task_lock);
708 gomp_finish_task (task);
709 free (task);
710 return false;
711 }
712 if (taskgroup)
713 taskgroup->num_children++;
714 /* For async offloading, if we don't need to wait for dependencies,
715 run the gomp_target_task_fn right away, essentially schedule the
716 mapping part of the task in the current thread. */
717 if (devicep != NULL
718 && (devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
719 {
720 priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0,
721 PRIORITY_INSERT_END,
722 /*adjust_parent_depends_on=*/false,
723 task->parent_depends_on);
724 if (taskgroup)
725 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
726 task, 0, PRIORITY_INSERT_END,
727 /*adjust_parent_depends_on=*/false,
728 task->parent_depends_on);
729 task->pnode[PQ_TEAM].next = NULL;
730 task->pnode[PQ_TEAM].prev = NULL;
731 task->kind = GOMP_TASK_TIED;
732 ++team->task_count;
733 gomp_mutex_unlock (&team->task_lock);
734
735 thr->task = task;
736 gomp_target_task_fn (task->fn_data);
737 thr->task = parent;
738
739 gomp_mutex_lock (&team->task_lock);
740 task->kind = GOMP_TASK_ASYNC_RUNNING;
741 /* If GOMP_PLUGIN_target_task_completion has run already
742 in between gomp_target_task_fn and the mutex lock,
743 perform the requeuing here. */
744 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
745 gomp_target_task_completion (team, task);
746 else
747 ttask->state = GOMP_TARGET_TASK_RUNNING;
748 gomp_mutex_unlock (&team->task_lock);
749 return true;
750 }
751 priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0,
752 PRIORITY_INSERT_BEGIN,
753 /*adjust_parent_depends_on=*/false,
754 task->parent_depends_on);
755 if (taskgroup)
756 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, task, 0,
757 PRIORITY_INSERT_BEGIN,
758 /*adjust_parent_depends_on=*/false,
759 task->parent_depends_on);
760 priority_queue_insert (PQ_TEAM, &team->task_queue, task, 0,
761 PRIORITY_INSERT_END,
762 /*adjust_parent_depends_on=*/false,
763 task->parent_depends_on);
764 ++team->task_count;
765 ++team->task_queued_count;
766 gomp_team_barrier_set_task_pending (&team->barrier);
767 do_wake = team->task_running_count + !parent->in_tied_task
768 < team->nthreads;
769 gomp_mutex_unlock (&team->task_lock);
770 if (do_wake)
771 gomp_team_barrier_wake (&team->barrier, 1);
772 return true;
773 }
774
775 /* Given a parent_depends_on task in LIST, move it to the front of its
776 priority so it is run as soon as possible.
777
778 Care is taken to update the list's LAST_PARENT_DEPENDS_ON field.
779
780 We rearrange the queue such that all parent_depends_on tasks are
781 first, and last_parent_depends_on points to the last such task we
782 rearranged. For example, given the following tasks in a queue
783 where PD[123] are the parent_depends_on tasks:
784
785 task->children
786 |
787 V
788 C1 -> C2 -> C3 -> PD1 -> PD2 -> PD3 -> C4
789
790 We rearrange such that:
791
792 task->children
793 | +--- last_parent_depends_on
794 | |
795 V V
796 PD1 -> PD2 -> PD3 -> C1 -> C2 -> C3 -> C4. */
797
798 static void inline
799 priority_list_upgrade_task (struct priority_list *list,
800 struct priority_node *node)
801 {
802 struct priority_node *last_parent_depends_on
803 = list->last_parent_depends_on;
804 if (last_parent_depends_on)
805 {
806 node->prev->next = node->next;
807 node->next->prev = node->prev;
808 node->prev = last_parent_depends_on;
809 node->next = last_parent_depends_on->next;
810 node->prev->next = node;
811 node->next->prev = node;
812 }
813 else if (node != list->tasks)
814 {
815 node->prev->next = node->next;
816 node->next->prev = node->prev;
817 node->prev = list->tasks->prev;
818 node->next = list->tasks;
819 list->tasks = node;
820 node->prev->next = node;
821 node->next->prev = node;
822 }
823 list->last_parent_depends_on = node;
824 }
825
826 /* Given a parent_depends_on TASK in its parent's children_queue, move
827 it to the front of its priority so it is run as soon as possible.
828
829 PARENT is passed as an optimization.
830
831 (This function could be defined in priority_queue.c, but we want it
832 inlined, and putting it in priority_queue.h is not an option, given
833 that gomp_task has not been properly defined at that point). */
834
835 static void inline
836 priority_queue_upgrade_task (struct gomp_task *task,
837 struct gomp_task *parent)
838 {
839 struct priority_queue *head = &parent->children_queue;
840 struct priority_node *node = &task->pnode[PQ_CHILDREN];
841 #if _LIBGOMP_CHECKING_
842 if (!task->parent_depends_on)
843 gomp_fatal ("priority_queue_upgrade_task: task must be a "
844 "parent_depends_on task");
845 if (!priority_queue_task_in_queue_p (PQ_CHILDREN, head, task))
846 gomp_fatal ("priority_queue_upgrade_task: cannot find task=%p", task);
847 #endif
848 if (priority_queue_multi_p (head))
849 {
850 struct priority_list *list
851 = priority_queue_lookup_priority (head, task->priority);
852 priority_list_upgrade_task (list, node);
853 }
854 else
855 priority_list_upgrade_task (&head->l, node);
856 }
857
858 /* Given a CHILD_TASK in LIST that is about to be executed, move it out of
859 the way in LIST so that other tasks can be considered for
860 execution. LIST contains tasks of type TYPE.
861
862 Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field
863 if applicable. */
864
865 static void inline
866 priority_list_downgrade_task (enum priority_queue_type type,
867 struct priority_list *list,
868 struct gomp_task *child_task)
869 {
870 struct priority_node *node = task_to_priority_node (type, child_task);
871 if (list->tasks == node)
872 list->tasks = node->next;
873 else if (node->next != list->tasks)
874 {
875 /* The task in NODE is about to become TIED and TIED tasks
876 cannot come before WAITING tasks. If we're about to
877 leave the queue in such an indeterminate state, rewire
878 things appropriately. However, a TIED task at the end is
879 perfectly fine. */
880 struct gomp_task *next_task = priority_node_to_task (type, node->next);
881 if (next_task->kind == GOMP_TASK_WAITING)
882 {
883 /* Remove from list. */
884 node->prev->next = node->next;
885 node->next->prev = node->prev;
886 /* Rewire at the end. */
887 node->next = list->tasks;
888 node->prev = list->tasks->prev;
889 list->tasks->prev->next = node;
890 list->tasks->prev = node;
891 }
892 }
893
894 /* If the current task is the last_parent_depends_on for its
895 priority, adjust last_parent_depends_on appropriately. */
896 if (__builtin_expect (child_task->parent_depends_on, 0)
897 && list->last_parent_depends_on == node)
898 {
899 struct gomp_task *prev_child = priority_node_to_task (type, node->prev);
900 if (node->prev != node
901 && prev_child->kind == GOMP_TASK_WAITING
902 && prev_child->parent_depends_on)
903 list->last_parent_depends_on = node->prev;
904 else
905 {
906 /* There are no more parent_depends_on entries waiting
907 to run, clear the list. */
908 list->last_parent_depends_on = NULL;
909 }
910 }
911 }
912
913 /* Given a TASK in HEAD that is about to be executed, move it out of
914 the way so that other tasks can be considered for execution. HEAD
915 contains tasks of type TYPE.
916
917 Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field
918 if applicable.
919
920 (This function could be defined in priority_queue.c, but we want it
921 inlined, and putting it in priority_queue.h is not an option, given
922 that gomp_task has not been properly defined at that point). */
923
924 static void inline
925 priority_queue_downgrade_task (enum priority_queue_type type,
926 struct priority_queue *head,
927 struct gomp_task *task)
928 {
929 #if _LIBGOMP_CHECKING_
930 if (!priority_queue_task_in_queue_p (type, head, task))
931 gomp_fatal ("Attempt to downgrade missing task %p", task);
932 #endif
933 if (priority_queue_multi_p (head))
934 {
935 struct priority_list *list
936 = priority_queue_lookup_priority (head, task->priority);
937 priority_list_downgrade_task (type, list, task);
938 }
939 else
940 priority_list_downgrade_task (type, &head->l, task);
941 }
942
943 /* Setup CHILD_TASK to execute. This is done by setting the task to
944 TIED, and updating all relevant queues so that CHILD_TASK is no
945 longer chosen for scheduling. Also, remove CHILD_TASK from the
946 overall team task queue entirely.
947
948 Return TRUE if task or its containing taskgroup has been
949 cancelled. */
950
951 static inline bool
952 gomp_task_run_pre (struct gomp_task *child_task, struct gomp_task *parent,
953 struct gomp_team *team)
954 {
955 #if _LIBGOMP_CHECKING_
956 if (child_task->parent)
957 priority_queue_verify (PQ_CHILDREN,
958 &child_task->parent->children_queue, true);
959 if (child_task->taskgroup)
960 priority_queue_verify (PQ_TASKGROUP,
961 &child_task->taskgroup->taskgroup_queue, false);
962 priority_queue_verify (PQ_TEAM, &team->task_queue, false);
963 #endif
964
965 /* Task is about to go tied, move it out of the way. */
966 if (parent)
967 priority_queue_downgrade_task (PQ_CHILDREN, &parent->children_queue,
968 child_task);
969
970 /* Task is about to go tied, move it out of the way. */
971 struct gomp_taskgroup *taskgroup = child_task->taskgroup;
972 if (taskgroup)
973 priority_queue_downgrade_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
974 child_task);
975
976 priority_queue_remove (PQ_TEAM, &team->task_queue, child_task,
977 MEMMODEL_RELAXED);
978 child_task->pnode[PQ_TEAM].next = NULL;
979 child_task->pnode[PQ_TEAM].prev = NULL;
980 child_task->kind = GOMP_TASK_TIED;
981
982 if (--team->task_queued_count == 0)
983 gomp_team_barrier_clear_task_pending (&team->barrier);
984 if ((gomp_team_barrier_cancelled (&team->barrier)
985 || (taskgroup && taskgroup->cancelled))
986 && !child_task->copy_ctors_done)
987 return true;
988 return false;
989 }
990
991 static void
992 gomp_task_run_post_handle_depend_hash (struct gomp_task *child_task)
993 {
994 struct gomp_task *parent = child_task->parent;
995 size_t i;
996
997 for (i = 0; i < child_task->depend_count; i++)
998 if (!child_task->depend[i].redundant)
999 {
1000 if (child_task->depend[i].next)
1001 child_task->depend[i].next->prev = child_task->depend[i].prev;
1002 if (child_task->depend[i].prev)
1003 child_task->depend[i].prev->next = child_task->depend[i].next;
1004 else
1005 {
1006 hash_entry_type *slot
1007 = htab_find_slot (&parent->depend_hash, &child_task->depend[i],
1008 NO_INSERT);
1009 if (*slot != &child_task->depend[i])
1010 abort ();
1011 if (child_task->depend[i].next)
1012 *slot = child_task->depend[i].next;
1013 else
1014 htab_clear_slot (parent->depend_hash, slot);
1015 }
1016 }
1017 }
1018
1019 /* After a CHILD_TASK has been run, adjust the dependency queue for
1020 each task that depends on CHILD_TASK, to record the fact that there
1021 is one less dependency to worry about. If a task that depended on
1022 CHILD_TASK now has no dependencies, place it in the various queues
1023 so it gets scheduled to run.
1024
1025 TEAM is the team to which CHILD_TASK belongs to. */
1026
1027 static size_t
1028 gomp_task_run_post_handle_dependers (struct gomp_task *child_task,
1029 struct gomp_team *team)
1030 {
1031 struct gomp_task *parent = child_task->parent;
1032 size_t i, count = child_task->dependers->n_elem, ret = 0;
1033 for (i = 0; i < count; i++)
1034 {
1035 struct gomp_task *task = child_task->dependers->elem[i];
1036
1037 /* CHILD_TASK satisfies a dependency for TASK. Keep track of
1038 TASK's remaining dependencies. Once TASK has no other
1039 depenencies, put it into the various queues so it will get
1040 scheduled for execution. */
1041 if (--task->num_dependees != 0)
1042 continue;
1043
1044 struct gomp_taskgroup *taskgroup = task->taskgroup;
1045 if (parent)
1046 {
1047 priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
1048 task, task->priority,
1049 PRIORITY_INSERT_BEGIN,
1050 /*adjust_parent_depends_on=*/true,
1051 task->parent_depends_on);
1052 if (parent->taskwait)
1053 {
1054 if (parent->taskwait->in_taskwait)
1055 {
1056 /* One more task has had its dependencies met.
1057 Inform any waiters. */
1058 parent->taskwait->in_taskwait = false;
1059 gomp_sem_post (&parent->taskwait->taskwait_sem);
1060 }
1061 else if (parent->taskwait->in_depend_wait)
1062 {
1063 /* One more task has had its dependencies met.
1064 Inform any waiters. */
1065 parent->taskwait->in_depend_wait = false;
1066 gomp_sem_post (&parent->taskwait->taskwait_sem);
1067 }
1068 }
1069 }
1070 if (taskgroup)
1071 {
1072 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
1073 task, task->priority,
1074 PRIORITY_INSERT_BEGIN,
1075 /*adjust_parent_depends_on=*/false,
1076 task->parent_depends_on);
1077 if (taskgroup->in_taskgroup_wait)
1078 {
1079 /* One more task has had its dependencies met.
1080 Inform any waiters. */
1081 taskgroup->in_taskgroup_wait = false;
1082 gomp_sem_post (&taskgroup->taskgroup_sem);
1083 }
1084 }
1085 priority_queue_insert (PQ_TEAM, &team->task_queue,
1086 task, task->priority,
1087 PRIORITY_INSERT_END,
1088 /*adjust_parent_depends_on=*/false,
1089 task->parent_depends_on);
1090 ++team->task_count;
1091 ++team->task_queued_count;
1092 ++ret;
1093 }
1094 free (child_task->dependers);
1095 child_task->dependers = NULL;
1096 if (ret > 1)
1097 gomp_team_barrier_set_task_pending (&team->barrier);
1098 return ret;
1099 }
1100
1101 static inline size_t
1102 gomp_task_run_post_handle_depend (struct gomp_task *child_task,
1103 struct gomp_team *team)
1104 {
1105 if (child_task->depend_count == 0)
1106 return 0;
1107
1108 /* If parent is gone already, the hash table is freed and nothing
1109 will use the hash table anymore, no need to remove anything from it. */
1110 if (child_task->parent != NULL)
1111 gomp_task_run_post_handle_depend_hash (child_task);
1112
1113 if (child_task->dependers == NULL)
1114 return 0;
1115
1116 return gomp_task_run_post_handle_dependers (child_task, team);
1117 }
1118
1119 /* Remove CHILD_TASK from its parent. */
1120
1121 static inline void
1122 gomp_task_run_post_remove_parent (struct gomp_task *child_task)
1123 {
1124 struct gomp_task *parent = child_task->parent;
1125 if (parent == NULL)
1126 return;
1127
1128 /* If this was the last task the parent was depending on,
1129 synchronize with gomp_task_maybe_wait_for_dependencies so it can
1130 clean up and return. */
1131 if (__builtin_expect (child_task->parent_depends_on, 0)
1132 && --parent->taskwait->n_depend == 0
1133 && parent->taskwait->in_depend_wait)
1134 {
1135 parent->taskwait->in_depend_wait = false;
1136 gomp_sem_post (&parent->taskwait->taskwait_sem);
1137 }
1138
1139 if (priority_queue_remove (PQ_CHILDREN, &parent->children_queue,
1140 child_task, MEMMODEL_RELEASE)
1141 && parent->taskwait && parent->taskwait->in_taskwait)
1142 {
1143 parent->taskwait->in_taskwait = false;
1144 gomp_sem_post (&parent->taskwait->taskwait_sem);
1145 }
1146 child_task->pnode[PQ_CHILDREN].next = NULL;
1147 child_task->pnode[PQ_CHILDREN].prev = NULL;
1148 }
1149
1150 /* Remove CHILD_TASK from its taskgroup. */
1151
1152 static inline void
1153 gomp_task_run_post_remove_taskgroup (struct gomp_task *child_task)
1154 {
1155 struct gomp_taskgroup *taskgroup = child_task->taskgroup;
1156 if (taskgroup == NULL)
1157 return;
1158 bool empty = priority_queue_remove (PQ_TASKGROUP,
1159 &taskgroup->taskgroup_queue,
1160 child_task, MEMMODEL_RELAXED);
1161 child_task->pnode[PQ_TASKGROUP].next = NULL;
1162 child_task->pnode[PQ_TASKGROUP].prev = NULL;
1163 if (taskgroup->num_children > 1)
1164 --taskgroup->num_children;
1165 else
1166 {
1167 /* We access taskgroup->num_children in GOMP_taskgroup_end
1168 outside of the task lock mutex region, so
1169 need a release barrier here to ensure memory
1170 written by child_task->fn above is flushed
1171 before the NULL is written. */
1172 __atomic_store_n (&taskgroup->num_children, 0, MEMMODEL_RELEASE);
1173 }
1174 if (empty && taskgroup->in_taskgroup_wait)
1175 {
1176 taskgroup->in_taskgroup_wait = false;
1177 gomp_sem_post (&taskgroup->taskgroup_sem);
1178 }
1179 }
1180
1181 void
1182 gomp_barrier_handle_tasks (gomp_barrier_state_t state)
1183 {
1184 struct gomp_thread *thr = gomp_thread ();
1185 struct gomp_team *team = thr->ts.team;
1186 struct gomp_task *task = thr->task;
1187 struct gomp_task *child_task = NULL;
1188 struct gomp_task *to_free = NULL;
1189 int do_wake = 0;
1190
1191 gomp_mutex_lock (&team->task_lock);
1192 if (gomp_barrier_last_thread (state))
1193 {
1194 if (team->task_count == 0)
1195 {
1196 gomp_team_barrier_done (&team->barrier, state);
1197 gomp_mutex_unlock (&team->task_lock);
1198 gomp_team_barrier_wake (&team->barrier, 0);
1199 return;
1200 }
1201 gomp_team_barrier_set_waiting_for_tasks (&team->barrier);
1202 }
1203
1204 while (1)
1205 {
1206 bool cancelled = false;
1207 if (!priority_queue_empty_p (&team->task_queue, MEMMODEL_RELAXED))
1208 {
1209 bool ignored;
1210 child_task
1211 = priority_queue_next_task (PQ_TEAM, &team->task_queue,
1212 PQ_IGNORED, NULL,
1213 &ignored);
1214 cancelled = gomp_task_run_pre (child_task, child_task->parent,
1215 team);
1216 if (__builtin_expect (cancelled, 0))
1217 {
1218 if (to_free)
1219 {
1220 gomp_finish_task (to_free);
1221 free (to_free);
1222 to_free = NULL;
1223 }
1224 goto finish_cancelled;
1225 }
1226 team->task_running_count++;
1227 child_task->in_tied_task = true;
1228 }
1229 gomp_mutex_unlock (&team->task_lock);
1230 if (do_wake)
1231 {
1232 gomp_team_barrier_wake (&team->barrier, do_wake);
1233 do_wake = 0;
1234 }
1235 if (to_free)
1236 {
1237 gomp_finish_task (to_free);
1238 free (to_free);
1239 to_free = NULL;
1240 }
1241 if (child_task)
1242 {
1243 thr->task = child_task;
1244 if (__builtin_expect (child_task->fn == NULL, 0))
1245 {
1246 if (gomp_target_task_fn (child_task->fn_data))
1247 {
1248 thr->task = task;
1249 gomp_mutex_lock (&team->task_lock);
1250 child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1251 team->task_running_count--;
1252 struct gomp_target_task *ttask
1253 = (struct gomp_target_task *) child_task->fn_data;
1254 /* If GOMP_PLUGIN_target_task_completion has run already
1255 in between gomp_target_task_fn and the mutex lock,
1256 perform the requeuing here. */
1257 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1258 gomp_target_task_completion (team, child_task);
1259 else
1260 ttask->state = GOMP_TARGET_TASK_RUNNING;
1261 child_task = NULL;
1262 continue;
1263 }
1264 }
1265 else
1266 child_task->fn (child_task->fn_data);
1267 thr->task = task;
1268 }
1269 else
1270 return;
1271 gomp_mutex_lock (&team->task_lock);
1272 if (child_task)
1273 {
1274 finish_cancelled:;
1275 size_t new_tasks
1276 = gomp_task_run_post_handle_depend (child_task, team);
1277 gomp_task_run_post_remove_parent (child_task);
1278 gomp_clear_parent (&child_task->children_queue);
1279 gomp_task_run_post_remove_taskgroup (child_task);
1280 to_free = child_task;
1281 child_task = NULL;
1282 if (!cancelled)
1283 team->task_running_count--;
1284 if (new_tasks > 1)
1285 {
1286 do_wake = team->nthreads - team->task_running_count;
1287 if (do_wake > new_tasks)
1288 do_wake = new_tasks;
1289 }
1290 if (--team->task_count == 0
1291 && gomp_team_barrier_waiting_for_tasks (&team->barrier))
1292 {
1293 gomp_team_barrier_done (&team->barrier, state);
1294 gomp_mutex_unlock (&team->task_lock);
1295 gomp_team_barrier_wake (&team->barrier, 0);
1296 gomp_mutex_lock (&team->task_lock);
1297 }
1298 }
1299 }
1300 }
1301
1302 /* Called when encountering a taskwait directive.
1303
1304 Wait for all children of the current task. */
1305
1306 void
1307 GOMP_taskwait (void)
1308 {
1309 struct gomp_thread *thr = gomp_thread ();
1310 struct gomp_team *team = thr->ts.team;
1311 struct gomp_task *task = thr->task;
1312 struct gomp_task *child_task = NULL;
1313 struct gomp_task *to_free = NULL;
1314 struct gomp_taskwait taskwait;
1315 int do_wake = 0;
1316
1317 /* The acquire barrier on load of task->children here synchronizes
1318 with the write of a NULL in gomp_task_run_post_remove_parent. It is
1319 not necessary that we synchronize with other non-NULL writes at
1320 this point, but we must ensure that all writes to memory by a
1321 child thread task work function are seen before we exit from
1322 GOMP_taskwait. */
1323 if (task == NULL
1324 || priority_queue_empty_p (&task->children_queue, MEMMODEL_ACQUIRE))
1325 return;
1326
1327 memset (&taskwait, 0, sizeof (taskwait));
1328 bool child_q = false;
1329 gomp_mutex_lock (&team->task_lock);
1330 while (1)
1331 {
1332 bool cancelled = false;
1333 if (priority_queue_empty_p (&task->children_queue, MEMMODEL_RELAXED))
1334 {
1335 bool destroy_taskwait = task->taskwait != NULL;
1336 task->taskwait = NULL;
1337 gomp_mutex_unlock (&team->task_lock);
1338 if (to_free)
1339 {
1340 gomp_finish_task (to_free);
1341 free (to_free);
1342 }
1343 if (destroy_taskwait)
1344 gomp_sem_destroy (&taskwait.taskwait_sem);
1345 return;
1346 }
1347 struct gomp_task *next_task
1348 = priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
1349 PQ_TEAM, &team->task_queue, &child_q);
1350 if (next_task->kind == GOMP_TASK_WAITING)
1351 {
1352 child_task = next_task;
1353 cancelled
1354 = gomp_task_run_pre (child_task, task, team);
1355 if (__builtin_expect (cancelled, 0))
1356 {
1357 if (to_free)
1358 {
1359 gomp_finish_task (to_free);
1360 free (to_free);
1361 to_free = NULL;
1362 }
1363 goto finish_cancelled;
1364 }
1365 }
1366 else
1367 {
1368 /* All tasks we are waiting for are either running in other
1369 threads, or they are tasks that have not had their
1370 dependencies met (so they're not even in the queue). Wait
1371 for them. */
1372 if (task->taskwait == NULL)
1373 {
1374 taskwait.in_depend_wait = false;
1375 gomp_sem_init (&taskwait.taskwait_sem, 0);
1376 task->taskwait = &taskwait;
1377 }
1378 taskwait.in_taskwait = true;
1379 }
1380 gomp_mutex_unlock (&team->task_lock);
1381 if (do_wake)
1382 {
1383 gomp_team_barrier_wake (&team->barrier, do_wake);
1384 do_wake = 0;
1385 }
1386 if (to_free)
1387 {
1388 gomp_finish_task (to_free);
1389 free (to_free);
1390 to_free = NULL;
1391 }
1392 if (child_task)
1393 {
1394 thr->task = child_task;
1395 if (__builtin_expect (child_task->fn == NULL, 0))
1396 {
1397 if (gomp_target_task_fn (child_task->fn_data))
1398 {
1399 thr->task = task;
1400 gomp_mutex_lock (&team->task_lock);
1401 child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1402 struct gomp_target_task *ttask
1403 = (struct gomp_target_task *) child_task->fn_data;
1404 /* If GOMP_PLUGIN_target_task_completion has run already
1405 in between gomp_target_task_fn and the mutex lock,
1406 perform the requeuing here. */
1407 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1408 gomp_target_task_completion (team, child_task);
1409 else
1410 ttask->state = GOMP_TARGET_TASK_RUNNING;
1411 child_task = NULL;
1412 continue;
1413 }
1414 }
1415 else
1416 child_task->fn (child_task->fn_data);
1417 thr->task = task;
1418 }
1419 else
1420 gomp_sem_wait (&taskwait.taskwait_sem);
1421 gomp_mutex_lock (&team->task_lock);
1422 if (child_task)
1423 {
1424 finish_cancelled:;
1425 size_t new_tasks
1426 = gomp_task_run_post_handle_depend (child_task, team);
1427
1428 if (child_q)
1429 {
1430 priority_queue_remove (PQ_CHILDREN, &task->children_queue,
1431 child_task, MEMMODEL_RELAXED);
1432 child_task->pnode[PQ_CHILDREN].next = NULL;
1433 child_task->pnode[PQ_CHILDREN].prev = NULL;
1434 }
1435
1436 gomp_clear_parent (&child_task->children_queue);
1437
1438 gomp_task_run_post_remove_taskgroup (child_task);
1439
1440 to_free = child_task;
1441 child_task = NULL;
1442 team->task_count--;
1443 if (new_tasks > 1)
1444 {
1445 do_wake = team->nthreads - team->task_running_count
1446 - !task->in_tied_task;
1447 if (do_wake > new_tasks)
1448 do_wake = new_tasks;
1449 }
1450 }
1451 }
1452 }
1453
1454 /* An undeferred task is about to run. Wait for all tasks that this
1455 undeferred task depends on.
1456
1457 This is done by first putting all known ready dependencies
1458 (dependencies that have their own dependencies met) at the top of
1459 the scheduling queues. Then we iterate through these imminently
1460 ready tasks (and possibly other high priority tasks), and run them.
1461 If we run out of ready dependencies to execute, we either wait for
1462 the reamining dependencies to finish, or wait for them to get
1463 scheduled so we can run them.
1464
1465 DEPEND is as in GOMP_task. */
1466
1467 void
1468 gomp_task_maybe_wait_for_dependencies (void **depend)
1469 {
1470 struct gomp_thread *thr = gomp_thread ();
1471 struct gomp_task *task = thr->task;
1472 struct gomp_team *team = thr->ts.team;
1473 struct gomp_task_depend_entry elem, *ent = NULL;
1474 struct gomp_taskwait taskwait;
1475 size_t ndepend = (uintptr_t) depend[0];
1476 size_t nout = (uintptr_t) depend[1];
1477 size_t i;
1478 size_t num_awaited = 0;
1479 struct gomp_task *child_task = NULL;
1480 struct gomp_task *to_free = NULL;
1481 int do_wake = 0;
1482
1483 gomp_mutex_lock (&team->task_lock);
1484 for (i = 0; i < ndepend; i++)
1485 {
1486 elem.addr = depend[i + 2];
1487 ent = htab_find (task->depend_hash, &elem);
1488 for (; ent; ent = ent->next)
1489 if (i >= nout && ent->is_in)
1490 continue;
1491 else
1492 {
1493 struct gomp_task *tsk = ent->task;
1494 if (!tsk->parent_depends_on)
1495 {
1496 tsk->parent_depends_on = true;
1497 ++num_awaited;
1498 /* If depenency TSK itself has no dependencies and is
1499 ready to run, move it up front so that we run it as
1500 soon as possible. */
1501 if (tsk->num_dependees == 0 && tsk->kind == GOMP_TASK_WAITING)
1502 priority_queue_upgrade_task (tsk, task);
1503 }
1504 }
1505 }
1506 if (num_awaited == 0)
1507 {
1508 gomp_mutex_unlock (&team->task_lock);
1509 return;
1510 }
1511
1512 memset (&taskwait, 0, sizeof (taskwait));
1513 taskwait.n_depend = num_awaited;
1514 gomp_sem_init (&taskwait.taskwait_sem, 0);
1515 task->taskwait = &taskwait;
1516
1517 while (1)
1518 {
1519 bool cancelled = false;
1520 if (taskwait.n_depend == 0)
1521 {
1522 task->taskwait = NULL;
1523 gomp_mutex_unlock (&team->task_lock);
1524 if (to_free)
1525 {
1526 gomp_finish_task (to_free);
1527 free (to_free);
1528 }
1529 gomp_sem_destroy (&taskwait.taskwait_sem);
1530 return;
1531 }
1532
1533 /* Theoretically when we have multiple priorities, we should
1534 chose between the highest priority item in
1535 task->children_queue and team->task_queue here, so we should
1536 use priority_queue_next_task(). However, since we are
1537 running an undeferred task, perhaps that makes all tasks it
1538 depends on undeferred, thus a priority of INF? This would
1539 make it unnecessary to take anything into account here,
1540 but the dependencies.
1541
1542 On the other hand, if we want to use priority_queue_next_task(),
1543 care should be taken to only use priority_queue_remove()
1544 below if the task was actually removed from the children
1545 queue. */
1546 bool ignored;
1547 struct gomp_task *next_task
1548 = priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
1549 PQ_IGNORED, NULL, &ignored);
1550
1551 if (next_task->kind == GOMP_TASK_WAITING)
1552 {
1553 child_task = next_task;
1554 cancelled
1555 = gomp_task_run_pre (child_task, task, team);
1556 if (__builtin_expect (cancelled, 0))
1557 {
1558 if (to_free)
1559 {
1560 gomp_finish_task (to_free);
1561 free (to_free);
1562 to_free = NULL;
1563 }
1564 goto finish_cancelled;
1565 }
1566 }
1567 else
1568 /* All tasks we are waiting for are either running in other
1569 threads, or they are tasks that have not had their
1570 dependencies met (so they're not even in the queue). Wait
1571 for them. */
1572 taskwait.in_depend_wait = true;
1573 gomp_mutex_unlock (&team->task_lock);
1574 if (do_wake)
1575 {
1576 gomp_team_barrier_wake (&team->barrier, do_wake);
1577 do_wake = 0;
1578 }
1579 if (to_free)
1580 {
1581 gomp_finish_task (to_free);
1582 free (to_free);
1583 to_free = NULL;
1584 }
1585 if (child_task)
1586 {
1587 thr->task = child_task;
1588 if (__builtin_expect (child_task->fn == NULL, 0))
1589 {
1590 if (gomp_target_task_fn (child_task->fn_data))
1591 {
1592 thr->task = task;
1593 gomp_mutex_lock (&team->task_lock);
1594 child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1595 struct gomp_target_task *ttask
1596 = (struct gomp_target_task *) child_task->fn_data;
1597 /* If GOMP_PLUGIN_target_task_completion has run already
1598 in between gomp_target_task_fn and the mutex lock,
1599 perform the requeuing here. */
1600 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1601 gomp_target_task_completion (team, child_task);
1602 else
1603 ttask->state = GOMP_TARGET_TASK_RUNNING;
1604 child_task = NULL;
1605 continue;
1606 }
1607 }
1608 else
1609 child_task->fn (child_task->fn_data);
1610 thr->task = task;
1611 }
1612 else
1613 gomp_sem_wait (&taskwait.taskwait_sem);
1614 gomp_mutex_lock (&team->task_lock);
1615 if (child_task)
1616 {
1617 finish_cancelled:;
1618 size_t new_tasks
1619 = gomp_task_run_post_handle_depend (child_task, team);
1620 if (child_task->parent_depends_on)
1621 --taskwait.n_depend;
1622
1623 priority_queue_remove (PQ_CHILDREN, &task->children_queue,
1624 child_task, MEMMODEL_RELAXED);
1625 child_task->pnode[PQ_CHILDREN].next = NULL;
1626 child_task->pnode[PQ_CHILDREN].prev = NULL;
1627
1628 gomp_clear_parent (&child_task->children_queue);
1629 gomp_task_run_post_remove_taskgroup (child_task);
1630 to_free = child_task;
1631 child_task = NULL;
1632 team->task_count--;
1633 if (new_tasks > 1)
1634 {
1635 do_wake = team->nthreads - team->task_running_count
1636 - !task->in_tied_task;
1637 if (do_wake > new_tasks)
1638 do_wake = new_tasks;
1639 }
1640 }
1641 }
1642 }
1643
1644 /* Called when encountering a taskyield directive. */
1645
1646 void
1647 GOMP_taskyield (void)
1648 {
1649 /* Nothing at the moment. */
1650 }
1651
1652 void
1653 GOMP_taskgroup_start (void)
1654 {
1655 struct gomp_thread *thr = gomp_thread ();
1656 struct gomp_team *team = thr->ts.team;
1657 struct gomp_task *task = thr->task;
1658 struct gomp_taskgroup *taskgroup;
1659
1660 /* If team is NULL, all tasks are executed as
1661 GOMP_TASK_UNDEFERRED tasks and thus all children tasks of
1662 taskgroup and their descendant tasks will be finished
1663 by the time GOMP_taskgroup_end is called. */
1664 if (team == NULL)
1665 return;
1666 taskgroup = gomp_malloc (sizeof (struct gomp_taskgroup));
1667 taskgroup->prev = task->taskgroup;
1668 priority_queue_init (&taskgroup->taskgroup_queue);
1669 taskgroup->in_taskgroup_wait = false;
1670 taskgroup->cancelled = false;
1671 taskgroup->num_children = 0;
1672 gomp_sem_init (&taskgroup->taskgroup_sem, 0);
1673 task->taskgroup = taskgroup;
1674 }
1675
1676 void
1677 GOMP_taskgroup_end (void)
1678 {
1679 struct gomp_thread *thr = gomp_thread ();
1680 struct gomp_team *team = thr->ts.team;
1681 struct gomp_task *task = thr->task;
1682 struct gomp_taskgroup *taskgroup;
1683 struct gomp_task *child_task = NULL;
1684 struct gomp_task *to_free = NULL;
1685 int do_wake = 0;
1686
1687 if (team == NULL)
1688 return;
1689 taskgroup = task->taskgroup;
1690 if (__builtin_expect (taskgroup == NULL, 0)
1691 && thr->ts.level == 0)
1692 {
1693 /* This can happen if GOMP_taskgroup_start is called when
1694 thr->ts.team == NULL, but inside of the taskgroup there
1695 is #pragma omp target nowait that creates an implicit
1696 team with a single thread. In this case, we want to wait
1697 for all outstanding tasks in this team. */
1698 gomp_team_barrier_wait (&team->barrier);
1699 return;
1700 }
1701
1702 /* The acquire barrier on load of taskgroup->num_children here
1703 synchronizes with the write of 0 in gomp_task_run_post_remove_taskgroup.
1704 It is not necessary that we synchronize with other non-0 writes at
1705 this point, but we must ensure that all writes to memory by a
1706 child thread task work function are seen before we exit from
1707 GOMP_taskgroup_end. */
1708 if (__atomic_load_n (&taskgroup->num_children, MEMMODEL_ACQUIRE) == 0)
1709 goto finish;
1710
1711 bool unused;
1712 gomp_mutex_lock (&team->task_lock);
1713 while (1)
1714 {
1715 bool cancelled = false;
1716 if (priority_queue_empty_p (&taskgroup->taskgroup_queue,
1717 MEMMODEL_RELAXED))
1718 {
1719 if (taskgroup->num_children)
1720 {
1721 if (priority_queue_empty_p (&task->children_queue,
1722 MEMMODEL_RELAXED))
1723 goto do_wait;
1724 child_task
1725 = priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
1726 PQ_TEAM, &team->task_queue,
1727 &unused);
1728 }
1729 else
1730 {
1731 gomp_mutex_unlock (&team->task_lock);
1732 if (to_free)
1733 {
1734 gomp_finish_task (to_free);
1735 free (to_free);
1736 }
1737 goto finish;
1738 }
1739 }
1740 else
1741 child_task
1742 = priority_queue_next_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
1743 PQ_TEAM, &team->task_queue, &unused);
1744 if (child_task->kind == GOMP_TASK_WAITING)
1745 {
1746 cancelled
1747 = gomp_task_run_pre (child_task, child_task->parent, team);
1748 if (__builtin_expect (cancelled, 0))
1749 {
1750 if (to_free)
1751 {
1752 gomp_finish_task (to_free);
1753 free (to_free);
1754 to_free = NULL;
1755 }
1756 goto finish_cancelled;
1757 }
1758 }
1759 else
1760 {
1761 child_task = NULL;
1762 do_wait:
1763 /* All tasks we are waiting for are either running in other
1764 threads, or they are tasks that have not had their
1765 dependencies met (so they're not even in the queue). Wait
1766 for them. */
1767 taskgroup->in_taskgroup_wait = true;
1768 }
1769 gomp_mutex_unlock (&team->task_lock);
1770 if (do_wake)
1771 {
1772 gomp_team_barrier_wake (&team->barrier, do_wake);
1773 do_wake = 0;
1774 }
1775 if (to_free)
1776 {
1777 gomp_finish_task (to_free);
1778 free (to_free);
1779 to_free = NULL;
1780 }
1781 if (child_task)
1782 {
1783 thr->task = child_task;
1784 if (__builtin_expect (child_task->fn == NULL, 0))
1785 {
1786 if (gomp_target_task_fn (child_task->fn_data))
1787 {
1788 thr->task = task;
1789 gomp_mutex_lock (&team->task_lock);
1790 child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1791 struct gomp_target_task *ttask
1792 = (struct gomp_target_task *) child_task->fn_data;
1793 /* If GOMP_PLUGIN_target_task_completion has run already
1794 in between gomp_target_task_fn and the mutex lock,
1795 perform the requeuing here. */
1796 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1797 gomp_target_task_completion (team, child_task);
1798 else
1799 ttask->state = GOMP_TARGET_TASK_RUNNING;
1800 child_task = NULL;
1801 continue;
1802 }
1803 }
1804 else
1805 child_task->fn (child_task->fn_data);
1806 thr->task = task;
1807 }
1808 else
1809 gomp_sem_wait (&taskgroup->taskgroup_sem);
1810 gomp_mutex_lock (&team->task_lock);
1811 if (child_task)
1812 {
1813 finish_cancelled:;
1814 size_t new_tasks
1815 = gomp_task_run_post_handle_depend (child_task, team);
1816 gomp_task_run_post_remove_parent (child_task);
1817 gomp_clear_parent (&child_task->children_queue);
1818 gomp_task_run_post_remove_taskgroup (child_task);
1819 to_free = child_task;
1820 child_task = NULL;
1821 team->task_count--;
1822 if (new_tasks > 1)
1823 {
1824 do_wake = team->nthreads - team->task_running_count
1825 - !task->in_tied_task;
1826 if (do_wake > new_tasks)
1827 do_wake = new_tasks;
1828 }
1829 }
1830 }
1831
1832 finish:
1833 task->taskgroup = taskgroup->prev;
1834 gomp_sem_destroy (&taskgroup->taskgroup_sem);
1835 free (taskgroup);
1836 }
1837
1838 int
1839 omp_in_final (void)
1840 {
1841 struct gomp_thread *thr = gomp_thread ();
1842 return thr->task && thr->task->final_task;
1843 }
1844
1845 ialias (omp_in_final)