]> git.ipfire.org Git - thirdparty/gcc.git/blob - libgomp/task.c
Use preferred mode for doloop IV [PR61837]
[thirdparty/gcc.git] / libgomp / task.c
1 /* Copyright (C) 2007-2021 Free Software Foundation, Inc.
2 Contributed by Richard Henderson <rth@redhat.com>.
3
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
6
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
26 /* This file handles the maintenance of tasks in response to task
27 creation and termination. */
28
29 #include "libgomp.h"
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33 #include "gomp-constants.h"
34
35 typedef struct gomp_task_depend_entry *hash_entry_type;
36
37 static inline void *
38 htab_alloc (size_t size)
39 {
40 return gomp_malloc (size);
41 }
42
43 static inline void
44 htab_free (void *ptr)
45 {
46 free (ptr);
47 }
48
49 #include "hashtab.h"
50
51 static inline hashval_t
52 htab_hash (hash_entry_type element)
53 {
54 return hash_pointer (element->addr);
55 }
56
57 static inline bool
58 htab_eq (hash_entry_type x, hash_entry_type y)
59 {
60 return x->addr == y->addr;
61 }
62
63 /* Create a new task data structure. */
64
65 void
66 gomp_init_task (struct gomp_task *task, struct gomp_task *parent_task,
67 struct gomp_task_icv *prev_icv)
68 {
69 /* It would seem that using memset here would be a win, but it turns
70 out that partially filling gomp_task allows us to keep the
71 overhead of task creation low. In the nqueens-1.c test, for a
72 sufficiently large N, we drop the overhead from 5-6% to 1%.
73
74 Note, the nqueens-1.c test in serial mode is a good test to
75 benchmark the overhead of creating tasks as there are millions of
76 tiny tasks created that all run undeferred. */
77 task->parent = parent_task;
78 priority_queue_init (&task->children_queue);
79 task->taskgroup = NULL;
80 task->dependers = NULL;
81 task->depend_hash = NULL;
82 task->taskwait = NULL;
83 task->depend_count = 0;
84 task->completion_sem = NULL;
85 task->deferred_p = false;
86 task->icv = *prev_icv;
87 task->kind = GOMP_TASK_IMPLICIT;
88 task->in_tied_task = false;
89 task->final_task = false;
90 task->copy_ctors_done = false;
91 task->parent_depends_on = false;
92 }
93
94 /* Clean up a task, after completing it. */
95
96 void
97 gomp_end_task (void)
98 {
99 struct gomp_thread *thr = gomp_thread ();
100 struct gomp_task *task = thr->task;
101
102 gomp_finish_task (task);
103 thr->task = task->parent;
104 }
105
106 /* Clear the parent field of every task in LIST. */
107
108 static inline void
109 gomp_clear_parent_in_list (struct priority_list *list)
110 {
111 struct priority_node *p = list->tasks;
112 if (p)
113 do
114 {
115 priority_node_to_task (PQ_CHILDREN, p)->parent = NULL;
116 p = p->next;
117 }
118 while (p != list->tasks);
119 }
120
121 /* Splay tree version of gomp_clear_parent_in_list.
122
123 Clear the parent field of every task in NODE within SP, and free
124 the node when done. */
125
126 static void
127 gomp_clear_parent_in_tree (prio_splay_tree sp, prio_splay_tree_node node)
128 {
129 if (!node)
130 return;
131 prio_splay_tree_node left = node->left, right = node->right;
132 gomp_clear_parent_in_list (&node->key.l);
133 #if _LIBGOMP_CHECKING_
134 memset (node, 0xaf, sizeof (*node));
135 #endif
136 /* No need to remove the node from the tree. We're nuking
137 everything, so just free the nodes and our caller can clear the
138 entire splay tree. */
139 free (node);
140 gomp_clear_parent_in_tree (sp, left);
141 gomp_clear_parent_in_tree (sp, right);
142 }
143
144 /* Clear the parent field of every task in Q and remove every task
145 from Q. */
146
147 static inline void
148 gomp_clear_parent (struct priority_queue *q)
149 {
150 if (priority_queue_multi_p (q))
151 {
152 gomp_clear_parent_in_tree (&q->t, q->t.root);
153 /* All the nodes have been cleared in gomp_clear_parent_in_tree.
154 No need to remove anything. We can just nuke everything. */
155 q->t.root = NULL;
156 }
157 else
158 gomp_clear_parent_in_list (&q->l);
159 }
160
161 /* Helper function for GOMP_task and gomp_create_target_task.
162
163 For a TASK with in/out dependencies, fill in the various dependency
164 queues. PARENT is the parent of said task. DEPEND is as in
165 GOMP_task. */
166
167 static void
168 gomp_task_handle_depend (struct gomp_task *task, struct gomp_task *parent,
169 void **depend)
170 {
171 size_t ndepend = (uintptr_t) depend[0];
172 size_t i;
173 hash_entry_type ent;
174
175 if (ndepend)
176 {
177 /* depend[0] is total # */
178 size_t nout = (uintptr_t) depend[1]; /* # of out: and inout: */
179 /* ndepend - nout is # of in: */
180 for (i = 0; i < ndepend; i++)
181 {
182 task->depend[i].addr = depend[2 + i];
183 task->depend[i].is_in = i >= nout;
184 }
185 }
186 else
187 {
188 ndepend = (uintptr_t) depend[1]; /* total # */
189 size_t nout = (uintptr_t) depend[2]; /* # of out: and inout: */
190 size_t nmutexinoutset = (uintptr_t) depend[3]; /* # of mutexinoutset: */
191 /* For now we treat mutexinoutset like out, which is compliant, but
192 inefficient. */
193 size_t nin = (uintptr_t) depend[4]; /* # of in: */
194 /* ndepend - nout - nmutexinoutset - nin is # of depobjs */
195 size_t normal = nout + nmutexinoutset + nin;
196 size_t n = 0;
197 for (i = normal; i < ndepend; i++)
198 {
199 void **d = (void **) (uintptr_t) depend[5 + i];
200 switch ((uintptr_t) d[1])
201 {
202 case GOMP_DEPEND_OUT:
203 case GOMP_DEPEND_INOUT:
204 case GOMP_DEPEND_MUTEXINOUTSET:
205 break;
206 case GOMP_DEPEND_IN:
207 continue;
208 default:
209 gomp_fatal ("unknown omp_depend_t dependence type %d",
210 (int) (uintptr_t) d[1]);
211 }
212 task->depend[n].addr = d[0];
213 task->depend[n++].is_in = 0;
214 }
215 for (i = 0; i < normal; i++)
216 {
217 task->depend[n].addr = depend[5 + i];
218 task->depend[n++].is_in = i >= nout + nmutexinoutset;
219 }
220 for (i = normal; i < ndepend; i++)
221 {
222 void **d = (void **) (uintptr_t) depend[5 + i];
223 if ((uintptr_t) d[1] != GOMP_DEPEND_IN)
224 continue;
225 task->depend[n].addr = d[0];
226 task->depend[n++].is_in = 1;
227 }
228 }
229 task->depend_count = ndepend;
230 task->num_dependees = 0;
231 if (parent->depend_hash == NULL)
232 parent->depend_hash = htab_create (2 * ndepend > 12 ? 2 * ndepend : 12);
233 for (i = 0; i < ndepend; i++)
234 {
235 task->depend[i].next = NULL;
236 task->depend[i].prev = NULL;
237 task->depend[i].task = task;
238 task->depend[i].redundant = false;
239 task->depend[i].redundant_out = false;
240
241 hash_entry_type *slot = htab_find_slot (&parent->depend_hash,
242 &task->depend[i], INSERT);
243 hash_entry_type out = NULL, last = NULL;
244 if (*slot)
245 {
246 /* If multiple depends on the same task are the same, all but the
247 first one are redundant. As inout/out come first, if any of them
248 is inout/out, it will win, which is the right semantics. */
249 if ((*slot)->task == task)
250 {
251 task->depend[i].redundant = true;
252 continue;
253 }
254 for (ent = *slot; ent; ent = ent->next)
255 {
256 if (ent->redundant_out)
257 break;
258
259 last = ent;
260
261 /* depend(in:...) doesn't depend on earlier depend(in:...). */
262 if (task->depend[i].is_in && ent->is_in)
263 continue;
264
265 if (!ent->is_in)
266 out = ent;
267
268 struct gomp_task *tsk = ent->task;
269 if (tsk->dependers == NULL)
270 {
271 tsk->dependers
272 = gomp_malloc (sizeof (struct gomp_dependers_vec)
273 + 6 * sizeof (struct gomp_task *));
274 tsk->dependers->n_elem = 1;
275 tsk->dependers->allocated = 6;
276 tsk->dependers->elem[0] = task;
277 task->num_dependees++;
278 continue;
279 }
280 /* We already have some other dependency on tsk from earlier
281 depend clause. */
282 else if (tsk->dependers->n_elem
283 && (tsk->dependers->elem[tsk->dependers->n_elem - 1]
284 == task))
285 continue;
286 else if (tsk->dependers->n_elem == tsk->dependers->allocated)
287 {
288 tsk->dependers->allocated
289 = tsk->dependers->allocated * 2 + 2;
290 tsk->dependers
291 = gomp_realloc (tsk->dependers,
292 sizeof (struct gomp_dependers_vec)
293 + (tsk->dependers->allocated
294 * sizeof (struct gomp_task *)));
295 }
296 tsk->dependers->elem[tsk->dependers->n_elem++] = task;
297 task->num_dependees++;
298 }
299 task->depend[i].next = *slot;
300 (*slot)->prev = &task->depend[i];
301 }
302 *slot = &task->depend[i];
303
304 /* There is no need to store more than one depend({,in}out:) task per
305 address in the hash table chain for the purpose of creation of
306 deferred tasks, because each out depends on all earlier outs, thus it
307 is enough to record just the last depend({,in}out:). For depend(in:),
308 we need to keep all of the previous ones not terminated yet, because
309 a later depend({,in}out:) might need to depend on all of them. So, if
310 the new task's clause is depend({,in}out:), we know there is at most
311 one other depend({,in}out:) clause in the list (out). For
312 non-deferred tasks we want to see all outs, so they are moved to the
313 end of the chain, after first redundant_out entry all following
314 entries should be redundant_out. */
315 if (!task->depend[i].is_in && out)
316 {
317 if (out != last)
318 {
319 out->next->prev = out->prev;
320 out->prev->next = out->next;
321 out->next = last->next;
322 out->prev = last;
323 last->next = out;
324 if (out->next)
325 out->next->prev = out;
326 }
327 out->redundant_out = true;
328 }
329 }
330 }
331
332 /* Called when encountering an explicit task directive. If IF_CLAUSE is
333 false, then we must not delay in executing the task. If UNTIED is true,
334 then the task may be executed by any member of the team.
335
336 DEPEND is an array containing:
337 if depend[0] is non-zero, then:
338 depend[0]: number of depend elements.
339 depend[1]: number of depend elements of type "out/inout".
340 depend[2..N+1]: address of [1..N]th depend element.
341 otherwise, when depend[0] is zero, then:
342 depend[1]: number of depend elements.
343 depend[2]: number of depend elements of type "out/inout".
344 depend[3]: number of depend elements of type "mutexinoutset".
345 depend[4]: number of depend elements of type "in".
346 depend[5..4+depend[2]+depend[3]+depend[4]]: address of depend elements
347 depend[5+depend[2]+depend[3]+depend[4]..4+depend[1]]: address of
348 omp_depend_t objects. */
349
350 void
351 GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
352 long arg_size, long arg_align, bool if_clause, unsigned flags,
353 void **depend, int priority_arg, void *detach)
354 {
355 struct gomp_thread *thr = gomp_thread ();
356 struct gomp_team *team = thr->ts.team;
357 int priority = 0;
358
359 #ifdef HAVE_BROKEN_POSIX_SEMAPHORES
360 /* If pthread_mutex_* is used for omp_*lock*, then each task must be
361 tied to one thread all the time. This means UNTIED tasks must be
362 tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN
363 might be running on different thread than FN. */
364 if (cpyfn)
365 if_clause = false;
366 flags &= ~GOMP_TASK_FLAG_UNTIED;
367 #endif
368
369 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
370 if (__builtin_expect (gomp_cancel_var, 0) && team)
371 {
372 if (gomp_team_barrier_cancelled (&team->barrier))
373 return;
374 if (thr->task->taskgroup)
375 {
376 if (thr->task->taskgroup->cancelled)
377 return;
378 if (thr->task->taskgroup->workshare
379 && thr->task->taskgroup->prev
380 && thr->task->taskgroup->prev->cancelled)
381 return;
382 }
383 }
384
385 if (__builtin_expect ((flags & GOMP_TASK_FLAG_PRIORITY) != 0, 0))
386 {
387 priority = priority_arg;
388 if (priority > gomp_max_task_priority_var)
389 priority = gomp_max_task_priority_var;
390 }
391
392 if (!if_clause || team == NULL
393 || (thr->task && thr->task->final_task)
394 || team->task_count > 64 * team->nthreads)
395 {
396 struct gomp_task task;
397 gomp_sem_t completion_sem;
398
399 /* If there are depend clauses and earlier deferred sibling tasks
400 with depend clauses, check if there isn't a dependency. If there
401 is, we need to wait for them. There is no need to handle
402 depend clauses for non-deferred tasks other than this, because
403 the parent task is suspended until the child task finishes and thus
404 it can't start further child tasks. */
405 if ((flags & GOMP_TASK_FLAG_DEPEND)
406 && thr->task && thr->task->depend_hash)
407 gomp_task_maybe_wait_for_dependencies (depend);
408
409 gomp_init_task (&task, thr->task, gomp_icv (false));
410 task.kind = GOMP_TASK_UNDEFERRED;
411 task.final_task = (thr->task && thr->task->final_task)
412 || (flags & GOMP_TASK_FLAG_FINAL);
413 task.priority = priority;
414
415 if ((flags & GOMP_TASK_FLAG_DETACH) != 0)
416 {
417 gomp_sem_init (&completion_sem, 0);
418 task.completion_sem = &completion_sem;
419 *(void **) detach = &task;
420 if (data)
421 *(void **) data = &task;
422
423 gomp_debug (0, "Thread %d: new event: %p\n",
424 thr->ts.team_id, &task);
425 }
426
427 if (thr->task)
428 {
429 task.in_tied_task = thr->task->in_tied_task;
430 task.taskgroup = thr->task->taskgroup;
431 }
432 thr->task = &task;
433 if (__builtin_expect (cpyfn != NULL, 0))
434 {
435 char buf[arg_size + arg_align - 1];
436 char *arg = (char *) (((uintptr_t) buf + arg_align - 1)
437 & ~(uintptr_t) (arg_align - 1));
438 cpyfn (arg, data);
439 fn (arg);
440 }
441 else
442 fn (data);
443
444 if ((flags & GOMP_TASK_FLAG_DETACH) != 0)
445 {
446 gomp_sem_wait (&completion_sem);
447 gomp_sem_destroy (&completion_sem);
448 }
449
450 /* Access to "children" is normally done inside a task_lock
451 mutex region, but the only way this particular task.children
452 can be set is if this thread's task work function (fn)
453 creates children. So since the setter is *this* thread, we
454 need no barriers here when testing for non-NULL. We can have
455 task.children set by the current thread then changed by a
456 child thread, but seeing a stale non-NULL value is not a
457 problem. Once past the task_lock acquisition, this thread
458 will see the real value of task.children. */
459 if (!priority_queue_empty_p (&task.children_queue, MEMMODEL_RELAXED))
460 {
461 gomp_mutex_lock (&team->task_lock);
462 gomp_clear_parent (&task.children_queue);
463 gomp_mutex_unlock (&team->task_lock);
464 }
465 gomp_end_task ();
466 }
467 else
468 {
469 struct gomp_task *task;
470 struct gomp_task *parent = thr->task;
471 struct gomp_taskgroup *taskgroup = parent->taskgroup;
472 char *arg;
473 bool do_wake;
474 size_t depend_size = 0;
475
476 if (flags & GOMP_TASK_FLAG_DEPEND)
477 depend_size = ((uintptr_t) (depend[0] ? depend[0] : depend[1])
478 * sizeof (struct gomp_task_depend_entry));
479 task = gomp_malloc (sizeof (*task) + depend_size
480 + arg_size + arg_align - 1);
481 arg = (char *) (((uintptr_t) (task + 1) + depend_size + arg_align - 1)
482 & ~(uintptr_t) (arg_align - 1));
483 gomp_init_task (task, parent, gomp_icv (false));
484 task->priority = priority;
485 task->kind = GOMP_TASK_UNDEFERRED;
486 task->in_tied_task = parent->in_tied_task;
487 task->taskgroup = taskgroup;
488 task->deferred_p = true;
489 if ((flags & GOMP_TASK_FLAG_DETACH) != 0)
490 {
491 task->detach_team = team;
492
493 *(void **) detach = task;
494 if (data)
495 *(void **) data = task;
496
497 gomp_debug (0, "Thread %d: new event: %p\n", thr->ts.team_id, task);
498 }
499 thr->task = task;
500 if (cpyfn)
501 {
502 cpyfn (arg, data);
503 task->copy_ctors_done = true;
504 }
505 else
506 memcpy (arg, data, arg_size);
507 thr->task = parent;
508 task->kind = GOMP_TASK_WAITING;
509 task->fn = fn;
510 task->fn_data = arg;
511 task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1;
512 gomp_mutex_lock (&team->task_lock);
513 /* If parallel or taskgroup has been cancelled, don't start new
514 tasks. */
515 if (__builtin_expect (gomp_cancel_var, 0)
516 && !task->copy_ctors_done)
517 {
518 if (gomp_team_barrier_cancelled (&team->barrier))
519 {
520 do_cancel:
521 gomp_mutex_unlock (&team->task_lock);
522 gomp_finish_task (task);
523 free (task);
524 return;
525 }
526 if (taskgroup)
527 {
528 if (taskgroup->cancelled)
529 goto do_cancel;
530 if (taskgroup->workshare
531 && taskgroup->prev
532 && taskgroup->prev->cancelled)
533 goto do_cancel;
534 }
535 }
536 if (taskgroup)
537 taskgroup->num_children++;
538 if (depend_size)
539 {
540 gomp_task_handle_depend (task, parent, depend);
541 if (task->num_dependees)
542 {
543 /* Tasks that depend on other tasks are not put into the
544 various waiting queues, so we are done for now. Said
545 tasks are instead put into the queues via
546 gomp_task_run_post_handle_dependers() after their
547 dependencies have been satisfied. After which, they
548 can be picked up by the various scheduling
549 points. */
550 gomp_mutex_unlock (&team->task_lock);
551 return;
552 }
553 }
554
555 priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
556 task, priority,
557 PRIORITY_INSERT_BEGIN,
558 /*adjust_parent_depends_on=*/false,
559 task->parent_depends_on);
560 if (taskgroup)
561 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
562 task, priority,
563 PRIORITY_INSERT_BEGIN,
564 /*adjust_parent_depends_on=*/false,
565 task->parent_depends_on);
566
567 priority_queue_insert (PQ_TEAM, &team->task_queue,
568 task, priority,
569 PRIORITY_INSERT_END,
570 /*adjust_parent_depends_on=*/false,
571 task->parent_depends_on);
572
573 ++team->task_count;
574 ++team->task_queued_count;
575 gomp_team_barrier_set_task_pending (&team->barrier);
576 do_wake = team->task_running_count + !parent->in_tied_task
577 < team->nthreads;
578 gomp_mutex_unlock (&team->task_lock);
579 if (do_wake)
580 gomp_team_barrier_wake (&team->barrier, 1);
581 }
582 }
583
584 ialias (GOMP_taskgroup_start)
585 ialias (GOMP_taskgroup_end)
586 ialias (GOMP_taskgroup_reduction_register)
587
588 #define TYPE long
589 #define UTYPE unsigned long
590 #define TYPE_is_long 1
591 #include "taskloop.c"
592 #undef TYPE
593 #undef UTYPE
594 #undef TYPE_is_long
595
596 #define TYPE unsigned long long
597 #define UTYPE TYPE
598 #define GOMP_taskloop GOMP_taskloop_ull
599 #include "taskloop.c"
600 #undef TYPE
601 #undef UTYPE
602 #undef GOMP_taskloop
603
604 static void inline
605 priority_queue_move_task_first (enum priority_queue_type type,
606 struct priority_queue *head,
607 struct gomp_task *task)
608 {
609 #if _LIBGOMP_CHECKING_
610 if (!priority_queue_task_in_queue_p (type, head, task))
611 gomp_fatal ("Attempt to move first missing task %p", task);
612 #endif
613 struct priority_list *list;
614 if (priority_queue_multi_p (head))
615 {
616 list = priority_queue_lookup_priority (head, task->priority);
617 #if _LIBGOMP_CHECKING_
618 if (!list)
619 gomp_fatal ("Unable to find priority %d", task->priority);
620 #endif
621 }
622 else
623 list = &head->l;
624 priority_list_remove (list, task_to_priority_node (type, task), 0);
625 priority_list_insert (type, list, task, task->priority,
626 PRIORITY_INSERT_BEGIN, type == PQ_CHILDREN,
627 task->parent_depends_on);
628 }
629
630 /* Actual body of GOMP_PLUGIN_target_task_completion that is executed
631 with team->task_lock held, or is executed in the thread that called
632 gomp_target_task_fn if GOMP_PLUGIN_target_task_completion has been
633 run before it acquires team->task_lock. */
634
635 static void
636 gomp_target_task_completion (struct gomp_team *team, struct gomp_task *task)
637 {
638 struct gomp_task *parent = task->parent;
639 if (parent)
640 priority_queue_move_task_first (PQ_CHILDREN, &parent->children_queue,
641 task);
642
643 struct gomp_taskgroup *taskgroup = task->taskgroup;
644 if (taskgroup)
645 priority_queue_move_task_first (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
646 task);
647
648 priority_queue_insert (PQ_TEAM, &team->task_queue, task, task->priority,
649 PRIORITY_INSERT_BEGIN, false,
650 task->parent_depends_on);
651 task->kind = GOMP_TASK_WAITING;
652 if (parent && parent->taskwait)
653 {
654 if (parent->taskwait->in_taskwait)
655 {
656 /* One more task has had its dependencies met.
657 Inform any waiters. */
658 parent->taskwait->in_taskwait = false;
659 gomp_sem_post (&parent->taskwait->taskwait_sem);
660 }
661 else if (parent->taskwait->in_depend_wait)
662 {
663 /* One more task has had its dependencies met.
664 Inform any waiters. */
665 parent->taskwait->in_depend_wait = false;
666 gomp_sem_post (&parent->taskwait->taskwait_sem);
667 }
668 }
669 if (taskgroup && taskgroup->in_taskgroup_wait)
670 {
671 /* One more task has had its dependencies met.
672 Inform any waiters. */
673 taskgroup->in_taskgroup_wait = false;
674 gomp_sem_post (&taskgroup->taskgroup_sem);
675 }
676
677 ++team->task_queued_count;
678 gomp_team_barrier_set_task_pending (&team->barrier);
679 /* I'm afraid this can't be done after releasing team->task_lock,
680 as gomp_target_task_completion is run from unrelated thread and
681 therefore in between gomp_mutex_unlock and gomp_team_barrier_wake
682 the team could be gone already. */
683 if (team->nthreads > team->task_running_count)
684 gomp_team_barrier_wake (&team->barrier, 1);
685 }
686
687 /* Signal that a target task TTASK has completed the asynchronously
688 running phase and should be requeued as a task to handle the
689 variable unmapping. */
690
691 void
692 GOMP_PLUGIN_target_task_completion (void *data)
693 {
694 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
695 struct gomp_task *task = ttask->task;
696 struct gomp_team *team = ttask->team;
697
698 gomp_mutex_lock (&team->task_lock);
699 if (ttask->state == GOMP_TARGET_TASK_READY_TO_RUN)
700 {
701 ttask->state = GOMP_TARGET_TASK_FINISHED;
702 gomp_mutex_unlock (&team->task_lock);
703 return;
704 }
705 ttask->state = GOMP_TARGET_TASK_FINISHED;
706 gomp_target_task_completion (team, task);
707 gomp_mutex_unlock (&team->task_lock);
708 }
709
710 static void gomp_task_run_post_handle_depend_hash (struct gomp_task *);
711
712 /* Called for nowait target tasks. */
713
714 bool
715 gomp_create_target_task (struct gomp_device_descr *devicep,
716 void (*fn) (void *), size_t mapnum, void **hostaddrs,
717 size_t *sizes, unsigned short *kinds,
718 unsigned int flags, void **depend, void **args,
719 enum gomp_target_task_state state)
720 {
721 struct gomp_thread *thr = gomp_thread ();
722 struct gomp_team *team = thr->ts.team;
723
724 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
725 if (__builtin_expect (gomp_cancel_var, 0) && team)
726 {
727 if (gomp_team_barrier_cancelled (&team->barrier))
728 return true;
729 if (thr->task->taskgroup)
730 {
731 if (thr->task->taskgroup->cancelled)
732 return true;
733 if (thr->task->taskgroup->workshare
734 && thr->task->taskgroup->prev
735 && thr->task->taskgroup->prev->cancelled)
736 return true;
737 }
738 }
739
740 struct gomp_target_task *ttask;
741 struct gomp_task *task;
742 struct gomp_task *parent = thr->task;
743 struct gomp_taskgroup *taskgroup = parent->taskgroup;
744 bool do_wake;
745 size_t depend_size = 0;
746 uintptr_t depend_cnt = 0;
747 size_t tgt_align = 0, tgt_size = 0;
748
749 if (depend != NULL)
750 {
751 depend_cnt = (uintptr_t) (depend[0] ? depend[0] : depend[1]);
752 depend_size = depend_cnt * sizeof (struct gomp_task_depend_entry);
753 }
754 if (fn)
755 {
756 /* GOMP_MAP_FIRSTPRIVATE need to be copied first, as they are
757 firstprivate on the target task. */
758 size_t i;
759 for (i = 0; i < mapnum; i++)
760 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
761 {
762 size_t align = (size_t) 1 << (kinds[i] >> 8);
763 if (tgt_align < align)
764 tgt_align = align;
765 tgt_size = (tgt_size + align - 1) & ~(align - 1);
766 tgt_size += sizes[i];
767 }
768 if (tgt_align)
769 tgt_size += tgt_align - 1;
770 else
771 tgt_size = 0;
772 }
773
774 task = gomp_malloc (sizeof (*task) + depend_size
775 + sizeof (*ttask)
776 + mapnum * (sizeof (void *) + sizeof (size_t)
777 + sizeof (unsigned short))
778 + tgt_size);
779 gomp_init_task (task, parent, gomp_icv (false));
780 task->priority = 0;
781 task->kind = GOMP_TASK_WAITING;
782 task->in_tied_task = parent->in_tied_task;
783 task->taskgroup = taskgroup;
784 ttask = (struct gomp_target_task *) &task->depend[depend_cnt];
785 ttask->devicep = devicep;
786 ttask->fn = fn;
787 ttask->mapnum = mapnum;
788 ttask->args = args;
789 memcpy (ttask->hostaddrs, hostaddrs, mapnum * sizeof (void *));
790 ttask->sizes = (size_t *) &ttask->hostaddrs[mapnum];
791 memcpy (ttask->sizes, sizes, mapnum * sizeof (size_t));
792 ttask->kinds = (unsigned short *) &ttask->sizes[mapnum];
793 memcpy (ttask->kinds, kinds, mapnum * sizeof (unsigned short));
794 if (tgt_align)
795 {
796 char *tgt = (char *) &ttask->kinds[mapnum];
797 size_t i;
798 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
799 if (al)
800 tgt += tgt_align - al;
801 tgt_size = 0;
802 for (i = 0; i < mapnum; i++)
803 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
804 {
805 size_t align = (size_t) 1 << (kinds[i] >> 8);
806 tgt_size = (tgt_size + align - 1) & ~(align - 1);
807 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
808 ttask->hostaddrs[i] = tgt + tgt_size;
809 tgt_size = tgt_size + sizes[i];
810 }
811 }
812 ttask->flags = flags;
813 ttask->state = state;
814 ttask->task = task;
815 ttask->team = team;
816 task->fn = NULL;
817 task->fn_data = ttask;
818 task->final_task = 0;
819 gomp_mutex_lock (&team->task_lock);
820 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
821 if (__builtin_expect (gomp_cancel_var, 0))
822 {
823 if (gomp_team_barrier_cancelled (&team->barrier))
824 {
825 do_cancel:
826 gomp_mutex_unlock (&team->task_lock);
827 gomp_finish_task (task);
828 free (task);
829 return true;
830 }
831 if (taskgroup)
832 {
833 if (taskgroup->cancelled)
834 goto do_cancel;
835 if (taskgroup->workshare
836 && taskgroup->prev
837 && taskgroup->prev->cancelled)
838 goto do_cancel;
839 }
840 }
841 if (depend_size)
842 {
843 gomp_task_handle_depend (task, parent, depend);
844 if (task->num_dependees)
845 {
846 if (taskgroup)
847 taskgroup->num_children++;
848 gomp_mutex_unlock (&team->task_lock);
849 return true;
850 }
851 }
852 if (state == GOMP_TARGET_TASK_DATA)
853 {
854 gomp_task_run_post_handle_depend_hash (task);
855 gomp_mutex_unlock (&team->task_lock);
856 gomp_finish_task (task);
857 free (task);
858 return false;
859 }
860 if (taskgroup)
861 taskgroup->num_children++;
862 /* For async offloading, if we don't need to wait for dependencies,
863 run the gomp_target_task_fn right away, essentially schedule the
864 mapping part of the task in the current thread. */
865 if (devicep != NULL
866 && (devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
867 {
868 priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0,
869 PRIORITY_INSERT_END,
870 /*adjust_parent_depends_on=*/false,
871 task->parent_depends_on);
872 if (taskgroup)
873 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
874 task, 0, PRIORITY_INSERT_END,
875 /*adjust_parent_depends_on=*/false,
876 task->parent_depends_on);
877 task->pnode[PQ_TEAM].next = NULL;
878 task->pnode[PQ_TEAM].prev = NULL;
879 task->kind = GOMP_TASK_TIED;
880 ++team->task_count;
881 gomp_mutex_unlock (&team->task_lock);
882
883 thr->task = task;
884 gomp_target_task_fn (task->fn_data);
885 thr->task = parent;
886
887 gomp_mutex_lock (&team->task_lock);
888 task->kind = GOMP_TASK_ASYNC_RUNNING;
889 /* If GOMP_PLUGIN_target_task_completion has run already
890 in between gomp_target_task_fn and the mutex lock,
891 perform the requeuing here. */
892 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
893 gomp_target_task_completion (team, task);
894 else
895 ttask->state = GOMP_TARGET_TASK_RUNNING;
896 gomp_mutex_unlock (&team->task_lock);
897 return true;
898 }
899 priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0,
900 PRIORITY_INSERT_BEGIN,
901 /*adjust_parent_depends_on=*/false,
902 task->parent_depends_on);
903 if (taskgroup)
904 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, task, 0,
905 PRIORITY_INSERT_BEGIN,
906 /*adjust_parent_depends_on=*/false,
907 task->parent_depends_on);
908 priority_queue_insert (PQ_TEAM, &team->task_queue, task, 0,
909 PRIORITY_INSERT_END,
910 /*adjust_parent_depends_on=*/false,
911 task->parent_depends_on);
912 ++team->task_count;
913 ++team->task_queued_count;
914 gomp_team_barrier_set_task_pending (&team->barrier);
915 do_wake = team->task_running_count + !parent->in_tied_task
916 < team->nthreads;
917 gomp_mutex_unlock (&team->task_lock);
918 if (do_wake)
919 gomp_team_barrier_wake (&team->barrier, 1);
920 return true;
921 }
922
923 /* Given a parent_depends_on task in LIST, move it to the front of its
924 priority so it is run as soon as possible.
925
926 Care is taken to update the list's LAST_PARENT_DEPENDS_ON field.
927
928 We rearrange the queue such that all parent_depends_on tasks are
929 first, and last_parent_depends_on points to the last such task we
930 rearranged. For example, given the following tasks in a queue
931 where PD[123] are the parent_depends_on tasks:
932
933 task->children
934 |
935 V
936 C1 -> C2 -> C3 -> PD1 -> PD2 -> PD3 -> C4
937
938 We rearrange such that:
939
940 task->children
941 | +--- last_parent_depends_on
942 | |
943 V V
944 PD1 -> PD2 -> PD3 -> C1 -> C2 -> C3 -> C4. */
945
946 static void inline
947 priority_list_upgrade_task (struct priority_list *list,
948 struct priority_node *node)
949 {
950 struct priority_node *last_parent_depends_on
951 = list->last_parent_depends_on;
952 if (last_parent_depends_on)
953 {
954 node->prev->next = node->next;
955 node->next->prev = node->prev;
956 node->prev = last_parent_depends_on;
957 node->next = last_parent_depends_on->next;
958 node->prev->next = node;
959 node->next->prev = node;
960 }
961 else if (node != list->tasks)
962 {
963 node->prev->next = node->next;
964 node->next->prev = node->prev;
965 node->prev = list->tasks->prev;
966 node->next = list->tasks;
967 list->tasks = node;
968 node->prev->next = node;
969 node->next->prev = node;
970 }
971 list->last_parent_depends_on = node;
972 }
973
974 /* Given a parent_depends_on TASK in its parent's children_queue, move
975 it to the front of its priority so it is run as soon as possible.
976
977 PARENT is passed as an optimization.
978
979 (This function could be defined in priority_queue.c, but we want it
980 inlined, and putting it in priority_queue.h is not an option, given
981 that gomp_task has not been properly defined at that point). */
982
983 static void inline
984 priority_queue_upgrade_task (struct gomp_task *task,
985 struct gomp_task *parent)
986 {
987 struct priority_queue *head = &parent->children_queue;
988 struct priority_node *node = &task->pnode[PQ_CHILDREN];
989 #if _LIBGOMP_CHECKING_
990 if (!task->parent_depends_on)
991 gomp_fatal ("priority_queue_upgrade_task: task must be a "
992 "parent_depends_on task");
993 if (!priority_queue_task_in_queue_p (PQ_CHILDREN, head, task))
994 gomp_fatal ("priority_queue_upgrade_task: cannot find task=%p", task);
995 #endif
996 if (priority_queue_multi_p (head))
997 {
998 struct priority_list *list
999 = priority_queue_lookup_priority (head, task->priority);
1000 priority_list_upgrade_task (list, node);
1001 }
1002 else
1003 priority_list_upgrade_task (&head->l, node);
1004 }
1005
1006 /* Given a CHILD_TASK in LIST that is about to be executed, move it out of
1007 the way in LIST so that other tasks can be considered for
1008 execution. LIST contains tasks of type TYPE.
1009
1010 Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field
1011 if applicable. */
1012
1013 static void inline
1014 priority_list_downgrade_task (enum priority_queue_type type,
1015 struct priority_list *list,
1016 struct gomp_task *child_task)
1017 {
1018 struct priority_node *node = task_to_priority_node (type, child_task);
1019 if (list->tasks == node)
1020 list->tasks = node->next;
1021 else if (node->next != list->tasks)
1022 {
1023 /* The task in NODE is about to become TIED and TIED tasks
1024 cannot come before WAITING tasks. If we're about to
1025 leave the queue in such an indeterminate state, rewire
1026 things appropriately. However, a TIED task at the end is
1027 perfectly fine. */
1028 struct gomp_task *next_task = priority_node_to_task (type, node->next);
1029 if (next_task->kind == GOMP_TASK_WAITING)
1030 {
1031 /* Remove from list. */
1032 node->prev->next = node->next;
1033 node->next->prev = node->prev;
1034 /* Rewire at the end. */
1035 node->next = list->tasks;
1036 node->prev = list->tasks->prev;
1037 list->tasks->prev->next = node;
1038 list->tasks->prev = node;
1039 }
1040 }
1041
1042 /* If the current task is the last_parent_depends_on for its
1043 priority, adjust last_parent_depends_on appropriately. */
1044 if (__builtin_expect (child_task->parent_depends_on, 0)
1045 && list->last_parent_depends_on == node)
1046 {
1047 struct gomp_task *prev_child = priority_node_to_task (type, node->prev);
1048 if (node->prev != node
1049 && prev_child->kind == GOMP_TASK_WAITING
1050 && prev_child->parent_depends_on)
1051 list->last_parent_depends_on = node->prev;
1052 else
1053 {
1054 /* There are no more parent_depends_on entries waiting
1055 to run, clear the list. */
1056 list->last_parent_depends_on = NULL;
1057 }
1058 }
1059 }
1060
1061 /* Given a TASK in HEAD that is about to be executed, move it out of
1062 the way so that other tasks can be considered for execution. HEAD
1063 contains tasks of type TYPE.
1064
1065 Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field
1066 if applicable.
1067
1068 (This function could be defined in priority_queue.c, but we want it
1069 inlined, and putting it in priority_queue.h is not an option, given
1070 that gomp_task has not been properly defined at that point). */
1071
1072 static void inline
1073 priority_queue_downgrade_task (enum priority_queue_type type,
1074 struct priority_queue *head,
1075 struct gomp_task *task)
1076 {
1077 #if _LIBGOMP_CHECKING_
1078 if (!priority_queue_task_in_queue_p (type, head, task))
1079 gomp_fatal ("Attempt to downgrade missing task %p", task);
1080 #endif
1081 if (priority_queue_multi_p (head))
1082 {
1083 struct priority_list *list
1084 = priority_queue_lookup_priority (head, task->priority);
1085 priority_list_downgrade_task (type, list, task);
1086 }
1087 else
1088 priority_list_downgrade_task (type, &head->l, task);
1089 }
1090
1091 /* Setup CHILD_TASK to execute. This is done by setting the task to
1092 TIED, and updating all relevant queues so that CHILD_TASK is no
1093 longer chosen for scheduling. Also, remove CHILD_TASK from the
1094 overall team task queue entirely.
1095
1096 Return TRUE if task or its containing taskgroup has been
1097 cancelled. */
1098
1099 static inline bool
1100 gomp_task_run_pre (struct gomp_task *child_task, struct gomp_task *parent,
1101 struct gomp_team *team)
1102 {
1103 #if _LIBGOMP_CHECKING_
1104 if (child_task->parent)
1105 priority_queue_verify (PQ_CHILDREN,
1106 &child_task->parent->children_queue, true);
1107 if (child_task->taskgroup)
1108 priority_queue_verify (PQ_TASKGROUP,
1109 &child_task->taskgroup->taskgroup_queue, false);
1110 priority_queue_verify (PQ_TEAM, &team->task_queue, false);
1111 #endif
1112
1113 /* Task is about to go tied, move it out of the way. */
1114 if (parent)
1115 priority_queue_downgrade_task (PQ_CHILDREN, &parent->children_queue,
1116 child_task);
1117
1118 /* Task is about to go tied, move it out of the way. */
1119 struct gomp_taskgroup *taskgroup = child_task->taskgroup;
1120 if (taskgroup)
1121 priority_queue_downgrade_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
1122 child_task);
1123
1124 priority_queue_remove (PQ_TEAM, &team->task_queue, child_task,
1125 MEMMODEL_RELAXED);
1126 child_task->pnode[PQ_TEAM].next = NULL;
1127 child_task->pnode[PQ_TEAM].prev = NULL;
1128 child_task->kind = GOMP_TASK_TIED;
1129
1130 if (--team->task_queued_count == 0)
1131 gomp_team_barrier_clear_task_pending (&team->barrier);
1132 if (__builtin_expect (gomp_cancel_var, 0)
1133 && !child_task->copy_ctors_done)
1134 {
1135 if (gomp_team_barrier_cancelled (&team->barrier))
1136 return true;
1137 if (taskgroup)
1138 {
1139 if (taskgroup->cancelled)
1140 return true;
1141 if (taskgroup->workshare
1142 && taskgroup->prev
1143 && taskgroup->prev->cancelled)
1144 return true;
1145 }
1146 }
1147 return false;
1148 }
1149
1150 static void
1151 gomp_task_run_post_handle_depend_hash (struct gomp_task *child_task)
1152 {
1153 struct gomp_task *parent = child_task->parent;
1154 size_t i;
1155
1156 for (i = 0; i < child_task->depend_count; i++)
1157 if (!child_task->depend[i].redundant)
1158 {
1159 if (child_task->depend[i].next)
1160 child_task->depend[i].next->prev = child_task->depend[i].prev;
1161 if (child_task->depend[i].prev)
1162 child_task->depend[i].prev->next = child_task->depend[i].next;
1163 else
1164 {
1165 hash_entry_type *slot
1166 = htab_find_slot (&parent->depend_hash, &child_task->depend[i],
1167 NO_INSERT);
1168 if (*slot != &child_task->depend[i])
1169 abort ();
1170 if (child_task->depend[i].next)
1171 *slot = child_task->depend[i].next;
1172 else
1173 htab_clear_slot (parent->depend_hash, slot);
1174 }
1175 }
1176 }
1177
1178 /* After a CHILD_TASK has been run, adjust the dependency queue for
1179 each task that depends on CHILD_TASK, to record the fact that there
1180 is one less dependency to worry about. If a task that depended on
1181 CHILD_TASK now has no dependencies, place it in the various queues
1182 so it gets scheduled to run.
1183
1184 TEAM is the team to which CHILD_TASK belongs to. */
1185
1186 static size_t
1187 gomp_task_run_post_handle_dependers (struct gomp_task *child_task,
1188 struct gomp_team *team)
1189 {
1190 struct gomp_task *parent = child_task->parent;
1191 size_t i, count = child_task->dependers->n_elem, ret = 0;
1192 for (i = 0; i < count; i++)
1193 {
1194 struct gomp_task *task = child_task->dependers->elem[i];
1195
1196 /* CHILD_TASK satisfies a dependency for TASK. Keep track of
1197 TASK's remaining dependencies. Once TASK has no other
1198 dependencies, put it into the various queues so it will get
1199 scheduled for execution. */
1200 if (--task->num_dependees != 0)
1201 continue;
1202
1203 struct gomp_taskgroup *taskgroup = task->taskgroup;
1204 if (parent)
1205 {
1206 priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
1207 task, task->priority,
1208 PRIORITY_INSERT_BEGIN,
1209 /*adjust_parent_depends_on=*/true,
1210 task->parent_depends_on);
1211 if (parent->taskwait)
1212 {
1213 if (parent->taskwait->in_taskwait)
1214 {
1215 /* One more task has had its dependencies met.
1216 Inform any waiters. */
1217 parent->taskwait->in_taskwait = false;
1218 gomp_sem_post (&parent->taskwait->taskwait_sem);
1219 }
1220 else if (parent->taskwait->in_depend_wait)
1221 {
1222 /* One more task has had its dependencies met.
1223 Inform any waiters. */
1224 parent->taskwait->in_depend_wait = false;
1225 gomp_sem_post (&parent->taskwait->taskwait_sem);
1226 }
1227 }
1228 }
1229 if (taskgroup)
1230 {
1231 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
1232 task, task->priority,
1233 PRIORITY_INSERT_BEGIN,
1234 /*adjust_parent_depends_on=*/false,
1235 task->parent_depends_on);
1236 if (taskgroup->in_taskgroup_wait)
1237 {
1238 /* One more task has had its dependencies met.
1239 Inform any waiters. */
1240 taskgroup->in_taskgroup_wait = false;
1241 gomp_sem_post (&taskgroup->taskgroup_sem);
1242 }
1243 }
1244 priority_queue_insert (PQ_TEAM, &team->task_queue,
1245 task, task->priority,
1246 PRIORITY_INSERT_END,
1247 /*adjust_parent_depends_on=*/false,
1248 task->parent_depends_on);
1249 ++team->task_count;
1250 ++team->task_queued_count;
1251 ++ret;
1252 }
1253 free (child_task->dependers);
1254 child_task->dependers = NULL;
1255 if (ret > 1)
1256 gomp_team_barrier_set_task_pending (&team->barrier);
1257 return ret;
1258 }
1259
1260 static inline size_t
1261 gomp_task_run_post_handle_depend (struct gomp_task *child_task,
1262 struct gomp_team *team)
1263 {
1264 if (child_task->depend_count == 0)
1265 return 0;
1266
1267 /* If parent is gone already, the hash table is freed and nothing
1268 will use the hash table anymore, no need to remove anything from it. */
1269 if (child_task->parent != NULL)
1270 gomp_task_run_post_handle_depend_hash (child_task);
1271
1272 if (child_task->dependers == NULL)
1273 return 0;
1274
1275 return gomp_task_run_post_handle_dependers (child_task, team);
1276 }
1277
1278 /* Remove CHILD_TASK from its parent. */
1279
1280 static inline void
1281 gomp_task_run_post_remove_parent (struct gomp_task *child_task)
1282 {
1283 struct gomp_task *parent = child_task->parent;
1284 if (parent == NULL)
1285 return;
1286
1287 /* If this was the last task the parent was depending on,
1288 synchronize with gomp_task_maybe_wait_for_dependencies so it can
1289 clean up and return. */
1290 if (__builtin_expect (child_task->parent_depends_on, 0)
1291 && --parent->taskwait->n_depend == 0
1292 && parent->taskwait->in_depend_wait)
1293 {
1294 parent->taskwait->in_depend_wait = false;
1295 gomp_sem_post (&parent->taskwait->taskwait_sem);
1296 }
1297
1298 if (priority_queue_remove (PQ_CHILDREN, &parent->children_queue,
1299 child_task, MEMMODEL_RELEASE)
1300 && parent->taskwait && parent->taskwait->in_taskwait)
1301 {
1302 parent->taskwait->in_taskwait = false;
1303 gomp_sem_post (&parent->taskwait->taskwait_sem);
1304 }
1305 child_task->pnode[PQ_CHILDREN].next = NULL;
1306 child_task->pnode[PQ_CHILDREN].prev = NULL;
1307 }
1308
1309 /* Remove CHILD_TASK from its taskgroup. */
1310
1311 static inline void
1312 gomp_task_run_post_remove_taskgroup (struct gomp_task *child_task)
1313 {
1314 struct gomp_taskgroup *taskgroup = child_task->taskgroup;
1315 if (taskgroup == NULL)
1316 return;
1317 bool empty = priority_queue_remove (PQ_TASKGROUP,
1318 &taskgroup->taskgroup_queue,
1319 child_task, MEMMODEL_RELAXED);
1320 child_task->pnode[PQ_TASKGROUP].next = NULL;
1321 child_task->pnode[PQ_TASKGROUP].prev = NULL;
1322 if (taskgroup->num_children > 1)
1323 --taskgroup->num_children;
1324 else
1325 {
1326 /* We access taskgroup->num_children in GOMP_taskgroup_end
1327 outside of the task lock mutex region, so
1328 need a release barrier here to ensure memory
1329 written by child_task->fn above is flushed
1330 before the NULL is written. */
1331 __atomic_store_n (&taskgroup->num_children, 0, MEMMODEL_RELEASE);
1332 }
1333 if (empty && taskgroup->in_taskgroup_wait)
1334 {
1335 taskgroup->in_taskgroup_wait = false;
1336 gomp_sem_post (&taskgroup->taskgroup_sem);
1337 }
1338 }
1339
1340 void
1341 gomp_barrier_handle_tasks (gomp_barrier_state_t state)
1342 {
1343 struct gomp_thread *thr = gomp_thread ();
1344 struct gomp_team *team = thr->ts.team;
1345 struct gomp_task *task = thr->task;
1346 struct gomp_task *child_task = NULL;
1347 struct gomp_task *to_free = NULL;
1348 int do_wake = 0;
1349
1350 gomp_mutex_lock (&team->task_lock);
1351 if (gomp_barrier_last_thread (state))
1352 {
1353 if (team->task_count == 0)
1354 {
1355 gomp_team_barrier_done (&team->barrier, state);
1356 gomp_mutex_unlock (&team->task_lock);
1357 gomp_team_barrier_wake (&team->barrier, 0);
1358 return;
1359 }
1360 gomp_team_barrier_set_waiting_for_tasks (&team->barrier);
1361 }
1362
1363 while (1)
1364 {
1365 bool cancelled = false;
1366
1367 if (!priority_queue_empty_p (&team->task_queue, MEMMODEL_RELAXED))
1368 {
1369 bool ignored;
1370 child_task
1371 = priority_queue_next_task (PQ_TEAM, &team->task_queue,
1372 PQ_IGNORED, NULL,
1373 &ignored);
1374 cancelled = gomp_task_run_pre (child_task, child_task->parent,
1375 team);
1376 if (__builtin_expect (cancelled, 0))
1377 {
1378 if (to_free)
1379 {
1380 gomp_finish_task (to_free);
1381 free (to_free);
1382 to_free = NULL;
1383 }
1384 goto finish_cancelled;
1385 }
1386 team->task_running_count++;
1387 child_task->in_tied_task = true;
1388 }
1389 else if (team->task_count == 0
1390 && gomp_team_barrier_waiting_for_tasks (&team->barrier))
1391 {
1392 gomp_team_barrier_done (&team->barrier, state);
1393 gomp_mutex_unlock (&team->task_lock);
1394 gomp_team_barrier_wake (&team->barrier, 0);
1395 if (to_free)
1396 {
1397 gomp_finish_task (to_free);
1398 free (to_free);
1399 }
1400 return;
1401 }
1402 gomp_mutex_unlock (&team->task_lock);
1403 if (do_wake)
1404 {
1405 gomp_team_barrier_wake (&team->barrier, do_wake);
1406 do_wake = 0;
1407 }
1408 if (to_free)
1409 {
1410 gomp_finish_task (to_free);
1411 free (to_free);
1412 to_free = NULL;
1413 }
1414 if (child_task)
1415 {
1416 thr->task = child_task;
1417 if (__builtin_expect (child_task->fn == NULL, 0))
1418 {
1419 if (gomp_target_task_fn (child_task->fn_data))
1420 {
1421 thr->task = task;
1422 gomp_mutex_lock (&team->task_lock);
1423 child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1424 team->task_running_count--;
1425 struct gomp_target_task *ttask
1426 = (struct gomp_target_task *) child_task->fn_data;
1427 /* If GOMP_PLUGIN_target_task_completion has run already
1428 in between gomp_target_task_fn and the mutex lock,
1429 perform the requeuing here. */
1430 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1431 gomp_target_task_completion (team, child_task);
1432 else
1433 ttask->state = GOMP_TARGET_TASK_RUNNING;
1434 child_task = NULL;
1435 continue;
1436 }
1437 }
1438 else
1439 child_task->fn (child_task->fn_data);
1440 thr->task = task;
1441 }
1442 else
1443 return;
1444 gomp_mutex_lock (&team->task_lock);
1445 if (child_task)
1446 {
1447 if (child_task->detach_team)
1448 {
1449 assert (child_task->detach_team == team);
1450 child_task->kind = GOMP_TASK_DETACHED;
1451 ++team->task_detach_count;
1452 --team->task_running_count;
1453 gomp_debug (0,
1454 "thread %d: task with event %p finished without "
1455 "completion event fulfilled in team barrier\n",
1456 thr->ts.team_id, child_task);
1457 child_task = NULL;
1458 continue;
1459 }
1460
1461 finish_cancelled:;
1462 size_t new_tasks
1463 = gomp_task_run_post_handle_depend (child_task, team);
1464 gomp_task_run_post_remove_parent (child_task);
1465 gomp_clear_parent (&child_task->children_queue);
1466 gomp_task_run_post_remove_taskgroup (child_task);
1467 to_free = child_task;
1468 if (!cancelled)
1469 team->task_running_count--;
1470 child_task = NULL;
1471 if (new_tasks > 1)
1472 {
1473 do_wake = team->nthreads - team->task_running_count;
1474 if (do_wake > new_tasks)
1475 do_wake = new_tasks;
1476 }
1477 --team->task_count;
1478 }
1479 }
1480 }
1481
1482 /* Called when encountering a taskwait directive.
1483
1484 Wait for all children of the current task. */
1485
1486 void
1487 GOMP_taskwait (void)
1488 {
1489 struct gomp_thread *thr = gomp_thread ();
1490 struct gomp_team *team = thr->ts.team;
1491 struct gomp_task *task = thr->task;
1492 struct gomp_task *child_task = NULL;
1493 struct gomp_task *to_free = NULL;
1494 struct gomp_taskwait taskwait;
1495 int do_wake = 0;
1496
1497 /* The acquire barrier on load of task->children here synchronizes
1498 with the write of a NULL in gomp_task_run_post_remove_parent. It is
1499 not necessary that we synchronize with other non-NULL writes at
1500 this point, but we must ensure that all writes to memory by a
1501 child thread task work function are seen before we exit from
1502 GOMP_taskwait. */
1503 if (task == NULL
1504 || priority_queue_empty_p (&task->children_queue, MEMMODEL_ACQUIRE))
1505 return;
1506
1507 memset (&taskwait, 0, sizeof (taskwait));
1508 bool child_q = false;
1509 gomp_mutex_lock (&team->task_lock);
1510 while (1)
1511 {
1512 bool cancelled = false;
1513 if (priority_queue_empty_p (&task->children_queue, MEMMODEL_RELAXED))
1514 {
1515 bool destroy_taskwait = task->taskwait != NULL;
1516 task->taskwait = NULL;
1517 gomp_mutex_unlock (&team->task_lock);
1518 if (to_free)
1519 {
1520 gomp_finish_task (to_free);
1521 free (to_free);
1522 }
1523 if (destroy_taskwait)
1524 gomp_sem_destroy (&taskwait.taskwait_sem);
1525 return;
1526 }
1527 struct gomp_task *next_task
1528 = priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
1529 PQ_TEAM, &team->task_queue, &child_q);
1530 if (next_task->kind == GOMP_TASK_WAITING)
1531 {
1532 child_task = next_task;
1533 cancelled
1534 = gomp_task_run_pre (child_task, task, team);
1535 if (__builtin_expect (cancelled, 0))
1536 {
1537 if (to_free)
1538 {
1539 gomp_finish_task (to_free);
1540 free (to_free);
1541 to_free = NULL;
1542 }
1543 goto finish_cancelled;
1544 }
1545 }
1546 else
1547 {
1548 /* All tasks we are waiting for are either running in other
1549 threads, are detached and waiting for the completion event to be
1550 fulfilled, or they are tasks that have not had their
1551 dependencies met (so they're not even in the queue). Wait
1552 for them. */
1553 if (task->taskwait == NULL)
1554 {
1555 taskwait.in_depend_wait = false;
1556 gomp_sem_init (&taskwait.taskwait_sem, 0);
1557 task->taskwait = &taskwait;
1558 }
1559 taskwait.in_taskwait = true;
1560 }
1561 gomp_mutex_unlock (&team->task_lock);
1562 if (do_wake)
1563 {
1564 gomp_team_barrier_wake (&team->barrier, do_wake);
1565 do_wake = 0;
1566 }
1567 if (to_free)
1568 {
1569 gomp_finish_task (to_free);
1570 free (to_free);
1571 to_free = NULL;
1572 }
1573 if (child_task)
1574 {
1575 thr->task = child_task;
1576 if (__builtin_expect (child_task->fn == NULL, 0))
1577 {
1578 if (gomp_target_task_fn (child_task->fn_data))
1579 {
1580 thr->task = task;
1581 gomp_mutex_lock (&team->task_lock);
1582 child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1583 struct gomp_target_task *ttask
1584 = (struct gomp_target_task *) child_task->fn_data;
1585 /* If GOMP_PLUGIN_target_task_completion has run already
1586 in between gomp_target_task_fn and the mutex lock,
1587 perform the requeuing here. */
1588 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1589 gomp_target_task_completion (team, child_task);
1590 else
1591 ttask->state = GOMP_TARGET_TASK_RUNNING;
1592 child_task = NULL;
1593 continue;
1594 }
1595 }
1596 else
1597 child_task->fn (child_task->fn_data);
1598 thr->task = task;
1599 }
1600 else
1601 gomp_sem_wait (&taskwait.taskwait_sem);
1602 gomp_mutex_lock (&team->task_lock);
1603 if (child_task)
1604 {
1605 if (child_task->detach_team)
1606 {
1607 assert (child_task->detach_team == team);
1608 child_task->kind = GOMP_TASK_DETACHED;
1609 ++team->task_detach_count;
1610 gomp_debug (0,
1611 "thread %d: task with event %p finished without "
1612 "completion event fulfilled in taskwait\n",
1613 thr->ts.team_id, child_task);
1614 child_task = NULL;
1615 continue;
1616 }
1617
1618 finish_cancelled:;
1619 size_t new_tasks
1620 = gomp_task_run_post_handle_depend (child_task, team);
1621
1622 if (child_q)
1623 {
1624 priority_queue_remove (PQ_CHILDREN, &task->children_queue,
1625 child_task, MEMMODEL_RELAXED);
1626 child_task->pnode[PQ_CHILDREN].next = NULL;
1627 child_task->pnode[PQ_CHILDREN].prev = NULL;
1628 }
1629
1630 gomp_clear_parent (&child_task->children_queue);
1631
1632 gomp_task_run_post_remove_taskgroup (child_task);
1633
1634 to_free = child_task;
1635 child_task = NULL;
1636 team->task_count--;
1637 if (new_tasks > 1)
1638 {
1639 do_wake = team->nthreads - team->task_running_count
1640 - !task->in_tied_task;
1641 if (do_wake > new_tasks)
1642 do_wake = new_tasks;
1643 }
1644 }
1645 }
1646 }
1647
1648 /* Called when encountering a taskwait directive with depend clause(s).
1649 Wait as if it was an mergeable included task construct with empty body. */
1650
1651 void
1652 GOMP_taskwait_depend (void **depend)
1653 {
1654 struct gomp_thread *thr = gomp_thread ();
1655 struct gomp_team *team = thr->ts.team;
1656
1657 /* If parallel or taskgroup has been cancelled, return early. */
1658 if (__builtin_expect (gomp_cancel_var, 0) && team)
1659 {
1660 if (gomp_team_barrier_cancelled (&team->barrier))
1661 return;
1662 if (thr->task->taskgroup)
1663 {
1664 if (thr->task->taskgroup->cancelled)
1665 return;
1666 if (thr->task->taskgroup->workshare
1667 && thr->task->taskgroup->prev
1668 && thr->task->taskgroup->prev->cancelled)
1669 return;
1670 }
1671 }
1672
1673 if (thr->task && thr->task->depend_hash)
1674 gomp_task_maybe_wait_for_dependencies (depend);
1675 }
1676
1677 /* An undeferred task is about to run. Wait for all tasks that this
1678 undeferred task depends on.
1679
1680 This is done by first putting all known ready dependencies
1681 (dependencies that have their own dependencies met) at the top of
1682 the scheduling queues. Then we iterate through these imminently
1683 ready tasks (and possibly other high priority tasks), and run them.
1684 If we run out of ready dependencies to execute, we either wait for
1685 the remaining dependencies to finish, or wait for them to get
1686 scheduled so we can run them.
1687
1688 DEPEND is as in GOMP_task. */
1689
1690 void
1691 gomp_task_maybe_wait_for_dependencies (void **depend)
1692 {
1693 struct gomp_thread *thr = gomp_thread ();
1694 struct gomp_task *task = thr->task;
1695 struct gomp_team *team = thr->ts.team;
1696 struct gomp_task_depend_entry elem, *ent = NULL;
1697 struct gomp_taskwait taskwait;
1698 size_t orig_ndepend = (uintptr_t) depend[0];
1699 size_t nout = (uintptr_t) depend[1];
1700 size_t ndepend = orig_ndepend;
1701 size_t normal = ndepend;
1702 size_t n = 2;
1703 size_t i;
1704 size_t num_awaited = 0;
1705 struct gomp_task *child_task = NULL;
1706 struct gomp_task *to_free = NULL;
1707 int do_wake = 0;
1708
1709 if (ndepend == 0)
1710 {
1711 ndepend = nout;
1712 nout = (uintptr_t) depend[2] + (uintptr_t) depend[3];
1713 normal = nout + (uintptr_t) depend[4];
1714 n = 5;
1715 }
1716 gomp_mutex_lock (&team->task_lock);
1717 for (i = 0; i < ndepend; i++)
1718 {
1719 elem.addr = depend[i + n];
1720 elem.is_in = i >= nout;
1721 if (__builtin_expect (i >= normal, 0))
1722 {
1723 void **d = (void **) elem.addr;
1724 switch ((uintptr_t) d[1])
1725 {
1726 case GOMP_DEPEND_IN:
1727 break;
1728 case GOMP_DEPEND_OUT:
1729 case GOMP_DEPEND_INOUT:
1730 case GOMP_DEPEND_MUTEXINOUTSET:
1731 elem.is_in = 0;
1732 break;
1733 default:
1734 gomp_fatal ("unknown omp_depend_t dependence type %d",
1735 (int) (uintptr_t) d[1]);
1736 }
1737 elem.addr = d[0];
1738 }
1739 ent = htab_find (task->depend_hash, &elem);
1740 for (; ent; ent = ent->next)
1741 if (elem.is_in && ent->is_in)
1742 continue;
1743 else
1744 {
1745 struct gomp_task *tsk = ent->task;
1746 if (!tsk->parent_depends_on)
1747 {
1748 tsk->parent_depends_on = true;
1749 ++num_awaited;
1750 /* If dependency TSK itself has no dependencies and is
1751 ready to run, move it up front so that we run it as
1752 soon as possible. */
1753 if (tsk->num_dependees == 0 && tsk->kind == GOMP_TASK_WAITING)
1754 priority_queue_upgrade_task (tsk, task);
1755 }
1756 }
1757 }
1758 if (num_awaited == 0)
1759 {
1760 gomp_mutex_unlock (&team->task_lock);
1761 return;
1762 }
1763
1764 memset (&taskwait, 0, sizeof (taskwait));
1765 taskwait.n_depend = num_awaited;
1766 gomp_sem_init (&taskwait.taskwait_sem, 0);
1767 task->taskwait = &taskwait;
1768
1769 while (1)
1770 {
1771 bool cancelled = false;
1772 if (taskwait.n_depend == 0)
1773 {
1774 task->taskwait = NULL;
1775 gomp_mutex_unlock (&team->task_lock);
1776 if (to_free)
1777 {
1778 gomp_finish_task (to_free);
1779 free (to_free);
1780 }
1781 gomp_sem_destroy (&taskwait.taskwait_sem);
1782 return;
1783 }
1784
1785 /* Theoretically when we have multiple priorities, we should
1786 chose between the highest priority item in
1787 task->children_queue and team->task_queue here, so we should
1788 use priority_queue_next_task(). However, since we are
1789 running an undeferred task, perhaps that makes all tasks it
1790 depends on undeferred, thus a priority of INF? This would
1791 make it unnecessary to take anything into account here,
1792 but the dependencies.
1793
1794 On the other hand, if we want to use priority_queue_next_task(),
1795 care should be taken to only use priority_queue_remove()
1796 below if the task was actually removed from the children
1797 queue. */
1798 bool ignored;
1799 struct gomp_task *next_task
1800 = priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
1801 PQ_IGNORED, NULL, &ignored);
1802
1803 if (next_task->kind == GOMP_TASK_WAITING)
1804 {
1805 child_task = next_task;
1806 cancelled
1807 = gomp_task_run_pre (child_task, task, team);
1808 if (__builtin_expect (cancelled, 0))
1809 {
1810 if (to_free)
1811 {
1812 gomp_finish_task (to_free);
1813 free (to_free);
1814 to_free = NULL;
1815 }
1816 goto finish_cancelled;
1817 }
1818 }
1819 else
1820 /* All tasks we are waiting for are either running in other
1821 threads, or they are tasks that have not had their
1822 dependencies met (so they're not even in the queue). Wait
1823 for them. */
1824 taskwait.in_depend_wait = true;
1825 gomp_mutex_unlock (&team->task_lock);
1826 if (do_wake)
1827 {
1828 gomp_team_barrier_wake (&team->barrier, do_wake);
1829 do_wake = 0;
1830 }
1831 if (to_free)
1832 {
1833 gomp_finish_task (to_free);
1834 free (to_free);
1835 to_free = NULL;
1836 }
1837 if (child_task)
1838 {
1839 thr->task = child_task;
1840 if (__builtin_expect (child_task->fn == NULL, 0))
1841 {
1842 if (gomp_target_task_fn (child_task->fn_data))
1843 {
1844 thr->task = task;
1845 gomp_mutex_lock (&team->task_lock);
1846 child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1847 struct gomp_target_task *ttask
1848 = (struct gomp_target_task *) child_task->fn_data;
1849 /* If GOMP_PLUGIN_target_task_completion has run already
1850 in between gomp_target_task_fn and the mutex lock,
1851 perform the requeuing here. */
1852 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1853 gomp_target_task_completion (team, child_task);
1854 else
1855 ttask->state = GOMP_TARGET_TASK_RUNNING;
1856 child_task = NULL;
1857 continue;
1858 }
1859 }
1860 else
1861 child_task->fn (child_task->fn_data);
1862 thr->task = task;
1863 }
1864 else
1865 gomp_sem_wait (&taskwait.taskwait_sem);
1866 gomp_mutex_lock (&team->task_lock);
1867 if (child_task)
1868 {
1869 finish_cancelled:;
1870 size_t new_tasks
1871 = gomp_task_run_post_handle_depend (child_task, team);
1872 if (child_task->parent_depends_on)
1873 --taskwait.n_depend;
1874
1875 priority_queue_remove (PQ_CHILDREN, &task->children_queue,
1876 child_task, MEMMODEL_RELAXED);
1877 child_task->pnode[PQ_CHILDREN].next = NULL;
1878 child_task->pnode[PQ_CHILDREN].prev = NULL;
1879
1880 gomp_clear_parent (&child_task->children_queue);
1881 gomp_task_run_post_remove_taskgroup (child_task);
1882 to_free = child_task;
1883 child_task = NULL;
1884 team->task_count--;
1885 if (new_tasks > 1)
1886 {
1887 do_wake = team->nthreads - team->task_running_count
1888 - !task->in_tied_task;
1889 if (do_wake > new_tasks)
1890 do_wake = new_tasks;
1891 }
1892 }
1893 }
1894 }
1895
1896 /* Called when encountering a taskyield directive. */
1897
1898 void
1899 GOMP_taskyield (void)
1900 {
1901 /* Nothing at the moment. */
1902 }
1903
1904 static inline struct gomp_taskgroup *
1905 gomp_taskgroup_init (struct gomp_taskgroup *prev)
1906 {
1907 struct gomp_taskgroup *taskgroup
1908 = gomp_malloc (sizeof (struct gomp_taskgroup));
1909 taskgroup->prev = prev;
1910 priority_queue_init (&taskgroup->taskgroup_queue);
1911 taskgroup->reductions = prev ? prev->reductions : NULL;
1912 taskgroup->in_taskgroup_wait = false;
1913 taskgroup->cancelled = false;
1914 taskgroup->workshare = false;
1915 taskgroup->num_children = 0;
1916 gomp_sem_init (&taskgroup->taskgroup_sem, 0);
1917 return taskgroup;
1918 }
1919
1920 void
1921 GOMP_taskgroup_start (void)
1922 {
1923 struct gomp_thread *thr = gomp_thread ();
1924 struct gomp_team *team = thr->ts.team;
1925 struct gomp_task *task = thr->task;
1926
1927 /* If team is NULL, all tasks are executed as
1928 GOMP_TASK_UNDEFERRED tasks and thus all children tasks of
1929 taskgroup and their descendant tasks will be finished
1930 by the time GOMP_taskgroup_end is called. */
1931 if (team == NULL)
1932 return;
1933 task->taskgroup = gomp_taskgroup_init (task->taskgroup);
1934 }
1935
1936 void
1937 GOMP_taskgroup_end (void)
1938 {
1939 struct gomp_thread *thr = gomp_thread ();
1940 struct gomp_team *team = thr->ts.team;
1941 struct gomp_task *task = thr->task;
1942 struct gomp_taskgroup *taskgroup;
1943 struct gomp_task *child_task = NULL;
1944 struct gomp_task *to_free = NULL;
1945 int do_wake = 0;
1946
1947 if (team == NULL)
1948 return;
1949 taskgroup = task->taskgroup;
1950 if (__builtin_expect (taskgroup == NULL, 0)
1951 && thr->ts.level == 0)
1952 {
1953 /* This can happen if GOMP_taskgroup_start is called when
1954 thr->ts.team == NULL, but inside of the taskgroup there
1955 is #pragma omp target nowait that creates an implicit
1956 team with a single thread. In this case, we want to wait
1957 for all outstanding tasks in this team. */
1958 gomp_team_barrier_wait (&team->barrier);
1959 return;
1960 }
1961
1962 /* The acquire barrier on load of taskgroup->num_children here
1963 synchronizes with the write of 0 in gomp_task_run_post_remove_taskgroup.
1964 It is not necessary that we synchronize with other non-0 writes at
1965 this point, but we must ensure that all writes to memory by a
1966 child thread task work function are seen before we exit from
1967 GOMP_taskgroup_end. */
1968 if (__atomic_load_n (&taskgroup->num_children, MEMMODEL_ACQUIRE) == 0)
1969 goto finish;
1970
1971 bool unused;
1972 gomp_mutex_lock (&team->task_lock);
1973 while (1)
1974 {
1975 bool cancelled = false;
1976 if (priority_queue_empty_p (&taskgroup->taskgroup_queue,
1977 MEMMODEL_RELAXED))
1978 {
1979 if (taskgroup->num_children)
1980 {
1981 if (priority_queue_empty_p (&task->children_queue,
1982 MEMMODEL_RELAXED))
1983 goto do_wait;
1984 child_task
1985 = priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
1986 PQ_TEAM, &team->task_queue,
1987 &unused);
1988 }
1989 else
1990 {
1991 gomp_mutex_unlock (&team->task_lock);
1992 if (to_free)
1993 {
1994 gomp_finish_task (to_free);
1995 free (to_free);
1996 }
1997 goto finish;
1998 }
1999 }
2000 else
2001 child_task
2002 = priority_queue_next_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
2003 PQ_TEAM, &team->task_queue, &unused);
2004 if (child_task->kind == GOMP_TASK_WAITING)
2005 {
2006 cancelled
2007 = gomp_task_run_pre (child_task, child_task->parent, team);
2008 if (__builtin_expect (cancelled, 0))
2009 {
2010 if (to_free)
2011 {
2012 gomp_finish_task (to_free);
2013 free (to_free);
2014 to_free = NULL;
2015 }
2016 goto finish_cancelled;
2017 }
2018 }
2019 else
2020 {
2021 child_task = NULL;
2022 do_wait:
2023 /* All tasks we are waiting for are either running in other
2024 threads, or they are tasks that have not had their
2025 dependencies met (so they're not even in the queue). Wait
2026 for them. */
2027 taskgroup->in_taskgroup_wait = true;
2028 }
2029 gomp_mutex_unlock (&team->task_lock);
2030 if (do_wake)
2031 {
2032 gomp_team_barrier_wake (&team->barrier, do_wake);
2033 do_wake = 0;
2034 }
2035 if (to_free)
2036 {
2037 gomp_finish_task (to_free);
2038 free (to_free);
2039 to_free = NULL;
2040 }
2041 if (child_task)
2042 {
2043 thr->task = child_task;
2044 if (__builtin_expect (child_task->fn == NULL, 0))
2045 {
2046 if (gomp_target_task_fn (child_task->fn_data))
2047 {
2048 thr->task = task;
2049 gomp_mutex_lock (&team->task_lock);
2050 child_task->kind = GOMP_TASK_ASYNC_RUNNING;
2051 struct gomp_target_task *ttask
2052 = (struct gomp_target_task *) child_task->fn_data;
2053 /* If GOMP_PLUGIN_target_task_completion has run already
2054 in between gomp_target_task_fn and the mutex lock,
2055 perform the requeuing here. */
2056 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
2057 gomp_target_task_completion (team, child_task);
2058 else
2059 ttask->state = GOMP_TARGET_TASK_RUNNING;
2060 child_task = NULL;
2061 continue;
2062 }
2063 }
2064 else
2065 child_task->fn (child_task->fn_data);
2066 thr->task = task;
2067 }
2068 else
2069 gomp_sem_wait (&taskgroup->taskgroup_sem);
2070 gomp_mutex_lock (&team->task_lock);
2071 if (child_task)
2072 {
2073 if (child_task->detach_team)
2074 {
2075 assert (child_task->detach_team == team);
2076 child_task->kind = GOMP_TASK_DETACHED;
2077 ++team->task_detach_count;
2078 gomp_debug (0,
2079 "thread %d: task with event %p finished without "
2080 "completion event fulfilled in taskgroup\n",
2081 thr->ts.team_id, child_task);
2082 child_task = NULL;
2083 continue;
2084 }
2085
2086 finish_cancelled:;
2087 size_t new_tasks
2088 = gomp_task_run_post_handle_depend (child_task, team);
2089 gomp_task_run_post_remove_parent (child_task);
2090 gomp_clear_parent (&child_task->children_queue);
2091 gomp_task_run_post_remove_taskgroup (child_task);
2092 to_free = child_task;
2093 child_task = NULL;
2094 team->task_count--;
2095 if (new_tasks > 1)
2096 {
2097 do_wake = team->nthreads - team->task_running_count
2098 - !task->in_tied_task;
2099 if (do_wake > new_tasks)
2100 do_wake = new_tasks;
2101 }
2102 }
2103 }
2104
2105 finish:
2106 task->taskgroup = taskgroup->prev;
2107 gomp_sem_destroy (&taskgroup->taskgroup_sem);
2108 free (taskgroup);
2109 }
2110
2111 static inline __attribute__((always_inline)) void
2112 gomp_reduction_register (uintptr_t *data, uintptr_t *old, uintptr_t *orig,
2113 unsigned nthreads)
2114 {
2115 size_t total_cnt = 0;
2116 uintptr_t *d = data;
2117 struct htab *old_htab = NULL, *new_htab;
2118 do
2119 {
2120 if (__builtin_expect (orig != NULL, 0))
2121 {
2122 /* For worksharing task reductions, memory has been allocated
2123 already by some other thread that encountered the construct
2124 earlier. */
2125 d[2] = orig[2];
2126 d[6] = orig[6];
2127 orig = (uintptr_t *) orig[4];
2128 }
2129 else
2130 {
2131 size_t sz = d[1] * nthreads;
2132 /* Should use omp_alloc if d[3] is not -1. */
2133 void *ptr = gomp_aligned_alloc (d[2], sz);
2134 memset (ptr, '\0', sz);
2135 d[2] = (uintptr_t) ptr;
2136 d[6] = d[2] + sz;
2137 }
2138 d[5] = 0;
2139 total_cnt += d[0];
2140 if (d[4] == 0)
2141 {
2142 d[4] = (uintptr_t) old;
2143 break;
2144 }
2145 else
2146 d = (uintptr_t *) d[4];
2147 }
2148 while (1);
2149 if (old && old[5])
2150 {
2151 old_htab = (struct htab *) old[5];
2152 total_cnt += htab_elements (old_htab);
2153 }
2154 new_htab = htab_create (total_cnt);
2155 if (old_htab)
2156 {
2157 /* Copy old hash table, like in htab_expand. */
2158 hash_entry_type *p, *olimit;
2159 new_htab->n_elements = htab_elements (old_htab);
2160 olimit = old_htab->entries + old_htab->size;
2161 p = old_htab->entries;
2162 do
2163 {
2164 hash_entry_type x = *p;
2165 if (x != HTAB_EMPTY_ENTRY && x != HTAB_DELETED_ENTRY)
2166 *find_empty_slot_for_expand (new_htab, htab_hash (x)) = x;
2167 p++;
2168 }
2169 while (p < olimit);
2170 }
2171 d = data;
2172 do
2173 {
2174 size_t j;
2175 for (j = 0; j < d[0]; ++j)
2176 {
2177 uintptr_t *p = d + 7 + j * 3;
2178 p[2] = (uintptr_t) d;
2179 /* Ugly hack, hash_entry_type is defined for the task dependencies,
2180 which hash on the first element which is a pointer. We need
2181 to hash also on the first sizeof (uintptr_t) bytes which contain
2182 a pointer. Hide the cast from the compiler. */
2183 hash_entry_type n;
2184 __asm ("" : "=g" (n) : "0" (p));
2185 *htab_find_slot (&new_htab, n, INSERT) = n;
2186 }
2187 if (d[4] == (uintptr_t) old)
2188 break;
2189 else
2190 d = (uintptr_t *) d[4];
2191 }
2192 while (1);
2193 d[5] = (uintptr_t) new_htab;
2194 }
2195
2196 static void
2197 gomp_create_artificial_team (void)
2198 {
2199 struct gomp_thread *thr = gomp_thread ();
2200 struct gomp_task_icv *icv;
2201 struct gomp_team *team = gomp_new_team (1);
2202 struct gomp_task *task = thr->task;
2203 icv = task ? &task->icv : &gomp_global_icv;
2204 team->prev_ts = thr->ts;
2205 thr->ts.team = team;
2206 thr->ts.team_id = 0;
2207 thr->ts.work_share = &team->work_shares[0];
2208 thr->ts.last_work_share = NULL;
2209 #ifdef HAVE_SYNC_BUILTINS
2210 thr->ts.single_count = 0;
2211 #endif
2212 thr->ts.static_trip = 0;
2213 thr->task = &team->implicit_task[0];
2214 gomp_init_task (thr->task, NULL, icv);
2215 if (task)
2216 {
2217 thr->task = task;
2218 gomp_end_task ();
2219 free (task);
2220 thr->task = &team->implicit_task[0];
2221 }
2222 #ifdef LIBGOMP_USE_PTHREADS
2223 else
2224 pthread_setspecific (gomp_thread_destructor, thr);
2225 #endif
2226 }
2227
2228 /* The format of data is:
2229 data[0] cnt
2230 data[1] size
2231 data[2] alignment (on output array pointer)
2232 data[3] allocator (-1 if malloc allocator)
2233 data[4] next pointer
2234 data[5] used internally (htab pointer)
2235 data[6] used internally (end of array)
2236 cnt times
2237 ent[0] address
2238 ent[1] offset
2239 ent[2] used internally (pointer to data[0])
2240 The entries are sorted by increasing offset, so that a binary
2241 search can be performed. Normally, data[8] is 0, exception is
2242 for worksharing construct task reductions in cancellable parallel,
2243 where at offset 0 there should be space for a pointer and an integer
2244 which are used internally. */
2245
2246 void
2247 GOMP_taskgroup_reduction_register (uintptr_t *data)
2248 {
2249 struct gomp_thread *thr = gomp_thread ();
2250 struct gomp_team *team = thr->ts.team;
2251 struct gomp_task *task;
2252 unsigned nthreads;
2253 if (__builtin_expect (team == NULL, 0))
2254 {
2255 /* The task reduction code needs a team and task, so for
2256 orphaned taskgroups just create the implicit team. */
2257 gomp_create_artificial_team ();
2258 ialias_call (GOMP_taskgroup_start) ();
2259 team = thr->ts.team;
2260 }
2261 nthreads = team->nthreads;
2262 task = thr->task;
2263 gomp_reduction_register (data, task->taskgroup->reductions, NULL, nthreads);
2264 task->taskgroup->reductions = data;
2265 }
2266
2267 void
2268 GOMP_taskgroup_reduction_unregister (uintptr_t *data)
2269 {
2270 uintptr_t *d = data;
2271 htab_free ((struct htab *) data[5]);
2272 do
2273 {
2274 gomp_aligned_free ((void *) d[2]);
2275 d = (uintptr_t *) d[4];
2276 }
2277 while (d && !d[5]);
2278 }
2279 ialias (GOMP_taskgroup_reduction_unregister)
2280
2281 /* For i = 0 to cnt-1, remap ptrs[i] which is either address of the
2282 original list item or address of previously remapped original list
2283 item to address of the private copy, store that to ptrs[i].
2284 For i < cntorig, additionally set ptrs[cnt+i] to the address of
2285 the original list item. */
2286
2287 void
2288 GOMP_task_reduction_remap (size_t cnt, size_t cntorig, void **ptrs)
2289 {
2290 struct gomp_thread *thr = gomp_thread ();
2291 struct gomp_task *task = thr->task;
2292 unsigned id = thr->ts.team_id;
2293 uintptr_t *data = task->taskgroup->reductions;
2294 uintptr_t *d;
2295 struct htab *reduction_htab = (struct htab *) data[5];
2296 size_t i;
2297 for (i = 0; i < cnt; ++i)
2298 {
2299 hash_entry_type ent, n;
2300 __asm ("" : "=g" (ent) : "0" (ptrs + i));
2301 n = htab_find (reduction_htab, ent);
2302 if (n)
2303 {
2304 uintptr_t *p;
2305 __asm ("" : "=g" (p) : "0" (n));
2306 /* At this point, p[0] should be equal to (uintptr_t) ptrs[i],
2307 p[1] is the offset within the allocated chunk for each
2308 thread, p[2] is the array registered with
2309 GOMP_taskgroup_reduction_register, d[2] is the base of the
2310 allocated memory and d[1] is the size of the allocated chunk
2311 for one thread. */
2312 d = (uintptr_t *) p[2];
2313 ptrs[i] = (void *) (d[2] + id * d[1] + p[1]);
2314 if (__builtin_expect (i < cntorig, 0))
2315 ptrs[cnt + i] = (void *) p[0];
2316 continue;
2317 }
2318 d = data;
2319 while (d != NULL)
2320 {
2321 if ((uintptr_t) ptrs[i] >= d[2] && (uintptr_t) ptrs[i] < d[6])
2322 break;
2323 d = (uintptr_t *) d[4];
2324 }
2325 if (d == NULL)
2326 gomp_fatal ("couldn't find matching task_reduction or reduction with "
2327 "task modifier for %p", ptrs[i]);
2328 uintptr_t off = ((uintptr_t) ptrs[i] - d[2]) % d[1];
2329 ptrs[i] = (void *) (d[2] + id * d[1] + off);
2330 if (__builtin_expect (i < cntorig, 0))
2331 {
2332 size_t lo = 0, hi = d[0] - 1;
2333 while (lo <= hi)
2334 {
2335 size_t m = (lo + hi) / 2;
2336 if (d[7 + 3 * m + 1] < off)
2337 lo = m + 1;
2338 else if (d[7 + 3 * m + 1] == off)
2339 {
2340 ptrs[cnt + i] = (void *) d[7 + 3 * m];
2341 break;
2342 }
2343 else
2344 hi = m - 1;
2345 }
2346 if (lo > hi)
2347 gomp_fatal ("couldn't find matching task_reduction or reduction "
2348 "with task modifier for %p", ptrs[i]);
2349 }
2350 }
2351 }
2352
2353 struct gomp_taskgroup *
2354 gomp_parallel_reduction_register (uintptr_t *data, unsigned nthreads)
2355 {
2356 struct gomp_taskgroup *taskgroup = gomp_taskgroup_init (NULL);
2357 gomp_reduction_register (data, NULL, NULL, nthreads);
2358 taskgroup->reductions = data;
2359 return taskgroup;
2360 }
2361
2362 void
2363 gomp_workshare_task_reduction_register (uintptr_t *data, uintptr_t *orig)
2364 {
2365 struct gomp_thread *thr = gomp_thread ();
2366 struct gomp_team *team = thr->ts.team;
2367 struct gomp_task *task = thr->task;
2368 unsigned nthreads = team->nthreads;
2369 gomp_reduction_register (data, task->taskgroup->reductions, orig, nthreads);
2370 task->taskgroup->reductions = data;
2371 }
2372
2373 void
2374 gomp_workshare_taskgroup_start (void)
2375 {
2376 struct gomp_thread *thr = gomp_thread ();
2377 struct gomp_team *team = thr->ts.team;
2378 struct gomp_task *task;
2379
2380 if (team == NULL)
2381 {
2382 gomp_create_artificial_team ();
2383 team = thr->ts.team;
2384 }
2385 task = thr->task;
2386 task->taskgroup = gomp_taskgroup_init (task->taskgroup);
2387 task->taskgroup->workshare = true;
2388 }
2389
2390 void
2391 GOMP_workshare_task_reduction_unregister (bool cancelled)
2392 {
2393 struct gomp_thread *thr = gomp_thread ();
2394 struct gomp_task *task = thr->task;
2395 struct gomp_team *team = thr->ts.team;
2396 uintptr_t *data = task->taskgroup->reductions;
2397 ialias_call (GOMP_taskgroup_end) ();
2398 if (thr->ts.team_id == 0)
2399 ialias_call (GOMP_taskgroup_reduction_unregister) (data);
2400 else
2401 htab_free ((struct htab *) data[5]);
2402
2403 if (!cancelled)
2404 gomp_team_barrier_wait (&team->barrier);
2405 }
2406
2407 int
2408 omp_in_final (void)
2409 {
2410 struct gomp_thread *thr = gomp_thread ();
2411 return thr->task && thr->task->final_task;
2412 }
2413
2414 ialias (omp_in_final)
2415
2416 void
2417 omp_fulfill_event (omp_event_handle_t event)
2418 {
2419 struct gomp_task *task = (struct gomp_task *) event;
2420 if (!task->deferred_p)
2421 {
2422 if (gomp_sem_getcount (task->completion_sem) > 0)
2423 gomp_fatal ("omp_fulfill_event: %p event already fulfilled!\n", task);
2424
2425 gomp_debug (0, "omp_fulfill_event: %p event for undeferred task\n",
2426 task);
2427 gomp_sem_post (task->completion_sem);
2428 return;
2429 }
2430
2431 struct gomp_team *team = __atomic_load_n (&task->detach_team,
2432 MEMMODEL_RELAXED);
2433 if (!team)
2434 gomp_fatal ("omp_fulfill_event: %p event is invalid or has already "
2435 "been fulfilled!\n", task);
2436
2437 gomp_mutex_lock (&team->task_lock);
2438 if (task->kind != GOMP_TASK_DETACHED)
2439 {
2440 /* The task has not finished running yet. */
2441 gomp_debug (0,
2442 "omp_fulfill_event: %p event fulfilled for unfinished "
2443 "task\n", task);
2444 __atomic_store_n (&task->detach_team, NULL, MEMMODEL_RELAXED);
2445 gomp_mutex_unlock (&team->task_lock);
2446 return;
2447 }
2448
2449 gomp_debug (0, "omp_fulfill_event: %p event fulfilled for finished task\n",
2450 task);
2451 size_t new_tasks = gomp_task_run_post_handle_depend (task, team);
2452 gomp_task_run_post_remove_parent (task);
2453 gomp_clear_parent (&task->children_queue);
2454 gomp_task_run_post_remove_taskgroup (task);
2455 team->task_count--;
2456 team->task_detach_count--;
2457
2458 int do_wake = 0;
2459 bool shackled_thread_p = team == gomp_thread ()->ts.team;
2460 if (new_tasks > 0)
2461 {
2462 /* Wake up threads to run new tasks. */
2463 gomp_team_barrier_set_task_pending (&team->barrier);
2464 do_wake = team->nthreads - team->task_running_count;
2465 if (do_wake > new_tasks)
2466 do_wake = new_tasks;
2467 }
2468
2469 if (!shackled_thread_p
2470 && !do_wake
2471 && team->task_detach_count == 0
2472 && gomp_team_barrier_waiting_for_tasks (&team->barrier))
2473 /* Ensure that at least one thread is woken up to signal that the
2474 barrier can finish. */
2475 do_wake = 1;
2476
2477 /* If we are running in an unshackled thread, the team might vanish before
2478 gomp_team_barrier_wake is run if we release the lock first, so keep the
2479 lock for the call in that case. */
2480 if (shackled_thread_p)
2481 gomp_mutex_unlock (&team->task_lock);
2482 if (do_wake)
2483 gomp_team_barrier_wake (&team->barrier, do_wake);
2484 if (!shackled_thread_p)
2485 gomp_mutex_unlock (&team->task_lock);
2486
2487 gomp_finish_task (task);
2488 free (task);
2489 }
2490
2491 ialias (omp_fulfill_event)