]>
Commit | Line | Data |
---|---|---|
7adcbafe | 1 | /* Copyright (C) 2007-2022 Free Software Foundation, Inc. |
a68ab351 JJ |
2 | Contributed by Richard Henderson <rth@redhat.com>. |
3 | ||
f1f3453e TS |
4 | This file is part of the GNU Offloading and Multi Processing Library |
5 | (libgomp). | |
a68ab351 JJ |
6 | |
7 | Libgomp is free software; you can redistribute it and/or modify it | |
748086b7 JJ |
8 | under the terms of the GNU General Public License as published by |
9 | the Free Software Foundation; either version 3, or (at your option) | |
10 | any later version. | |
a68ab351 JJ |
11 | |
12 | Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
748086b7 | 14 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
a68ab351 JJ |
15 | more details. |
16 | ||
748086b7 JJ |
17 | Under Section 7 of GPL version 3, you are granted additional |
18 | permissions described in the GCC Runtime Library Exception, version | |
19 | 3.1, as published by the Free Software Foundation. | |
20 | ||
21 | You should have received a copy of the GNU General Public License and | |
22 | a copy of the GCC Runtime Library Exception along with this program; | |
23 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
24 | <http://www.gnu.org/licenses/>. */ | |
a68ab351 | 25 | |
93d90219 | 26 | /* This file handles the maintenance of tasks in response to task |
a68ab351 JJ |
27 | creation and termination. */ |
28 | ||
29 | #include "libgomp.h" | |
30 | #include <stdlib.h> | |
31 | #include <string.h> | |
d656bfda | 32 | #include <assert.h> |
d9a6bd32 | 33 | #include "gomp-constants.h" |
a68ab351 | 34 | |
acf0174b JJ |
35 | typedef struct gomp_task_depend_entry *hash_entry_type; |
36 | ||
37 | static inline void * | |
38 | htab_alloc (size_t size) | |
39 | { | |
40 | return gomp_malloc (size); | |
41 | } | |
42 | ||
43 | static inline void | |
44 | htab_free (void *ptr) | |
45 | { | |
46 | free (ptr); | |
47 | } | |
48 | ||
49 | #include "hashtab.h" | |
50 | ||
51 | static inline hashval_t | |
52 | htab_hash (hash_entry_type element) | |
53 | { | |
54 | return hash_pointer (element->addr); | |
55 | } | |
56 | ||
57 | static inline bool | |
58 | htab_eq (hash_entry_type x, hash_entry_type y) | |
59 | { | |
60 | return x->addr == y->addr; | |
61 | } | |
a68ab351 JJ |
62 | |
63 | /* Create a new task data structure. */ | |
64 | ||
65 | void | |
66 | gomp_init_task (struct gomp_task *task, struct gomp_task *parent_task, | |
67 | struct gomp_task_icv *prev_icv) | |
68 | { | |
e4606348 JJ |
69 | /* It would seem that using memset here would be a win, but it turns |
70 | out that partially filling gomp_task allows us to keep the | |
71 | overhead of task creation low. In the nqueens-1.c test, for a | |
72 | sufficiently large N, we drop the overhead from 5-6% to 1%. | |
73 | ||
74 | Note, the nqueens-1.c test in serial mode is a good test to | |
75 | benchmark the overhead of creating tasks as there are millions of | |
76 | tiny tasks created that all run undeferred. */ | |
a68ab351 | 77 | task->parent = parent_task; |
d656bfda KCY |
78 | priority_queue_init (&task->children_queue); |
79 | task->taskgroup = NULL; | |
80 | task->dependers = NULL; | |
81 | task->depend_hash = NULL; | |
82 | task->taskwait = NULL; | |
83 | task->depend_count = 0; | |
84 | task->completion_sem = NULL; | |
85 | task->deferred_p = false; | |
a68ab351 JJ |
86 | task->icv = *prev_icv; |
87 | task->kind = GOMP_TASK_IMPLICIT; | |
5f836cbb | 88 | task->in_tied_task = false; |
20906c66 | 89 | task->final_task = false; |
acf0174b | 90 | task->copy_ctors_done = false; |
0494285a | 91 | task->parent_depends_on = false; |
a68ab351 JJ |
92 | } |
93 | ||
94 | /* Clean up a task, after completing it. */ | |
95 | ||
96 | void | |
97 | gomp_end_task (void) | |
98 | { | |
99 | struct gomp_thread *thr = gomp_thread (); | |
100 | struct gomp_task *task = thr->task; | |
101 | ||
102 | gomp_finish_task (task); | |
103 | thr->task = task->parent; | |
104 | } | |
105 | ||
e4606348 | 106 | /* Clear the parent field of every task in LIST. */ |
d9a6bd32 | 107 | |
a68ab351 | 108 | static inline void |
e4606348 | 109 | gomp_clear_parent_in_list (struct priority_list *list) |
a68ab351 | 110 | { |
e4606348 JJ |
111 | struct priority_node *p = list->tasks; |
112 | if (p) | |
a68ab351 JJ |
113 | do |
114 | { | |
e4606348 JJ |
115 | priority_node_to_task (PQ_CHILDREN, p)->parent = NULL; |
116 | p = p->next; | |
a68ab351 | 117 | } |
e4606348 | 118 | while (p != list->tasks); |
a68ab351 JJ |
119 | } |
120 | ||
e4606348 JJ |
121 | /* Splay tree version of gomp_clear_parent_in_list. |
122 | ||
123 | Clear the parent field of every task in NODE within SP, and free | |
124 | the node when done. */ | |
125 | ||
126 | static void | |
127 | gomp_clear_parent_in_tree (prio_splay_tree sp, prio_splay_tree_node node) | |
128 | { | |
129 | if (!node) | |
130 | return; | |
131 | prio_splay_tree_node left = node->left, right = node->right; | |
132 | gomp_clear_parent_in_list (&node->key.l); | |
133 | #if _LIBGOMP_CHECKING_ | |
134 | memset (node, 0xaf, sizeof (*node)); | |
135 | #endif | |
136 | /* No need to remove the node from the tree. We're nuking | |
137 | everything, so just free the nodes and our caller can clear the | |
138 | entire splay tree. */ | |
139 | free (node); | |
140 | gomp_clear_parent_in_tree (sp, left); | |
141 | gomp_clear_parent_in_tree (sp, right); | |
142 | } | |
143 | ||
144 | /* Clear the parent field of every task in Q and remove every task | |
145 | from Q. */ | |
146 | ||
147 | static inline void | |
148 | gomp_clear_parent (struct priority_queue *q) | |
149 | { | |
150 | if (priority_queue_multi_p (q)) | |
151 | { | |
152 | gomp_clear_parent_in_tree (&q->t, q->t.root); | |
153 | /* All the nodes have been cleared in gomp_clear_parent_in_tree. | |
154 | No need to remove anything. We can just nuke everything. */ | |
155 | q->t.root = NULL; | |
156 | } | |
157 | else | |
158 | gomp_clear_parent_in_list (&q->l); | |
159 | } | |
160 | ||
161 | /* Helper function for GOMP_task and gomp_create_target_task. | |
162 | ||
163 | For a TASK with in/out dependencies, fill in the various dependency | |
164 | queues. PARENT is the parent of said task. DEPEND is as in | |
165 | GOMP_task. */ | |
d9a6bd32 JJ |
166 | |
167 | static void | |
168 | gomp_task_handle_depend (struct gomp_task *task, struct gomp_task *parent, | |
169 | void **depend) | |
170 | { | |
171 | size_t ndepend = (uintptr_t) depend[0]; | |
d9a6bd32 JJ |
172 | size_t i; |
173 | hash_entry_type ent; | |
174 | ||
28567c40 JJ |
175 | if (ndepend) |
176 | { | |
177 | /* depend[0] is total # */ | |
178 | size_t nout = (uintptr_t) depend[1]; /* # of out: and inout: */ | |
179 | /* ndepend - nout is # of in: */ | |
180 | for (i = 0; i < ndepend; i++) | |
181 | { | |
182 | task->depend[i].addr = depend[2 + i]; | |
183 | task->depend[i].is_in = i >= nout; | |
184 | } | |
185 | } | |
186 | else | |
187 | { | |
188 | ndepend = (uintptr_t) depend[1]; /* total # */ | |
189 | size_t nout = (uintptr_t) depend[2]; /* # of out: and inout: */ | |
190 | size_t nmutexinoutset = (uintptr_t) depend[3]; /* # of mutexinoutset: */ | |
191 | /* For now we treat mutexinoutset like out, which is compliant, but | |
192 | inefficient. */ | |
193 | size_t nin = (uintptr_t) depend[4]; /* # of in: */ | |
194 | /* ndepend - nout - nmutexinoutset - nin is # of depobjs */ | |
195 | size_t normal = nout + nmutexinoutset + nin; | |
196 | size_t n = 0; | |
197 | for (i = normal; i < ndepend; i++) | |
198 | { | |
199 | void **d = (void **) (uintptr_t) depend[5 + i]; | |
200 | switch ((uintptr_t) d[1]) | |
201 | { | |
202 | case GOMP_DEPEND_OUT: | |
203 | case GOMP_DEPEND_INOUT: | |
204 | case GOMP_DEPEND_MUTEXINOUTSET: | |
205 | break; | |
206 | case GOMP_DEPEND_IN: | |
207 | continue; | |
208 | default: | |
209 | gomp_fatal ("unknown omp_depend_t dependence type %d", | |
210 | (int) (uintptr_t) d[1]); | |
211 | } | |
212 | task->depend[n].addr = d[0]; | |
213 | task->depend[n++].is_in = 0; | |
214 | } | |
215 | for (i = 0; i < normal; i++) | |
216 | { | |
217 | task->depend[n].addr = depend[5 + i]; | |
218 | task->depend[n++].is_in = i >= nout + nmutexinoutset; | |
219 | } | |
220 | for (i = normal; i < ndepend; i++) | |
221 | { | |
222 | void **d = (void **) (uintptr_t) depend[5 + i]; | |
223 | if ((uintptr_t) d[1] != GOMP_DEPEND_IN) | |
224 | continue; | |
225 | task->depend[n].addr = d[0]; | |
226 | task->depend[n++].is_in = 1; | |
227 | } | |
228 | } | |
d9a6bd32 JJ |
229 | task->depend_count = ndepend; |
230 | task->num_dependees = 0; | |
231 | if (parent->depend_hash == NULL) | |
232 | parent->depend_hash = htab_create (2 * ndepend > 12 ? 2 * ndepend : 12); | |
233 | for (i = 0; i < ndepend; i++) | |
234 | { | |
d9a6bd32 JJ |
235 | task->depend[i].next = NULL; |
236 | task->depend[i].prev = NULL; | |
237 | task->depend[i].task = task; | |
d9a6bd32 JJ |
238 | task->depend[i].redundant = false; |
239 | task->depend[i].redundant_out = false; | |
240 | ||
241 | hash_entry_type *slot = htab_find_slot (&parent->depend_hash, | |
242 | &task->depend[i], INSERT); | |
243 | hash_entry_type out = NULL, last = NULL; | |
244 | if (*slot) | |
245 | { | |
246 | /* If multiple depends on the same task are the same, all but the | |
247 | first one are redundant. As inout/out come first, if any of them | |
248 | is inout/out, it will win, which is the right semantics. */ | |
249 | if ((*slot)->task == task) | |
250 | { | |
251 | task->depend[i].redundant = true; | |
252 | continue; | |
253 | } | |
254 | for (ent = *slot; ent; ent = ent->next) | |
255 | { | |
256 | if (ent->redundant_out) | |
257 | break; | |
258 | ||
259 | last = ent; | |
260 | ||
261 | /* depend(in:...) doesn't depend on earlier depend(in:...). */ | |
28567c40 | 262 | if (task->depend[i].is_in && ent->is_in) |
d9a6bd32 JJ |
263 | continue; |
264 | ||
265 | if (!ent->is_in) | |
266 | out = ent; | |
267 | ||
268 | struct gomp_task *tsk = ent->task; | |
269 | if (tsk->dependers == NULL) | |
270 | { | |
271 | tsk->dependers | |
272 | = gomp_malloc (sizeof (struct gomp_dependers_vec) | |
273 | + 6 * sizeof (struct gomp_task *)); | |
274 | tsk->dependers->n_elem = 1; | |
275 | tsk->dependers->allocated = 6; | |
276 | tsk->dependers->elem[0] = task; | |
277 | task->num_dependees++; | |
278 | continue; | |
279 | } | |
280 | /* We already have some other dependency on tsk from earlier | |
281 | depend clause. */ | |
282 | else if (tsk->dependers->n_elem | |
283 | && (tsk->dependers->elem[tsk->dependers->n_elem - 1] | |
284 | == task)) | |
285 | continue; | |
286 | else if (tsk->dependers->n_elem == tsk->dependers->allocated) | |
287 | { | |
288 | tsk->dependers->allocated | |
289 | = tsk->dependers->allocated * 2 + 2; | |
290 | tsk->dependers | |
291 | = gomp_realloc (tsk->dependers, | |
292 | sizeof (struct gomp_dependers_vec) | |
293 | + (tsk->dependers->allocated | |
294 | * sizeof (struct gomp_task *))); | |
295 | } | |
296 | tsk->dependers->elem[tsk->dependers->n_elem++] = task; | |
297 | task->num_dependees++; | |
298 | } | |
299 | task->depend[i].next = *slot; | |
300 | (*slot)->prev = &task->depend[i]; | |
301 | } | |
302 | *slot = &task->depend[i]; | |
303 | ||
304 | /* There is no need to store more than one depend({,in}out:) task per | |
305 | address in the hash table chain for the purpose of creation of | |
306 | deferred tasks, because each out depends on all earlier outs, thus it | |
307 | is enough to record just the last depend({,in}out:). For depend(in:), | |
308 | we need to keep all of the previous ones not terminated yet, because | |
309 | a later depend({,in}out:) might need to depend on all of them. So, if | |
310 | the new task's clause is depend({,in}out:), we know there is at most | |
311 | one other depend({,in}out:) clause in the list (out). For | |
312 | non-deferred tasks we want to see all outs, so they are moved to the | |
313 | end of the chain, after first redundant_out entry all following | |
314 | entries should be redundant_out. */ | |
315 | if (!task->depend[i].is_in && out) | |
316 | { | |
317 | if (out != last) | |
318 | { | |
319 | out->next->prev = out->prev; | |
320 | out->prev->next = out->next; | |
321 | out->next = last->next; | |
322 | out->prev = last; | |
323 | last->next = out; | |
324 | if (out->next) | |
325 | out->next->prev = out; | |
326 | } | |
327 | out->redundant_out = true; | |
328 | } | |
329 | } | |
330 | } | |
0494285a | 331 | |
a68ab351 JJ |
332 | /* Called when encountering an explicit task directive. If IF_CLAUSE is |
333 | false, then we must not delay in executing the task. If UNTIED is true, | |
d9a6bd32 JJ |
334 | then the task may be executed by any member of the team. |
335 | ||
336 | DEPEND is an array containing: | |
28567c40 | 337 | if depend[0] is non-zero, then: |
d9a6bd32 | 338 | depend[0]: number of depend elements. |
28567c40 JJ |
339 | depend[1]: number of depend elements of type "out/inout". |
340 | depend[2..N+1]: address of [1..N]th depend element. | |
341 | otherwise, when depend[0] is zero, then: | |
342 | depend[1]: number of depend elements. | |
343 | depend[2]: number of depend elements of type "out/inout". | |
344 | depend[3]: number of depend elements of type "mutexinoutset". | |
345 | depend[4]: number of depend elements of type "in". | |
346 | depend[5..4+depend[2]+depend[3]+depend[4]]: address of depend elements | |
347 | depend[5+depend[2]+depend[3]+depend[4]..4+depend[1]]: address of | |
348 | omp_depend_t objects. */ | |
a68ab351 JJ |
349 | |
350 | void | |
351 | GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), | |
acf0174b | 352 | long arg_size, long arg_align, bool if_clause, unsigned flags, |
0bb27b81 | 353 | void **depend, int priority_arg, void *detach) |
a68ab351 JJ |
354 | { |
355 | struct gomp_thread *thr = gomp_thread (); | |
356 | struct gomp_team *team = thr->ts.team; | |
0bb27b81 | 357 | int priority = 0; |
a68ab351 JJ |
358 | |
359 | #ifdef HAVE_BROKEN_POSIX_SEMAPHORES | |
360 | /* If pthread_mutex_* is used for omp_*lock*, then each task must be | |
361 | tied to one thread all the time. This means UNTIED tasks must be | |
362 | tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN | |
363 | might be running on different thread than FN. */ | |
364 | if (cpyfn) | |
365 | if_clause = false; | |
d9a6bd32 | 366 | flags &= ~GOMP_TASK_FLAG_UNTIED; |
a68ab351 JJ |
367 | #endif |
368 | ||
acf0174b | 369 | /* If parallel or taskgroup has been cancelled, don't start new tasks. */ |
28567c40 JJ |
370 | if (__builtin_expect (gomp_cancel_var, 0) && team) |
371 | { | |
372 | if (gomp_team_barrier_cancelled (&team->barrier)) | |
373 | return; | |
374 | if (thr->task->taskgroup) | |
375 | { | |
376 | if (thr->task->taskgroup->cancelled) | |
377 | return; | |
378 | if (thr->task->taskgroup->workshare | |
379 | && thr->task->taskgroup->prev | |
380 | && thr->task->taskgroup->prev->cancelled) | |
381 | return; | |
382 | } | |
383 | } | |
acf0174b | 384 | |
0bb27b81 JJ |
385 | if (__builtin_expect ((flags & GOMP_TASK_FLAG_PRIORITY) != 0, 0)) |
386 | { | |
387 | priority = priority_arg; | |
388 | if (priority > gomp_max_task_priority_var) | |
389 | priority = gomp_max_task_priority_var; | |
390 | } | |
a6d22fb2 | 391 | |
a68ab351 | 392 | if (!if_clause || team == NULL |
20906c66 | 393 | || (thr->task && thr->task->final_task) |
a68ab351 JJ |
394 | || team->task_count > 64 * team->nthreads) |
395 | { | |
396 | struct gomp_task task; | |
d656bfda | 397 | gomp_sem_t completion_sem; |
a68ab351 | 398 | |
acf0174b JJ |
399 | /* If there are depend clauses and earlier deferred sibling tasks |
400 | with depend clauses, check if there isn't a dependency. If there | |
0494285a | 401 | is, we need to wait for them. There is no need to handle |
acf0174b JJ |
402 | depend clauses for non-deferred tasks other than this, because |
403 | the parent task is suspended until the child task finishes and thus | |
404 | it can't start further child tasks. */ | |
d9a6bd32 JJ |
405 | if ((flags & GOMP_TASK_FLAG_DEPEND) |
406 | && thr->task && thr->task->depend_hash) | |
0494285a | 407 | gomp_task_maybe_wait_for_dependencies (depend); |
acf0174b | 408 | |
a68ab351 | 409 | gomp_init_task (&task, thr->task, gomp_icv (false)); |
d9a6bd32 JJ |
410 | task.kind = GOMP_TASK_UNDEFERRED; |
411 | task.final_task = (thr->task && thr->task->final_task) | |
412 | || (flags & GOMP_TASK_FLAG_FINAL); | |
e4606348 | 413 | task.priority = priority; |
a6d22fb2 | 414 | |
0bb27b81 | 415 | if ((flags & GOMP_TASK_FLAG_DETACH) != 0) |
a6d22fb2 | 416 | { |
d656bfda KCY |
417 | gomp_sem_init (&completion_sem, 0); |
418 | task.completion_sem = &completion_sem; | |
419 | *(void **) detach = &task; | |
a6d22fb2 | 420 | if (data) |
d656bfda | 421 | *(void **) data = &task; |
a6d22fb2 | 422 | |
d656bfda KCY |
423 | gomp_debug (0, "Thread %d: new event: %p\n", |
424 | thr->ts.team_id, &task); | |
a6d22fb2 KCY |
425 | } |
426 | ||
5f836cbb | 427 | if (thr->task) |
acf0174b JJ |
428 | { |
429 | task.in_tied_task = thr->task->in_tied_task; | |
430 | task.taskgroup = thr->task->taskgroup; | |
431 | } | |
a68ab351 JJ |
432 | thr->task = &task; |
433 | if (__builtin_expect (cpyfn != NULL, 0)) | |
434 | { | |
435 | char buf[arg_size + arg_align - 1]; | |
436 | char *arg = (char *) (((uintptr_t) buf + arg_align - 1) | |
437 | & ~(uintptr_t) (arg_align - 1)); | |
438 | cpyfn (arg, data); | |
439 | fn (arg); | |
440 | } | |
441 | else | |
442 | fn (data); | |
a6d22fb2 | 443 | |
d656bfda KCY |
444 | if ((flags & GOMP_TASK_FLAG_DETACH) != 0) |
445 | { | |
446 | gomp_sem_wait (&completion_sem); | |
447 | gomp_sem_destroy (&completion_sem); | |
448 | } | |
a6d22fb2 | 449 | |
bed8d8a6 AM |
450 | /* Access to "children" is normally done inside a task_lock |
451 | mutex region, but the only way this particular task.children | |
452 | can be set is if this thread's task work function (fn) | |
453 | creates children. So since the setter is *this* thread, we | |
454 | need no barriers here when testing for non-NULL. We can have | |
455 | task.children set by the current thread then changed by a | |
456 | child thread, but seeing a stale non-NULL value is not a | |
457 | problem. Once past the task_lock acquisition, this thread | |
458 | will see the real value of task.children. */ | |
e4606348 | 459 | if (!priority_queue_empty_p (&task.children_queue, MEMMODEL_RELAXED)) |
a68ab351 JJ |
460 | { |
461 | gomp_mutex_lock (&team->task_lock); | |
e4606348 | 462 | gomp_clear_parent (&task.children_queue); |
a68ab351 JJ |
463 | gomp_mutex_unlock (&team->task_lock); |
464 | } | |
465 | gomp_end_task (); | |
466 | } | |
467 | else | |
468 | { | |
469 | struct gomp_task *task; | |
470 | struct gomp_task *parent = thr->task; | |
acf0174b | 471 | struct gomp_taskgroup *taskgroup = parent->taskgroup; |
a68ab351 JJ |
472 | char *arg; |
473 | bool do_wake; | |
acf0174b JJ |
474 | size_t depend_size = 0; |
475 | ||
d9a6bd32 | 476 | if (flags & GOMP_TASK_FLAG_DEPEND) |
28567c40 | 477 | depend_size = ((uintptr_t) (depend[0] ? depend[0] : depend[1]) |
acf0174b JJ |
478 | * sizeof (struct gomp_task_depend_entry)); |
479 | task = gomp_malloc (sizeof (*task) + depend_size | |
480 | + arg_size + arg_align - 1); | |
481 | arg = (char *) (((uintptr_t) (task + 1) + depend_size + arg_align - 1) | |
a68ab351 JJ |
482 | & ~(uintptr_t) (arg_align - 1)); |
483 | gomp_init_task (task, parent, gomp_icv (false)); | |
e4606348 | 484 | task->priority = priority; |
d9a6bd32 | 485 | task->kind = GOMP_TASK_UNDEFERRED; |
5f836cbb | 486 | task->in_tied_task = parent->in_tied_task; |
acf0174b | 487 | task->taskgroup = taskgroup; |
d656bfda | 488 | task->deferred_p = true; |
0bb27b81 | 489 | if ((flags & GOMP_TASK_FLAG_DETACH) != 0) |
a6d22fb2 | 490 | { |
d656bfda KCY |
491 | task->detach_team = team; |
492 | ||
493 | *(void **) detach = task; | |
a6d22fb2 | 494 | if (data) |
d656bfda | 495 | *(void **) data = task; |
a6d22fb2 | 496 | |
d656bfda | 497 | gomp_debug (0, "Thread %d: new event: %p\n", thr->ts.team_id, task); |
a6d22fb2 | 498 | } |
a68ab351 JJ |
499 | thr->task = task; |
500 | if (cpyfn) | |
acf0174b JJ |
501 | { |
502 | cpyfn (arg, data); | |
503 | task->copy_ctors_done = true; | |
504 | } | |
a68ab351 JJ |
505 | else |
506 | memcpy (arg, data, arg_size); | |
507 | thr->task = parent; | |
508 | task->kind = GOMP_TASK_WAITING; | |
509 | task->fn = fn; | |
510 | task->fn_data = arg; | |
d9a6bd32 | 511 | task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1; |
a68ab351 | 512 | gomp_mutex_lock (&team->task_lock); |
acf0174b JJ |
513 | /* If parallel or taskgroup has been cancelled, don't start new |
514 | tasks. */ | |
28567c40 JJ |
515 | if (__builtin_expect (gomp_cancel_var, 0) |
516 | && !task->copy_ctors_done) | |
acf0174b | 517 | { |
28567c40 JJ |
518 | if (gomp_team_barrier_cancelled (&team->barrier)) |
519 | { | |
520 | do_cancel: | |
521 | gomp_mutex_unlock (&team->task_lock); | |
522 | gomp_finish_task (task); | |
523 | free (task); | |
524 | return; | |
525 | } | |
526 | if (taskgroup) | |
527 | { | |
528 | if (taskgroup->cancelled) | |
529 | goto do_cancel; | |
530 | if (taskgroup->workshare | |
531 | && taskgroup->prev | |
532 | && taskgroup->prev->cancelled) | |
533 | goto do_cancel; | |
534 | } | |
acf0174b JJ |
535 | } |
536 | if (taskgroup) | |
537 | taskgroup->num_children++; | |
538 | if (depend_size) | |
539 | { | |
d9a6bd32 | 540 | gomp_task_handle_depend (task, parent, depend); |
acf0174b JJ |
541 | if (task->num_dependees) |
542 | { | |
e4606348 JJ |
543 | /* Tasks that depend on other tasks are not put into the |
544 | various waiting queues, so we are done for now. Said | |
545 | tasks are instead put into the queues via | |
546 | gomp_task_run_post_handle_dependers() after their | |
547 | dependencies have been satisfied. After which, they | |
548 | can be picked up by the various scheduling | |
549 | points. */ | |
acf0174b JJ |
550 | gomp_mutex_unlock (&team->task_lock); |
551 | return; | |
552 | } | |
553 | } | |
e4606348 JJ |
554 | |
555 | priority_queue_insert (PQ_CHILDREN, &parent->children_queue, | |
556 | task, priority, | |
557 | PRIORITY_INSERT_BEGIN, | |
558 | /*adjust_parent_depends_on=*/false, | |
559 | task->parent_depends_on); | |
acf0174b | 560 | if (taskgroup) |
e4606348 JJ |
561 | priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, |
562 | task, priority, | |
563 | PRIORITY_INSERT_BEGIN, | |
564 | /*adjust_parent_depends_on=*/false, | |
565 | task->parent_depends_on); | |
566 | ||
567 | priority_queue_insert (PQ_TEAM, &team->task_queue, | |
568 | task, priority, | |
569 | PRIORITY_INSERT_END, | |
570 | /*adjust_parent_depends_on=*/false, | |
571 | task->parent_depends_on); | |
572 | ||
5f836cbb | 573 | ++team->task_count; |
acf0174b | 574 | ++team->task_queued_count; |
5f836cbb JJ |
575 | gomp_team_barrier_set_task_pending (&team->barrier); |
576 | do_wake = team->task_running_count + !parent->in_tied_task | |
577 | < team->nthreads; | |
a68ab351 JJ |
578 | gomp_mutex_unlock (&team->task_lock); |
579 | if (do_wake) | |
580 | gomp_team_barrier_wake (&team->barrier, 1); | |
581 | } | |
582 | } | |
583 | ||
d9a6bd32 JJ |
584 | ialias (GOMP_taskgroup_start) |
585 | ialias (GOMP_taskgroup_end) | |
28567c40 | 586 | ialias (GOMP_taskgroup_reduction_register) |
d9a6bd32 JJ |
587 | |
588 | #define TYPE long | |
589 | #define UTYPE unsigned long | |
590 | #define TYPE_is_long 1 | |
591 | #include "taskloop.c" | |
592 | #undef TYPE | |
593 | #undef UTYPE | |
594 | #undef TYPE_is_long | |
595 | ||
596 | #define TYPE unsigned long long | |
597 | #define UTYPE TYPE | |
598 | #define GOMP_taskloop GOMP_taskloop_ull | |
599 | #include "taskloop.c" | |
600 | #undef TYPE | |
601 | #undef UTYPE | |
602 | #undef GOMP_taskloop | |
603 | ||
e4606348 JJ |
604 | static void inline |
605 | priority_queue_move_task_first (enum priority_queue_type type, | |
606 | struct priority_queue *head, | |
607 | struct gomp_task *task) | |
608 | { | |
609 | #if _LIBGOMP_CHECKING_ | |
610 | if (!priority_queue_task_in_queue_p (type, head, task)) | |
611 | gomp_fatal ("Attempt to move first missing task %p", task); | |
612 | #endif | |
613 | struct priority_list *list; | |
614 | if (priority_queue_multi_p (head)) | |
615 | { | |
616 | list = priority_queue_lookup_priority (head, task->priority); | |
617 | #if _LIBGOMP_CHECKING_ | |
618 | if (!list) | |
619 | gomp_fatal ("Unable to find priority %d", task->priority); | |
620 | #endif | |
621 | } | |
622 | else | |
623 | list = &head->l; | |
624 | priority_list_remove (list, task_to_priority_node (type, task), 0); | |
625 | priority_list_insert (type, list, task, task->priority, | |
626 | PRIORITY_INSERT_BEGIN, type == PQ_CHILDREN, | |
627 | task->parent_depends_on); | |
628 | } | |
629 | ||
630 | /* Actual body of GOMP_PLUGIN_target_task_completion that is executed | |
631 | with team->task_lock held, or is executed in the thread that called | |
632 | gomp_target_task_fn if GOMP_PLUGIN_target_task_completion has been | |
633 | run before it acquires team->task_lock. */ | |
634 | ||
635 | static void | |
636 | gomp_target_task_completion (struct gomp_team *team, struct gomp_task *task) | |
637 | { | |
638 | struct gomp_task *parent = task->parent; | |
639 | if (parent) | |
640 | priority_queue_move_task_first (PQ_CHILDREN, &parent->children_queue, | |
641 | task); | |
642 | ||
643 | struct gomp_taskgroup *taskgroup = task->taskgroup; | |
644 | if (taskgroup) | |
645 | priority_queue_move_task_first (PQ_TASKGROUP, &taskgroup->taskgroup_queue, | |
646 | task); | |
647 | ||
648 | priority_queue_insert (PQ_TEAM, &team->task_queue, task, task->priority, | |
649 | PRIORITY_INSERT_BEGIN, false, | |
650 | task->parent_depends_on); | |
651 | task->kind = GOMP_TASK_WAITING; | |
652 | if (parent && parent->taskwait) | |
653 | { | |
654 | if (parent->taskwait->in_taskwait) | |
655 | { | |
656 | /* One more task has had its dependencies met. | |
657 | Inform any waiters. */ | |
658 | parent->taskwait->in_taskwait = false; | |
659 | gomp_sem_post (&parent->taskwait->taskwait_sem); | |
660 | } | |
661 | else if (parent->taskwait->in_depend_wait) | |
662 | { | |
663 | /* One more task has had its dependencies met. | |
664 | Inform any waiters. */ | |
665 | parent->taskwait->in_depend_wait = false; | |
666 | gomp_sem_post (&parent->taskwait->taskwait_sem); | |
667 | } | |
668 | } | |
669 | if (taskgroup && taskgroup->in_taskgroup_wait) | |
670 | { | |
671 | /* One more task has had its dependencies met. | |
672 | Inform any waiters. */ | |
673 | taskgroup->in_taskgroup_wait = false; | |
674 | gomp_sem_post (&taskgroup->taskgroup_sem); | |
675 | } | |
676 | ||
677 | ++team->task_queued_count; | |
678 | gomp_team_barrier_set_task_pending (&team->barrier); | |
679 | /* I'm afraid this can't be done after releasing team->task_lock, | |
680 | as gomp_target_task_completion is run from unrelated thread and | |
681 | therefore in between gomp_mutex_unlock and gomp_team_barrier_wake | |
682 | the team could be gone already. */ | |
683 | if (team->nthreads > team->task_running_count) | |
684 | gomp_team_barrier_wake (&team->barrier, 1); | |
685 | } | |
686 | ||
687 | /* Signal that a target task TTASK has completed the asynchronously | |
688 | running phase and should be requeued as a task to handle the | |
689 | variable unmapping. */ | |
d9a6bd32 JJ |
690 | |
691 | void | |
e4606348 JJ |
692 | GOMP_PLUGIN_target_task_completion (void *data) |
693 | { | |
694 | struct gomp_target_task *ttask = (struct gomp_target_task *) data; | |
695 | struct gomp_task *task = ttask->task; | |
696 | struct gomp_team *team = ttask->team; | |
697 | ||
698 | gomp_mutex_lock (&team->task_lock); | |
699 | if (ttask->state == GOMP_TARGET_TASK_READY_TO_RUN) | |
700 | { | |
701 | ttask->state = GOMP_TARGET_TASK_FINISHED; | |
702 | gomp_mutex_unlock (&team->task_lock); | |
cb11f3cf | 703 | return; |
e4606348 JJ |
704 | } |
705 | ttask->state = GOMP_TARGET_TASK_FINISHED; | |
706 | gomp_target_task_completion (team, task); | |
707 | gomp_mutex_unlock (&team->task_lock); | |
708 | } | |
709 | ||
8e4e4719 JJ |
710 | static void gomp_task_run_post_handle_depend_hash (struct gomp_task *); |
711 | ||
e4606348 JJ |
712 | /* Called for nowait target tasks. */ |
713 | ||
714 | bool | |
d9a6bd32 JJ |
715 | gomp_create_target_task (struct gomp_device_descr *devicep, |
716 | void (*fn) (void *), size_t mapnum, void **hostaddrs, | |
717 | size_t *sizes, unsigned short *kinds, | |
b2b40051 | 718 | unsigned int flags, void **depend, void **args, |
e4606348 | 719 | enum gomp_target_task_state state) |
d9a6bd32 JJ |
720 | { |
721 | struct gomp_thread *thr = gomp_thread (); | |
722 | struct gomp_team *team = thr->ts.team; | |
723 | ||
724 | /* If parallel or taskgroup has been cancelled, don't start new tasks. */ | |
28567c40 JJ |
725 | if (__builtin_expect (gomp_cancel_var, 0) && team) |
726 | { | |
727 | if (gomp_team_barrier_cancelled (&team->barrier)) | |
728 | return true; | |
729 | if (thr->task->taskgroup) | |
730 | { | |
731 | if (thr->task->taskgroup->cancelled) | |
732 | return true; | |
733 | if (thr->task->taskgroup->workshare | |
734 | && thr->task->taskgroup->prev | |
735 | && thr->task->taskgroup->prev->cancelled) | |
736 | return true; | |
737 | } | |
738 | } | |
d9a6bd32 JJ |
739 | |
740 | struct gomp_target_task *ttask; | |
741 | struct gomp_task *task; | |
742 | struct gomp_task *parent = thr->task; | |
743 | struct gomp_taskgroup *taskgroup = parent->taskgroup; | |
744 | bool do_wake; | |
745 | size_t depend_size = 0; | |
e4606348 JJ |
746 | uintptr_t depend_cnt = 0; |
747 | size_t tgt_align = 0, tgt_size = 0; | |
aea72386 | 748 | uintptr_t args_cnt = 0; |
d9a6bd32 JJ |
749 | |
750 | if (depend != NULL) | |
e4606348 | 751 | { |
28567c40 | 752 | depend_cnt = (uintptr_t) (depend[0] ? depend[0] : depend[1]); |
e4606348 JJ |
753 | depend_size = depend_cnt * sizeof (struct gomp_task_depend_entry); |
754 | } | |
755 | if (fn) | |
756 | { | |
757 | /* GOMP_MAP_FIRSTPRIVATE need to be copied first, as they are | |
758 | firstprivate on the target task. */ | |
759 | size_t i; | |
760 | for (i = 0; i < mapnum; i++) | |
761 | if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE) | |
762 | { | |
763 | size_t align = (size_t) 1 << (kinds[i] >> 8); | |
764 | if (tgt_align < align) | |
765 | tgt_align = align; | |
766 | tgt_size = (tgt_size + align - 1) & ~(align - 1); | |
767 | tgt_size += sizes[i]; | |
768 | } | |
769 | if (tgt_align) | |
770 | tgt_size += tgt_align - 1; | |
771 | else | |
772 | tgt_size = 0; | |
aea72386 JJ |
773 | if (args) |
774 | { | |
775 | void **cargs = args; | |
776 | while (*cargs) | |
777 | { | |
778 | intptr_t id = (intptr_t) *cargs++; | |
779 | if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM) | |
780 | cargs++; | |
781 | } | |
782 | args_cnt = cargs + 1 - args; | |
783 | } | |
e4606348 JJ |
784 | } |
785 | ||
d9a6bd32 JJ |
786 | task = gomp_malloc (sizeof (*task) + depend_size |
787 | + sizeof (*ttask) | |
aea72386 | 788 | + args_cnt * sizeof (void *) |
d9a6bd32 | 789 | + mapnum * (sizeof (void *) + sizeof (size_t) |
e4606348 JJ |
790 | + sizeof (unsigned short)) |
791 | + tgt_size); | |
d9a6bd32 | 792 | gomp_init_task (task, parent, gomp_icv (false)); |
e4606348 | 793 | task->priority = 0; |
d9a6bd32 JJ |
794 | task->kind = GOMP_TASK_WAITING; |
795 | task->in_tied_task = parent->in_tied_task; | |
796 | task->taskgroup = taskgroup; | |
e4606348 | 797 | ttask = (struct gomp_target_task *) &task->depend[depend_cnt]; |
d9a6bd32 JJ |
798 | ttask->devicep = devicep; |
799 | ttask->fn = fn; | |
800 | ttask->mapnum = mapnum; | |
801 | memcpy (ttask->hostaddrs, hostaddrs, mapnum * sizeof (void *)); | |
aea72386 JJ |
802 | if (args_cnt) |
803 | { | |
804 | ttask->args = (void **) &ttask->hostaddrs[mapnum]; | |
805 | memcpy (ttask->args, args, args_cnt * sizeof (void *)); | |
806 | ttask->sizes = (size_t *) &ttask->args[args_cnt]; | |
807 | } | |
808 | else | |
809 | { | |
810 | ttask->args = args; | |
811 | ttask->sizes = (size_t *) &ttask->hostaddrs[mapnum]; | |
812 | } | |
d9a6bd32 JJ |
813 | memcpy (ttask->sizes, sizes, mapnum * sizeof (size_t)); |
814 | ttask->kinds = (unsigned short *) &ttask->sizes[mapnum]; | |
815 | memcpy (ttask->kinds, kinds, mapnum * sizeof (unsigned short)); | |
e4606348 JJ |
816 | if (tgt_align) |
817 | { | |
818 | char *tgt = (char *) &ttask->kinds[mapnum]; | |
819 | size_t i; | |
820 | uintptr_t al = (uintptr_t) tgt & (tgt_align - 1); | |
821 | if (al) | |
822 | tgt += tgt_align - al; | |
823 | tgt_size = 0; | |
824 | for (i = 0; i < mapnum; i++) | |
825 | if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE) | |
826 | { | |
827 | size_t align = (size_t) 1 << (kinds[i] >> 8); | |
828 | tgt_size = (tgt_size + align - 1) & ~(align - 1); | |
829 | memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]); | |
830 | ttask->hostaddrs[i] = tgt + tgt_size; | |
831 | tgt_size = tgt_size + sizes[i]; | |
832 | } | |
833 | } | |
d9a6bd32 | 834 | ttask->flags = flags; |
e4606348 JJ |
835 | ttask->state = state; |
836 | ttask->task = task; | |
837 | ttask->team = team; | |
838 | task->fn = NULL; | |
d9a6bd32 JJ |
839 | task->fn_data = ttask; |
840 | task->final_task = 0; | |
841 | gomp_mutex_lock (&team->task_lock); | |
842 | /* If parallel or taskgroup has been cancelled, don't start new tasks. */ | |
28567c40 | 843 | if (__builtin_expect (gomp_cancel_var, 0)) |
d9a6bd32 | 844 | { |
28567c40 JJ |
845 | if (gomp_team_barrier_cancelled (&team->barrier)) |
846 | { | |
847 | do_cancel: | |
848 | gomp_mutex_unlock (&team->task_lock); | |
849 | gomp_finish_task (task); | |
850 | free (task); | |
851 | return true; | |
852 | } | |
853 | if (taskgroup) | |
854 | { | |
855 | if (taskgroup->cancelled) | |
856 | goto do_cancel; | |
857 | if (taskgroup->workshare | |
858 | && taskgroup->prev | |
859 | && taskgroup->prev->cancelled) | |
860 | goto do_cancel; | |
861 | } | |
d9a6bd32 | 862 | } |
d9a6bd32 JJ |
863 | if (depend_size) |
864 | { | |
865 | gomp_task_handle_depend (task, parent, depend); | |
866 | if (task->num_dependees) | |
867 | { | |
e4606348 JJ |
868 | if (taskgroup) |
869 | taskgroup->num_children++; | |
d9a6bd32 | 870 | gomp_mutex_unlock (&team->task_lock); |
e4606348 | 871 | return true; |
d9a6bd32 JJ |
872 | } |
873 | } | |
e4606348 | 874 | if (state == GOMP_TARGET_TASK_DATA) |
d9a6bd32 | 875 | { |
8e4e4719 | 876 | gomp_task_run_post_handle_depend_hash (task); |
e4606348 JJ |
877 | gomp_mutex_unlock (&team->task_lock); |
878 | gomp_finish_task (task); | |
879 | free (task); | |
880 | return false; | |
d9a6bd32 | 881 | } |
d9a6bd32 | 882 | if (taskgroup) |
e4606348 JJ |
883 | taskgroup->num_children++; |
884 | /* For async offloading, if we don't need to wait for dependencies, | |
885 | run the gomp_target_task_fn right away, essentially schedule the | |
886 | mapping part of the task in the current thread. */ | |
887 | if (devicep != NULL | |
888 | && (devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) | |
d9a6bd32 | 889 | { |
e4606348 JJ |
890 | priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0, |
891 | PRIORITY_INSERT_END, | |
892 | /*adjust_parent_depends_on=*/false, | |
893 | task->parent_depends_on); | |
894 | if (taskgroup) | |
895 | priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, | |
896 | task, 0, PRIORITY_INSERT_END, | |
897 | /*adjust_parent_depends_on=*/false, | |
898 | task->parent_depends_on); | |
899 | task->pnode[PQ_TEAM].next = NULL; | |
900 | task->pnode[PQ_TEAM].prev = NULL; | |
901 | task->kind = GOMP_TASK_TIED; | |
902 | ++team->task_count; | |
903 | gomp_mutex_unlock (&team->task_lock); | |
904 | ||
905 | thr->task = task; | |
906 | gomp_target_task_fn (task->fn_data); | |
907 | thr->task = parent; | |
908 | ||
909 | gomp_mutex_lock (&team->task_lock); | |
910 | task->kind = GOMP_TASK_ASYNC_RUNNING; | |
911 | /* If GOMP_PLUGIN_target_task_completion has run already | |
912 | in between gomp_target_task_fn and the mutex lock, | |
913 | perform the requeuing here. */ | |
914 | if (ttask->state == GOMP_TARGET_TASK_FINISHED) | |
915 | gomp_target_task_completion (team, task); | |
d9a6bd32 | 916 | else |
e4606348 JJ |
917 | ttask->state = GOMP_TARGET_TASK_RUNNING; |
918 | gomp_mutex_unlock (&team->task_lock); | |
919 | return true; | |
d9a6bd32 | 920 | } |
e4606348 JJ |
921 | priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0, |
922 | PRIORITY_INSERT_BEGIN, | |
923 | /*adjust_parent_depends_on=*/false, | |
924 | task->parent_depends_on); | |
925 | if (taskgroup) | |
926 | priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, task, 0, | |
927 | PRIORITY_INSERT_BEGIN, | |
928 | /*adjust_parent_depends_on=*/false, | |
929 | task->parent_depends_on); | |
930 | priority_queue_insert (PQ_TEAM, &team->task_queue, task, 0, | |
931 | PRIORITY_INSERT_END, | |
932 | /*adjust_parent_depends_on=*/false, | |
933 | task->parent_depends_on); | |
d9a6bd32 JJ |
934 | ++team->task_count; |
935 | ++team->task_queued_count; | |
936 | gomp_team_barrier_set_task_pending (&team->barrier); | |
937 | do_wake = team->task_running_count + !parent->in_tied_task | |
938 | < team->nthreads; | |
939 | gomp_mutex_unlock (&team->task_lock); | |
940 | if (do_wake) | |
941 | gomp_team_barrier_wake (&team->barrier, 1); | |
e4606348 | 942 | return true; |
d9a6bd32 JJ |
943 | } |
944 | ||
e4606348 JJ |
945 | /* Given a parent_depends_on task in LIST, move it to the front of its |
946 | priority so it is run as soon as possible. | |
d9a6bd32 | 947 | |
e4606348 | 948 | Care is taken to update the list's LAST_PARENT_DEPENDS_ON field. |
d9a6bd32 | 949 | |
e4606348 JJ |
950 | We rearrange the queue such that all parent_depends_on tasks are |
951 | first, and last_parent_depends_on points to the last such task we | |
952 | rearranged. For example, given the following tasks in a queue | |
953 | where PD[123] are the parent_depends_on tasks: | |
d9a6bd32 | 954 | |
e4606348 JJ |
955 | task->children |
956 | | | |
957 | V | |
958 | C1 -> C2 -> C3 -> PD1 -> PD2 -> PD3 -> C4 | |
d9a6bd32 | 959 | |
e4606348 JJ |
960 | We rearrange such that: |
961 | ||
962 | task->children | |
963 | | +--- last_parent_depends_on | |
964 | | | | |
965 | V V | |
966 | PD1 -> PD2 -> PD3 -> C1 -> C2 -> C3 -> C4. */ | |
967 | ||
968 | static void inline | |
969 | priority_list_upgrade_task (struct priority_list *list, | |
970 | struct priority_node *node) | |
971 | { | |
972 | struct priority_node *last_parent_depends_on | |
973 | = list->last_parent_depends_on; | |
974 | if (last_parent_depends_on) | |
d9a6bd32 | 975 | { |
e4606348 JJ |
976 | node->prev->next = node->next; |
977 | node->next->prev = node->prev; | |
978 | node->prev = last_parent_depends_on; | |
979 | node->next = last_parent_depends_on->next; | |
980 | node->prev->next = node; | |
981 | node->next->prev = node; | |
d9a6bd32 | 982 | } |
e4606348 JJ |
983 | else if (node != list->tasks) |
984 | { | |
985 | node->prev->next = node->next; | |
986 | node->next->prev = node->prev; | |
987 | node->prev = list->tasks->prev; | |
988 | node->next = list->tasks; | |
989 | list->tasks = node; | |
990 | node->prev->next = node; | |
991 | node->next->prev = node; | |
992 | } | |
993 | list->last_parent_depends_on = node; | |
d9a6bd32 JJ |
994 | } |
995 | ||
e4606348 JJ |
996 | /* Given a parent_depends_on TASK in its parent's children_queue, move |
997 | it to the front of its priority so it is run as soon as possible. | |
d9a6bd32 | 998 | |
e4606348 | 999 | PARENT is passed as an optimization. |
d9a6bd32 | 1000 | |
e4606348 JJ |
1001 | (This function could be defined in priority_queue.c, but we want it |
1002 | inlined, and putting it in priority_queue.h is not an option, given | |
1003 | that gomp_task has not been properly defined at that point). */ | |
d9a6bd32 | 1004 | |
e4606348 JJ |
1005 | static void inline |
1006 | priority_queue_upgrade_task (struct gomp_task *task, | |
1007 | struct gomp_task *parent) | |
d9a6bd32 | 1008 | { |
e4606348 JJ |
1009 | struct priority_queue *head = &parent->children_queue; |
1010 | struct priority_node *node = &task->pnode[PQ_CHILDREN]; | |
1011 | #if _LIBGOMP_CHECKING_ | |
1012 | if (!task->parent_depends_on) | |
1013 | gomp_fatal ("priority_queue_upgrade_task: task must be a " | |
1014 | "parent_depends_on task"); | |
1015 | if (!priority_queue_task_in_queue_p (PQ_CHILDREN, head, task)) | |
1016 | gomp_fatal ("priority_queue_upgrade_task: cannot find task=%p", task); | |
1017 | #endif | |
1018 | if (priority_queue_multi_p (head)) | |
d9a6bd32 | 1019 | { |
e4606348 JJ |
1020 | struct priority_list *list |
1021 | = priority_queue_lookup_priority (head, task->priority); | |
1022 | priority_list_upgrade_task (list, node); | |
d9a6bd32 | 1023 | } |
e4606348 JJ |
1024 | else |
1025 | priority_list_upgrade_task (&head->l, node); | |
d9a6bd32 JJ |
1026 | } |
1027 | ||
e4606348 JJ |
1028 | /* Given a CHILD_TASK in LIST that is about to be executed, move it out of |
1029 | the way in LIST so that other tasks can be considered for | |
1030 | execution. LIST contains tasks of type TYPE. | |
d9a6bd32 | 1031 | |
e4606348 JJ |
1032 | Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field |
1033 | if applicable. */ | |
1034 | ||
1035 | static void inline | |
1036 | priority_list_downgrade_task (enum priority_queue_type type, | |
1037 | struct priority_list *list, | |
1038 | struct gomp_task *child_task) | |
d9a6bd32 | 1039 | { |
e4606348 JJ |
1040 | struct priority_node *node = task_to_priority_node (type, child_task); |
1041 | if (list->tasks == node) | |
1042 | list->tasks = node->next; | |
1043 | else if (node->next != list->tasks) | |
1044 | { | |
1045 | /* The task in NODE is about to become TIED and TIED tasks | |
1046 | cannot come before WAITING tasks. If we're about to | |
1047 | leave the queue in such an indeterminate state, rewire | |
1048 | things appropriately. However, a TIED task at the end is | |
1049 | perfectly fine. */ | |
1050 | struct gomp_task *next_task = priority_node_to_task (type, node->next); | |
1051 | if (next_task->kind == GOMP_TASK_WAITING) | |
1052 | { | |
1053 | /* Remove from list. */ | |
1054 | node->prev->next = node->next; | |
1055 | node->next->prev = node->prev; | |
1056 | /* Rewire at the end. */ | |
1057 | node->next = list->tasks; | |
1058 | node->prev = list->tasks->prev; | |
1059 | list->tasks->prev->next = node; | |
1060 | list->tasks->prev = node; | |
1061 | } | |
1062 | } | |
1063 | ||
1064 | /* If the current task is the last_parent_depends_on for its | |
1065 | priority, adjust last_parent_depends_on appropriately. */ | |
1066 | if (__builtin_expect (child_task->parent_depends_on, 0) | |
1067 | && list->last_parent_depends_on == node) | |
1068 | { | |
1069 | struct gomp_task *prev_child = priority_node_to_task (type, node->prev); | |
1070 | if (node->prev != node | |
1071 | && prev_child->kind == GOMP_TASK_WAITING | |
1072 | && prev_child->parent_depends_on) | |
1073 | list->last_parent_depends_on = node->prev; | |
1074 | else | |
1075 | { | |
1076 | /* There are no more parent_depends_on entries waiting | |
1077 | to run, clear the list. */ | |
1078 | list->last_parent_depends_on = NULL; | |
1079 | } | |
1080 | } | |
d9a6bd32 | 1081 | } |
e4606348 JJ |
1082 | |
1083 | /* Given a TASK in HEAD that is about to be executed, move it out of | |
1084 | the way so that other tasks can be considered for execution. HEAD | |
1085 | contains tasks of type TYPE. | |
1086 | ||
1087 | Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field | |
1088 | if applicable. | |
1089 | ||
1090 | (This function could be defined in priority_queue.c, but we want it | |
1091 | inlined, and putting it in priority_queue.h is not an option, given | |
1092 | that gomp_task has not been properly defined at that point). */ | |
1093 | ||
1094 | static void inline | |
1095 | priority_queue_downgrade_task (enum priority_queue_type type, | |
1096 | struct priority_queue *head, | |
1097 | struct gomp_task *task) | |
1098 | { | |
1099 | #if _LIBGOMP_CHECKING_ | |
1100 | if (!priority_queue_task_in_queue_p (type, head, task)) | |
1101 | gomp_fatal ("Attempt to downgrade missing task %p", task); | |
d9a6bd32 | 1102 | #endif |
e4606348 JJ |
1103 | if (priority_queue_multi_p (head)) |
1104 | { | |
1105 | struct priority_list *list | |
1106 | = priority_queue_lookup_priority (head, task->priority); | |
1107 | priority_list_downgrade_task (type, list, task); | |
1108 | } | |
1109 | else | |
1110 | priority_list_downgrade_task (type, &head->l, task); | |
1111 | } | |
1112 | ||
1113 | /* Setup CHILD_TASK to execute. This is done by setting the task to | |
1114 | TIED, and updating all relevant queues so that CHILD_TASK is no | |
1115 | longer chosen for scheduling. Also, remove CHILD_TASK from the | |
1116 | overall team task queue entirely. | |
1117 | ||
1118 | Return TRUE if task or its containing taskgroup has been | |
1119 | cancelled. */ | |
d9a6bd32 | 1120 | |
acf0174b JJ |
1121 | static inline bool |
1122 | gomp_task_run_pre (struct gomp_task *child_task, struct gomp_task *parent, | |
d9a6bd32 | 1123 | struct gomp_team *team) |
acf0174b | 1124 | { |
e4606348 JJ |
1125 | #if _LIBGOMP_CHECKING_ |
1126 | if (child_task->parent) | |
1127 | priority_queue_verify (PQ_CHILDREN, | |
1128 | &child_task->parent->children_queue, true); | |
1129 | if (child_task->taskgroup) | |
1130 | priority_queue_verify (PQ_TASKGROUP, | |
1131 | &child_task->taskgroup->taskgroup_queue, false); | |
1132 | priority_queue_verify (PQ_TEAM, &team->task_queue, false); | |
d9a6bd32 JJ |
1133 | #endif |
1134 | ||
e4606348 | 1135 | /* Task is about to go tied, move it out of the way. */ |
0494285a | 1136 | if (parent) |
e4606348 JJ |
1137 | priority_queue_downgrade_task (PQ_CHILDREN, &parent->children_queue, |
1138 | child_task); | |
d9a6bd32 | 1139 | |
e4606348 | 1140 | /* Task is about to go tied, move it out of the way. */ |
d9a6bd32 JJ |
1141 | struct gomp_taskgroup *taskgroup = child_task->taskgroup; |
1142 | if (taskgroup) | |
e4606348 JJ |
1143 | priority_queue_downgrade_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue, |
1144 | child_task); | |
d9a6bd32 | 1145 | |
e4606348 JJ |
1146 | priority_queue_remove (PQ_TEAM, &team->task_queue, child_task, |
1147 | MEMMODEL_RELAXED); | |
1148 | child_task->pnode[PQ_TEAM].next = NULL; | |
1149 | child_task->pnode[PQ_TEAM].prev = NULL; | |
acf0174b | 1150 | child_task->kind = GOMP_TASK_TIED; |
d9a6bd32 | 1151 | |
acf0174b JJ |
1152 | if (--team->task_queued_count == 0) |
1153 | gomp_team_barrier_clear_task_pending (&team->barrier); | |
28567c40 | 1154 | if (__builtin_expect (gomp_cancel_var, 0) |
acf0174b | 1155 | && !child_task->copy_ctors_done) |
28567c40 JJ |
1156 | { |
1157 | if (gomp_team_barrier_cancelled (&team->barrier)) | |
1158 | return true; | |
1159 | if (taskgroup) | |
1160 | { | |
1161 | if (taskgroup->cancelled) | |
1162 | return true; | |
1163 | if (taskgroup->workshare | |
1164 | && taskgroup->prev | |
1165 | && taskgroup->prev->cancelled) | |
1166 | return true; | |
1167 | } | |
1168 | } | |
acf0174b JJ |
1169 | return false; |
1170 | } | |
1171 | ||
1172 | static void | |
1173 | gomp_task_run_post_handle_depend_hash (struct gomp_task *child_task) | |
1174 | { | |
1175 | struct gomp_task *parent = child_task->parent; | |
1176 | size_t i; | |
1177 | ||
1178 | for (i = 0; i < child_task->depend_count; i++) | |
1179 | if (!child_task->depend[i].redundant) | |
1180 | { | |
1181 | if (child_task->depend[i].next) | |
1182 | child_task->depend[i].next->prev = child_task->depend[i].prev; | |
1183 | if (child_task->depend[i].prev) | |
1184 | child_task->depend[i].prev->next = child_task->depend[i].next; | |
1185 | else | |
1186 | { | |
1187 | hash_entry_type *slot | |
1188 | = htab_find_slot (&parent->depend_hash, &child_task->depend[i], | |
1189 | NO_INSERT); | |
1190 | if (*slot != &child_task->depend[i]) | |
1191 | abort (); | |
1192 | if (child_task->depend[i].next) | |
1193 | *slot = child_task->depend[i].next; | |
1194 | else | |
1195 | htab_clear_slot (parent->depend_hash, slot); | |
1196 | } | |
1197 | } | |
1198 | } | |
1199 | ||
e4606348 JJ |
1200 | /* After a CHILD_TASK has been run, adjust the dependency queue for |
1201 | each task that depends on CHILD_TASK, to record the fact that there | |
1202 | is one less dependency to worry about. If a task that depended on | |
1203 | CHILD_TASK now has no dependencies, place it in the various queues | |
1204 | so it gets scheduled to run. | |
d9a6bd32 JJ |
1205 | |
1206 | TEAM is the team to which CHILD_TASK belongs to. */ | |
1207 | ||
acf0174b JJ |
1208 | static size_t |
1209 | gomp_task_run_post_handle_dependers (struct gomp_task *child_task, | |
1210 | struct gomp_team *team) | |
1211 | { | |
1212 | struct gomp_task *parent = child_task->parent; | |
1213 | size_t i, count = child_task->dependers->n_elem, ret = 0; | |
1214 | for (i = 0; i < count; i++) | |
1215 | { | |
1216 | struct gomp_task *task = child_task->dependers->elem[i]; | |
e4606348 JJ |
1217 | |
1218 | /* CHILD_TASK satisfies a dependency for TASK. Keep track of | |
1219 | TASK's remaining dependencies. Once TASK has no other | |
93d90219 | 1220 | dependencies, put it into the various queues so it will get |
e4606348 | 1221 | scheduled for execution. */ |
acf0174b JJ |
1222 | if (--task->num_dependees != 0) |
1223 | continue; | |
1224 | ||
1225 | struct gomp_taskgroup *taskgroup = task->taskgroup; | |
1226 | if (parent) | |
1227 | { | |
e4606348 JJ |
1228 | priority_queue_insert (PQ_CHILDREN, &parent->children_queue, |
1229 | task, task->priority, | |
1230 | PRIORITY_INSERT_BEGIN, | |
1231 | /*adjust_parent_depends_on=*/true, | |
1232 | task->parent_depends_on); | |
0494285a | 1233 | if (parent->taskwait) |
acf0174b | 1234 | { |
0494285a JJ |
1235 | if (parent->taskwait->in_taskwait) |
1236 | { | |
e4606348 JJ |
1237 | /* One more task has had its dependencies met. |
1238 | Inform any waiters. */ | |
0494285a JJ |
1239 | parent->taskwait->in_taskwait = false; |
1240 | gomp_sem_post (&parent->taskwait->taskwait_sem); | |
1241 | } | |
1242 | else if (parent->taskwait->in_depend_wait) | |
1243 | { | |
e4606348 JJ |
1244 | /* One more task has had its dependencies met. |
1245 | Inform any waiters. */ | |
0494285a JJ |
1246 | parent->taskwait->in_depend_wait = false; |
1247 | gomp_sem_post (&parent->taskwait->taskwait_sem); | |
1248 | } | |
acf0174b JJ |
1249 | } |
1250 | } | |
1251 | if (taskgroup) | |
1252 | { | |
e4606348 JJ |
1253 | priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, |
1254 | task, task->priority, | |
1255 | PRIORITY_INSERT_BEGIN, | |
1256 | /*adjust_parent_depends_on=*/false, | |
1257 | task->parent_depends_on); | |
acf0174b JJ |
1258 | if (taskgroup->in_taskgroup_wait) |
1259 | { | |
e4606348 JJ |
1260 | /* One more task has had its dependencies met. |
1261 | Inform any waiters. */ | |
acf0174b JJ |
1262 | taskgroup->in_taskgroup_wait = false; |
1263 | gomp_sem_post (&taskgroup->taskgroup_sem); | |
1264 | } | |
1265 | } | |
e4606348 JJ |
1266 | priority_queue_insert (PQ_TEAM, &team->task_queue, |
1267 | task, task->priority, | |
1268 | PRIORITY_INSERT_END, | |
1269 | /*adjust_parent_depends_on=*/false, | |
1270 | task->parent_depends_on); | |
acf0174b JJ |
1271 | ++team->task_count; |
1272 | ++team->task_queued_count; | |
1273 | ++ret; | |
1274 | } | |
1275 | free (child_task->dependers); | |
1276 | child_task->dependers = NULL; | |
1277 | if (ret > 1) | |
1278 | gomp_team_barrier_set_task_pending (&team->barrier); | |
1279 | return ret; | |
1280 | } | |
1281 | ||
1282 | static inline size_t | |
1283 | gomp_task_run_post_handle_depend (struct gomp_task *child_task, | |
1284 | struct gomp_team *team) | |
1285 | { | |
1286 | if (child_task->depend_count == 0) | |
1287 | return 0; | |
1288 | ||
1289 | /* If parent is gone already, the hash table is freed and nothing | |
1290 | will use the hash table anymore, no need to remove anything from it. */ | |
1291 | if (child_task->parent != NULL) | |
1292 | gomp_task_run_post_handle_depend_hash (child_task); | |
1293 | ||
1294 | if (child_task->dependers == NULL) | |
1295 | return 0; | |
1296 | ||
1297 | return gomp_task_run_post_handle_dependers (child_task, team); | |
1298 | } | |
1299 | ||
d9a6bd32 JJ |
1300 | /* Remove CHILD_TASK from its parent. */ |
1301 | ||
acf0174b JJ |
1302 | static inline void |
1303 | gomp_task_run_post_remove_parent (struct gomp_task *child_task) | |
1304 | { | |
1305 | struct gomp_task *parent = child_task->parent; | |
1306 | if (parent == NULL) | |
1307 | return; | |
d9a6bd32 JJ |
1308 | |
1309 | /* If this was the last task the parent was depending on, | |
1310 | synchronize with gomp_task_maybe_wait_for_dependencies so it can | |
1311 | clean up and return. */ | |
0494285a JJ |
1312 | if (__builtin_expect (child_task->parent_depends_on, 0) |
1313 | && --parent->taskwait->n_depend == 0 | |
1314 | && parent->taskwait->in_depend_wait) | |
1315 | { | |
1316 | parent->taskwait->in_depend_wait = false; | |
1317 | gomp_sem_post (&parent->taskwait->taskwait_sem); | |
1318 | } | |
d9a6bd32 | 1319 | |
e4606348 JJ |
1320 | if (priority_queue_remove (PQ_CHILDREN, &parent->children_queue, |
1321 | child_task, MEMMODEL_RELEASE) | |
1322 | && parent->taskwait && parent->taskwait->in_taskwait) | |
acf0174b | 1323 | { |
e4606348 JJ |
1324 | parent->taskwait->in_taskwait = false; |
1325 | gomp_sem_post (&parent->taskwait->taskwait_sem); | |
acf0174b | 1326 | } |
e4606348 JJ |
1327 | child_task->pnode[PQ_CHILDREN].next = NULL; |
1328 | child_task->pnode[PQ_CHILDREN].prev = NULL; | |
acf0174b JJ |
1329 | } |
1330 | ||
d9a6bd32 JJ |
1331 | /* Remove CHILD_TASK from its taskgroup. */ |
1332 | ||
acf0174b JJ |
1333 | static inline void |
1334 | gomp_task_run_post_remove_taskgroup (struct gomp_task *child_task) | |
1335 | { | |
1336 | struct gomp_taskgroup *taskgroup = child_task->taskgroup; | |
1337 | if (taskgroup == NULL) | |
1338 | return; | |
e4606348 JJ |
1339 | bool empty = priority_queue_remove (PQ_TASKGROUP, |
1340 | &taskgroup->taskgroup_queue, | |
1341 | child_task, MEMMODEL_RELAXED); | |
1342 | child_task->pnode[PQ_TASKGROUP].next = NULL; | |
1343 | child_task->pnode[PQ_TASKGROUP].prev = NULL; | |
acf0174b JJ |
1344 | if (taskgroup->num_children > 1) |
1345 | --taskgroup->num_children; | |
1346 | else | |
1347 | { | |
1348 | /* We access taskgroup->num_children in GOMP_taskgroup_end | |
1349 | outside of the task lock mutex region, so | |
1350 | need a release barrier here to ensure memory | |
1351 | written by child_task->fn above is flushed | |
1352 | before the NULL is written. */ | |
1353 | __atomic_store_n (&taskgroup->num_children, 0, MEMMODEL_RELEASE); | |
1354 | } | |
e4606348 | 1355 | if (empty && taskgroup->in_taskgroup_wait) |
acf0174b | 1356 | { |
e4606348 JJ |
1357 | taskgroup->in_taskgroup_wait = false; |
1358 | gomp_sem_post (&taskgroup->taskgroup_sem); | |
acf0174b JJ |
1359 | } |
1360 | } | |
1361 | ||
a68ab351 JJ |
1362 | void |
1363 | gomp_barrier_handle_tasks (gomp_barrier_state_t state) | |
1364 | { | |
1365 | struct gomp_thread *thr = gomp_thread (); | |
1366 | struct gomp_team *team = thr->ts.team; | |
1367 | struct gomp_task *task = thr->task; | |
1368 | struct gomp_task *child_task = NULL; | |
1369 | struct gomp_task *to_free = NULL; | |
acf0174b | 1370 | int do_wake = 0; |
a68ab351 JJ |
1371 | |
1372 | gomp_mutex_lock (&team->task_lock); | |
1373 | if (gomp_barrier_last_thread (state)) | |
1374 | { | |
1375 | if (team->task_count == 0) | |
1376 | { | |
1377 | gomp_team_barrier_done (&team->barrier, state); | |
1378 | gomp_mutex_unlock (&team->task_lock); | |
1379 | gomp_team_barrier_wake (&team->barrier, 0); | |
1380 | return; | |
1381 | } | |
1382 | gomp_team_barrier_set_waiting_for_tasks (&team->barrier); | |
1383 | } | |
1384 | ||
1385 | while (1) | |
1386 | { | |
acf0174b | 1387 | bool cancelled = false; |
a6d22fb2 | 1388 | |
e4606348 | 1389 | if (!priority_queue_empty_p (&team->task_queue, MEMMODEL_RELAXED)) |
a68ab351 | 1390 | { |
e4606348 JJ |
1391 | bool ignored; |
1392 | child_task | |
1393 | = priority_queue_next_task (PQ_TEAM, &team->task_queue, | |
1394 | PQ_IGNORED, NULL, | |
1395 | &ignored); | |
acf0174b | 1396 | cancelled = gomp_task_run_pre (child_task, child_task->parent, |
d9a6bd32 | 1397 | team); |
acf0174b JJ |
1398 | if (__builtin_expect (cancelled, 0)) |
1399 | { | |
1400 | if (to_free) | |
1401 | { | |
1402 | gomp_finish_task (to_free); | |
1403 | free (to_free); | |
1404 | to_free = NULL; | |
1405 | } | |
1406 | goto finish_cancelled; | |
1407 | } | |
a68ab351 | 1408 | team->task_running_count++; |
acf0174b | 1409 | child_task->in_tied_task = true; |
a68ab351 | 1410 | } |
d656bfda KCY |
1411 | else if (team->task_count == 0 |
1412 | && gomp_team_barrier_waiting_for_tasks (&team->barrier)) | |
1413 | { | |
1414 | gomp_team_barrier_done (&team->barrier, state); | |
1415 | gomp_mutex_unlock (&team->task_lock); | |
1416 | gomp_team_barrier_wake (&team->barrier, 0); | |
1417 | if (to_free) | |
1418 | { | |
1419 | gomp_finish_task (to_free); | |
1420 | free (to_free); | |
1421 | } | |
1422 | return; | |
1423 | } | |
a68ab351 | 1424 | gomp_mutex_unlock (&team->task_lock); |
acf0174b JJ |
1425 | if (do_wake) |
1426 | { | |
1427 | gomp_team_barrier_wake (&team->barrier, do_wake); | |
1428 | do_wake = 0; | |
1429 | } | |
a68ab351 JJ |
1430 | if (to_free) |
1431 | { | |
1432 | gomp_finish_task (to_free); | |
1433 | free (to_free); | |
1434 | to_free = NULL; | |
1435 | } | |
1436 | if (child_task) | |
1437 | { | |
1438 | thr->task = child_task; | |
e4606348 JJ |
1439 | if (__builtin_expect (child_task->fn == NULL, 0)) |
1440 | { | |
1441 | if (gomp_target_task_fn (child_task->fn_data)) | |
1442 | { | |
1443 | thr->task = task; | |
1444 | gomp_mutex_lock (&team->task_lock); | |
1445 | child_task->kind = GOMP_TASK_ASYNC_RUNNING; | |
1446 | team->task_running_count--; | |
1447 | struct gomp_target_task *ttask | |
1448 | = (struct gomp_target_task *) child_task->fn_data; | |
1449 | /* If GOMP_PLUGIN_target_task_completion has run already | |
1450 | in between gomp_target_task_fn and the mutex lock, | |
1451 | perform the requeuing here. */ | |
1452 | if (ttask->state == GOMP_TARGET_TASK_FINISHED) | |
1453 | gomp_target_task_completion (team, child_task); | |
1454 | else | |
1455 | ttask->state = GOMP_TARGET_TASK_RUNNING; | |
1456 | child_task = NULL; | |
1457 | continue; | |
1458 | } | |
1459 | } | |
1460 | else | |
1461 | child_task->fn (child_task->fn_data); | |
a68ab351 JJ |
1462 | thr->task = task; |
1463 | } | |
1464 | else | |
1465 | return; | |
1466 | gomp_mutex_lock (&team->task_lock); | |
1467 | if (child_task) | |
1468 | { | |
d656bfda | 1469 | if (child_task->detach_team) |
acf0174b | 1470 | { |
d656bfda KCY |
1471 | assert (child_task->detach_team == team); |
1472 | child_task->kind = GOMP_TASK_DETACHED; | |
a6d22fb2 | 1473 | ++team->task_detach_count; |
d656bfda KCY |
1474 | --team->task_running_count; |
1475 | gomp_debug (0, | |
1476 | "thread %d: task with event %p finished without " | |
1477 | "completion event fulfilled in team barrier\n", | |
1478 | thr->ts.team_id, child_task); | |
a6d22fb2 | 1479 | child_task = NULL; |
d656bfda | 1480 | continue; |
acf0174b | 1481 | } |
d656bfda KCY |
1482 | |
1483 | finish_cancelled:; | |
1484 | size_t new_tasks | |
1485 | = gomp_task_run_post_handle_depend (child_task, team); | |
1486 | gomp_task_run_post_remove_parent (child_task); | |
1487 | gomp_clear_parent (&child_task->children_queue); | |
1488 | gomp_task_run_post_remove_taskgroup (child_task); | |
1489 | to_free = child_task; | |
1490 | if (!cancelled) | |
1491 | team->task_running_count--; | |
1492 | child_task = NULL; | |
1493 | if (new_tasks > 1) | |
a68ab351 | 1494 | { |
d656bfda KCY |
1495 | do_wake = team->nthreads - team->task_running_count; |
1496 | if (do_wake > new_tasks) | |
1497 | do_wake = new_tasks; | |
a68ab351 | 1498 | } |
d656bfda | 1499 | --team->task_count; |
a68ab351 JJ |
1500 | } |
1501 | } | |
1502 | } | |
1503 | ||
d9a6bd32 JJ |
1504 | /* Called when encountering a taskwait directive. |
1505 | ||
1506 | Wait for all children of the current task. */ | |
a68ab351 JJ |
1507 | |
1508 | void | |
1509 | GOMP_taskwait (void) | |
1510 | { | |
1511 | struct gomp_thread *thr = gomp_thread (); | |
1512 | struct gomp_team *team = thr->ts.team; | |
1513 | struct gomp_task *task = thr->task; | |
1514 | struct gomp_task *child_task = NULL; | |
1515 | struct gomp_task *to_free = NULL; | |
0494285a | 1516 | struct gomp_taskwait taskwait; |
acf0174b | 1517 | int do_wake = 0; |
a68ab351 | 1518 | |
bed8d8a6 | 1519 | /* The acquire barrier on load of task->children here synchronizes |
acf0174b | 1520 | with the write of a NULL in gomp_task_run_post_remove_parent. It is |
bed8d8a6 AM |
1521 | not necessary that we synchronize with other non-NULL writes at |
1522 | this point, but we must ensure that all writes to memory by a | |
1523 | child thread task work function are seen before we exit from | |
1524 | GOMP_taskwait. */ | |
9a647288 | 1525 | if (task == NULL |
e4606348 | 1526 | || priority_queue_empty_p (&task->children_queue, MEMMODEL_ACQUIRE)) |
a68ab351 | 1527 | return; |
fbf7be80 | 1528 | |
0494285a | 1529 | memset (&taskwait, 0, sizeof (taskwait)); |
e4606348 | 1530 | bool child_q = false; |
a68ab351 JJ |
1531 | gomp_mutex_lock (&team->task_lock); |
1532 | while (1) | |
1533 | { | |
acf0174b | 1534 | bool cancelled = false; |
e4606348 | 1535 | if (priority_queue_empty_p (&task->children_queue, MEMMODEL_RELAXED)) |
a68ab351 | 1536 | { |
0494285a JJ |
1537 | bool destroy_taskwait = task->taskwait != NULL; |
1538 | task->taskwait = NULL; | |
1539 | gomp_mutex_unlock (&team->task_lock); | |
1540 | if (to_free) | |
1541 | { | |
1542 | gomp_finish_task (to_free); | |
1543 | free (to_free); | |
1544 | } | |
1545 | if (destroy_taskwait) | |
1546 | gomp_sem_destroy (&taskwait.taskwait_sem); | |
1547 | return; | |
1548 | } | |
e4606348 JJ |
1549 | struct gomp_task *next_task |
1550 | = priority_queue_next_task (PQ_CHILDREN, &task->children_queue, | |
1551 | PQ_TEAM, &team->task_queue, &child_q); | |
1552 | if (next_task->kind == GOMP_TASK_WAITING) | |
0494285a | 1553 | { |
e4606348 | 1554 | child_task = next_task; |
0494285a | 1555 | cancelled |
d9a6bd32 | 1556 | = gomp_task_run_pre (child_task, task, team); |
0494285a JJ |
1557 | if (__builtin_expect (cancelled, 0)) |
1558 | { | |
1559 | if (to_free) | |
1560 | { | |
1561 | gomp_finish_task (to_free); | |
1562 | free (to_free); | |
1563 | to_free = NULL; | |
1564 | } | |
1565 | goto finish_cancelled; | |
1566 | } | |
1567 | } | |
1568 | else | |
1569 | { | |
e4606348 | 1570 | /* All tasks we are waiting for are either running in other |
d656bfda KCY |
1571 | threads, are detached and waiting for the completion event to be |
1572 | fulfilled, or they are tasks that have not had their | |
e4606348 JJ |
1573 | dependencies met (so they're not even in the queue). Wait |
1574 | for them. */ | |
0494285a JJ |
1575 | if (task->taskwait == NULL) |
1576 | { | |
1577 | taskwait.in_depend_wait = false; | |
1578 | gomp_sem_init (&taskwait.taskwait_sem, 0); | |
1579 | task->taskwait = &taskwait; | |
1580 | } | |
1581 | taskwait.in_taskwait = true; | |
1582 | } | |
1583 | gomp_mutex_unlock (&team->task_lock); | |
1584 | if (do_wake) | |
1585 | { | |
1586 | gomp_team_barrier_wake (&team->barrier, do_wake); | |
1587 | do_wake = 0; | |
1588 | } | |
1589 | if (to_free) | |
1590 | { | |
1591 | gomp_finish_task (to_free); | |
1592 | free (to_free); | |
1593 | to_free = NULL; | |
1594 | } | |
1595 | if (child_task) | |
1596 | { | |
1597 | thr->task = child_task; | |
e4606348 JJ |
1598 | if (__builtin_expect (child_task->fn == NULL, 0)) |
1599 | { | |
1600 | if (gomp_target_task_fn (child_task->fn_data)) | |
1601 | { | |
1602 | thr->task = task; | |
1603 | gomp_mutex_lock (&team->task_lock); | |
1604 | child_task->kind = GOMP_TASK_ASYNC_RUNNING; | |
1605 | struct gomp_target_task *ttask | |
1606 | = (struct gomp_target_task *) child_task->fn_data; | |
1607 | /* If GOMP_PLUGIN_target_task_completion has run already | |
1608 | in between gomp_target_task_fn and the mutex lock, | |
1609 | perform the requeuing here. */ | |
1610 | if (ttask->state == GOMP_TARGET_TASK_FINISHED) | |
1611 | gomp_target_task_completion (team, child_task); | |
1612 | else | |
1613 | ttask->state = GOMP_TARGET_TASK_RUNNING; | |
1614 | child_task = NULL; | |
1615 | continue; | |
1616 | } | |
1617 | } | |
1618 | else | |
1619 | child_task->fn (child_task->fn_data); | |
0494285a JJ |
1620 | thr->task = task; |
1621 | } | |
1622 | else | |
1623 | gomp_sem_wait (&taskwait.taskwait_sem); | |
1624 | gomp_mutex_lock (&team->task_lock); | |
1625 | if (child_task) | |
1626 | { | |
d656bfda KCY |
1627 | if (child_task->detach_team) |
1628 | { | |
1629 | assert (child_task->detach_team == team); | |
1630 | child_task->kind = GOMP_TASK_DETACHED; | |
1631 | ++team->task_detach_count; | |
1632 | gomp_debug (0, | |
1633 | "thread %d: task with event %p finished without " | |
1634 | "completion event fulfilled in taskwait\n", | |
1635 | thr->ts.team_id, child_task); | |
1636 | child_task = NULL; | |
1637 | continue; | |
1638 | } | |
1639 | ||
0494285a JJ |
1640 | finish_cancelled:; |
1641 | size_t new_tasks | |
1642 | = gomp_task_run_post_handle_depend (child_task, team); | |
d9a6bd32 | 1643 | |
e4606348 | 1644 | if (child_q) |
0494285a | 1645 | { |
e4606348 JJ |
1646 | priority_queue_remove (PQ_CHILDREN, &task->children_queue, |
1647 | child_task, MEMMODEL_RELAXED); | |
1648 | child_task->pnode[PQ_CHILDREN].next = NULL; | |
1649 | child_task->pnode[PQ_CHILDREN].prev = NULL; | |
0494285a | 1650 | } |
d9a6bd32 | 1651 | |
e4606348 JJ |
1652 | gomp_clear_parent (&child_task->children_queue); |
1653 | ||
0494285a | 1654 | gomp_task_run_post_remove_taskgroup (child_task); |
d9a6bd32 | 1655 | |
0494285a JJ |
1656 | to_free = child_task; |
1657 | child_task = NULL; | |
1658 | team->task_count--; | |
1659 | if (new_tasks > 1) | |
1660 | { | |
1661 | do_wake = team->nthreads - team->task_running_count | |
1662 | - !task->in_tied_task; | |
1663 | if (do_wake > new_tasks) | |
1664 | do_wake = new_tasks; | |
1665 | } | |
1666 | } | |
1667 | } | |
1668 | } | |
1669 | ||
28567c40 JJ |
1670 | /* Called when encountering a taskwait directive with depend clause(s). |
1671 | Wait as if it was an mergeable included task construct with empty body. */ | |
1672 | ||
1673 | void | |
1674 | GOMP_taskwait_depend (void **depend) | |
1675 | { | |
1676 | struct gomp_thread *thr = gomp_thread (); | |
1677 | struct gomp_team *team = thr->ts.team; | |
1678 | ||
1679 | /* If parallel or taskgroup has been cancelled, return early. */ | |
1680 | if (__builtin_expect (gomp_cancel_var, 0) && team) | |
1681 | { | |
1682 | if (gomp_team_barrier_cancelled (&team->barrier)) | |
1683 | return; | |
1684 | if (thr->task->taskgroup) | |
1685 | { | |
1686 | if (thr->task->taskgroup->cancelled) | |
1687 | return; | |
1688 | if (thr->task->taskgroup->workshare | |
1689 | && thr->task->taskgroup->prev | |
1690 | && thr->task->taskgroup->prev->cancelled) | |
1691 | return; | |
1692 | } | |
1693 | } | |
1694 | ||
1695 | if (thr->task && thr->task->depend_hash) | |
1696 | gomp_task_maybe_wait_for_dependencies (depend); | |
1697 | } | |
1698 | ||
e4606348 JJ |
1699 | /* An undeferred task is about to run. Wait for all tasks that this |
1700 | undeferred task depends on. | |
1701 | ||
1702 | This is done by first putting all known ready dependencies | |
1703 | (dependencies that have their own dependencies met) at the top of | |
1704 | the scheduling queues. Then we iterate through these imminently | |
1705 | ready tasks (and possibly other high priority tasks), and run them. | |
1706 | If we run out of ready dependencies to execute, we either wait for | |
28567c40 | 1707 | the remaining dependencies to finish, or wait for them to get |
e4606348 | 1708 | scheduled so we can run them. |
0494285a | 1709 | |
d9a6bd32 JJ |
1710 | DEPEND is as in GOMP_task. */ |
1711 | ||
1712 | void | |
0494285a JJ |
1713 | gomp_task_maybe_wait_for_dependencies (void **depend) |
1714 | { | |
1715 | struct gomp_thread *thr = gomp_thread (); | |
1716 | struct gomp_task *task = thr->task; | |
1717 | struct gomp_team *team = thr->ts.team; | |
1718 | struct gomp_task_depend_entry elem, *ent = NULL; | |
1719 | struct gomp_taskwait taskwait; | |
28567c40 | 1720 | size_t orig_ndepend = (uintptr_t) depend[0]; |
0494285a | 1721 | size_t nout = (uintptr_t) depend[1]; |
28567c40 JJ |
1722 | size_t ndepend = orig_ndepend; |
1723 | size_t normal = ndepend; | |
1724 | size_t n = 2; | |
0494285a JJ |
1725 | size_t i; |
1726 | size_t num_awaited = 0; | |
1727 | struct gomp_task *child_task = NULL; | |
1728 | struct gomp_task *to_free = NULL; | |
1729 | int do_wake = 0; | |
1730 | ||
28567c40 JJ |
1731 | if (ndepend == 0) |
1732 | { | |
1733 | ndepend = nout; | |
1734 | nout = (uintptr_t) depend[2] + (uintptr_t) depend[3]; | |
1735 | normal = nout + (uintptr_t) depend[4]; | |
1736 | n = 5; | |
1737 | } | |
0494285a JJ |
1738 | gomp_mutex_lock (&team->task_lock); |
1739 | for (i = 0; i < ndepend; i++) | |
1740 | { | |
28567c40 JJ |
1741 | elem.addr = depend[i + n]; |
1742 | elem.is_in = i >= nout; | |
1743 | if (__builtin_expect (i >= normal, 0)) | |
1744 | { | |
1745 | void **d = (void **) elem.addr; | |
1746 | switch ((uintptr_t) d[1]) | |
1747 | { | |
1748 | case GOMP_DEPEND_IN: | |
1749 | break; | |
1750 | case GOMP_DEPEND_OUT: | |
1751 | case GOMP_DEPEND_INOUT: | |
1752 | case GOMP_DEPEND_MUTEXINOUTSET: | |
1753 | elem.is_in = 0; | |
1754 | break; | |
1755 | default: | |
1756 | gomp_fatal ("unknown omp_depend_t dependence type %d", | |
1757 | (int) (uintptr_t) d[1]); | |
1758 | } | |
1759 | elem.addr = d[0]; | |
1760 | } | |
0494285a JJ |
1761 | ent = htab_find (task->depend_hash, &elem); |
1762 | for (; ent; ent = ent->next) | |
28567c40 | 1763 | if (elem.is_in && ent->is_in) |
0494285a JJ |
1764 | continue; |
1765 | else | |
1766 | { | |
1767 | struct gomp_task *tsk = ent->task; | |
1768 | if (!tsk->parent_depends_on) | |
1769 | { | |
1770 | tsk->parent_depends_on = true; | |
1771 | ++num_awaited; | |
93d90219 | 1772 | /* If dependency TSK itself has no dependencies and is |
e4606348 JJ |
1773 | ready to run, move it up front so that we run it as |
1774 | soon as possible. */ | |
0494285a | 1775 | if (tsk->num_dependees == 0 && tsk->kind == GOMP_TASK_WAITING) |
e4606348 | 1776 | priority_queue_upgrade_task (tsk, task); |
0494285a JJ |
1777 | } |
1778 | } | |
1779 | } | |
1780 | if (num_awaited == 0) | |
1781 | { | |
1782 | gomp_mutex_unlock (&team->task_lock); | |
1783 | return; | |
1784 | } | |
1785 | ||
1786 | memset (&taskwait, 0, sizeof (taskwait)); | |
1787 | taskwait.n_depend = num_awaited; | |
0494285a JJ |
1788 | gomp_sem_init (&taskwait.taskwait_sem, 0); |
1789 | task->taskwait = &taskwait; | |
1790 | ||
1791 | while (1) | |
1792 | { | |
1793 | bool cancelled = false; | |
1794 | if (taskwait.n_depend == 0) | |
1795 | { | |
1796 | task->taskwait = NULL; | |
a68ab351 JJ |
1797 | gomp_mutex_unlock (&team->task_lock); |
1798 | if (to_free) | |
1799 | { | |
1800 | gomp_finish_task (to_free); | |
1801 | free (to_free); | |
1802 | } | |
0494285a | 1803 | gomp_sem_destroy (&taskwait.taskwait_sem); |
a68ab351 JJ |
1804 | return; |
1805 | } | |
e4606348 JJ |
1806 | |
1807 | /* Theoretically when we have multiple priorities, we should | |
1808 | chose between the highest priority item in | |
1809 | task->children_queue and team->task_queue here, so we should | |
1810 | use priority_queue_next_task(). However, since we are | |
1811 | running an undeferred task, perhaps that makes all tasks it | |
1812 | depends on undeferred, thus a priority of INF? This would | |
1813 | make it unnecessary to take anything into account here, | |
1814 | but the dependencies. | |
1815 | ||
1816 | On the other hand, if we want to use priority_queue_next_task(), | |
1817 | care should be taken to only use priority_queue_remove() | |
1818 | below if the task was actually removed from the children | |
1819 | queue. */ | |
1820 | bool ignored; | |
1821 | struct gomp_task *next_task | |
1822 | = priority_queue_next_task (PQ_CHILDREN, &task->children_queue, | |
1823 | PQ_IGNORED, NULL, &ignored); | |
1824 | ||
1825 | if (next_task->kind == GOMP_TASK_WAITING) | |
a68ab351 | 1826 | { |
e4606348 | 1827 | child_task = next_task; |
acf0174b | 1828 | cancelled |
d9a6bd32 | 1829 | = gomp_task_run_pre (child_task, task, team); |
acf0174b | 1830 | if (__builtin_expect (cancelled, 0)) |
a68ab351 | 1831 | { |
acf0174b JJ |
1832 | if (to_free) |
1833 | { | |
1834 | gomp_finish_task (to_free); | |
1835 | free (to_free); | |
1836 | to_free = NULL; | |
1837 | } | |
1838 | goto finish_cancelled; | |
a68ab351 | 1839 | } |
a68ab351 JJ |
1840 | } |
1841 | else | |
e4606348 JJ |
1842 | /* All tasks we are waiting for are either running in other |
1843 | threads, or they are tasks that have not had their | |
1844 | dependencies met (so they're not even in the queue). Wait | |
1845 | for them. */ | |
0494285a | 1846 | taskwait.in_depend_wait = true; |
a68ab351 | 1847 | gomp_mutex_unlock (&team->task_lock); |
acf0174b JJ |
1848 | if (do_wake) |
1849 | { | |
1850 | gomp_team_barrier_wake (&team->barrier, do_wake); | |
1851 | do_wake = 0; | |
1852 | } | |
a68ab351 JJ |
1853 | if (to_free) |
1854 | { | |
1855 | gomp_finish_task (to_free); | |
1856 | free (to_free); | |
1857 | to_free = NULL; | |
1858 | } | |
1859 | if (child_task) | |
1860 | { | |
1861 | thr->task = child_task; | |
e4606348 JJ |
1862 | if (__builtin_expect (child_task->fn == NULL, 0)) |
1863 | { | |
1864 | if (gomp_target_task_fn (child_task->fn_data)) | |
1865 | { | |
1866 | thr->task = task; | |
1867 | gomp_mutex_lock (&team->task_lock); | |
1868 | child_task->kind = GOMP_TASK_ASYNC_RUNNING; | |
1869 | struct gomp_target_task *ttask | |
1870 | = (struct gomp_target_task *) child_task->fn_data; | |
1871 | /* If GOMP_PLUGIN_target_task_completion has run already | |
1872 | in between gomp_target_task_fn and the mutex lock, | |
1873 | perform the requeuing here. */ | |
1874 | if (ttask->state == GOMP_TARGET_TASK_FINISHED) | |
1875 | gomp_target_task_completion (team, child_task); | |
1876 | else | |
1877 | ttask->state = GOMP_TARGET_TASK_RUNNING; | |
1878 | child_task = NULL; | |
1879 | continue; | |
1880 | } | |
1881 | } | |
1882 | else | |
1883 | child_task->fn (child_task->fn_data); | |
a68ab351 JJ |
1884 | thr->task = task; |
1885 | } | |
1886 | else | |
0494285a | 1887 | gomp_sem_wait (&taskwait.taskwait_sem); |
a68ab351 JJ |
1888 | gomp_mutex_lock (&team->task_lock); |
1889 | if (child_task) | |
1890 | { | |
acf0174b JJ |
1891 | finish_cancelled:; |
1892 | size_t new_tasks | |
1893 | = gomp_task_run_post_handle_depend (child_task, team); | |
0494285a JJ |
1894 | if (child_task->parent_depends_on) |
1895 | --taskwait.n_depend; | |
d9a6bd32 | 1896 | |
e4606348 JJ |
1897 | priority_queue_remove (PQ_CHILDREN, &task->children_queue, |
1898 | child_task, MEMMODEL_RELAXED); | |
1899 | child_task->pnode[PQ_CHILDREN].next = NULL; | |
1900 | child_task->pnode[PQ_CHILDREN].prev = NULL; | |
d9a6bd32 | 1901 | |
e4606348 | 1902 | gomp_clear_parent (&child_task->children_queue); |
acf0174b | 1903 | gomp_task_run_post_remove_taskgroup (child_task); |
a68ab351 JJ |
1904 | to_free = child_task; |
1905 | child_task = NULL; | |
1906 | team->task_count--; | |
acf0174b JJ |
1907 | if (new_tasks > 1) |
1908 | { | |
1909 | do_wake = team->nthreads - team->task_running_count | |
1910 | - !task->in_tied_task; | |
1911 | if (do_wake > new_tasks) | |
1912 | do_wake = new_tasks; | |
1913 | } | |
a68ab351 JJ |
1914 | } |
1915 | } | |
1916 | } | |
20906c66 JJ |
1917 | |
1918 | /* Called when encountering a taskyield directive. */ | |
1919 | ||
1920 | void | |
1921 | GOMP_taskyield (void) | |
1922 | { | |
1923 | /* Nothing at the moment. */ | |
1924 | } | |
1925 | ||
28567c40 JJ |
1926 | static inline struct gomp_taskgroup * |
1927 | gomp_taskgroup_init (struct gomp_taskgroup *prev) | |
1928 | { | |
1929 | struct gomp_taskgroup *taskgroup | |
1930 | = gomp_malloc (sizeof (struct gomp_taskgroup)); | |
1931 | taskgroup->prev = prev; | |
1932 | priority_queue_init (&taskgroup->taskgroup_queue); | |
1933 | taskgroup->reductions = prev ? prev->reductions : NULL; | |
1934 | taskgroup->in_taskgroup_wait = false; | |
1935 | taskgroup->cancelled = false; | |
1936 | taskgroup->workshare = false; | |
1937 | taskgroup->num_children = 0; | |
1938 | gomp_sem_init (&taskgroup->taskgroup_sem, 0); | |
1939 | return taskgroup; | |
1940 | } | |
1941 | ||
acf0174b JJ |
1942 | void |
1943 | GOMP_taskgroup_start (void) | |
1944 | { | |
1945 | struct gomp_thread *thr = gomp_thread (); | |
1946 | struct gomp_team *team = thr->ts.team; | |
1947 | struct gomp_task *task = thr->task; | |
acf0174b JJ |
1948 | |
1949 | /* If team is NULL, all tasks are executed as | |
d9a6bd32 | 1950 | GOMP_TASK_UNDEFERRED tasks and thus all children tasks of |
acf0174b JJ |
1951 | taskgroup and their descendant tasks will be finished |
1952 | by the time GOMP_taskgroup_end is called. */ | |
1953 | if (team == NULL) | |
1954 | return; | |
28567c40 | 1955 | task->taskgroup = gomp_taskgroup_init (task->taskgroup); |
acf0174b JJ |
1956 | } |
1957 | ||
1958 | void | |
1959 | GOMP_taskgroup_end (void) | |
1960 | { | |
1961 | struct gomp_thread *thr = gomp_thread (); | |
1962 | struct gomp_team *team = thr->ts.team; | |
1963 | struct gomp_task *task = thr->task; | |
1964 | struct gomp_taskgroup *taskgroup; | |
1965 | struct gomp_task *child_task = NULL; | |
1966 | struct gomp_task *to_free = NULL; | |
1967 | int do_wake = 0; | |
1968 | ||
1969 | if (team == NULL) | |
1970 | return; | |
1971 | taskgroup = task->taskgroup; | |
e4606348 JJ |
1972 | if (__builtin_expect (taskgroup == NULL, 0) |
1973 | && thr->ts.level == 0) | |
1974 | { | |
1975 | /* This can happen if GOMP_taskgroup_start is called when | |
1976 | thr->ts.team == NULL, but inside of the taskgroup there | |
1977 | is #pragma omp target nowait that creates an implicit | |
1978 | team with a single thread. In this case, we want to wait | |
1979 | for all outstanding tasks in this team. */ | |
1980 | gomp_team_barrier_wait (&team->barrier); | |
1981 | return; | |
1982 | } | |
acf0174b JJ |
1983 | |
1984 | /* The acquire barrier on load of taskgroup->num_children here | |
1985 | synchronizes with the write of 0 in gomp_task_run_post_remove_taskgroup. | |
1986 | It is not necessary that we synchronize with other non-0 writes at | |
1987 | this point, but we must ensure that all writes to memory by a | |
1988 | child thread task work function are seen before we exit from | |
1989 | GOMP_taskgroup_end. */ | |
1990 | if (__atomic_load_n (&taskgroup->num_children, MEMMODEL_ACQUIRE) == 0) | |
1991 | goto finish; | |
1992 | ||
e4606348 | 1993 | bool unused; |
acf0174b JJ |
1994 | gomp_mutex_lock (&team->task_lock); |
1995 | while (1) | |
1996 | { | |
1997 | bool cancelled = false; | |
e4606348 JJ |
1998 | if (priority_queue_empty_p (&taskgroup->taskgroup_queue, |
1999 | MEMMODEL_RELAXED)) | |
acf0174b JJ |
2000 | { |
2001 | if (taskgroup->num_children) | |
acf0174b | 2002 | { |
e4606348 JJ |
2003 | if (priority_queue_empty_p (&task->children_queue, |
2004 | MEMMODEL_RELAXED)) | |
3696163c | 2005 | goto do_wait; |
e4606348 JJ |
2006 | child_task |
2007 | = priority_queue_next_task (PQ_CHILDREN, &task->children_queue, | |
2008 | PQ_TEAM, &team->task_queue, | |
2009 | &unused); | |
2010 | } | |
2011 | else | |
3696163c JJ |
2012 | { |
2013 | gomp_mutex_unlock (&team->task_lock); | |
2014 | if (to_free) | |
2015 | { | |
2016 | gomp_finish_task (to_free); | |
2017 | free (to_free); | |
2018 | } | |
2019 | goto finish; | |
acf0174b | 2020 | } |
acf0174b | 2021 | } |
3696163c | 2022 | else |
e4606348 JJ |
2023 | child_task |
2024 | = priority_queue_next_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue, | |
2025 | PQ_TEAM, &team->task_queue, &unused); | |
3696163c | 2026 | if (child_task->kind == GOMP_TASK_WAITING) |
acf0174b | 2027 | { |
acf0174b | 2028 | cancelled |
d9a6bd32 | 2029 | = gomp_task_run_pre (child_task, child_task->parent, team); |
acf0174b JJ |
2030 | if (__builtin_expect (cancelled, 0)) |
2031 | { | |
2032 | if (to_free) | |
2033 | { | |
2034 | gomp_finish_task (to_free); | |
2035 | free (to_free); | |
2036 | to_free = NULL; | |
2037 | } | |
2038 | goto finish_cancelled; | |
2039 | } | |
2040 | } | |
2041 | else | |
2042 | { | |
3696163c | 2043 | child_task = NULL; |
acf0174b | 2044 | do_wait: |
e4606348 JJ |
2045 | /* All tasks we are waiting for are either running in other |
2046 | threads, or they are tasks that have not had their | |
2047 | dependencies met (so they're not even in the queue). Wait | |
2048 | for them. */ | |
acf0174b JJ |
2049 | taskgroup->in_taskgroup_wait = true; |
2050 | } | |
2051 | gomp_mutex_unlock (&team->task_lock); | |
2052 | if (do_wake) | |
2053 | { | |
2054 | gomp_team_barrier_wake (&team->barrier, do_wake); | |
2055 | do_wake = 0; | |
2056 | } | |
2057 | if (to_free) | |
2058 | { | |
2059 | gomp_finish_task (to_free); | |
2060 | free (to_free); | |
2061 | to_free = NULL; | |
2062 | } | |
2063 | if (child_task) | |
2064 | { | |
2065 | thr->task = child_task; | |
e4606348 JJ |
2066 | if (__builtin_expect (child_task->fn == NULL, 0)) |
2067 | { | |
2068 | if (gomp_target_task_fn (child_task->fn_data)) | |
2069 | { | |
2070 | thr->task = task; | |
2071 | gomp_mutex_lock (&team->task_lock); | |
2072 | child_task->kind = GOMP_TASK_ASYNC_RUNNING; | |
2073 | struct gomp_target_task *ttask | |
2074 | = (struct gomp_target_task *) child_task->fn_data; | |
2075 | /* If GOMP_PLUGIN_target_task_completion has run already | |
2076 | in between gomp_target_task_fn and the mutex lock, | |
2077 | perform the requeuing here. */ | |
2078 | if (ttask->state == GOMP_TARGET_TASK_FINISHED) | |
2079 | gomp_target_task_completion (team, child_task); | |
2080 | else | |
2081 | ttask->state = GOMP_TARGET_TASK_RUNNING; | |
2082 | child_task = NULL; | |
2083 | continue; | |
2084 | } | |
2085 | } | |
2086 | else | |
2087 | child_task->fn (child_task->fn_data); | |
acf0174b JJ |
2088 | thr->task = task; |
2089 | } | |
2090 | else | |
2091 | gomp_sem_wait (&taskgroup->taskgroup_sem); | |
2092 | gomp_mutex_lock (&team->task_lock); | |
2093 | if (child_task) | |
2094 | { | |
d656bfda KCY |
2095 | if (child_task->detach_team) |
2096 | { | |
2097 | assert (child_task->detach_team == team); | |
2098 | child_task->kind = GOMP_TASK_DETACHED; | |
2099 | ++team->task_detach_count; | |
2100 | gomp_debug (0, | |
2101 | "thread %d: task with event %p finished without " | |
2102 | "completion event fulfilled in taskgroup\n", | |
2103 | thr->ts.team_id, child_task); | |
2104 | child_task = NULL; | |
2105 | continue; | |
2106 | } | |
2107 | ||
acf0174b JJ |
2108 | finish_cancelled:; |
2109 | size_t new_tasks | |
2110 | = gomp_task_run_post_handle_depend (child_task, team); | |
acf0174b | 2111 | gomp_task_run_post_remove_parent (child_task); |
e4606348 | 2112 | gomp_clear_parent (&child_task->children_queue); |
3696163c | 2113 | gomp_task_run_post_remove_taskgroup (child_task); |
acf0174b JJ |
2114 | to_free = child_task; |
2115 | child_task = NULL; | |
2116 | team->task_count--; | |
2117 | if (new_tasks > 1) | |
2118 | { | |
2119 | do_wake = team->nthreads - team->task_running_count | |
2120 | - !task->in_tied_task; | |
2121 | if (do_wake > new_tasks) | |
2122 | do_wake = new_tasks; | |
2123 | } | |
2124 | } | |
2125 | } | |
2126 | ||
2127 | finish: | |
2128 | task->taskgroup = taskgroup->prev; | |
2129 | gomp_sem_destroy (&taskgroup->taskgroup_sem); | |
2130 | free (taskgroup); | |
2131 | } | |
2132 | ||
28567c40 JJ |
2133 | static inline __attribute__((always_inline)) void |
2134 | gomp_reduction_register (uintptr_t *data, uintptr_t *old, uintptr_t *orig, | |
2135 | unsigned nthreads) | |
2136 | { | |
2137 | size_t total_cnt = 0; | |
2138 | uintptr_t *d = data; | |
2139 | struct htab *old_htab = NULL, *new_htab; | |
2140 | do | |
2141 | { | |
2142 | if (__builtin_expect (orig != NULL, 0)) | |
2143 | { | |
2144 | /* For worksharing task reductions, memory has been allocated | |
2145 | already by some other thread that encountered the construct | |
2146 | earlier. */ | |
2147 | d[2] = orig[2]; | |
2148 | d[6] = orig[6]; | |
2149 | orig = (uintptr_t *) orig[4]; | |
2150 | } | |
2151 | else | |
2152 | { | |
2153 | size_t sz = d[1] * nthreads; | |
2154 | /* Should use omp_alloc if d[3] is not -1. */ | |
2155 | void *ptr = gomp_aligned_alloc (d[2], sz); | |
2156 | memset (ptr, '\0', sz); | |
2157 | d[2] = (uintptr_t) ptr; | |
2158 | d[6] = d[2] + sz; | |
2159 | } | |
2160 | d[5] = 0; | |
2161 | total_cnt += d[0]; | |
2162 | if (d[4] == 0) | |
2163 | { | |
2164 | d[4] = (uintptr_t) old; | |
2165 | break; | |
2166 | } | |
2167 | else | |
2168 | d = (uintptr_t *) d[4]; | |
2169 | } | |
2170 | while (1); | |
2171 | if (old && old[5]) | |
2172 | { | |
2173 | old_htab = (struct htab *) old[5]; | |
2174 | total_cnt += htab_elements (old_htab); | |
2175 | } | |
2176 | new_htab = htab_create (total_cnt); | |
2177 | if (old_htab) | |
2178 | { | |
2179 | /* Copy old hash table, like in htab_expand. */ | |
2180 | hash_entry_type *p, *olimit; | |
2181 | new_htab->n_elements = htab_elements (old_htab); | |
2182 | olimit = old_htab->entries + old_htab->size; | |
2183 | p = old_htab->entries; | |
2184 | do | |
2185 | { | |
2186 | hash_entry_type x = *p; | |
2187 | if (x != HTAB_EMPTY_ENTRY && x != HTAB_DELETED_ENTRY) | |
2188 | *find_empty_slot_for_expand (new_htab, htab_hash (x)) = x; | |
2189 | p++; | |
2190 | } | |
2191 | while (p < olimit); | |
2192 | } | |
2193 | d = data; | |
2194 | do | |
2195 | { | |
2196 | size_t j; | |
2197 | for (j = 0; j < d[0]; ++j) | |
2198 | { | |
2199 | uintptr_t *p = d + 7 + j * 3; | |
2200 | p[2] = (uintptr_t) d; | |
2201 | /* Ugly hack, hash_entry_type is defined for the task dependencies, | |
2202 | which hash on the first element which is a pointer. We need | |
2203 | to hash also on the first sizeof (uintptr_t) bytes which contain | |
2204 | a pointer. Hide the cast from the compiler. */ | |
2205 | hash_entry_type n; | |
2206 | __asm ("" : "=g" (n) : "0" (p)); | |
2207 | *htab_find_slot (&new_htab, n, INSERT) = n; | |
2208 | } | |
2209 | if (d[4] == (uintptr_t) old) | |
2210 | break; | |
2211 | else | |
2212 | d = (uintptr_t *) d[4]; | |
2213 | } | |
2214 | while (1); | |
2215 | d[5] = (uintptr_t) new_htab; | |
2216 | } | |
2217 | ||
2218 | static void | |
2219 | gomp_create_artificial_team (void) | |
2220 | { | |
2221 | struct gomp_thread *thr = gomp_thread (); | |
2222 | struct gomp_task_icv *icv; | |
2223 | struct gomp_team *team = gomp_new_team (1); | |
2224 | struct gomp_task *task = thr->task; | |
2225 | icv = task ? &task->icv : &gomp_global_icv; | |
2226 | team->prev_ts = thr->ts; | |
2227 | thr->ts.team = team; | |
2228 | thr->ts.team_id = 0; | |
2229 | thr->ts.work_share = &team->work_shares[0]; | |
2230 | thr->ts.last_work_share = NULL; | |
2231 | #ifdef HAVE_SYNC_BUILTINS | |
2232 | thr->ts.single_count = 0; | |
2233 | #endif | |
2234 | thr->ts.static_trip = 0; | |
2235 | thr->task = &team->implicit_task[0]; | |
2236 | gomp_init_task (thr->task, NULL, icv); | |
2237 | if (task) | |
2238 | { | |
2239 | thr->task = task; | |
2240 | gomp_end_task (); | |
2241 | free (task); | |
2242 | thr->task = &team->implicit_task[0]; | |
2243 | } | |
2244 | #ifdef LIBGOMP_USE_PTHREADS | |
2245 | else | |
2246 | pthread_setspecific (gomp_thread_destructor, thr); | |
2247 | #endif | |
2248 | } | |
2249 | ||
2250 | /* The format of data is: | |
2251 | data[0] cnt | |
2252 | data[1] size | |
2253 | data[2] alignment (on output array pointer) | |
2254 | data[3] allocator (-1 if malloc allocator) | |
2255 | data[4] next pointer | |
2256 | data[5] used internally (htab pointer) | |
2257 | data[6] used internally (end of array) | |
2258 | cnt times | |
2259 | ent[0] address | |
2260 | ent[1] offset | |
2261 | ent[2] used internally (pointer to data[0]) | |
2262 | The entries are sorted by increasing offset, so that a binary | |
2263 | search can be performed. Normally, data[8] is 0, exception is | |
2264 | for worksharing construct task reductions in cancellable parallel, | |
2265 | where at offset 0 there should be space for a pointer and an integer | |
2266 | which are used internally. */ | |
2267 | ||
2268 | void | |
2269 | GOMP_taskgroup_reduction_register (uintptr_t *data) | |
2270 | { | |
2271 | struct gomp_thread *thr = gomp_thread (); | |
2272 | struct gomp_team *team = thr->ts.team; | |
2273 | struct gomp_task *task; | |
2274 | unsigned nthreads; | |
2275 | if (__builtin_expect (team == NULL, 0)) | |
2276 | { | |
2277 | /* The task reduction code needs a team and task, so for | |
2278 | orphaned taskgroups just create the implicit team. */ | |
2279 | gomp_create_artificial_team (); | |
2280 | ialias_call (GOMP_taskgroup_start) (); | |
2281 | team = thr->ts.team; | |
2282 | } | |
2283 | nthreads = team->nthreads; | |
2284 | task = thr->task; | |
2285 | gomp_reduction_register (data, task->taskgroup->reductions, NULL, nthreads); | |
2286 | task->taskgroup->reductions = data; | |
2287 | } | |
2288 | ||
2289 | void | |
2290 | GOMP_taskgroup_reduction_unregister (uintptr_t *data) | |
2291 | { | |
2292 | uintptr_t *d = data; | |
2293 | htab_free ((struct htab *) data[5]); | |
2294 | do | |
2295 | { | |
2296 | gomp_aligned_free ((void *) d[2]); | |
2297 | d = (uintptr_t *) d[4]; | |
2298 | } | |
2299 | while (d && !d[5]); | |
2300 | } | |
2301 | ialias (GOMP_taskgroup_reduction_unregister) | |
2302 | ||
2303 | /* For i = 0 to cnt-1, remap ptrs[i] which is either address of the | |
2304 | original list item or address of previously remapped original list | |
2305 | item to address of the private copy, store that to ptrs[i]. | |
2306 | For i < cntorig, additionally set ptrs[cnt+i] to the address of | |
2307 | the original list item. */ | |
2308 | ||
2309 | void | |
2310 | GOMP_task_reduction_remap (size_t cnt, size_t cntorig, void **ptrs) | |
2311 | { | |
2312 | struct gomp_thread *thr = gomp_thread (); | |
2313 | struct gomp_task *task = thr->task; | |
2314 | unsigned id = thr->ts.team_id; | |
2315 | uintptr_t *data = task->taskgroup->reductions; | |
2316 | uintptr_t *d; | |
2317 | struct htab *reduction_htab = (struct htab *) data[5]; | |
2318 | size_t i; | |
2319 | for (i = 0; i < cnt; ++i) | |
2320 | { | |
2321 | hash_entry_type ent, n; | |
2322 | __asm ("" : "=g" (ent) : "0" (ptrs + i)); | |
2323 | n = htab_find (reduction_htab, ent); | |
2324 | if (n) | |
2325 | { | |
2326 | uintptr_t *p; | |
2327 | __asm ("" : "=g" (p) : "0" (n)); | |
2328 | /* At this point, p[0] should be equal to (uintptr_t) ptrs[i], | |
2329 | p[1] is the offset within the allocated chunk for each | |
2330 | thread, p[2] is the array registered with | |
2331 | GOMP_taskgroup_reduction_register, d[2] is the base of the | |
2332 | allocated memory and d[1] is the size of the allocated chunk | |
2333 | for one thread. */ | |
2334 | d = (uintptr_t *) p[2]; | |
2335 | ptrs[i] = (void *) (d[2] + id * d[1] + p[1]); | |
2336 | if (__builtin_expect (i < cntorig, 0)) | |
2337 | ptrs[cnt + i] = (void *) p[0]; | |
2338 | continue; | |
2339 | } | |
2340 | d = data; | |
2341 | while (d != NULL) | |
2342 | { | |
2343 | if ((uintptr_t) ptrs[i] >= d[2] && (uintptr_t) ptrs[i] < d[6]) | |
2344 | break; | |
2345 | d = (uintptr_t *) d[4]; | |
2346 | } | |
2347 | if (d == NULL) | |
2348 | gomp_fatal ("couldn't find matching task_reduction or reduction with " | |
2349 | "task modifier for %p", ptrs[i]); | |
2350 | uintptr_t off = ((uintptr_t) ptrs[i] - d[2]) % d[1]; | |
2351 | ptrs[i] = (void *) (d[2] + id * d[1] + off); | |
2352 | if (__builtin_expect (i < cntorig, 0)) | |
2353 | { | |
2354 | size_t lo = 0, hi = d[0] - 1; | |
2355 | while (lo <= hi) | |
2356 | { | |
2357 | size_t m = (lo + hi) / 2; | |
2358 | if (d[7 + 3 * m + 1] < off) | |
2359 | lo = m + 1; | |
2360 | else if (d[7 + 3 * m + 1] == off) | |
2361 | { | |
2362 | ptrs[cnt + i] = (void *) d[7 + 3 * m]; | |
2363 | break; | |
2364 | } | |
2365 | else | |
2366 | hi = m - 1; | |
2367 | } | |
2368 | if (lo > hi) | |
2369 | gomp_fatal ("couldn't find matching task_reduction or reduction " | |
2370 | "with task modifier for %p", ptrs[i]); | |
2371 | } | |
2372 | } | |
2373 | } | |
2374 | ||
2375 | struct gomp_taskgroup * | |
2376 | gomp_parallel_reduction_register (uintptr_t *data, unsigned nthreads) | |
2377 | { | |
2378 | struct gomp_taskgroup *taskgroup = gomp_taskgroup_init (NULL); | |
2379 | gomp_reduction_register (data, NULL, NULL, nthreads); | |
2380 | taskgroup->reductions = data; | |
2381 | return taskgroup; | |
2382 | } | |
2383 | ||
2384 | void | |
2385 | gomp_workshare_task_reduction_register (uintptr_t *data, uintptr_t *orig) | |
2386 | { | |
2387 | struct gomp_thread *thr = gomp_thread (); | |
2388 | struct gomp_team *team = thr->ts.team; | |
2389 | struct gomp_task *task = thr->task; | |
2390 | unsigned nthreads = team->nthreads; | |
2391 | gomp_reduction_register (data, task->taskgroup->reductions, orig, nthreads); | |
2392 | task->taskgroup->reductions = data; | |
2393 | } | |
2394 | ||
2395 | void | |
2396 | gomp_workshare_taskgroup_start (void) | |
2397 | { | |
2398 | struct gomp_thread *thr = gomp_thread (); | |
2399 | struct gomp_team *team = thr->ts.team; | |
2400 | struct gomp_task *task; | |
2401 | ||
2402 | if (team == NULL) | |
2403 | { | |
2404 | gomp_create_artificial_team (); | |
2405 | team = thr->ts.team; | |
2406 | } | |
2407 | task = thr->task; | |
2408 | task->taskgroup = gomp_taskgroup_init (task->taskgroup); | |
2409 | task->taskgroup->workshare = true; | |
2410 | } | |
2411 | ||
2412 | void | |
2413 | GOMP_workshare_task_reduction_unregister (bool cancelled) | |
2414 | { | |
2415 | struct gomp_thread *thr = gomp_thread (); | |
2416 | struct gomp_task *task = thr->task; | |
2417 | struct gomp_team *team = thr->ts.team; | |
2418 | uintptr_t *data = task->taskgroup->reductions; | |
2419 | ialias_call (GOMP_taskgroup_end) (); | |
2420 | if (thr->ts.team_id == 0) | |
2421 | ialias_call (GOMP_taskgroup_reduction_unregister) (data); | |
2422 | else | |
2423 | htab_free ((struct htab *) data[5]); | |
2424 | ||
2425 | if (!cancelled) | |
2426 | gomp_team_barrier_wait (&team->barrier); | |
2427 | } | |
2428 | ||
20906c66 JJ |
2429 | int |
2430 | omp_in_final (void) | |
2431 | { | |
2432 | struct gomp_thread *thr = gomp_thread (); | |
2433 | return thr->task && thr->task->final_task; | |
2434 | } | |
2435 | ||
2436 | ialias (omp_in_final) | |
a6d22fb2 KCY |
2437 | |
2438 | void | |
2439 | omp_fulfill_event (omp_event_handle_t event) | |
2440 | { | |
d656bfda KCY |
2441 | struct gomp_task *task = (struct gomp_task *) event; |
2442 | if (!task->deferred_p) | |
2443 | { | |
2444 | if (gomp_sem_getcount (task->completion_sem) > 0) | |
2445 | gomp_fatal ("omp_fulfill_event: %p event already fulfilled!\n", task); | |
a6d22fb2 | 2446 | |
d656bfda KCY |
2447 | gomp_debug (0, "omp_fulfill_event: %p event for undeferred task\n", |
2448 | task); | |
2449 | gomp_sem_post (task->completion_sem); | |
2450 | return; | |
2451 | } | |
a6d22fb2 | 2452 | |
d656bfda KCY |
2453 | struct gomp_team *team = __atomic_load_n (&task->detach_team, |
2454 | MEMMODEL_RELAXED); | |
2455 | if (!team) | |
2456 | gomp_fatal ("omp_fulfill_event: %p event is invalid or has already " | |
2457 | "been fulfilled!\n", task); | |
2458 | ||
2459 | gomp_mutex_lock (&team->task_lock); | |
2460 | if (task->kind != GOMP_TASK_DETACHED) | |
2461 | { | |
2462 | /* The task has not finished running yet. */ | |
2463 | gomp_debug (0, | |
2464 | "omp_fulfill_event: %p event fulfilled for unfinished " | |
2465 | "task\n", task); | |
2466 | __atomic_store_n (&task->detach_team, NULL, MEMMODEL_RELAXED); | |
2467 | gomp_mutex_unlock (&team->task_lock); | |
2468 | return; | |
2469 | } | |
2470 | ||
2471 | gomp_debug (0, "omp_fulfill_event: %p event fulfilled for finished task\n", | |
2472 | task); | |
2473 | size_t new_tasks = gomp_task_run_post_handle_depend (task, team); | |
2474 | gomp_task_run_post_remove_parent (task); | |
2475 | gomp_clear_parent (&task->children_queue); | |
2476 | gomp_task_run_post_remove_taskgroup (task); | |
2477 | team->task_count--; | |
2478 | team->task_detach_count--; | |
2479 | ||
2480 | int do_wake = 0; | |
2481 | bool shackled_thread_p = team == gomp_thread ()->ts.team; | |
2482 | if (new_tasks > 0) | |
2483 | { | |
2484 | /* Wake up threads to run new tasks. */ | |
ba886d0c | 2485 | gomp_team_barrier_set_task_pending (&team->barrier); |
d656bfda KCY |
2486 | do_wake = team->nthreads - team->task_running_count; |
2487 | if (do_wake > new_tasks) | |
2488 | do_wake = new_tasks; | |
2489 | } | |
2490 | ||
2491 | if (!shackled_thread_p | |
2492 | && !do_wake | |
2493 | && team->task_detach_count == 0 | |
2494 | && gomp_team_barrier_waiting_for_tasks (&team->barrier)) | |
2495 | /* Ensure that at least one thread is woken up to signal that the | |
2496 | barrier can finish. */ | |
2497 | do_wake = 1; | |
2498 | ||
2499 | /* If we are running in an unshackled thread, the team might vanish before | |
2500 | gomp_team_barrier_wake is run if we release the lock first, so keep the | |
2501 | lock for the call in that case. */ | |
2502 | if (shackled_thread_p) | |
2503 | gomp_mutex_unlock (&team->task_lock); | |
2504 | if (do_wake) | |
2505 | gomp_team_barrier_wake (&team->barrier, do_wake); | |
2506 | if (!shackled_thread_p) | |
2507 | gomp_mutex_unlock (&team->task_lock); | |
2508 | ||
2509 | gomp_finish_task (task); | |
2510 | free (task); | |
a6d22fb2 KCY |
2511 | } |
2512 | ||
2513 | ialias (omp_fulfill_event) |