]>
Commit | Line | Data |
---|---|---|
99dee823 | 1 | /* Copyright (C) 2007-2021 Free Software Foundation, Inc. |
a68ab351 JJ |
2 | Contributed by Richard Henderson <rth@redhat.com>. |
3 | ||
f1f3453e TS |
4 | This file is part of the GNU Offloading and Multi Processing Library |
5 | (libgomp). | |
a68ab351 JJ |
6 | |
7 | Libgomp is free software; you can redistribute it and/or modify it | |
748086b7 JJ |
8 | under the terms of the GNU General Public License as published by |
9 | the Free Software Foundation; either version 3, or (at your option) | |
10 | any later version. | |
a68ab351 JJ |
11 | |
12 | Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
748086b7 | 14 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
a68ab351 JJ |
15 | more details. |
16 | ||
748086b7 JJ |
17 | Under Section 7 of GPL version 3, you are granted additional |
18 | permissions described in the GCC Runtime Library Exception, version | |
19 | 3.1, as published by the Free Software Foundation. | |
20 | ||
21 | You should have received a copy of the GNU General Public License and | |
22 | a copy of the GCC Runtime Library Exception along with this program; | |
23 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
24 | <http://www.gnu.org/licenses/>. */ | |
a68ab351 | 25 | |
93d90219 | 26 | /* This file handles the maintenance of tasks in response to task |
a68ab351 JJ |
27 | creation and termination. */ |
28 | ||
29 | #include "libgomp.h" | |
30 | #include <stdlib.h> | |
31 | #include <string.h> | |
d9a6bd32 | 32 | #include "gomp-constants.h" |
a68ab351 | 33 | |
acf0174b JJ |
34 | typedef struct gomp_task_depend_entry *hash_entry_type; |
35 | ||
36 | static inline void * | |
37 | htab_alloc (size_t size) | |
38 | { | |
39 | return gomp_malloc (size); | |
40 | } | |
41 | ||
42 | static inline void | |
43 | htab_free (void *ptr) | |
44 | { | |
45 | free (ptr); | |
46 | } | |
47 | ||
48 | #include "hashtab.h" | |
49 | ||
50 | static inline hashval_t | |
51 | htab_hash (hash_entry_type element) | |
52 | { | |
53 | return hash_pointer (element->addr); | |
54 | } | |
55 | ||
56 | static inline bool | |
57 | htab_eq (hash_entry_type x, hash_entry_type y) | |
58 | { | |
59 | return x->addr == y->addr; | |
60 | } | |
a68ab351 JJ |
61 | |
62 | /* Create a new task data structure. */ | |
63 | ||
64 | void | |
65 | gomp_init_task (struct gomp_task *task, struct gomp_task *parent_task, | |
66 | struct gomp_task_icv *prev_icv) | |
67 | { | |
e4606348 JJ |
68 | /* It would seem that using memset here would be a win, but it turns |
69 | out that partially filling gomp_task allows us to keep the | |
70 | overhead of task creation low. In the nqueens-1.c test, for a | |
71 | sufficiently large N, we drop the overhead from 5-6% to 1%. | |
72 | ||
73 | Note, the nqueens-1.c test in serial mode is a good test to | |
74 | benchmark the overhead of creating tasks as there are millions of | |
75 | tiny tasks created that all run undeferred. */ | |
a68ab351 JJ |
76 | task->parent = parent_task; |
77 | task->icv = *prev_icv; | |
78 | task->kind = GOMP_TASK_IMPLICIT; | |
0494285a | 79 | task->taskwait = NULL; |
5f836cbb | 80 | task->in_tied_task = false; |
20906c66 | 81 | task->final_task = false; |
acf0174b | 82 | task->copy_ctors_done = false; |
0494285a | 83 | task->parent_depends_on = false; |
e4606348 | 84 | priority_queue_init (&task->children_queue); |
acf0174b JJ |
85 | task->taskgroup = NULL; |
86 | task->dependers = NULL; | |
87 | task->depend_hash = NULL; | |
88 | task->depend_count = 0; | |
a68ab351 JJ |
89 | } |
90 | ||
91 | /* Clean up a task, after completing it. */ | |
92 | ||
93 | void | |
94 | gomp_end_task (void) | |
95 | { | |
96 | struct gomp_thread *thr = gomp_thread (); | |
97 | struct gomp_task *task = thr->task; | |
98 | ||
99 | gomp_finish_task (task); | |
100 | thr->task = task->parent; | |
101 | } | |
102 | ||
e4606348 | 103 | /* Clear the parent field of every task in LIST. */ |
d9a6bd32 | 104 | |
a68ab351 | 105 | static inline void |
e4606348 | 106 | gomp_clear_parent_in_list (struct priority_list *list) |
a68ab351 | 107 | { |
e4606348 JJ |
108 | struct priority_node *p = list->tasks; |
109 | if (p) | |
a68ab351 JJ |
110 | do |
111 | { | |
e4606348 JJ |
112 | priority_node_to_task (PQ_CHILDREN, p)->parent = NULL; |
113 | p = p->next; | |
a68ab351 | 114 | } |
e4606348 | 115 | while (p != list->tasks); |
a68ab351 JJ |
116 | } |
117 | ||
e4606348 JJ |
118 | /* Splay tree version of gomp_clear_parent_in_list. |
119 | ||
120 | Clear the parent field of every task in NODE within SP, and free | |
121 | the node when done. */ | |
122 | ||
123 | static void | |
124 | gomp_clear_parent_in_tree (prio_splay_tree sp, prio_splay_tree_node node) | |
125 | { | |
126 | if (!node) | |
127 | return; | |
128 | prio_splay_tree_node left = node->left, right = node->right; | |
129 | gomp_clear_parent_in_list (&node->key.l); | |
130 | #if _LIBGOMP_CHECKING_ | |
131 | memset (node, 0xaf, sizeof (*node)); | |
132 | #endif | |
133 | /* No need to remove the node from the tree. We're nuking | |
134 | everything, so just free the nodes and our caller can clear the | |
135 | entire splay tree. */ | |
136 | free (node); | |
137 | gomp_clear_parent_in_tree (sp, left); | |
138 | gomp_clear_parent_in_tree (sp, right); | |
139 | } | |
140 | ||
141 | /* Clear the parent field of every task in Q and remove every task | |
142 | from Q. */ | |
143 | ||
144 | static inline void | |
145 | gomp_clear_parent (struct priority_queue *q) | |
146 | { | |
147 | if (priority_queue_multi_p (q)) | |
148 | { | |
149 | gomp_clear_parent_in_tree (&q->t, q->t.root); | |
150 | /* All the nodes have been cleared in gomp_clear_parent_in_tree. | |
151 | No need to remove anything. We can just nuke everything. */ | |
152 | q->t.root = NULL; | |
153 | } | |
154 | else | |
155 | gomp_clear_parent_in_list (&q->l); | |
156 | } | |
157 | ||
158 | /* Helper function for GOMP_task and gomp_create_target_task. | |
159 | ||
160 | For a TASK with in/out dependencies, fill in the various dependency | |
161 | queues. PARENT is the parent of said task. DEPEND is as in | |
162 | GOMP_task. */ | |
d9a6bd32 JJ |
163 | |
164 | static void | |
165 | gomp_task_handle_depend (struct gomp_task *task, struct gomp_task *parent, | |
166 | void **depend) | |
167 | { | |
168 | size_t ndepend = (uintptr_t) depend[0]; | |
d9a6bd32 JJ |
169 | size_t i; |
170 | hash_entry_type ent; | |
171 | ||
28567c40 JJ |
172 | if (ndepend) |
173 | { | |
174 | /* depend[0] is total # */ | |
175 | size_t nout = (uintptr_t) depend[1]; /* # of out: and inout: */ | |
176 | /* ndepend - nout is # of in: */ | |
177 | for (i = 0; i < ndepend; i++) | |
178 | { | |
179 | task->depend[i].addr = depend[2 + i]; | |
180 | task->depend[i].is_in = i >= nout; | |
181 | } | |
182 | } | |
183 | else | |
184 | { | |
185 | ndepend = (uintptr_t) depend[1]; /* total # */ | |
186 | size_t nout = (uintptr_t) depend[2]; /* # of out: and inout: */ | |
187 | size_t nmutexinoutset = (uintptr_t) depend[3]; /* # of mutexinoutset: */ | |
188 | /* For now we treat mutexinoutset like out, which is compliant, but | |
189 | inefficient. */ | |
190 | size_t nin = (uintptr_t) depend[4]; /* # of in: */ | |
191 | /* ndepend - nout - nmutexinoutset - nin is # of depobjs */ | |
192 | size_t normal = nout + nmutexinoutset + nin; | |
193 | size_t n = 0; | |
194 | for (i = normal; i < ndepend; i++) | |
195 | { | |
196 | void **d = (void **) (uintptr_t) depend[5 + i]; | |
197 | switch ((uintptr_t) d[1]) | |
198 | { | |
199 | case GOMP_DEPEND_OUT: | |
200 | case GOMP_DEPEND_INOUT: | |
201 | case GOMP_DEPEND_MUTEXINOUTSET: | |
202 | break; | |
203 | case GOMP_DEPEND_IN: | |
204 | continue; | |
205 | default: | |
206 | gomp_fatal ("unknown omp_depend_t dependence type %d", | |
207 | (int) (uintptr_t) d[1]); | |
208 | } | |
209 | task->depend[n].addr = d[0]; | |
210 | task->depend[n++].is_in = 0; | |
211 | } | |
212 | for (i = 0; i < normal; i++) | |
213 | { | |
214 | task->depend[n].addr = depend[5 + i]; | |
215 | task->depend[n++].is_in = i >= nout + nmutexinoutset; | |
216 | } | |
217 | for (i = normal; i < ndepend; i++) | |
218 | { | |
219 | void **d = (void **) (uintptr_t) depend[5 + i]; | |
220 | if ((uintptr_t) d[1] != GOMP_DEPEND_IN) | |
221 | continue; | |
222 | task->depend[n].addr = d[0]; | |
223 | task->depend[n++].is_in = 1; | |
224 | } | |
225 | } | |
d9a6bd32 JJ |
226 | task->depend_count = ndepend; |
227 | task->num_dependees = 0; | |
228 | if (parent->depend_hash == NULL) | |
229 | parent->depend_hash = htab_create (2 * ndepend > 12 ? 2 * ndepend : 12); | |
230 | for (i = 0; i < ndepend; i++) | |
231 | { | |
d9a6bd32 JJ |
232 | task->depend[i].next = NULL; |
233 | task->depend[i].prev = NULL; | |
234 | task->depend[i].task = task; | |
d9a6bd32 JJ |
235 | task->depend[i].redundant = false; |
236 | task->depend[i].redundant_out = false; | |
237 | ||
238 | hash_entry_type *slot = htab_find_slot (&parent->depend_hash, | |
239 | &task->depend[i], INSERT); | |
240 | hash_entry_type out = NULL, last = NULL; | |
241 | if (*slot) | |
242 | { | |
243 | /* If multiple depends on the same task are the same, all but the | |
244 | first one are redundant. As inout/out come first, if any of them | |
245 | is inout/out, it will win, which is the right semantics. */ | |
246 | if ((*slot)->task == task) | |
247 | { | |
248 | task->depend[i].redundant = true; | |
249 | continue; | |
250 | } | |
251 | for (ent = *slot; ent; ent = ent->next) | |
252 | { | |
253 | if (ent->redundant_out) | |
254 | break; | |
255 | ||
256 | last = ent; | |
257 | ||
258 | /* depend(in:...) doesn't depend on earlier depend(in:...). */ | |
28567c40 | 259 | if (task->depend[i].is_in && ent->is_in) |
d9a6bd32 JJ |
260 | continue; |
261 | ||
262 | if (!ent->is_in) | |
263 | out = ent; | |
264 | ||
265 | struct gomp_task *tsk = ent->task; | |
266 | if (tsk->dependers == NULL) | |
267 | { | |
268 | tsk->dependers | |
269 | = gomp_malloc (sizeof (struct gomp_dependers_vec) | |
270 | + 6 * sizeof (struct gomp_task *)); | |
271 | tsk->dependers->n_elem = 1; | |
272 | tsk->dependers->allocated = 6; | |
273 | tsk->dependers->elem[0] = task; | |
274 | task->num_dependees++; | |
275 | continue; | |
276 | } | |
277 | /* We already have some other dependency on tsk from earlier | |
278 | depend clause. */ | |
279 | else if (tsk->dependers->n_elem | |
280 | && (tsk->dependers->elem[tsk->dependers->n_elem - 1] | |
281 | == task)) | |
282 | continue; | |
283 | else if (tsk->dependers->n_elem == tsk->dependers->allocated) | |
284 | { | |
285 | tsk->dependers->allocated | |
286 | = tsk->dependers->allocated * 2 + 2; | |
287 | tsk->dependers | |
288 | = gomp_realloc (tsk->dependers, | |
289 | sizeof (struct gomp_dependers_vec) | |
290 | + (tsk->dependers->allocated | |
291 | * sizeof (struct gomp_task *))); | |
292 | } | |
293 | tsk->dependers->elem[tsk->dependers->n_elem++] = task; | |
294 | task->num_dependees++; | |
295 | } | |
296 | task->depend[i].next = *slot; | |
297 | (*slot)->prev = &task->depend[i]; | |
298 | } | |
299 | *slot = &task->depend[i]; | |
300 | ||
301 | /* There is no need to store more than one depend({,in}out:) task per | |
302 | address in the hash table chain for the purpose of creation of | |
303 | deferred tasks, because each out depends on all earlier outs, thus it | |
304 | is enough to record just the last depend({,in}out:). For depend(in:), | |
305 | we need to keep all of the previous ones not terminated yet, because | |
306 | a later depend({,in}out:) might need to depend on all of them. So, if | |
307 | the new task's clause is depend({,in}out:), we know there is at most | |
308 | one other depend({,in}out:) clause in the list (out). For | |
309 | non-deferred tasks we want to see all outs, so they are moved to the | |
310 | end of the chain, after first redundant_out entry all following | |
311 | entries should be redundant_out. */ | |
312 | if (!task->depend[i].is_in && out) | |
313 | { | |
314 | if (out != last) | |
315 | { | |
316 | out->next->prev = out->prev; | |
317 | out->prev->next = out->next; | |
318 | out->next = last->next; | |
319 | out->prev = last; | |
320 | last->next = out; | |
321 | if (out->next) | |
322 | out->next->prev = out; | |
323 | } | |
324 | out->redundant_out = true; | |
325 | } | |
326 | } | |
327 | } | |
0494285a | 328 | |
a68ab351 JJ |
329 | /* Called when encountering an explicit task directive. If IF_CLAUSE is |
330 | false, then we must not delay in executing the task. If UNTIED is true, | |
d9a6bd32 JJ |
331 | then the task may be executed by any member of the team. |
332 | ||
333 | DEPEND is an array containing: | |
28567c40 | 334 | if depend[0] is non-zero, then: |
d9a6bd32 | 335 | depend[0]: number of depend elements. |
28567c40 JJ |
336 | depend[1]: number of depend elements of type "out/inout". |
337 | depend[2..N+1]: address of [1..N]th depend element. | |
338 | otherwise, when depend[0] is zero, then: | |
339 | depend[1]: number of depend elements. | |
340 | depend[2]: number of depend elements of type "out/inout". | |
341 | depend[3]: number of depend elements of type "mutexinoutset". | |
342 | depend[4]: number of depend elements of type "in". | |
343 | depend[5..4+depend[2]+depend[3]+depend[4]]: address of depend elements | |
344 | depend[5+depend[2]+depend[3]+depend[4]..4+depend[1]]: address of | |
345 | omp_depend_t objects. */ | |
a68ab351 JJ |
346 | |
347 | void | |
348 | GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), | |
acf0174b | 349 | long arg_size, long arg_align, bool if_clause, unsigned flags, |
d9a6bd32 | 350 | void **depend, int priority) |
a68ab351 JJ |
351 | { |
352 | struct gomp_thread *thr = gomp_thread (); | |
353 | struct gomp_team *team = thr->ts.team; | |
354 | ||
355 | #ifdef HAVE_BROKEN_POSIX_SEMAPHORES | |
356 | /* If pthread_mutex_* is used for omp_*lock*, then each task must be | |
357 | tied to one thread all the time. This means UNTIED tasks must be | |
358 | tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN | |
359 | might be running on different thread than FN. */ | |
360 | if (cpyfn) | |
361 | if_clause = false; | |
d9a6bd32 | 362 | flags &= ~GOMP_TASK_FLAG_UNTIED; |
a68ab351 JJ |
363 | #endif |
364 | ||
acf0174b | 365 | /* If parallel or taskgroup has been cancelled, don't start new tasks. */ |
28567c40 JJ |
366 | if (__builtin_expect (gomp_cancel_var, 0) && team) |
367 | { | |
368 | if (gomp_team_barrier_cancelled (&team->barrier)) | |
369 | return; | |
370 | if (thr->task->taskgroup) | |
371 | { | |
372 | if (thr->task->taskgroup->cancelled) | |
373 | return; | |
374 | if (thr->task->taskgroup->workshare | |
375 | && thr->task->taskgroup->prev | |
376 | && thr->task->taskgroup->prev->cancelled) | |
377 | return; | |
378 | } | |
379 | } | |
acf0174b | 380 | |
d9a6bd32 JJ |
381 | if ((flags & GOMP_TASK_FLAG_PRIORITY) == 0) |
382 | priority = 0; | |
e4606348 JJ |
383 | else if (priority > gomp_max_task_priority_var) |
384 | priority = gomp_max_task_priority_var; | |
d9a6bd32 | 385 | |
a68ab351 | 386 | if (!if_clause || team == NULL |
20906c66 | 387 | || (thr->task && thr->task->final_task) |
a68ab351 JJ |
388 | || team->task_count > 64 * team->nthreads) |
389 | { | |
390 | struct gomp_task task; | |
391 | ||
acf0174b JJ |
392 | /* If there are depend clauses and earlier deferred sibling tasks |
393 | with depend clauses, check if there isn't a dependency. If there | |
0494285a | 394 | is, we need to wait for them. There is no need to handle |
acf0174b JJ |
395 | depend clauses for non-deferred tasks other than this, because |
396 | the parent task is suspended until the child task finishes and thus | |
397 | it can't start further child tasks. */ | |
d9a6bd32 JJ |
398 | if ((flags & GOMP_TASK_FLAG_DEPEND) |
399 | && thr->task && thr->task->depend_hash) | |
0494285a | 400 | gomp_task_maybe_wait_for_dependencies (depend); |
acf0174b | 401 | |
a68ab351 | 402 | gomp_init_task (&task, thr->task, gomp_icv (false)); |
d9a6bd32 JJ |
403 | task.kind = GOMP_TASK_UNDEFERRED; |
404 | task.final_task = (thr->task && thr->task->final_task) | |
405 | || (flags & GOMP_TASK_FLAG_FINAL); | |
e4606348 | 406 | task.priority = priority; |
5f836cbb | 407 | if (thr->task) |
acf0174b JJ |
408 | { |
409 | task.in_tied_task = thr->task->in_tied_task; | |
410 | task.taskgroup = thr->task->taskgroup; | |
411 | } | |
a68ab351 JJ |
412 | thr->task = &task; |
413 | if (__builtin_expect (cpyfn != NULL, 0)) | |
414 | { | |
415 | char buf[arg_size + arg_align - 1]; | |
416 | char *arg = (char *) (((uintptr_t) buf + arg_align - 1) | |
417 | & ~(uintptr_t) (arg_align - 1)); | |
418 | cpyfn (arg, data); | |
419 | fn (arg); | |
420 | } | |
421 | else | |
422 | fn (data); | |
bed8d8a6 AM |
423 | /* Access to "children" is normally done inside a task_lock |
424 | mutex region, but the only way this particular task.children | |
425 | can be set is if this thread's task work function (fn) | |
426 | creates children. So since the setter is *this* thread, we | |
427 | need no barriers here when testing for non-NULL. We can have | |
428 | task.children set by the current thread then changed by a | |
429 | child thread, but seeing a stale non-NULL value is not a | |
430 | problem. Once past the task_lock acquisition, this thread | |
431 | will see the real value of task.children. */ | |
e4606348 | 432 | if (!priority_queue_empty_p (&task.children_queue, MEMMODEL_RELAXED)) |
a68ab351 JJ |
433 | { |
434 | gomp_mutex_lock (&team->task_lock); | |
e4606348 | 435 | gomp_clear_parent (&task.children_queue); |
a68ab351 JJ |
436 | gomp_mutex_unlock (&team->task_lock); |
437 | } | |
438 | gomp_end_task (); | |
439 | } | |
440 | else | |
441 | { | |
442 | struct gomp_task *task; | |
443 | struct gomp_task *parent = thr->task; | |
acf0174b | 444 | struct gomp_taskgroup *taskgroup = parent->taskgroup; |
a68ab351 JJ |
445 | char *arg; |
446 | bool do_wake; | |
acf0174b JJ |
447 | size_t depend_size = 0; |
448 | ||
d9a6bd32 | 449 | if (flags & GOMP_TASK_FLAG_DEPEND) |
28567c40 | 450 | depend_size = ((uintptr_t) (depend[0] ? depend[0] : depend[1]) |
acf0174b JJ |
451 | * sizeof (struct gomp_task_depend_entry)); |
452 | task = gomp_malloc (sizeof (*task) + depend_size | |
453 | + arg_size + arg_align - 1); | |
454 | arg = (char *) (((uintptr_t) (task + 1) + depend_size + arg_align - 1) | |
a68ab351 JJ |
455 | & ~(uintptr_t) (arg_align - 1)); |
456 | gomp_init_task (task, parent, gomp_icv (false)); | |
e4606348 | 457 | task->priority = priority; |
d9a6bd32 | 458 | task->kind = GOMP_TASK_UNDEFERRED; |
5f836cbb | 459 | task->in_tied_task = parent->in_tied_task; |
acf0174b | 460 | task->taskgroup = taskgroup; |
a68ab351 JJ |
461 | thr->task = task; |
462 | if (cpyfn) | |
acf0174b JJ |
463 | { |
464 | cpyfn (arg, data); | |
465 | task->copy_ctors_done = true; | |
466 | } | |
a68ab351 JJ |
467 | else |
468 | memcpy (arg, data, arg_size); | |
469 | thr->task = parent; | |
470 | task->kind = GOMP_TASK_WAITING; | |
471 | task->fn = fn; | |
472 | task->fn_data = arg; | |
d9a6bd32 | 473 | task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1; |
a68ab351 | 474 | gomp_mutex_lock (&team->task_lock); |
acf0174b JJ |
475 | /* If parallel or taskgroup has been cancelled, don't start new |
476 | tasks. */ | |
28567c40 JJ |
477 | if (__builtin_expect (gomp_cancel_var, 0) |
478 | && !task->copy_ctors_done) | |
acf0174b | 479 | { |
28567c40 JJ |
480 | if (gomp_team_barrier_cancelled (&team->barrier)) |
481 | { | |
482 | do_cancel: | |
483 | gomp_mutex_unlock (&team->task_lock); | |
484 | gomp_finish_task (task); | |
485 | free (task); | |
486 | return; | |
487 | } | |
488 | if (taskgroup) | |
489 | { | |
490 | if (taskgroup->cancelled) | |
491 | goto do_cancel; | |
492 | if (taskgroup->workshare | |
493 | && taskgroup->prev | |
494 | && taskgroup->prev->cancelled) | |
495 | goto do_cancel; | |
496 | } | |
acf0174b JJ |
497 | } |
498 | if (taskgroup) | |
499 | taskgroup->num_children++; | |
500 | if (depend_size) | |
501 | { | |
d9a6bd32 | 502 | gomp_task_handle_depend (task, parent, depend); |
acf0174b JJ |
503 | if (task->num_dependees) |
504 | { | |
e4606348 JJ |
505 | /* Tasks that depend on other tasks are not put into the |
506 | various waiting queues, so we are done for now. Said | |
507 | tasks are instead put into the queues via | |
508 | gomp_task_run_post_handle_dependers() after their | |
509 | dependencies have been satisfied. After which, they | |
510 | can be picked up by the various scheduling | |
511 | points. */ | |
acf0174b JJ |
512 | gomp_mutex_unlock (&team->task_lock); |
513 | return; | |
514 | } | |
515 | } | |
e4606348 JJ |
516 | |
517 | priority_queue_insert (PQ_CHILDREN, &parent->children_queue, | |
518 | task, priority, | |
519 | PRIORITY_INSERT_BEGIN, | |
520 | /*adjust_parent_depends_on=*/false, | |
521 | task->parent_depends_on); | |
acf0174b | 522 | if (taskgroup) |
e4606348 JJ |
523 | priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, |
524 | task, priority, | |
525 | PRIORITY_INSERT_BEGIN, | |
526 | /*adjust_parent_depends_on=*/false, | |
527 | task->parent_depends_on); | |
528 | ||
529 | priority_queue_insert (PQ_TEAM, &team->task_queue, | |
530 | task, priority, | |
531 | PRIORITY_INSERT_END, | |
532 | /*adjust_parent_depends_on=*/false, | |
533 | task->parent_depends_on); | |
534 | ||
5f836cbb | 535 | ++team->task_count; |
acf0174b | 536 | ++team->task_queued_count; |
5f836cbb JJ |
537 | gomp_team_barrier_set_task_pending (&team->barrier); |
538 | do_wake = team->task_running_count + !parent->in_tied_task | |
539 | < team->nthreads; | |
a68ab351 JJ |
540 | gomp_mutex_unlock (&team->task_lock); |
541 | if (do_wake) | |
542 | gomp_team_barrier_wake (&team->barrier, 1); | |
543 | } | |
544 | } | |
545 | ||
d9a6bd32 JJ |
546 | ialias (GOMP_taskgroup_start) |
547 | ialias (GOMP_taskgroup_end) | |
28567c40 | 548 | ialias (GOMP_taskgroup_reduction_register) |
d9a6bd32 JJ |
549 | |
550 | #define TYPE long | |
551 | #define UTYPE unsigned long | |
552 | #define TYPE_is_long 1 | |
553 | #include "taskloop.c" | |
554 | #undef TYPE | |
555 | #undef UTYPE | |
556 | #undef TYPE_is_long | |
557 | ||
558 | #define TYPE unsigned long long | |
559 | #define UTYPE TYPE | |
560 | #define GOMP_taskloop GOMP_taskloop_ull | |
561 | #include "taskloop.c" | |
562 | #undef TYPE | |
563 | #undef UTYPE | |
564 | #undef GOMP_taskloop | |
565 | ||
e4606348 JJ |
566 | static void inline |
567 | priority_queue_move_task_first (enum priority_queue_type type, | |
568 | struct priority_queue *head, | |
569 | struct gomp_task *task) | |
570 | { | |
571 | #if _LIBGOMP_CHECKING_ | |
572 | if (!priority_queue_task_in_queue_p (type, head, task)) | |
573 | gomp_fatal ("Attempt to move first missing task %p", task); | |
574 | #endif | |
575 | struct priority_list *list; | |
576 | if (priority_queue_multi_p (head)) | |
577 | { | |
578 | list = priority_queue_lookup_priority (head, task->priority); | |
579 | #if _LIBGOMP_CHECKING_ | |
580 | if (!list) | |
581 | gomp_fatal ("Unable to find priority %d", task->priority); | |
582 | #endif | |
583 | } | |
584 | else | |
585 | list = &head->l; | |
586 | priority_list_remove (list, task_to_priority_node (type, task), 0); | |
587 | priority_list_insert (type, list, task, task->priority, | |
588 | PRIORITY_INSERT_BEGIN, type == PQ_CHILDREN, | |
589 | task->parent_depends_on); | |
590 | } | |
591 | ||
592 | /* Actual body of GOMP_PLUGIN_target_task_completion that is executed | |
593 | with team->task_lock held, or is executed in the thread that called | |
594 | gomp_target_task_fn if GOMP_PLUGIN_target_task_completion has been | |
595 | run before it acquires team->task_lock. */ | |
596 | ||
597 | static void | |
598 | gomp_target_task_completion (struct gomp_team *team, struct gomp_task *task) | |
599 | { | |
600 | struct gomp_task *parent = task->parent; | |
601 | if (parent) | |
602 | priority_queue_move_task_first (PQ_CHILDREN, &parent->children_queue, | |
603 | task); | |
604 | ||
605 | struct gomp_taskgroup *taskgroup = task->taskgroup; | |
606 | if (taskgroup) | |
607 | priority_queue_move_task_first (PQ_TASKGROUP, &taskgroup->taskgroup_queue, | |
608 | task); | |
609 | ||
610 | priority_queue_insert (PQ_TEAM, &team->task_queue, task, task->priority, | |
611 | PRIORITY_INSERT_BEGIN, false, | |
612 | task->parent_depends_on); | |
613 | task->kind = GOMP_TASK_WAITING; | |
614 | if (parent && parent->taskwait) | |
615 | { | |
616 | if (parent->taskwait->in_taskwait) | |
617 | { | |
618 | /* One more task has had its dependencies met. | |
619 | Inform any waiters. */ | |
620 | parent->taskwait->in_taskwait = false; | |
621 | gomp_sem_post (&parent->taskwait->taskwait_sem); | |
622 | } | |
623 | else if (parent->taskwait->in_depend_wait) | |
624 | { | |
625 | /* One more task has had its dependencies met. | |
626 | Inform any waiters. */ | |
627 | parent->taskwait->in_depend_wait = false; | |
628 | gomp_sem_post (&parent->taskwait->taskwait_sem); | |
629 | } | |
630 | } | |
631 | if (taskgroup && taskgroup->in_taskgroup_wait) | |
632 | { | |
633 | /* One more task has had its dependencies met. | |
634 | Inform any waiters. */ | |
635 | taskgroup->in_taskgroup_wait = false; | |
636 | gomp_sem_post (&taskgroup->taskgroup_sem); | |
637 | } | |
638 | ||
639 | ++team->task_queued_count; | |
640 | gomp_team_barrier_set_task_pending (&team->barrier); | |
641 | /* I'm afraid this can't be done after releasing team->task_lock, | |
642 | as gomp_target_task_completion is run from unrelated thread and | |
643 | therefore in between gomp_mutex_unlock and gomp_team_barrier_wake | |
644 | the team could be gone already. */ | |
645 | if (team->nthreads > team->task_running_count) | |
646 | gomp_team_barrier_wake (&team->barrier, 1); | |
647 | } | |
648 | ||
649 | /* Signal that a target task TTASK has completed the asynchronously | |
650 | running phase and should be requeued as a task to handle the | |
651 | variable unmapping. */ | |
d9a6bd32 JJ |
652 | |
653 | void | |
e4606348 JJ |
654 | GOMP_PLUGIN_target_task_completion (void *data) |
655 | { | |
656 | struct gomp_target_task *ttask = (struct gomp_target_task *) data; | |
657 | struct gomp_task *task = ttask->task; | |
658 | struct gomp_team *team = ttask->team; | |
659 | ||
660 | gomp_mutex_lock (&team->task_lock); | |
661 | if (ttask->state == GOMP_TARGET_TASK_READY_TO_RUN) | |
662 | { | |
663 | ttask->state = GOMP_TARGET_TASK_FINISHED; | |
664 | gomp_mutex_unlock (&team->task_lock); | |
cb11f3cf | 665 | return; |
e4606348 JJ |
666 | } |
667 | ttask->state = GOMP_TARGET_TASK_FINISHED; | |
668 | gomp_target_task_completion (team, task); | |
669 | gomp_mutex_unlock (&team->task_lock); | |
670 | } | |
671 | ||
8e4e4719 JJ |
672 | static void gomp_task_run_post_handle_depend_hash (struct gomp_task *); |
673 | ||
e4606348 JJ |
674 | /* Called for nowait target tasks. */ |
675 | ||
676 | bool | |
d9a6bd32 JJ |
677 | gomp_create_target_task (struct gomp_device_descr *devicep, |
678 | void (*fn) (void *), size_t mapnum, void **hostaddrs, | |
679 | size_t *sizes, unsigned short *kinds, | |
b2b40051 | 680 | unsigned int flags, void **depend, void **args, |
e4606348 | 681 | enum gomp_target_task_state state) |
d9a6bd32 JJ |
682 | { |
683 | struct gomp_thread *thr = gomp_thread (); | |
684 | struct gomp_team *team = thr->ts.team; | |
685 | ||
686 | /* If parallel or taskgroup has been cancelled, don't start new tasks. */ | |
28567c40 JJ |
687 | if (__builtin_expect (gomp_cancel_var, 0) && team) |
688 | { | |
689 | if (gomp_team_barrier_cancelled (&team->barrier)) | |
690 | return true; | |
691 | if (thr->task->taskgroup) | |
692 | { | |
693 | if (thr->task->taskgroup->cancelled) | |
694 | return true; | |
695 | if (thr->task->taskgroup->workshare | |
696 | && thr->task->taskgroup->prev | |
697 | && thr->task->taskgroup->prev->cancelled) | |
698 | return true; | |
699 | } | |
700 | } | |
d9a6bd32 JJ |
701 | |
702 | struct gomp_target_task *ttask; | |
703 | struct gomp_task *task; | |
704 | struct gomp_task *parent = thr->task; | |
705 | struct gomp_taskgroup *taskgroup = parent->taskgroup; | |
706 | bool do_wake; | |
707 | size_t depend_size = 0; | |
e4606348 JJ |
708 | uintptr_t depend_cnt = 0; |
709 | size_t tgt_align = 0, tgt_size = 0; | |
d9a6bd32 JJ |
710 | |
711 | if (depend != NULL) | |
e4606348 | 712 | { |
28567c40 | 713 | depend_cnt = (uintptr_t) (depend[0] ? depend[0] : depend[1]); |
e4606348 JJ |
714 | depend_size = depend_cnt * sizeof (struct gomp_task_depend_entry); |
715 | } | |
716 | if (fn) | |
717 | { | |
718 | /* GOMP_MAP_FIRSTPRIVATE need to be copied first, as they are | |
719 | firstprivate on the target task. */ | |
720 | size_t i; | |
721 | for (i = 0; i < mapnum; i++) | |
722 | if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE) | |
723 | { | |
724 | size_t align = (size_t) 1 << (kinds[i] >> 8); | |
725 | if (tgt_align < align) | |
726 | tgt_align = align; | |
727 | tgt_size = (tgt_size + align - 1) & ~(align - 1); | |
728 | tgt_size += sizes[i]; | |
729 | } | |
730 | if (tgt_align) | |
731 | tgt_size += tgt_align - 1; | |
732 | else | |
733 | tgt_size = 0; | |
734 | } | |
735 | ||
d9a6bd32 JJ |
736 | task = gomp_malloc (sizeof (*task) + depend_size |
737 | + sizeof (*ttask) | |
738 | + mapnum * (sizeof (void *) + sizeof (size_t) | |
e4606348 JJ |
739 | + sizeof (unsigned short)) |
740 | + tgt_size); | |
d9a6bd32 | 741 | gomp_init_task (task, parent, gomp_icv (false)); |
e4606348 | 742 | task->priority = 0; |
d9a6bd32 JJ |
743 | task->kind = GOMP_TASK_WAITING; |
744 | task->in_tied_task = parent->in_tied_task; | |
745 | task->taskgroup = taskgroup; | |
e4606348 | 746 | ttask = (struct gomp_target_task *) &task->depend[depend_cnt]; |
d9a6bd32 JJ |
747 | ttask->devicep = devicep; |
748 | ttask->fn = fn; | |
749 | ttask->mapnum = mapnum; | |
b2b40051 | 750 | ttask->args = args; |
d9a6bd32 JJ |
751 | memcpy (ttask->hostaddrs, hostaddrs, mapnum * sizeof (void *)); |
752 | ttask->sizes = (size_t *) &ttask->hostaddrs[mapnum]; | |
753 | memcpy (ttask->sizes, sizes, mapnum * sizeof (size_t)); | |
754 | ttask->kinds = (unsigned short *) &ttask->sizes[mapnum]; | |
755 | memcpy (ttask->kinds, kinds, mapnum * sizeof (unsigned short)); | |
e4606348 JJ |
756 | if (tgt_align) |
757 | { | |
758 | char *tgt = (char *) &ttask->kinds[mapnum]; | |
759 | size_t i; | |
760 | uintptr_t al = (uintptr_t) tgt & (tgt_align - 1); | |
761 | if (al) | |
762 | tgt += tgt_align - al; | |
763 | tgt_size = 0; | |
764 | for (i = 0; i < mapnum; i++) | |
765 | if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE) | |
766 | { | |
767 | size_t align = (size_t) 1 << (kinds[i] >> 8); | |
768 | tgt_size = (tgt_size + align - 1) & ~(align - 1); | |
769 | memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]); | |
770 | ttask->hostaddrs[i] = tgt + tgt_size; | |
771 | tgt_size = tgt_size + sizes[i]; | |
772 | } | |
773 | } | |
d9a6bd32 | 774 | ttask->flags = flags; |
e4606348 JJ |
775 | ttask->state = state; |
776 | ttask->task = task; | |
777 | ttask->team = team; | |
778 | task->fn = NULL; | |
d9a6bd32 JJ |
779 | task->fn_data = ttask; |
780 | task->final_task = 0; | |
781 | gomp_mutex_lock (&team->task_lock); | |
782 | /* If parallel or taskgroup has been cancelled, don't start new tasks. */ | |
28567c40 | 783 | if (__builtin_expect (gomp_cancel_var, 0)) |
d9a6bd32 | 784 | { |
28567c40 JJ |
785 | if (gomp_team_barrier_cancelled (&team->barrier)) |
786 | { | |
787 | do_cancel: | |
788 | gomp_mutex_unlock (&team->task_lock); | |
789 | gomp_finish_task (task); | |
790 | free (task); | |
791 | return true; | |
792 | } | |
793 | if (taskgroup) | |
794 | { | |
795 | if (taskgroup->cancelled) | |
796 | goto do_cancel; | |
797 | if (taskgroup->workshare | |
798 | && taskgroup->prev | |
799 | && taskgroup->prev->cancelled) | |
800 | goto do_cancel; | |
801 | } | |
d9a6bd32 | 802 | } |
d9a6bd32 JJ |
803 | if (depend_size) |
804 | { | |
805 | gomp_task_handle_depend (task, parent, depend); | |
806 | if (task->num_dependees) | |
807 | { | |
e4606348 JJ |
808 | if (taskgroup) |
809 | taskgroup->num_children++; | |
d9a6bd32 | 810 | gomp_mutex_unlock (&team->task_lock); |
e4606348 | 811 | return true; |
d9a6bd32 JJ |
812 | } |
813 | } | |
e4606348 | 814 | if (state == GOMP_TARGET_TASK_DATA) |
d9a6bd32 | 815 | { |
8e4e4719 | 816 | gomp_task_run_post_handle_depend_hash (task); |
e4606348 JJ |
817 | gomp_mutex_unlock (&team->task_lock); |
818 | gomp_finish_task (task); | |
819 | free (task); | |
820 | return false; | |
d9a6bd32 | 821 | } |
d9a6bd32 | 822 | if (taskgroup) |
e4606348 JJ |
823 | taskgroup->num_children++; |
824 | /* For async offloading, if we don't need to wait for dependencies, | |
825 | run the gomp_target_task_fn right away, essentially schedule the | |
826 | mapping part of the task in the current thread. */ | |
827 | if (devicep != NULL | |
828 | && (devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) | |
d9a6bd32 | 829 | { |
e4606348 JJ |
830 | priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0, |
831 | PRIORITY_INSERT_END, | |
832 | /*adjust_parent_depends_on=*/false, | |
833 | task->parent_depends_on); | |
834 | if (taskgroup) | |
835 | priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, | |
836 | task, 0, PRIORITY_INSERT_END, | |
837 | /*adjust_parent_depends_on=*/false, | |
838 | task->parent_depends_on); | |
839 | task->pnode[PQ_TEAM].next = NULL; | |
840 | task->pnode[PQ_TEAM].prev = NULL; | |
841 | task->kind = GOMP_TASK_TIED; | |
842 | ++team->task_count; | |
843 | gomp_mutex_unlock (&team->task_lock); | |
844 | ||
845 | thr->task = task; | |
846 | gomp_target_task_fn (task->fn_data); | |
847 | thr->task = parent; | |
848 | ||
849 | gomp_mutex_lock (&team->task_lock); | |
850 | task->kind = GOMP_TASK_ASYNC_RUNNING; | |
851 | /* If GOMP_PLUGIN_target_task_completion has run already | |
852 | in between gomp_target_task_fn and the mutex lock, | |
853 | perform the requeuing here. */ | |
854 | if (ttask->state == GOMP_TARGET_TASK_FINISHED) | |
855 | gomp_target_task_completion (team, task); | |
d9a6bd32 | 856 | else |
e4606348 JJ |
857 | ttask->state = GOMP_TARGET_TASK_RUNNING; |
858 | gomp_mutex_unlock (&team->task_lock); | |
859 | return true; | |
d9a6bd32 | 860 | } |
e4606348 JJ |
861 | priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0, |
862 | PRIORITY_INSERT_BEGIN, | |
863 | /*adjust_parent_depends_on=*/false, | |
864 | task->parent_depends_on); | |
865 | if (taskgroup) | |
866 | priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, task, 0, | |
867 | PRIORITY_INSERT_BEGIN, | |
868 | /*adjust_parent_depends_on=*/false, | |
869 | task->parent_depends_on); | |
870 | priority_queue_insert (PQ_TEAM, &team->task_queue, task, 0, | |
871 | PRIORITY_INSERT_END, | |
872 | /*adjust_parent_depends_on=*/false, | |
873 | task->parent_depends_on); | |
d9a6bd32 JJ |
874 | ++team->task_count; |
875 | ++team->task_queued_count; | |
876 | gomp_team_barrier_set_task_pending (&team->barrier); | |
877 | do_wake = team->task_running_count + !parent->in_tied_task | |
878 | < team->nthreads; | |
879 | gomp_mutex_unlock (&team->task_lock); | |
880 | if (do_wake) | |
881 | gomp_team_barrier_wake (&team->barrier, 1); | |
e4606348 | 882 | return true; |
d9a6bd32 JJ |
883 | } |
884 | ||
e4606348 JJ |
885 | /* Given a parent_depends_on task in LIST, move it to the front of its |
886 | priority so it is run as soon as possible. | |
d9a6bd32 | 887 | |
e4606348 | 888 | Care is taken to update the list's LAST_PARENT_DEPENDS_ON field. |
d9a6bd32 | 889 | |
e4606348 JJ |
890 | We rearrange the queue such that all parent_depends_on tasks are |
891 | first, and last_parent_depends_on points to the last such task we | |
892 | rearranged. For example, given the following tasks in a queue | |
893 | where PD[123] are the parent_depends_on tasks: | |
d9a6bd32 | 894 | |
e4606348 JJ |
895 | task->children |
896 | | | |
897 | V | |
898 | C1 -> C2 -> C3 -> PD1 -> PD2 -> PD3 -> C4 | |
d9a6bd32 | 899 | |
e4606348 JJ |
900 | We rearrange such that: |
901 | ||
902 | task->children | |
903 | | +--- last_parent_depends_on | |
904 | | | | |
905 | V V | |
906 | PD1 -> PD2 -> PD3 -> C1 -> C2 -> C3 -> C4. */ | |
907 | ||
908 | static void inline | |
909 | priority_list_upgrade_task (struct priority_list *list, | |
910 | struct priority_node *node) | |
911 | { | |
912 | struct priority_node *last_parent_depends_on | |
913 | = list->last_parent_depends_on; | |
914 | if (last_parent_depends_on) | |
d9a6bd32 | 915 | { |
e4606348 JJ |
916 | node->prev->next = node->next; |
917 | node->next->prev = node->prev; | |
918 | node->prev = last_parent_depends_on; | |
919 | node->next = last_parent_depends_on->next; | |
920 | node->prev->next = node; | |
921 | node->next->prev = node; | |
d9a6bd32 | 922 | } |
e4606348 JJ |
923 | else if (node != list->tasks) |
924 | { | |
925 | node->prev->next = node->next; | |
926 | node->next->prev = node->prev; | |
927 | node->prev = list->tasks->prev; | |
928 | node->next = list->tasks; | |
929 | list->tasks = node; | |
930 | node->prev->next = node; | |
931 | node->next->prev = node; | |
932 | } | |
933 | list->last_parent_depends_on = node; | |
d9a6bd32 JJ |
934 | } |
935 | ||
e4606348 JJ |
936 | /* Given a parent_depends_on TASK in its parent's children_queue, move |
937 | it to the front of its priority so it is run as soon as possible. | |
d9a6bd32 | 938 | |
e4606348 | 939 | PARENT is passed as an optimization. |
d9a6bd32 | 940 | |
e4606348 JJ |
941 | (This function could be defined in priority_queue.c, but we want it |
942 | inlined, and putting it in priority_queue.h is not an option, given | |
943 | that gomp_task has not been properly defined at that point). */ | |
d9a6bd32 | 944 | |
e4606348 JJ |
945 | static void inline |
946 | priority_queue_upgrade_task (struct gomp_task *task, | |
947 | struct gomp_task *parent) | |
d9a6bd32 | 948 | { |
e4606348 JJ |
949 | struct priority_queue *head = &parent->children_queue; |
950 | struct priority_node *node = &task->pnode[PQ_CHILDREN]; | |
951 | #if _LIBGOMP_CHECKING_ | |
952 | if (!task->parent_depends_on) | |
953 | gomp_fatal ("priority_queue_upgrade_task: task must be a " | |
954 | "parent_depends_on task"); | |
955 | if (!priority_queue_task_in_queue_p (PQ_CHILDREN, head, task)) | |
956 | gomp_fatal ("priority_queue_upgrade_task: cannot find task=%p", task); | |
957 | #endif | |
958 | if (priority_queue_multi_p (head)) | |
d9a6bd32 | 959 | { |
e4606348 JJ |
960 | struct priority_list *list |
961 | = priority_queue_lookup_priority (head, task->priority); | |
962 | priority_list_upgrade_task (list, node); | |
d9a6bd32 | 963 | } |
e4606348 JJ |
964 | else |
965 | priority_list_upgrade_task (&head->l, node); | |
d9a6bd32 JJ |
966 | } |
967 | ||
e4606348 JJ |
968 | /* Given a CHILD_TASK in LIST that is about to be executed, move it out of |
969 | the way in LIST so that other tasks can be considered for | |
970 | execution. LIST contains tasks of type TYPE. | |
d9a6bd32 | 971 | |
e4606348 JJ |
972 | Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field |
973 | if applicable. */ | |
974 | ||
975 | static void inline | |
976 | priority_list_downgrade_task (enum priority_queue_type type, | |
977 | struct priority_list *list, | |
978 | struct gomp_task *child_task) | |
d9a6bd32 | 979 | { |
e4606348 JJ |
980 | struct priority_node *node = task_to_priority_node (type, child_task); |
981 | if (list->tasks == node) | |
982 | list->tasks = node->next; | |
983 | else if (node->next != list->tasks) | |
984 | { | |
985 | /* The task in NODE is about to become TIED and TIED tasks | |
986 | cannot come before WAITING tasks. If we're about to | |
987 | leave the queue in such an indeterminate state, rewire | |
988 | things appropriately. However, a TIED task at the end is | |
989 | perfectly fine. */ | |
990 | struct gomp_task *next_task = priority_node_to_task (type, node->next); | |
991 | if (next_task->kind == GOMP_TASK_WAITING) | |
992 | { | |
993 | /* Remove from list. */ | |
994 | node->prev->next = node->next; | |
995 | node->next->prev = node->prev; | |
996 | /* Rewire at the end. */ | |
997 | node->next = list->tasks; | |
998 | node->prev = list->tasks->prev; | |
999 | list->tasks->prev->next = node; | |
1000 | list->tasks->prev = node; | |
1001 | } | |
1002 | } | |
1003 | ||
1004 | /* If the current task is the last_parent_depends_on for its | |
1005 | priority, adjust last_parent_depends_on appropriately. */ | |
1006 | if (__builtin_expect (child_task->parent_depends_on, 0) | |
1007 | && list->last_parent_depends_on == node) | |
1008 | { | |
1009 | struct gomp_task *prev_child = priority_node_to_task (type, node->prev); | |
1010 | if (node->prev != node | |
1011 | && prev_child->kind == GOMP_TASK_WAITING | |
1012 | && prev_child->parent_depends_on) | |
1013 | list->last_parent_depends_on = node->prev; | |
1014 | else | |
1015 | { | |
1016 | /* There are no more parent_depends_on entries waiting | |
1017 | to run, clear the list. */ | |
1018 | list->last_parent_depends_on = NULL; | |
1019 | } | |
1020 | } | |
d9a6bd32 | 1021 | } |
e4606348 JJ |
1022 | |
1023 | /* Given a TASK in HEAD that is about to be executed, move it out of | |
1024 | the way so that other tasks can be considered for execution. HEAD | |
1025 | contains tasks of type TYPE. | |
1026 | ||
1027 | Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field | |
1028 | if applicable. | |
1029 | ||
1030 | (This function could be defined in priority_queue.c, but we want it | |
1031 | inlined, and putting it in priority_queue.h is not an option, given | |
1032 | that gomp_task has not been properly defined at that point). */ | |
1033 | ||
1034 | static void inline | |
1035 | priority_queue_downgrade_task (enum priority_queue_type type, | |
1036 | struct priority_queue *head, | |
1037 | struct gomp_task *task) | |
1038 | { | |
1039 | #if _LIBGOMP_CHECKING_ | |
1040 | if (!priority_queue_task_in_queue_p (type, head, task)) | |
1041 | gomp_fatal ("Attempt to downgrade missing task %p", task); | |
d9a6bd32 | 1042 | #endif |
e4606348 JJ |
1043 | if (priority_queue_multi_p (head)) |
1044 | { | |
1045 | struct priority_list *list | |
1046 | = priority_queue_lookup_priority (head, task->priority); | |
1047 | priority_list_downgrade_task (type, list, task); | |
1048 | } | |
1049 | else | |
1050 | priority_list_downgrade_task (type, &head->l, task); | |
1051 | } | |
1052 | ||
1053 | /* Setup CHILD_TASK to execute. This is done by setting the task to | |
1054 | TIED, and updating all relevant queues so that CHILD_TASK is no | |
1055 | longer chosen for scheduling. Also, remove CHILD_TASK from the | |
1056 | overall team task queue entirely. | |
1057 | ||
1058 | Return TRUE if task or its containing taskgroup has been | |
1059 | cancelled. */ | |
d9a6bd32 | 1060 | |
acf0174b JJ |
1061 | static inline bool |
1062 | gomp_task_run_pre (struct gomp_task *child_task, struct gomp_task *parent, | |
d9a6bd32 | 1063 | struct gomp_team *team) |
acf0174b | 1064 | { |
e4606348 JJ |
1065 | #if _LIBGOMP_CHECKING_ |
1066 | if (child_task->parent) | |
1067 | priority_queue_verify (PQ_CHILDREN, | |
1068 | &child_task->parent->children_queue, true); | |
1069 | if (child_task->taskgroup) | |
1070 | priority_queue_verify (PQ_TASKGROUP, | |
1071 | &child_task->taskgroup->taskgroup_queue, false); | |
1072 | priority_queue_verify (PQ_TEAM, &team->task_queue, false); | |
d9a6bd32 JJ |
1073 | #endif |
1074 | ||
e4606348 | 1075 | /* Task is about to go tied, move it out of the way. */ |
0494285a | 1076 | if (parent) |
e4606348 JJ |
1077 | priority_queue_downgrade_task (PQ_CHILDREN, &parent->children_queue, |
1078 | child_task); | |
d9a6bd32 | 1079 | |
e4606348 | 1080 | /* Task is about to go tied, move it out of the way. */ |
d9a6bd32 JJ |
1081 | struct gomp_taskgroup *taskgroup = child_task->taskgroup; |
1082 | if (taskgroup) | |
e4606348 JJ |
1083 | priority_queue_downgrade_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue, |
1084 | child_task); | |
d9a6bd32 | 1085 | |
e4606348 JJ |
1086 | priority_queue_remove (PQ_TEAM, &team->task_queue, child_task, |
1087 | MEMMODEL_RELAXED); | |
1088 | child_task->pnode[PQ_TEAM].next = NULL; | |
1089 | child_task->pnode[PQ_TEAM].prev = NULL; | |
acf0174b | 1090 | child_task->kind = GOMP_TASK_TIED; |
d9a6bd32 | 1091 | |
acf0174b JJ |
1092 | if (--team->task_queued_count == 0) |
1093 | gomp_team_barrier_clear_task_pending (&team->barrier); | |
28567c40 | 1094 | if (__builtin_expect (gomp_cancel_var, 0) |
acf0174b | 1095 | && !child_task->copy_ctors_done) |
28567c40 JJ |
1096 | { |
1097 | if (gomp_team_barrier_cancelled (&team->barrier)) | |
1098 | return true; | |
1099 | if (taskgroup) | |
1100 | { | |
1101 | if (taskgroup->cancelled) | |
1102 | return true; | |
1103 | if (taskgroup->workshare | |
1104 | && taskgroup->prev | |
1105 | && taskgroup->prev->cancelled) | |
1106 | return true; | |
1107 | } | |
1108 | } | |
acf0174b JJ |
1109 | return false; |
1110 | } | |
1111 | ||
1112 | static void | |
1113 | gomp_task_run_post_handle_depend_hash (struct gomp_task *child_task) | |
1114 | { | |
1115 | struct gomp_task *parent = child_task->parent; | |
1116 | size_t i; | |
1117 | ||
1118 | for (i = 0; i < child_task->depend_count; i++) | |
1119 | if (!child_task->depend[i].redundant) | |
1120 | { | |
1121 | if (child_task->depend[i].next) | |
1122 | child_task->depend[i].next->prev = child_task->depend[i].prev; | |
1123 | if (child_task->depend[i].prev) | |
1124 | child_task->depend[i].prev->next = child_task->depend[i].next; | |
1125 | else | |
1126 | { | |
1127 | hash_entry_type *slot | |
1128 | = htab_find_slot (&parent->depend_hash, &child_task->depend[i], | |
1129 | NO_INSERT); | |
1130 | if (*slot != &child_task->depend[i]) | |
1131 | abort (); | |
1132 | if (child_task->depend[i].next) | |
1133 | *slot = child_task->depend[i].next; | |
1134 | else | |
1135 | htab_clear_slot (parent->depend_hash, slot); | |
1136 | } | |
1137 | } | |
1138 | } | |
1139 | ||
e4606348 JJ |
1140 | /* After a CHILD_TASK has been run, adjust the dependency queue for |
1141 | each task that depends on CHILD_TASK, to record the fact that there | |
1142 | is one less dependency to worry about. If a task that depended on | |
1143 | CHILD_TASK now has no dependencies, place it in the various queues | |
1144 | so it gets scheduled to run. | |
d9a6bd32 JJ |
1145 | |
1146 | TEAM is the team to which CHILD_TASK belongs to. */ | |
1147 | ||
acf0174b JJ |
1148 | static size_t |
1149 | gomp_task_run_post_handle_dependers (struct gomp_task *child_task, | |
1150 | struct gomp_team *team) | |
1151 | { | |
1152 | struct gomp_task *parent = child_task->parent; | |
1153 | size_t i, count = child_task->dependers->n_elem, ret = 0; | |
1154 | for (i = 0; i < count; i++) | |
1155 | { | |
1156 | struct gomp_task *task = child_task->dependers->elem[i]; | |
e4606348 JJ |
1157 | |
1158 | /* CHILD_TASK satisfies a dependency for TASK. Keep track of | |
1159 | TASK's remaining dependencies. Once TASK has no other | |
93d90219 | 1160 | dependencies, put it into the various queues so it will get |
e4606348 | 1161 | scheduled for execution. */ |
acf0174b JJ |
1162 | if (--task->num_dependees != 0) |
1163 | continue; | |
1164 | ||
1165 | struct gomp_taskgroup *taskgroup = task->taskgroup; | |
1166 | if (parent) | |
1167 | { | |
e4606348 JJ |
1168 | priority_queue_insert (PQ_CHILDREN, &parent->children_queue, |
1169 | task, task->priority, | |
1170 | PRIORITY_INSERT_BEGIN, | |
1171 | /*adjust_parent_depends_on=*/true, | |
1172 | task->parent_depends_on); | |
0494285a | 1173 | if (parent->taskwait) |
acf0174b | 1174 | { |
0494285a JJ |
1175 | if (parent->taskwait->in_taskwait) |
1176 | { | |
e4606348 JJ |
1177 | /* One more task has had its dependencies met. |
1178 | Inform any waiters. */ | |
0494285a JJ |
1179 | parent->taskwait->in_taskwait = false; |
1180 | gomp_sem_post (&parent->taskwait->taskwait_sem); | |
1181 | } | |
1182 | else if (parent->taskwait->in_depend_wait) | |
1183 | { | |
e4606348 JJ |
1184 | /* One more task has had its dependencies met. |
1185 | Inform any waiters. */ | |
0494285a JJ |
1186 | parent->taskwait->in_depend_wait = false; |
1187 | gomp_sem_post (&parent->taskwait->taskwait_sem); | |
1188 | } | |
acf0174b JJ |
1189 | } |
1190 | } | |
1191 | if (taskgroup) | |
1192 | { | |
e4606348 JJ |
1193 | priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, |
1194 | task, task->priority, | |
1195 | PRIORITY_INSERT_BEGIN, | |
1196 | /*adjust_parent_depends_on=*/false, | |
1197 | task->parent_depends_on); | |
acf0174b JJ |
1198 | if (taskgroup->in_taskgroup_wait) |
1199 | { | |
e4606348 JJ |
1200 | /* One more task has had its dependencies met. |
1201 | Inform any waiters. */ | |
acf0174b JJ |
1202 | taskgroup->in_taskgroup_wait = false; |
1203 | gomp_sem_post (&taskgroup->taskgroup_sem); | |
1204 | } | |
1205 | } | |
e4606348 JJ |
1206 | priority_queue_insert (PQ_TEAM, &team->task_queue, |
1207 | task, task->priority, | |
1208 | PRIORITY_INSERT_END, | |
1209 | /*adjust_parent_depends_on=*/false, | |
1210 | task->parent_depends_on); | |
acf0174b JJ |
1211 | ++team->task_count; |
1212 | ++team->task_queued_count; | |
1213 | ++ret; | |
1214 | } | |
1215 | free (child_task->dependers); | |
1216 | child_task->dependers = NULL; | |
1217 | if (ret > 1) | |
1218 | gomp_team_barrier_set_task_pending (&team->barrier); | |
1219 | return ret; | |
1220 | } | |
1221 | ||
1222 | static inline size_t | |
1223 | gomp_task_run_post_handle_depend (struct gomp_task *child_task, | |
1224 | struct gomp_team *team) | |
1225 | { | |
1226 | if (child_task->depend_count == 0) | |
1227 | return 0; | |
1228 | ||
1229 | /* If parent is gone already, the hash table is freed and nothing | |
1230 | will use the hash table anymore, no need to remove anything from it. */ | |
1231 | if (child_task->parent != NULL) | |
1232 | gomp_task_run_post_handle_depend_hash (child_task); | |
1233 | ||
1234 | if (child_task->dependers == NULL) | |
1235 | return 0; | |
1236 | ||
1237 | return gomp_task_run_post_handle_dependers (child_task, team); | |
1238 | } | |
1239 | ||
d9a6bd32 JJ |
1240 | /* Remove CHILD_TASK from its parent. */ |
1241 | ||
acf0174b JJ |
1242 | static inline void |
1243 | gomp_task_run_post_remove_parent (struct gomp_task *child_task) | |
1244 | { | |
1245 | struct gomp_task *parent = child_task->parent; | |
1246 | if (parent == NULL) | |
1247 | return; | |
d9a6bd32 JJ |
1248 | |
1249 | /* If this was the last task the parent was depending on, | |
1250 | synchronize with gomp_task_maybe_wait_for_dependencies so it can | |
1251 | clean up and return. */ | |
0494285a JJ |
1252 | if (__builtin_expect (child_task->parent_depends_on, 0) |
1253 | && --parent->taskwait->n_depend == 0 | |
1254 | && parent->taskwait->in_depend_wait) | |
1255 | { | |
1256 | parent->taskwait->in_depend_wait = false; | |
1257 | gomp_sem_post (&parent->taskwait->taskwait_sem); | |
1258 | } | |
d9a6bd32 | 1259 | |
e4606348 JJ |
1260 | if (priority_queue_remove (PQ_CHILDREN, &parent->children_queue, |
1261 | child_task, MEMMODEL_RELEASE) | |
1262 | && parent->taskwait && parent->taskwait->in_taskwait) | |
acf0174b | 1263 | { |
e4606348 JJ |
1264 | parent->taskwait->in_taskwait = false; |
1265 | gomp_sem_post (&parent->taskwait->taskwait_sem); | |
acf0174b | 1266 | } |
e4606348 JJ |
1267 | child_task->pnode[PQ_CHILDREN].next = NULL; |
1268 | child_task->pnode[PQ_CHILDREN].prev = NULL; | |
acf0174b JJ |
1269 | } |
1270 | ||
d9a6bd32 JJ |
1271 | /* Remove CHILD_TASK from its taskgroup. */ |
1272 | ||
acf0174b JJ |
1273 | static inline void |
1274 | gomp_task_run_post_remove_taskgroup (struct gomp_task *child_task) | |
1275 | { | |
1276 | struct gomp_taskgroup *taskgroup = child_task->taskgroup; | |
1277 | if (taskgroup == NULL) | |
1278 | return; | |
e4606348 JJ |
1279 | bool empty = priority_queue_remove (PQ_TASKGROUP, |
1280 | &taskgroup->taskgroup_queue, | |
1281 | child_task, MEMMODEL_RELAXED); | |
1282 | child_task->pnode[PQ_TASKGROUP].next = NULL; | |
1283 | child_task->pnode[PQ_TASKGROUP].prev = NULL; | |
acf0174b JJ |
1284 | if (taskgroup->num_children > 1) |
1285 | --taskgroup->num_children; | |
1286 | else | |
1287 | { | |
1288 | /* We access taskgroup->num_children in GOMP_taskgroup_end | |
1289 | outside of the task lock mutex region, so | |
1290 | need a release barrier here to ensure memory | |
1291 | written by child_task->fn above is flushed | |
1292 | before the NULL is written. */ | |
1293 | __atomic_store_n (&taskgroup->num_children, 0, MEMMODEL_RELEASE); | |
1294 | } | |
e4606348 | 1295 | if (empty && taskgroup->in_taskgroup_wait) |
acf0174b | 1296 | { |
e4606348 JJ |
1297 | taskgroup->in_taskgroup_wait = false; |
1298 | gomp_sem_post (&taskgroup->taskgroup_sem); | |
acf0174b JJ |
1299 | } |
1300 | } | |
1301 | ||
a68ab351 JJ |
1302 | void |
1303 | gomp_barrier_handle_tasks (gomp_barrier_state_t state) | |
1304 | { | |
1305 | struct gomp_thread *thr = gomp_thread (); | |
1306 | struct gomp_team *team = thr->ts.team; | |
1307 | struct gomp_task *task = thr->task; | |
1308 | struct gomp_task *child_task = NULL; | |
1309 | struct gomp_task *to_free = NULL; | |
acf0174b | 1310 | int do_wake = 0; |
a68ab351 JJ |
1311 | |
1312 | gomp_mutex_lock (&team->task_lock); | |
1313 | if (gomp_barrier_last_thread (state)) | |
1314 | { | |
1315 | if (team->task_count == 0) | |
1316 | { | |
1317 | gomp_team_barrier_done (&team->barrier, state); | |
1318 | gomp_mutex_unlock (&team->task_lock); | |
1319 | gomp_team_barrier_wake (&team->barrier, 0); | |
1320 | return; | |
1321 | } | |
1322 | gomp_team_barrier_set_waiting_for_tasks (&team->barrier); | |
1323 | } | |
1324 | ||
1325 | while (1) | |
1326 | { | |
acf0174b | 1327 | bool cancelled = false; |
e4606348 | 1328 | if (!priority_queue_empty_p (&team->task_queue, MEMMODEL_RELAXED)) |
a68ab351 | 1329 | { |
e4606348 JJ |
1330 | bool ignored; |
1331 | child_task | |
1332 | = priority_queue_next_task (PQ_TEAM, &team->task_queue, | |
1333 | PQ_IGNORED, NULL, | |
1334 | &ignored); | |
acf0174b | 1335 | cancelled = gomp_task_run_pre (child_task, child_task->parent, |
d9a6bd32 | 1336 | team); |
acf0174b JJ |
1337 | if (__builtin_expect (cancelled, 0)) |
1338 | { | |
1339 | if (to_free) | |
1340 | { | |
1341 | gomp_finish_task (to_free); | |
1342 | free (to_free); | |
1343 | to_free = NULL; | |
1344 | } | |
1345 | goto finish_cancelled; | |
1346 | } | |
a68ab351 | 1347 | team->task_running_count++; |
acf0174b | 1348 | child_task->in_tied_task = true; |
a68ab351 JJ |
1349 | } |
1350 | gomp_mutex_unlock (&team->task_lock); | |
acf0174b JJ |
1351 | if (do_wake) |
1352 | { | |
1353 | gomp_team_barrier_wake (&team->barrier, do_wake); | |
1354 | do_wake = 0; | |
1355 | } | |
a68ab351 JJ |
1356 | if (to_free) |
1357 | { | |
1358 | gomp_finish_task (to_free); | |
1359 | free (to_free); | |
1360 | to_free = NULL; | |
1361 | } | |
1362 | if (child_task) | |
1363 | { | |
1364 | thr->task = child_task; | |
e4606348 JJ |
1365 | if (__builtin_expect (child_task->fn == NULL, 0)) |
1366 | { | |
1367 | if (gomp_target_task_fn (child_task->fn_data)) | |
1368 | { | |
1369 | thr->task = task; | |
1370 | gomp_mutex_lock (&team->task_lock); | |
1371 | child_task->kind = GOMP_TASK_ASYNC_RUNNING; | |
1372 | team->task_running_count--; | |
1373 | struct gomp_target_task *ttask | |
1374 | = (struct gomp_target_task *) child_task->fn_data; | |
1375 | /* If GOMP_PLUGIN_target_task_completion has run already | |
1376 | in between gomp_target_task_fn and the mutex lock, | |
1377 | perform the requeuing here. */ | |
1378 | if (ttask->state == GOMP_TARGET_TASK_FINISHED) | |
1379 | gomp_target_task_completion (team, child_task); | |
1380 | else | |
1381 | ttask->state = GOMP_TARGET_TASK_RUNNING; | |
1382 | child_task = NULL; | |
1383 | continue; | |
1384 | } | |
1385 | } | |
1386 | else | |
1387 | child_task->fn (child_task->fn_data); | |
a68ab351 JJ |
1388 | thr->task = task; |
1389 | } | |
1390 | else | |
1391 | return; | |
1392 | gomp_mutex_lock (&team->task_lock); | |
1393 | if (child_task) | |
1394 | { | |
acf0174b JJ |
1395 | finish_cancelled:; |
1396 | size_t new_tasks | |
1397 | = gomp_task_run_post_handle_depend (child_task, team); | |
1398 | gomp_task_run_post_remove_parent (child_task); | |
e4606348 | 1399 | gomp_clear_parent (&child_task->children_queue); |
acf0174b | 1400 | gomp_task_run_post_remove_taskgroup (child_task); |
a68ab351 JJ |
1401 | to_free = child_task; |
1402 | child_task = NULL; | |
acf0174b JJ |
1403 | if (!cancelled) |
1404 | team->task_running_count--; | |
1405 | if (new_tasks > 1) | |
1406 | { | |
1407 | do_wake = team->nthreads - team->task_running_count; | |
1408 | if (do_wake > new_tasks) | |
1409 | do_wake = new_tasks; | |
1410 | } | |
a68ab351 JJ |
1411 | if (--team->task_count == 0 |
1412 | && gomp_team_barrier_waiting_for_tasks (&team->barrier)) | |
1413 | { | |
1414 | gomp_team_barrier_done (&team->barrier, state); | |
1415 | gomp_mutex_unlock (&team->task_lock); | |
1416 | gomp_team_barrier_wake (&team->barrier, 0); | |
ab6dd406 | 1417 | gomp_mutex_lock (&team->task_lock); |
a68ab351 JJ |
1418 | } |
1419 | } | |
1420 | } | |
1421 | } | |
1422 | ||
d9a6bd32 JJ |
1423 | /* Called when encountering a taskwait directive. |
1424 | ||
1425 | Wait for all children of the current task. */ | |
a68ab351 JJ |
1426 | |
1427 | void | |
1428 | GOMP_taskwait (void) | |
1429 | { | |
1430 | struct gomp_thread *thr = gomp_thread (); | |
1431 | struct gomp_team *team = thr->ts.team; | |
1432 | struct gomp_task *task = thr->task; | |
1433 | struct gomp_task *child_task = NULL; | |
1434 | struct gomp_task *to_free = NULL; | |
0494285a | 1435 | struct gomp_taskwait taskwait; |
acf0174b | 1436 | int do_wake = 0; |
a68ab351 | 1437 | |
bed8d8a6 | 1438 | /* The acquire barrier on load of task->children here synchronizes |
acf0174b | 1439 | with the write of a NULL in gomp_task_run_post_remove_parent. It is |
bed8d8a6 AM |
1440 | not necessary that we synchronize with other non-NULL writes at |
1441 | this point, but we must ensure that all writes to memory by a | |
1442 | child thread task work function are seen before we exit from | |
1443 | GOMP_taskwait. */ | |
9a647288 | 1444 | if (task == NULL |
e4606348 | 1445 | || priority_queue_empty_p (&task->children_queue, MEMMODEL_ACQUIRE)) |
a68ab351 | 1446 | return; |
fbf7be80 | 1447 | |
0494285a | 1448 | memset (&taskwait, 0, sizeof (taskwait)); |
e4606348 | 1449 | bool child_q = false; |
a68ab351 JJ |
1450 | gomp_mutex_lock (&team->task_lock); |
1451 | while (1) | |
1452 | { | |
acf0174b | 1453 | bool cancelled = false; |
e4606348 | 1454 | if (priority_queue_empty_p (&task->children_queue, MEMMODEL_RELAXED)) |
a68ab351 | 1455 | { |
0494285a JJ |
1456 | bool destroy_taskwait = task->taskwait != NULL; |
1457 | task->taskwait = NULL; | |
1458 | gomp_mutex_unlock (&team->task_lock); | |
1459 | if (to_free) | |
1460 | { | |
1461 | gomp_finish_task (to_free); | |
1462 | free (to_free); | |
1463 | } | |
1464 | if (destroy_taskwait) | |
1465 | gomp_sem_destroy (&taskwait.taskwait_sem); | |
1466 | return; | |
1467 | } | |
e4606348 JJ |
1468 | struct gomp_task *next_task |
1469 | = priority_queue_next_task (PQ_CHILDREN, &task->children_queue, | |
1470 | PQ_TEAM, &team->task_queue, &child_q); | |
1471 | if (next_task->kind == GOMP_TASK_WAITING) | |
0494285a | 1472 | { |
e4606348 | 1473 | child_task = next_task; |
0494285a | 1474 | cancelled |
d9a6bd32 | 1475 | = gomp_task_run_pre (child_task, task, team); |
0494285a JJ |
1476 | if (__builtin_expect (cancelled, 0)) |
1477 | { | |
1478 | if (to_free) | |
1479 | { | |
1480 | gomp_finish_task (to_free); | |
1481 | free (to_free); | |
1482 | to_free = NULL; | |
1483 | } | |
1484 | goto finish_cancelled; | |
1485 | } | |
1486 | } | |
1487 | else | |
1488 | { | |
e4606348 JJ |
1489 | /* All tasks we are waiting for are either running in other |
1490 | threads, or they are tasks that have not had their | |
1491 | dependencies met (so they're not even in the queue). Wait | |
1492 | for them. */ | |
0494285a JJ |
1493 | if (task->taskwait == NULL) |
1494 | { | |
1495 | taskwait.in_depend_wait = false; | |
1496 | gomp_sem_init (&taskwait.taskwait_sem, 0); | |
1497 | task->taskwait = &taskwait; | |
1498 | } | |
1499 | taskwait.in_taskwait = true; | |
1500 | } | |
1501 | gomp_mutex_unlock (&team->task_lock); | |
1502 | if (do_wake) | |
1503 | { | |
1504 | gomp_team_barrier_wake (&team->barrier, do_wake); | |
1505 | do_wake = 0; | |
1506 | } | |
1507 | if (to_free) | |
1508 | { | |
1509 | gomp_finish_task (to_free); | |
1510 | free (to_free); | |
1511 | to_free = NULL; | |
1512 | } | |
1513 | if (child_task) | |
1514 | { | |
1515 | thr->task = child_task; | |
e4606348 JJ |
1516 | if (__builtin_expect (child_task->fn == NULL, 0)) |
1517 | { | |
1518 | if (gomp_target_task_fn (child_task->fn_data)) | |
1519 | { | |
1520 | thr->task = task; | |
1521 | gomp_mutex_lock (&team->task_lock); | |
1522 | child_task->kind = GOMP_TASK_ASYNC_RUNNING; | |
1523 | struct gomp_target_task *ttask | |
1524 | = (struct gomp_target_task *) child_task->fn_data; | |
1525 | /* If GOMP_PLUGIN_target_task_completion has run already | |
1526 | in between gomp_target_task_fn and the mutex lock, | |
1527 | perform the requeuing here. */ | |
1528 | if (ttask->state == GOMP_TARGET_TASK_FINISHED) | |
1529 | gomp_target_task_completion (team, child_task); | |
1530 | else | |
1531 | ttask->state = GOMP_TARGET_TASK_RUNNING; | |
1532 | child_task = NULL; | |
1533 | continue; | |
1534 | } | |
1535 | } | |
1536 | else | |
1537 | child_task->fn (child_task->fn_data); | |
0494285a JJ |
1538 | thr->task = task; |
1539 | } | |
1540 | else | |
1541 | gomp_sem_wait (&taskwait.taskwait_sem); | |
1542 | gomp_mutex_lock (&team->task_lock); | |
1543 | if (child_task) | |
1544 | { | |
1545 | finish_cancelled:; | |
1546 | size_t new_tasks | |
1547 | = gomp_task_run_post_handle_depend (child_task, team); | |
d9a6bd32 | 1548 | |
e4606348 | 1549 | if (child_q) |
0494285a | 1550 | { |
e4606348 JJ |
1551 | priority_queue_remove (PQ_CHILDREN, &task->children_queue, |
1552 | child_task, MEMMODEL_RELAXED); | |
1553 | child_task->pnode[PQ_CHILDREN].next = NULL; | |
1554 | child_task->pnode[PQ_CHILDREN].prev = NULL; | |
0494285a | 1555 | } |
d9a6bd32 | 1556 | |
e4606348 JJ |
1557 | gomp_clear_parent (&child_task->children_queue); |
1558 | ||
0494285a | 1559 | gomp_task_run_post_remove_taskgroup (child_task); |
d9a6bd32 | 1560 | |
0494285a JJ |
1561 | to_free = child_task; |
1562 | child_task = NULL; | |
1563 | team->task_count--; | |
1564 | if (new_tasks > 1) | |
1565 | { | |
1566 | do_wake = team->nthreads - team->task_running_count | |
1567 | - !task->in_tied_task; | |
1568 | if (do_wake > new_tasks) | |
1569 | do_wake = new_tasks; | |
1570 | } | |
1571 | } | |
1572 | } | |
1573 | } | |
1574 | ||
28567c40 JJ |
1575 | /* Called when encountering a taskwait directive with depend clause(s). |
1576 | Wait as if it was an mergeable included task construct with empty body. */ | |
1577 | ||
1578 | void | |
1579 | GOMP_taskwait_depend (void **depend) | |
1580 | { | |
1581 | struct gomp_thread *thr = gomp_thread (); | |
1582 | struct gomp_team *team = thr->ts.team; | |
1583 | ||
1584 | /* If parallel or taskgroup has been cancelled, return early. */ | |
1585 | if (__builtin_expect (gomp_cancel_var, 0) && team) | |
1586 | { | |
1587 | if (gomp_team_barrier_cancelled (&team->barrier)) | |
1588 | return; | |
1589 | if (thr->task->taskgroup) | |
1590 | { | |
1591 | if (thr->task->taskgroup->cancelled) | |
1592 | return; | |
1593 | if (thr->task->taskgroup->workshare | |
1594 | && thr->task->taskgroup->prev | |
1595 | && thr->task->taskgroup->prev->cancelled) | |
1596 | return; | |
1597 | } | |
1598 | } | |
1599 | ||
1600 | if (thr->task && thr->task->depend_hash) | |
1601 | gomp_task_maybe_wait_for_dependencies (depend); | |
1602 | } | |
1603 | ||
e4606348 JJ |
1604 | /* An undeferred task is about to run. Wait for all tasks that this |
1605 | undeferred task depends on. | |
1606 | ||
1607 | This is done by first putting all known ready dependencies | |
1608 | (dependencies that have their own dependencies met) at the top of | |
1609 | the scheduling queues. Then we iterate through these imminently | |
1610 | ready tasks (and possibly other high priority tasks), and run them. | |
1611 | If we run out of ready dependencies to execute, we either wait for | |
28567c40 | 1612 | the remaining dependencies to finish, or wait for them to get |
e4606348 | 1613 | scheduled so we can run them. |
0494285a | 1614 | |
d9a6bd32 JJ |
1615 | DEPEND is as in GOMP_task. */ |
1616 | ||
1617 | void | |
0494285a JJ |
1618 | gomp_task_maybe_wait_for_dependencies (void **depend) |
1619 | { | |
1620 | struct gomp_thread *thr = gomp_thread (); | |
1621 | struct gomp_task *task = thr->task; | |
1622 | struct gomp_team *team = thr->ts.team; | |
1623 | struct gomp_task_depend_entry elem, *ent = NULL; | |
1624 | struct gomp_taskwait taskwait; | |
28567c40 | 1625 | size_t orig_ndepend = (uintptr_t) depend[0]; |
0494285a | 1626 | size_t nout = (uintptr_t) depend[1]; |
28567c40 JJ |
1627 | size_t ndepend = orig_ndepend; |
1628 | size_t normal = ndepend; | |
1629 | size_t n = 2; | |
0494285a JJ |
1630 | size_t i; |
1631 | size_t num_awaited = 0; | |
1632 | struct gomp_task *child_task = NULL; | |
1633 | struct gomp_task *to_free = NULL; | |
1634 | int do_wake = 0; | |
1635 | ||
28567c40 JJ |
1636 | if (ndepend == 0) |
1637 | { | |
1638 | ndepend = nout; | |
1639 | nout = (uintptr_t) depend[2] + (uintptr_t) depend[3]; | |
1640 | normal = nout + (uintptr_t) depend[4]; | |
1641 | n = 5; | |
1642 | } | |
0494285a JJ |
1643 | gomp_mutex_lock (&team->task_lock); |
1644 | for (i = 0; i < ndepend; i++) | |
1645 | { | |
28567c40 JJ |
1646 | elem.addr = depend[i + n]; |
1647 | elem.is_in = i >= nout; | |
1648 | if (__builtin_expect (i >= normal, 0)) | |
1649 | { | |
1650 | void **d = (void **) elem.addr; | |
1651 | switch ((uintptr_t) d[1]) | |
1652 | { | |
1653 | case GOMP_DEPEND_IN: | |
1654 | break; | |
1655 | case GOMP_DEPEND_OUT: | |
1656 | case GOMP_DEPEND_INOUT: | |
1657 | case GOMP_DEPEND_MUTEXINOUTSET: | |
1658 | elem.is_in = 0; | |
1659 | break; | |
1660 | default: | |
1661 | gomp_fatal ("unknown omp_depend_t dependence type %d", | |
1662 | (int) (uintptr_t) d[1]); | |
1663 | } | |
1664 | elem.addr = d[0]; | |
1665 | } | |
0494285a JJ |
1666 | ent = htab_find (task->depend_hash, &elem); |
1667 | for (; ent; ent = ent->next) | |
28567c40 | 1668 | if (elem.is_in && ent->is_in) |
0494285a JJ |
1669 | continue; |
1670 | else | |
1671 | { | |
1672 | struct gomp_task *tsk = ent->task; | |
1673 | if (!tsk->parent_depends_on) | |
1674 | { | |
1675 | tsk->parent_depends_on = true; | |
1676 | ++num_awaited; | |
93d90219 | 1677 | /* If dependency TSK itself has no dependencies and is |
e4606348 JJ |
1678 | ready to run, move it up front so that we run it as |
1679 | soon as possible. */ | |
0494285a | 1680 | if (tsk->num_dependees == 0 && tsk->kind == GOMP_TASK_WAITING) |
e4606348 | 1681 | priority_queue_upgrade_task (tsk, task); |
0494285a JJ |
1682 | } |
1683 | } | |
1684 | } | |
1685 | if (num_awaited == 0) | |
1686 | { | |
1687 | gomp_mutex_unlock (&team->task_lock); | |
1688 | return; | |
1689 | } | |
1690 | ||
1691 | memset (&taskwait, 0, sizeof (taskwait)); | |
1692 | taskwait.n_depend = num_awaited; | |
0494285a JJ |
1693 | gomp_sem_init (&taskwait.taskwait_sem, 0); |
1694 | task->taskwait = &taskwait; | |
1695 | ||
1696 | while (1) | |
1697 | { | |
1698 | bool cancelled = false; | |
1699 | if (taskwait.n_depend == 0) | |
1700 | { | |
1701 | task->taskwait = NULL; | |
a68ab351 JJ |
1702 | gomp_mutex_unlock (&team->task_lock); |
1703 | if (to_free) | |
1704 | { | |
1705 | gomp_finish_task (to_free); | |
1706 | free (to_free); | |
1707 | } | |
0494285a | 1708 | gomp_sem_destroy (&taskwait.taskwait_sem); |
a68ab351 JJ |
1709 | return; |
1710 | } | |
e4606348 JJ |
1711 | |
1712 | /* Theoretically when we have multiple priorities, we should | |
1713 | chose between the highest priority item in | |
1714 | task->children_queue and team->task_queue here, so we should | |
1715 | use priority_queue_next_task(). However, since we are | |
1716 | running an undeferred task, perhaps that makes all tasks it | |
1717 | depends on undeferred, thus a priority of INF? This would | |
1718 | make it unnecessary to take anything into account here, | |
1719 | but the dependencies. | |
1720 | ||
1721 | On the other hand, if we want to use priority_queue_next_task(), | |
1722 | care should be taken to only use priority_queue_remove() | |
1723 | below if the task was actually removed from the children | |
1724 | queue. */ | |
1725 | bool ignored; | |
1726 | struct gomp_task *next_task | |
1727 | = priority_queue_next_task (PQ_CHILDREN, &task->children_queue, | |
1728 | PQ_IGNORED, NULL, &ignored); | |
1729 | ||
1730 | if (next_task->kind == GOMP_TASK_WAITING) | |
a68ab351 | 1731 | { |
e4606348 | 1732 | child_task = next_task; |
acf0174b | 1733 | cancelled |
d9a6bd32 | 1734 | = gomp_task_run_pre (child_task, task, team); |
acf0174b | 1735 | if (__builtin_expect (cancelled, 0)) |
a68ab351 | 1736 | { |
acf0174b JJ |
1737 | if (to_free) |
1738 | { | |
1739 | gomp_finish_task (to_free); | |
1740 | free (to_free); | |
1741 | to_free = NULL; | |
1742 | } | |
1743 | goto finish_cancelled; | |
a68ab351 | 1744 | } |
a68ab351 JJ |
1745 | } |
1746 | else | |
e4606348 JJ |
1747 | /* All tasks we are waiting for are either running in other |
1748 | threads, or they are tasks that have not had their | |
1749 | dependencies met (so they're not even in the queue). Wait | |
1750 | for them. */ | |
0494285a | 1751 | taskwait.in_depend_wait = true; |
a68ab351 | 1752 | gomp_mutex_unlock (&team->task_lock); |
acf0174b JJ |
1753 | if (do_wake) |
1754 | { | |
1755 | gomp_team_barrier_wake (&team->barrier, do_wake); | |
1756 | do_wake = 0; | |
1757 | } | |
a68ab351 JJ |
1758 | if (to_free) |
1759 | { | |
1760 | gomp_finish_task (to_free); | |
1761 | free (to_free); | |
1762 | to_free = NULL; | |
1763 | } | |
1764 | if (child_task) | |
1765 | { | |
1766 | thr->task = child_task; | |
e4606348 JJ |
1767 | if (__builtin_expect (child_task->fn == NULL, 0)) |
1768 | { | |
1769 | if (gomp_target_task_fn (child_task->fn_data)) | |
1770 | { | |
1771 | thr->task = task; | |
1772 | gomp_mutex_lock (&team->task_lock); | |
1773 | child_task->kind = GOMP_TASK_ASYNC_RUNNING; | |
1774 | struct gomp_target_task *ttask | |
1775 | = (struct gomp_target_task *) child_task->fn_data; | |
1776 | /* If GOMP_PLUGIN_target_task_completion has run already | |
1777 | in between gomp_target_task_fn and the mutex lock, | |
1778 | perform the requeuing here. */ | |
1779 | if (ttask->state == GOMP_TARGET_TASK_FINISHED) | |
1780 | gomp_target_task_completion (team, child_task); | |
1781 | else | |
1782 | ttask->state = GOMP_TARGET_TASK_RUNNING; | |
1783 | child_task = NULL; | |
1784 | continue; | |
1785 | } | |
1786 | } | |
1787 | else | |
1788 | child_task->fn (child_task->fn_data); | |
a68ab351 JJ |
1789 | thr->task = task; |
1790 | } | |
1791 | else | |
0494285a | 1792 | gomp_sem_wait (&taskwait.taskwait_sem); |
a68ab351 JJ |
1793 | gomp_mutex_lock (&team->task_lock); |
1794 | if (child_task) | |
1795 | { | |
acf0174b JJ |
1796 | finish_cancelled:; |
1797 | size_t new_tasks | |
1798 | = gomp_task_run_post_handle_depend (child_task, team); | |
0494285a JJ |
1799 | if (child_task->parent_depends_on) |
1800 | --taskwait.n_depend; | |
d9a6bd32 | 1801 | |
e4606348 JJ |
1802 | priority_queue_remove (PQ_CHILDREN, &task->children_queue, |
1803 | child_task, MEMMODEL_RELAXED); | |
1804 | child_task->pnode[PQ_CHILDREN].next = NULL; | |
1805 | child_task->pnode[PQ_CHILDREN].prev = NULL; | |
d9a6bd32 | 1806 | |
e4606348 | 1807 | gomp_clear_parent (&child_task->children_queue); |
acf0174b | 1808 | gomp_task_run_post_remove_taskgroup (child_task); |
a68ab351 JJ |
1809 | to_free = child_task; |
1810 | child_task = NULL; | |
1811 | team->task_count--; | |
acf0174b JJ |
1812 | if (new_tasks > 1) |
1813 | { | |
1814 | do_wake = team->nthreads - team->task_running_count | |
1815 | - !task->in_tied_task; | |
1816 | if (do_wake > new_tasks) | |
1817 | do_wake = new_tasks; | |
1818 | } | |
a68ab351 JJ |
1819 | } |
1820 | } | |
1821 | } | |
20906c66 JJ |
1822 | |
1823 | /* Called when encountering a taskyield directive. */ | |
1824 | ||
1825 | void | |
1826 | GOMP_taskyield (void) | |
1827 | { | |
1828 | /* Nothing at the moment. */ | |
1829 | } | |
1830 | ||
28567c40 JJ |
1831 | static inline struct gomp_taskgroup * |
1832 | gomp_taskgroup_init (struct gomp_taskgroup *prev) | |
1833 | { | |
1834 | struct gomp_taskgroup *taskgroup | |
1835 | = gomp_malloc (sizeof (struct gomp_taskgroup)); | |
1836 | taskgroup->prev = prev; | |
1837 | priority_queue_init (&taskgroup->taskgroup_queue); | |
1838 | taskgroup->reductions = prev ? prev->reductions : NULL; | |
1839 | taskgroup->in_taskgroup_wait = false; | |
1840 | taskgroup->cancelled = false; | |
1841 | taskgroup->workshare = false; | |
1842 | taskgroup->num_children = 0; | |
1843 | gomp_sem_init (&taskgroup->taskgroup_sem, 0); | |
1844 | return taskgroup; | |
1845 | } | |
1846 | ||
acf0174b JJ |
1847 | void |
1848 | GOMP_taskgroup_start (void) | |
1849 | { | |
1850 | struct gomp_thread *thr = gomp_thread (); | |
1851 | struct gomp_team *team = thr->ts.team; | |
1852 | struct gomp_task *task = thr->task; | |
acf0174b JJ |
1853 | |
1854 | /* If team is NULL, all tasks are executed as | |
d9a6bd32 | 1855 | GOMP_TASK_UNDEFERRED tasks and thus all children tasks of |
acf0174b JJ |
1856 | taskgroup and their descendant tasks will be finished |
1857 | by the time GOMP_taskgroup_end is called. */ | |
1858 | if (team == NULL) | |
1859 | return; | |
28567c40 | 1860 | task->taskgroup = gomp_taskgroup_init (task->taskgroup); |
acf0174b JJ |
1861 | } |
1862 | ||
1863 | void | |
1864 | GOMP_taskgroup_end (void) | |
1865 | { | |
1866 | struct gomp_thread *thr = gomp_thread (); | |
1867 | struct gomp_team *team = thr->ts.team; | |
1868 | struct gomp_task *task = thr->task; | |
1869 | struct gomp_taskgroup *taskgroup; | |
1870 | struct gomp_task *child_task = NULL; | |
1871 | struct gomp_task *to_free = NULL; | |
1872 | int do_wake = 0; | |
1873 | ||
1874 | if (team == NULL) | |
1875 | return; | |
1876 | taskgroup = task->taskgroup; | |
e4606348 JJ |
1877 | if (__builtin_expect (taskgroup == NULL, 0) |
1878 | && thr->ts.level == 0) | |
1879 | { | |
1880 | /* This can happen if GOMP_taskgroup_start is called when | |
1881 | thr->ts.team == NULL, but inside of the taskgroup there | |
1882 | is #pragma omp target nowait that creates an implicit | |
1883 | team with a single thread. In this case, we want to wait | |
1884 | for all outstanding tasks in this team. */ | |
1885 | gomp_team_barrier_wait (&team->barrier); | |
1886 | return; | |
1887 | } | |
acf0174b JJ |
1888 | |
1889 | /* The acquire barrier on load of taskgroup->num_children here | |
1890 | synchronizes with the write of 0 in gomp_task_run_post_remove_taskgroup. | |
1891 | It is not necessary that we synchronize with other non-0 writes at | |
1892 | this point, but we must ensure that all writes to memory by a | |
1893 | child thread task work function are seen before we exit from | |
1894 | GOMP_taskgroup_end. */ | |
1895 | if (__atomic_load_n (&taskgroup->num_children, MEMMODEL_ACQUIRE) == 0) | |
1896 | goto finish; | |
1897 | ||
e4606348 | 1898 | bool unused; |
acf0174b JJ |
1899 | gomp_mutex_lock (&team->task_lock); |
1900 | while (1) | |
1901 | { | |
1902 | bool cancelled = false; | |
e4606348 JJ |
1903 | if (priority_queue_empty_p (&taskgroup->taskgroup_queue, |
1904 | MEMMODEL_RELAXED)) | |
acf0174b JJ |
1905 | { |
1906 | if (taskgroup->num_children) | |
acf0174b | 1907 | { |
e4606348 JJ |
1908 | if (priority_queue_empty_p (&task->children_queue, |
1909 | MEMMODEL_RELAXED)) | |
3696163c | 1910 | goto do_wait; |
e4606348 JJ |
1911 | child_task |
1912 | = priority_queue_next_task (PQ_CHILDREN, &task->children_queue, | |
1913 | PQ_TEAM, &team->task_queue, | |
1914 | &unused); | |
1915 | } | |
1916 | else | |
3696163c JJ |
1917 | { |
1918 | gomp_mutex_unlock (&team->task_lock); | |
1919 | if (to_free) | |
1920 | { | |
1921 | gomp_finish_task (to_free); | |
1922 | free (to_free); | |
1923 | } | |
1924 | goto finish; | |
acf0174b | 1925 | } |
acf0174b | 1926 | } |
3696163c | 1927 | else |
e4606348 JJ |
1928 | child_task |
1929 | = priority_queue_next_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue, | |
1930 | PQ_TEAM, &team->task_queue, &unused); | |
3696163c | 1931 | if (child_task->kind == GOMP_TASK_WAITING) |
acf0174b | 1932 | { |
acf0174b | 1933 | cancelled |
d9a6bd32 | 1934 | = gomp_task_run_pre (child_task, child_task->parent, team); |
acf0174b JJ |
1935 | if (__builtin_expect (cancelled, 0)) |
1936 | { | |
1937 | if (to_free) | |
1938 | { | |
1939 | gomp_finish_task (to_free); | |
1940 | free (to_free); | |
1941 | to_free = NULL; | |
1942 | } | |
1943 | goto finish_cancelled; | |
1944 | } | |
1945 | } | |
1946 | else | |
1947 | { | |
3696163c | 1948 | child_task = NULL; |
acf0174b | 1949 | do_wait: |
e4606348 JJ |
1950 | /* All tasks we are waiting for are either running in other |
1951 | threads, or they are tasks that have not had their | |
1952 | dependencies met (so they're not even in the queue). Wait | |
1953 | for them. */ | |
acf0174b JJ |
1954 | taskgroup->in_taskgroup_wait = true; |
1955 | } | |
1956 | gomp_mutex_unlock (&team->task_lock); | |
1957 | if (do_wake) | |
1958 | { | |
1959 | gomp_team_barrier_wake (&team->barrier, do_wake); | |
1960 | do_wake = 0; | |
1961 | } | |
1962 | if (to_free) | |
1963 | { | |
1964 | gomp_finish_task (to_free); | |
1965 | free (to_free); | |
1966 | to_free = NULL; | |
1967 | } | |
1968 | if (child_task) | |
1969 | { | |
1970 | thr->task = child_task; | |
e4606348 JJ |
1971 | if (__builtin_expect (child_task->fn == NULL, 0)) |
1972 | { | |
1973 | if (gomp_target_task_fn (child_task->fn_data)) | |
1974 | { | |
1975 | thr->task = task; | |
1976 | gomp_mutex_lock (&team->task_lock); | |
1977 | child_task->kind = GOMP_TASK_ASYNC_RUNNING; | |
1978 | struct gomp_target_task *ttask | |
1979 | = (struct gomp_target_task *) child_task->fn_data; | |
1980 | /* If GOMP_PLUGIN_target_task_completion has run already | |
1981 | in between gomp_target_task_fn and the mutex lock, | |
1982 | perform the requeuing here. */ | |
1983 | if (ttask->state == GOMP_TARGET_TASK_FINISHED) | |
1984 | gomp_target_task_completion (team, child_task); | |
1985 | else | |
1986 | ttask->state = GOMP_TARGET_TASK_RUNNING; | |
1987 | child_task = NULL; | |
1988 | continue; | |
1989 | } | |
1990 | } | |
1991 | else | |
1992 | child_task->fn (child_task->fn_data); | |
acf0174b JJ |
1993 | thr->task = task; |
1994 | } | |
1995 | else | |
1996 | gomp_sem_wait (&taskgroup->taskgroup_sem); | |
1997 | gomp_mutex_lock (&team->task_lock); | |
1998 | if (child_task) | |
1999 | { | |
2000 | finish_cancelled:; | |
2001 | size_t new_tasks | |
2002 | = gomp_task_run_post_handle_depend (child_task, team); | |
acf0174b | 2003 | gomp_task_run_post_remove_parent (child_task); |
e4606348 | 2004 | gomp_clear_parent (&child_task->children_queue); |
3696163c | 2005 | gomp_task_run_post_remove_taskgroup (child_task); |
acf0174b JJ |
2006 | to_free = child_task; |
2007 | child_task = NULL; | |
2008 | team->task_count--; | |
2009 | if (new_tasks > 1) | |
2010 | { | |
2011 | do_wake = team->nthreads - team->task_running_count | |
2012 | - !task->in_tied_task; | |
2013 | if (do_wake > new_tasks) | |
2014 | do_wake = new_tasks; | |
2015 | } | |
2016 | } | |
2017 | } | |
2018 | ||
2019 | finish: | |
2020 | task->taskgroup = taskgroup->prev; | |
2021 | gomp_sem_destroy (&taskgroup->taskgroup_sem); | |
2022 | free (taskgroup); | |
2023 | } | |
2024 | ||
28567c40 JJ |
2025 | static inline __attribute__((always_inline)) void |
2026 | gomp_reduction_register (uintptr_t *data, uintptr_t *old, uintptr_t *orig, | |
2027 | unsigned nthreads) | |
2028 | { | |
2029 | size_t total_cnt = 0; | |
2030 | uintptr_t *d = data; | |
2031 | struct htab *old_htab = NULL, *new_htab; | |
2032 | do | |
2033 | { | |
2034 | if (__builtin_expect (orig != NULL, 0)) | |
2035 | { | |
2036 | /* For worksharing task reductions, memory has been allocated | |
2037 | already by some other thread that encountered the construct | |
2038 | earlier. */ | |
2039 | d[2] = orig[2]; | |
2040 | d[6] = orig[6]; | |
2041 | orig = (uintptr_t *) orig[4]; | |
2042 | } | |
2043 | else | |
2044 | { | |
2045 | size_t sz = d[1] * nthreads; | |
2046 | /* Should use omp_alloc if d[3] is not -1. */ | |
2047 | void *ptr = gomp_aligned_alloc (d[2], sz); | |
2048 | memset (ptr, '\0', sz); | |
2049 | d[2] = (uintptr_t) ptr; | |
2050 | d[6] = d[2] + sz; | |
2051 | } | |
2052 | d[5] = 0; | |
2053 | total_cnt += d[0]; | |
2054 | if (d[4] == 0) | |
2055 | { | |
2056 | d[4] = (uintptr_t) old; | |
2057 | break; | |
2058 | } | |
2059 | else | |
2060 | d = (uintptr_t *) d[4]; | |
2061 | } | |
2062 | while (1); | |
2063 | if (old && old[5]) | |
2064 | { | |
2065 | old_htab = (struct htab *) old[5]; | |
2066 | total_cnt += htab_elements (old_htab); | |
2067 | } | |
2068 | new_htab = htab_create (total_cnt); | |
2069 | if (old_htab) | |
2070 | { | |
2071 | /* Copy old hash table, like in htab_expand. */ | |
2072 | hash_entry_type *p, *olimit; | |
2073 | new_htab->n_elements = htab_elements (old_htab); | |
2074 | olimit = old_htab->entries + old_htab->size; | |
2075 | p = old_htab->entries; | |
2076 | do | |
2077 | { | |
2078 | hash_entry_type x = *p; | |
2079 | if (x != HTAB_EMPTY_ENTRY && x != HTAB_DELETED_ENTRY) | |
2080 | *find_empty_slot_for_expand (new_htab, htab_hash (x)) = x; | |
2081 | p++; | |
2082 | } | |
2083 | while (p < olimit); | |
2084 | } | |
2085 | d = data; | |
2086 | do | |
2087 | { | |
2088 | size_t j; | |
2089 | for (j = 0; j < d[0]; ++j) | |
2090 | { | |
2091 | uintptr_t *p = d + 7 + j * 3; | |
2092 | p[2] = (uintptr_t) d; | |
2093 | /* Ugly hack, hash_entry_type is defined for the task dependencies, | |
2094 | which hash on the first element which is a pointer. We need | |
2095 | to hash also on the first sizeof (uintptr_t) bytes which contain | |
2096 | a pointer. Hide the cast from the compiler. */ | |
2097 | hash_entry_type n; | |
2098 | __asm ("" : "=g" (n) : "0" (p)); | |
2099 | *htab_find_slot (&new_htab, n, INSERT) = n; | |
2100 | } | |
2101 | if (d[4] == (uintptr_t) old) | |
2102 | break; | |
2103 | else | |
2104 | d = (uintptr_t *) d[4]; | |
2105 | } | |
2106 | while (1); | |
2107 | d[5] = (uintptr_t) new_htab; | |
2108 | } | |
2109 | ||
2110 | static void | |
2111 | gomp_create_artificial_team (void) | |
2112 | { | |
2113 | struct gomp_thread *thr = gomp_thread (); | |
2114 | struct gomp_task_icv *icv; | |
2115 | struct gomp_team *team = gomp_new_team (1); | |
2116 | struct gomp_task *task = thr->task; | |
2117 | icv = task ? &task->icv : &gomp_global_icv; | |
2118 | team->prev_ts = thr->ts; | |
2119 | thr->ts.team = team; | |
2120 | thr->ts.team_id = 0; | |
2121 | thr->ts.work_share = &team->work_shares[0]; | |
2122 | thr->ts.last_work_share = NULL; | |
2123 | #ifdef HAVE_SYNC_BUILTINS | |
2124 | thr->ts.single_count = 0; | |
2125 | #endif | |
2126 | thr->ts.static_trip = 0; | |
2127 | thr->task = &team->implicit_task[0]; | |
2128 | gomp_init_task (thr->task, NULL, icv); | |
2129 | if (task) | |
2130 | { | |
2131 | thr->task = task; | |
2132 | gomp_end_task (); | |
2133 | free (task); | |
2134 | thr->task = &team->implicit_task[0]; | |
2135 | } | |
2136 | #ifdef LIBGOMP_USE_PTHREADS | |
2137 | else | |
2138 | pthread_setspecific (gomp_thread_destructor, thr); | |
2139 | #endif | |
2140 | } | |
2141 | ||
2142 | /* The format of data is: | |
2143 | data[0] cnt | |
2144 | data[1] size | |
2145 | data[2] alignment (on output array pointer) | |
2146 | data[3] allocator (-1 if malloc allocator) | |
2147 | data[4] next pointer | |
2148 | data[5] used internally (htab pointer) | |
2149 | data[6] used internally (end of array) | |
2150 | cnt times | |
2151 | ent[0] address | |
2152 | ent[1] offset | |
2153 | ent[2] used internally (pointer to data[0]) | |
2154 | The entries are sorted by increasing offset, so that a binary | |
2155 | search can be performed. Normally, data[8] is 0, exception is | |
2156 | for worksharing construct task reductions in cancellable parallel, | |
2157 | where at offset 0 there should be space for a pointer and an integer | |
2158 | which are used internally. */ | |
2159 | ||
2160 | void | |
2161 | GOMP_taskgroup_reduction_register (uintptr_t *data) | |
2162 | { | |
2163 | struct gomp_thread *thr = gomp_thread (); | |
2164 | struct gomp_team *team = thr->ts.team; | |
2165 | struct gomp_task *task; | |
2166 | unsigned nthreads; | |
2167 | if (__builtin_expect (team == NULL, 0)) | |
2168 | { | |
2169 | /* The task reduction code needs a team and task, so for | |
2170 | orphaned taskgroups just create the implicit team. */ | |
2171 | gomp_create_artificial_team (); | |
2172 | ialias_call (GOMP_taskgroup_start) (); | |
2173 | team = thr->ts.team; | |
2174 | } | |
2175 | nthreads = team->nthreads; | |
2176 | task = thr->task; | |
2177 | gomp_reduction_register (data, task->taskgroup->reductions, NULL, nthreads); | |
2178 | task->taskgroup->reductions = data; | |
2179 | } | |
2180 | ||
2181 | void | |
2182 | GOMP_taskgroup_reduction_unregister (uintptr_t *data) | |
2183 | { | |
2184 | uintptr_t *d = data; | |
2185 | htab_free ((struct htab *) data[5]); | |
2186 | do | |
2187 | { | |
2188 | gomp_aligned_free ((void *) d[2]); | |
2189 | d = (uintptr_t *) d[4]; | |
2190 | } | |
2191 | while (d && !d[5]); | |
2192 | } | |
2193 | ialias (GOMP_taskgroup_reduction_unregister) | |
2194 | ||
2195 | /* For i = 0 to cnt-1, remap ptrs[i] which is either address of the | |
2196 | original list item or address of previously remapped original list | |
2197 | item to address of the private copy, store that to ptrs[i]. | |
2198 | For i < cntorig, additionally set ptrs[cnt+i] to the address of | |
2199 | the original list item. */ | |
2200 | ||
2201 | void | |
2202 | GOMP_task_reduction_remap (size_t cnt, size_t cntorig, void **ptrs) | |
2203 | { | |
2204 | struct gomp_thread *thr = gomp_thread (); | |
2205 | struct gomp_task *task = thr->task; | |
2206 | unsigned id = thr->ts.team_id; | |
2207 | uintptr_t *data = task->taskgroup->reductions; | |
2208 | uintptr_t *d; | |
2209 | struct htab *reduction_htab = (struct htab *) data[5]; | |
2210 | size_t i; | |
2211 | for (i = 0; i < cnt; ++i) | |
2212 | { | |
2213 | hash_entry_type ent, n; | |
2214 | __asm ("" : "=g" (ent) : "0" (ptrs + i)); | |
2215 | n = htab_find (reduction_htab, ent); | |
2216 | if (n) | |
2217 | { | |
2218 | uintptr_t *p; | |
2219 | __asm ("" : "=g" (p) : "0" (n)); | |
2220 | /* At this point, p[0] should be equal to (uintptr_t) ptrs[i], | |
2221 | p[1] is the offset within the allocated chunk for each | |
2222 | thread, p[2] is the array registered with | |
2223 | GOMP_taskgroup_reduction_register, d[2] is the base of the | |
2224 | allocated memory and d[1] is the size of the allocated chunk | |
2225 | for one thread. */ | |
2226 | d = (uintptr_t *) p[2]; | |
2227 | ptrs[i] = (void *) (d[2] + id * d[1] + p[1]); | |
2228 | if (__builtin_expect (i < cntorig, 0)) | |
2229 | ptrs[cnt + i] = (void *) p[0]; | |
2230 | continue; | |
2231 | } | |
2232 | d = data; | |
2233 | while (d != NULL) | |
2234 | { | |
2235 | if ((uintptr_t) ptrs[i] >= d[2] && (uintptr_t) ptrs[i] < d[6]) | |
2236 | break; | |
2237 | d = (uintptr_t *) d[4]; | |
2238 | } | |
2239 | if (d == NULL) | |
2240 | gomp_fatal ("couldn't find matching task_reduction or reduction with " | |
2241 | "task modifier for %p", ptrs[i]); | |
2242 | uintptr_t off = ((uintptr_t) ptrs[i] - d[2]) % d[1]; | |
2243 | ptrs[i] = (void *) (d[2] + id * d[1] + off); | |
2244 | if (__builtin_expect (i < cntorig, 0)) | |
2245 | { | |
2246 | size_t lo = 0, hi = d[0] - 1; | |
2247 | while (lo <= hi) | |
2248 | { | |
2249 | size_t m = (lo + hi) / 2; | |
2250 | if (d[7 + 3 * m + 1] < off) | |
2251 | lo = m + 1; | |
2252 | else if (d[7 + 3 * m + 1] == off) | |
2253 | { | |
2254 | ptrs[cnt + i] = (void *) d[7 + 3 * m]; | |
2255 | break; | |
2256 | } | |
2257 | else | |
2258 | hi = m - 1; | |
2259 | } | |
2260 | if (lo > hi) | |
2261 | gomp_fatal ("couldn't find matching task_reduction or reduction " | |
2262 | "with task modifier for %p", ptrs[i]); | |
2263 | } | |
2264 | } | |
2265 | } | |
2266 | ||
2267 | struct gomp_taskgroup * | |
2268 | gomp_parallel_reduction_register (uintptr_t *data, unsigned nthreads) | |
2269 | { | |
2270 | struct gomp_taskgroup *taskgroup = gomp_taskgroup_init (NULL); | |
2271 | gomp_reduction_register (data, NULL, NULL, nthreads); | |
2272 | taskgroup->reductions = data; | |
2273 | return taskgroup; | |
2274 | } | |
2275 | ||
2276 | void | |
2277 | gomp_workshare_task_reduction_register (uintptr_t *data, uintptr_t *orig) | |
2278 | { | |
2279 | struct gomp_thread *thr = gomp_thread (); | |
2280 | struct gomp_team *team = thr->ts.team; | |
2281 | struct gomp_task *task = thr->task; | |
2282 | unsigned nthreads = team->nthreads; | |
2283 | gomp_reduction_register (data, task->taskgroup->reductions, orig, nthreads); | |
2284 | task->taskgroup->reductions = data; | |
2285 | } | |
2286 | ||
2287 | void | |
2288 | gomp_workshare_taskgroup_start (void) | |
2289 | { | |
2290 | struct gomp_thread *thr = gomp_thread (); | |
2291 | struct gomp_team *team = thr->ts.team; | |
2292 | struct gomp_task *task; | |
2293 | ||
2294 | if (team == NULL) | |
2295 | { | |
2296 | gomp_create_artificial_team (); | |
2297 | team = thr->ts.team; | |
2298 | } | |
2299 | task = thr->task; | |
2300 | task->taskgroup = gomp_taskgroup_init (task->taskgroup); | |
2301 | task->taskgroup->workshare = true; | |
2302 | } | |
2303 | ||
2304 | void | |
2305 | GOMP_workshare_task_reduction_unregister (bool cancelled) | |
2306 | { | |
2307 | struct gomp_thread *thr = gomp_thread (); | |
2308 | struct gomp_task *task = thr->task; | |
2309 | struct gomp_team *team = thr->ts.team; | |
2310 | uintptr_t *data = task->taskgroup->reductions; | |
2311 | ialias_call (GOMP_taskgroup_end) (); | |
2312 | if (thr->ts.team_id == 0) | |
2313 | ialias_call (GOMP_taskgroup_reduction_unregister) (data); | |
2314 | else | |
2315 | htab_free ((struct htab *) data[5]); | |
2316 | ||
2317 | if (!cancelled) | |
2318 | gomp_team_barrier_wait (&team->barrier); | |
2319 | } | |
2320 | ||
20906c66 JJ |
2321 | int |
2322 | omp_in_final (void) | |
2323 | { | |
2324 | struct gomp_thread *thr = gomp_thread (); | |
2325 | return thr->task && thr->task->final_task; | |
2326 | } | |
2327 | ||
2328 | ialias (omp_in_final) |