]>
Commit | Line | Data |
---|---|---|
83ffe9cd | 1 | /* Copyright (C) 2015-2023 Free Software Foundation, Inc. |
d9a6bd32 JJ |
2 | Contributed by Jakub Jelinek <jakub@redhat.com>. |
3 | ||
4 | This file is part of the GNU Offloading and Multi Processing Library | |
5 | (libgomp). | |
6 | ||
7 | Libgomp is free software; you can redistribute it and/or modify it | |
8 | under the terms of the GNU General Public License as published by | |
9 | the Free Software Foundation; either version 3, or (at your option) | |
10 | any later version. | |
11 | ||
12 | Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
14 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
15 | more details. | |
16 | ||
17 | Under Section 7 of GPL version 3, you are granted additional | |
18 | permissions described in the GCC Runtime Library Exception, version | |
19 | 3.1, as published by the Free Software Foundation. | |
20 | ||
21 | You should have received a copy of the GNU General Public License and | |
22 | a copy of the GCC Runtime Library Exception along with this program; | |
23 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
24 | <http://www.gnu.org/licenses/>. */ | |
25 | ||
26 | /* This file handles the taskloop construct. It is included twice, once | |
27 | for the long and once for unsigned long long variant. */ | |
28 | ||
29 | /* Called when encountering an explicit task directive. If IF_CLAUSE is | |
30 | false, then we must not delay in executing the task. If UNTIED is true, | |
31 | then the task may be executed by any member of the team. */ | |
32 | ||
33 | void | |
34 | GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), | |
35 | long arg_size, long arg_align, unsigned flags, | |
36 | unsigned long num_tasks, int priority, | |
37 | TYPE start, TYPE end, TYPE step) | |
38 | { | |
39 | struct gomp_thread *thr = gomp_thread (); | |
40 | struct gomp_team *team = thr->ts.team; | |
41 | ||
42 | #ifdef HAVE_BROKEN_POSIX_SEMAPHORES | |
43 | /* If pthread_mutex_* is used for omp_*lock*, then each task must be | |
44 | tied to one thread all the time. This means UNTIED tasks must be | |
45 | tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN | |
46 | might be running on different thread than FN. */ | |
47 | if (cpyfn) | |
48 | flags &= ~GOMP_TASK_FLAG_IF; | |
49 | flags &= ~GOMP_TASK_FLAG_UNTIED; | |
50 | #endif | |
51 | ||
52 | /* If parallel or taskgroup has been cancelled, don't start new tasks. */ | |
53 | if (team && gomp_team_barrier_cancelled (&team->barrier)) | |
98acbb31 JJ |
54 | { |
55 | early_return: | |
56 | if ((flags & (GOMP_TASK_FLAG_NOGROUP | GOMP_TASK_FLAG_REDUCTION)) | |
57 | == GOMP_TASK_FLAG_REDUCTION) | |
58 | { | |
59 | struct gomp_data_head { TYPE t1, t2; uintptr_t *ptr; }; | |
60 | uintptr_t *ptr = ((struct gomp_data_head *) data)->ptr; | |
61 | /* Tell callers GOMP_taskgroup_reduction_register has not been | |
62 | called. */ | |
63 | ptr[2] = 0; | |
64 | } | |
65 | return; | |
66 | } | |
d9a6bd32 JJ |
67 | |
68 | #ifdef TYPE_is_long | |
69 | TYPE s = step; | |
70 | if (step > 0) | |
71 | { | |
72 | if (start >= end) | |
98acbb31 | 73 | goto early_return; |
d9a6bd32 JJ |
74 | s--; |
75 | } | |
76 | else | |
77 | { | |
78 | if (start <= end) | |
98acbb31 | 79 | goto early_return; |
d9a6bd32 JJ |
80 | s++; |
81 | } | |
82 | UTYPE n = (end - start + s) / step; | |
83 | #else | |
84 | UTYPE n; | |
85 | if (flags & GOMP_TASK_FLAG_UP) | |
86 | { | |
87 | if (start >= end) | |
98acbb31 | 88 | goto early_return; |
d9a6bd32 JJ |
89 | n = (end - start + step - 1) / step; |
90 | } | |
91 | else | |
92 | { | |
93 | if (start <= end) | |
98acbb31 | 94 | goto early_return; |
d9a6bd32 JJ |
95 | n = (start - end - step - 1) / -step; |
96 | } | |
97 | #endif | |
98 | ||
99 | TYPE task_step = step; | |
3bc75533 | 100 | TYPE nfirst_task_step = step; |
d9a6bd32 JJ |
101 | unsigned long nfirst = n; |
102 | if (flags & GOMP_TASK_FLAG_GRAINSIZE) | |
103 | { | |
104 | unsigned long grainsize = num_tasks; | |
105 | #ifdef TYPE_is_long | |
106 | num_tasks = n / grainsize; | |
107 | #else | |
108 | UTYPE ndiv = n / grainsize; | |
109 | num_tasks = ndiv; | |
110 | if (num_tasks != ndiv) | |
111 | num_tasks = ~0UL; | |
112 | #endif | |
3bc75533 JJ |
113 | if ((flags & GOMP_TASK_FLAG_STRICT) |
114 | && num_tasks != ~0ULL) | |
115 | { | |
116 | UTYPE mod = n % grainsize; | |
117 | task_step = (TYPE) grainsize * step; | |
118 | if (mod) | |
119 | { | |
120 | num_tasks++; | |
121 | nfirst_task_step = (TYPE) mod * step; | |
122 | if (num_tasks == 1) | |
123 | task_step = nfirst_task_step; | |
124 | else | |
125 | nfirst = num_tasks - 2; | |
126 | } | |
127 | } | |
128 | else if (num_tasks <= 1) | |
d9a6bd32 JJ |
129 | { |
130 | num_tasks = 1; | |
131 | task_step = end - start; | |
132 | } | |
133 | else if (num_tasks >= grainsize | |
134 | #ifndef TYPE_is_long | |
135 | && num_tasks != ~0UL | |
136 | #endif | |
137 | ) | |
138 | { | |
139 | UTYPE mul = num_tasks * grainsize; | |
140 | task_step = (TYPE) grainsize * step; | |
141 | if (mul != n) | |
142 | { | |
3bc75533 | 143 | nfirst_task_step = task_step; |
d9a6bd32 JJ |
144 | task_step += step; |
145 | nfirst = n - mul - 1; | |
146 | } | |
147 | } | |
148 | else | |
149 | { | |
150 | UTYPE div = n / num_tasks; | |
151 | UTYPE mod = n % num_tasks; | |
152 | task_step = (TYPE) div * step; | |
153 | if (mod) | |
154 | { | |
3bc75533 | 155 | nfirst_task_step = task_step; |
d9a6bd32 JJ |
156 | task_step += step; |
157 | nfirst = mod - 1; | |
158 | } | |
159 | } | |
160 | } | |
161 | else | |
162 | { | |
163 | if (num_tasks == 0) | |
164 | num_tasks = team ? team->nthreads : 1; | |
165 | if (num_tasks >= n) | |
166 | num_tasks = n; | |
167 | else | |
168 | { | |
169 | UTYPE div = n / num_tasks; | |
170 | UTYPE mod = n % num_tasks; | |
171 | task_step = (TYPE) div * step; | |
172 | if (mod) | |
173 | { | |
3bc75533 | 174 | nfirst_task_step = task_step; |
d9a6bd32 JJ |
175 | task_step += step; |
176 | nfirst = mod - 1; | |
177 | } | |
178 | } | |
179 | } | |
180 | ||
181 | if (flags & GOMP_TASK_FLAG_NOGROUP) | |
182 | { | |
28567c40 JJ |
183 | if (__builtin_expect (gomp_cancel_var, 0) |
184 | && thr->task | |
185 | && thr->task->taskgroup) | |
186 | { | |
187 | if (thr->task->taskgroup->cancelled) | |
188 | return; | |
189 | if (thr->task->taskgroup->workshare | |
190 | && thr->task->taskgroup->prev | |
191 | && thr->task->taskgroup->prev->cancelled) | |
192 | return; | |
193 | } | |
d9a6bd32 JJ |
194 | } |
195 | else | |
28567c40 JJ |
196 | { |
197 | ialias_call (GOMP_taskgroup_start) (); | |
198 | if (flags & GOMP_TASK_FLAG_REDUCTION) | |
199 | { | |
200 | struct gomp_data_head { TYPE t1, t2; uintptr_t *ptr; }; | |
201 | uintptr_t *ptr = ((struct gomp_data_head *) data)->ptr; | |
202 | ialias_call (GOMP_taskgroup_reduction_register) (ptr); | |
203 | } | |
204 | } | |
d9a6bd32 | 205 | |
e4606348 JJ |
206 | if (priority > gomp_max_task_priority_var) |
207 | priority = gomp_max_task_priority_var; | |
d9a6bd32 JJ |
208 | |
209 | if ((flags & GOMP_TASK_FLAG_IF) == 0 || team == NULL | |
210 | || (thr->task && thr->task->final_task) | |
211 | || team->task_count + num_tasks > 64 * team->nthreads) | |
212 | { | |
213 | unsigned long i; | |
214 | if (__builtin_expect (cpyfn != NULL, 0)) | |
215 | { | |
216 | struct gomp_task task[num_tasks]; | |
217 | struct gomp_task *parent = thr->task; | |
218 | arg_size = (arg_size + arg_align - 1) & ~(arg_align - 1); | |
219 | char buf[num_tasks * arg_size + arg_align - 1]; | |
220 | char *arg = (char *) (((uintptr_t) buf + arg_align - 1) | |
221 | & ~(uintptr_t) (arg_align - 1)); | |
222 | char *orig_arg = arg; | |
223 | for (i = 0; i < num_tasks; i++) | |
224 | { | |
225 | gomp_init_task (&task[i], parent, gomp_icv (false)); | |
e4606348 | 226 | task[i].priority = priority; |
d9a6bd32 JJ |
227 | task[i].kind = GOMP_TASK_UNDEFERRED; |
228 | task[i].final_task = (thr->task && thr->task->final_task) | |
229 | || (flags & GOMP_TASK_FLAG_FINAL); | |
230 | if (thr->task) | |
231 | { | |
232 | task[i].in_tied_task = thr->task->in_tied_task; | |
233 | task[i].taskgroup = thr->task->taskgroup; | |
234 | } | |
235 | thr->task = &task[i]; | |
236 | cpyfn (arg, data); | |
237 | arg += arg_size; | |
238 | } | |
239 | arg = orig_arg; | |
240 | for (i = 0; i < num_tasks; i++) | |
241 | { | |
242 | thr->task = &task[i]; | |
243 | ((TYPE *)arg)[0] = start; | |
244 | start += task_step; | |
245 | ((TYPE *)arg)[1] = start; | |
246 | if (i == nfirst) | |
3bc75533 | 247 | task_step = nfirst_task_step; |
d9a6bd32 JJ |
248 | fn (arg); |
249 | arg += arg_size; | |
e4606348 JJ |
250 | if (!priority_queue_empty_p (&task[i].children_queue, |
251 | MEMMODEL_RELAXED)) | |
d9a6bd32 JJ |
252 | { |
253 | gomp_mutex_lock (&team->task_lock); | |
e4606348 | 254 | gomp_clear_parent (&task[i].children_queue); |
d9a6bd32 JJ |
255 | gomp_mutex_unlock (&team->task_lock); |
256 | } | |
257 | gomp_end_task (); | |
258 | } | |
259 | } | |
260 | else | |
261 | for (i = 0; i < num_tasks; i++) | |
262 | { | |
263 | struct gomp_task task; | |
264 | ||
265 | gomp_init_task (&task, thr->task, gomp_icv (false)); | |
e4606348 | 266 | task.priority = priority; |
d9a6bd32 JJ |
267 | task.kind = GOMP_TASK_UNDEFERRED; |
268 | task.final_task = (thr->task && thr->task->final_task) | |
269 | || (flags & GOMP_TASK_FLAG_FINAL); | |
270 | if (thr->task) | |
271 | { | |
272 | task.in_tied_task = thr->task->in_tied_task; | |
273 | task.taskgroup = thr->task->taskgroup; | |
274 | } | |
275 | thr->task = &task; | |
276 | ((TYPE *)data)[0] = start; | |
277 | start += task_step; | |
278 | ((TYPE *)data)[1] = start; | |
279 | if (i == nfirst) | |
3bc75533 | 280 | task_step = nfirst_task_step; |
d9a6bd32 | 281 | fn (data); |
e4606348 JJ |
282 | if (!priority_queue_empty_p (&task.children_queue, |
283 | MEMMODEL_RELAXED)) | |
d9a6bd32 JJ |
284 | { |
285 | gomp_mutex_lock (&team->task_lock); | |
e4606348 | 286 | gomp_clear_parent (&task.children_queue); |
d9a6bd32 JJ |
287 | gomp_mutex_unlock (&team->task_lock); |
288 | } | |
289 | gomp_end_task (); | |
290 | } | |
291 | } | |
292 | else | |
293 | { | |
294 | struct gomp_task *tasks[num_tasks]; | |
295 | struct gomp_task *parent = thr->task; | |
296 | struct gomp_taskgroup *taskgroup = parent->taskgroup; | |
297 | char *arg; | |
298 | int do_wake; | |
299 | unsigned long i; | |
300 | ||
301 | for (i = 0; i < num_tasks; i++) | |
302 | { | |
303 | struct gomp_task *task | |
304 | = gomp_malloc (sizeof (*task) + arg_size + arg_align - 1); | |
305 | tasks[i] = task; | |
306 | arg = (char *) (((uintptr_t) (task + 1) + arg_align - 1) | |
307 | & ~(uintptr_t) (arg_align - 1)); | |
308 | gomp_init_task (task, parent, gomp_icv (false)); | |
e4606348 | 309 | task->priority = priority; |
d9a6bd32 JJ |
310 | task->kind = GOMP_TASK_UNDEFERRED; |
311 | task->in_tied_task = parent->in_tied_task; | |
312 | task->taskgroup = taskgroup; | |
313 | thr->task = task; | |
314 | if (cpyfn) | |
315 | { | |
316 | cpyfn (arg, data); | |
317 | task->copy_ctors_done = true; | |
318 | } | |
319 | else | |
320 | memcpy (arg, data, arg_size); | |
321 | ((TYPE *)arg)[0] = start; | |
322 | start += task_step; | |
323 | ((TYPE *)arg)[1] = start; | |
324 | if (i == nfirst) | |
3bc75533 | 325 | task_step = nfirst_task_step; |
d9a6bd32 JJ |
326 | thr->task = parent; |
327 | task->kind = GOMP_TASK_WAITING; | |
328 | task->fn = fn; | |
329 | task->fn_data = arg; | |
330 | task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1; | |
331 | } | |
332 | gomp_mutex_lock (&team->task_lock); | |
333 | /* If parallel or taskgroup has been cancelled, don't start new | |
334 | tasks. */ | |
28567c40 JJ |
335 | if (__builtin_expect (gomp_cancel_var, 0) |
336 | && cpyfn == NULL) | |
d9a6bd32 | 337 | { |
28567c40 JJ |
338 | if (gomp_team_barrier_cancelled (&team->barrier)) |
339 | { | |
340 | do_cancel: | |
341 | gomp_mutex_unlock (&team->task_lock); | |
342 | for (i = 0; i < num_tasks; i++) | |
343 | { | |
344 | gomp_finish_task (tasks[i]); | |
345 | free (tasks[i]); | |
346 | } | |
347 | if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0) | |
348 | ialias_call (GOMP_taskgroup_end) (); | |
349 | return; | |
350 | } | |
351 | if (taskgroup) | |
d9a6bd32 | 352 | { |
28567c40 JJ |
353 | if (taskgroup->cancelled) |
354 | goto do_cancel; | |
355 | if (taskgroup->workshare | |
356 | && taskgroup->prev | |
357 | && taskgroup->prev->cancelled) | |
358 | goto do_cancel; | |
d9a6bd32 | 359 | } |
d9a6bd32 JJ |
360 | } |
361 | if (taskgroup) | |
362 | taskgroup->num_children += num_tasks; | |
363 | for (i = 0; i < num_tasks; i++) | |
364 | { | |
365 | struct gomp_task *task = tasks[i]; | |
e4606348 JJ |
366 | priority_queue_insert (PQ_CHILDREN, &parent->children_queue, |
367 | task, priority, | |
368 | PRIORITY_INSERT_BEGIN, | |
369 | /*last_parent_depends_on=*/false, | |
370 | task->parent_depends_on); | |
d9a6bd32 | 371 | if (taskgroup) |
e4606348 JJ |
372 | priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, |
373 | task, priority, PRIORITY_INSERT_BEGIN, | |
374 | /*last_parent_depends_on=*/false, | |
375 | task->parent_depends_on); | |
376 | priority_queue_insert (PQ_TEAM, &team->task_queue, task, priority, | |
377 | PRIORITY_INSERT_END, | |
378 | /*last_parent_depends_on=*/false, | |
379 | task->parent_depends_on); | |
d9a6bd32 JJ |
380 | ++team->task_count; |
381 | ++team->task_queued_count; | |
382 | } | |
383 | gomp_team_barrier_set_task_pending (&team->barrier); | |
384 | if (team->task_running_count + !parent->in_tied_task | |
385 | < team->nthreads) | |
386 | { | |
387 | do_wake = team->nthreads - team->task_running_count | |
388 | - !parent->in_tied_task; | |
389 | if ((unsigned long) do_wake > num_tasks) | |
390 | do_wake = num_tasks; | |
391 | } | |
392 | else | |
393 | do_wake = 0; | |
394 | gomp_mutex_unlock (&team->task_lock); | |
395 | if (do_wake) | |
396 | gomp_team_barrier_wake (&team->barrier, do_wake); | |
397 | } | |
398 | if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0) | |
399 | ialias_call (GOMP_taskgroup_end) (); | |
400 | } |