]>
Commit | Line | Data |
---|---|---|
f1717362 | 1 | /* Copyright (C) 2005-2016 Free Software Foundation, Inc. |
1e8e9920 | 2 | Contributed by Richard Henderson <rth@redhat.com>. |
3 | ||
c35c9a62 | 4 | This file is part of the GNU Offloading and Multi Processing Library |
5 | (libgomp). | |
1e8e9920 | 6 | |
7 | Libgomp is free software; you can redistribute it and/or modify it | |
6bc9506f | 8 | under the terms of the GNU General Public License as published by |
9 | the Free Software Foundation; either version 3, or (at your option) | |
10 | any later version. | |
1e8e9920 | 11 | |
12 | Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
6bc9506f | 14 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
1e8e9920 | 15 | more details. |
16 | ||
6bc9506f | 17 | Under Section 7 of GPL version 3, you are granted additional |
18 | permissions described in the GCC Runtime Library Exception, version | |
19 | 3.1, as published by the Free Software Foundation. | |
20 | ||
21 | You should have received a copy of the GNU General Public License and | |
22 | a copy of the GCC Runtime Library Exception along with this program; | |
23 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
24 | <http://www.gnu.org/licenses/>. */ | |
1e8e9920 | 25 | |
26 | /* This file handles the LOOP (FOR/DO) construct. */ | |
27 | ||
fd6481cf | 28 | #include <limits.h> |
1e8e9920 | 29 | #include <stdlib.h> |
fd6481cf | 30 | #include "libgomp.h" |
1e8e9920 | 31 | |
32 | ||
33 | /* Initialize the given work share construct from the given arguments. */ | |
34 | ||
35 | static inline void | |
497f2858 | 36 | gomp_loop_init (struct gomp_work_share *ws, long start, long end, long incr, |
37 | enum gomp_schedule_type sched, long chunk_size) | |
1e8e9920 | 38 | { |
39 | ws->sched = sched; | |
40 | ws->chunk_size = chunk_size; | |
497f2858 | 41 | /* Canonicalize loops that have zero iterations to ->next == ->end. */ |
42 | ws->end = ((incr > 0 && start > end) || (incr < 0 && start < end)) | |
43 | ? start : end; | |
1e8e9920 | 44 | ws->incr = incr; |
45 | ws->next = start; | |
fd6481cf | 46 | if (sched == GFS_DYNAMIC) |
47 | { | |
48 | ws->chunk_size *= incr; | |
49 | ||
50 | #ifdef HAVE_SYNC_BUILTINS | |
51 | { | |
52 | /* For dynamic scheduling prepare things to make each iteration | |
53 | faster. */ | |
54 | struct gomp_thread *thr = gomp_thread (); | |
55 | struct gomp_team *team = thr->ts.team; | |
56 | long nthreads = team ? team->nthreads : 1; | |
57 | ||
58 | if (__builtin_expect (incr > 0, 1)) | |
59 | { | |
60 | /* Cheap overflow protection. */ | |
61 | if (__builtin_expect ((nthreads | ws->chunk_size) | |
62 | >= 1UL << (sizeof (long) | |
63 | * __CHAR_BIT__ / 2 - 1), 0)) | |
64 | ws->mode = 0; | |
65 | else | |
66 | ws->mode = ws->end < (LONG_MAX | |
67 | - (nthreads + 1) * ws->chunk_size); | |
68 | } | |
69 | /* Cheap overflow protection. */ | |
70 | else if (__builtin_expect ((nthreads | -ws->chunk_size) | |
71 | >= 1UL << (sizeof (long) | |
72 | * __CHAR_BIT__ / 2 - 1), 0)) | |
73 | ws->mode = 0; | |
74 | else | |
75 | ws->mode = ws->end > (nthreads + 1) * -ws->chunk_size - LONG_MAX; | |
76 | } | |
77 | #endif | |
78 | } | |
1e8e9920 | 79 | } |
80 | ||
81 | /* The *_start routines are called when first encountering a loop construct | |
82 | that is not bound directly to a parallel construct. The first thread | |
83 | that arrives will create the work-share construct; subsequent threads | |
84 | will see the construct exists and allocate work from it. | |
85 | ||
86 | START, END, INCR are the bounds of the loop; due to the restrictions of | |
87 | OpenMP, these values must be the same in every thread. This is not | |
88 | verified (nor is it entirely verifiable, since START is not necessarily | |
89 | retained intact in the work-share data structure). CHUNK_SIZE is the | |
90 | scheduling parameter; again this must be identical in all threads. | |
91 | ||
92 | Returns true if there's any work for this thread to perform. If so, | |
93 | *ISTART and *IEND are filled with the bounds of the iteration block | |
94 | allocated to this thread. Returns false if all work was assigned to | |
95 | other threads prior to this thread's arrival. */ | |
96 | ||
97 | static bool | |
98 | gomp_loop_static_start (long start, long end, long incr, long chunk_size, | |
99 | long *istart, long *iend) | |
100 | { | |
101 | struct gomp_thread *thr = gomp_thread (); | |
102 | ||
fd6481cf | 103 | thr->ts.static_trip = 0; |
1e8e9920 | 104 | if (gomp_work_share_start (false)) |
fd6481cf | 105 | { |
106 | gomp_loop_init (thr->ts.work_share, start, end, incr, | |
107 | GFS_STATIC, chunk_size); | |
108 | gomp_work_share_init_done (); | |
109 | } | |
1e8e9920 | 110 | |
111 | return !gomp_iter_static_next (istart, iend); | |
112 | } | |
113 | ||
9561765e | 114 | /* The current dynamic implementation is always monotonic. The |
115 | entrypoints without nonmonotonic in them have to be always monotonic, | |
116 | but the nonmonotonic ones could be changed to use work-stealing for | |
117 | improved scalability. */ | |
118 | ||
1e8e9920 | 119 | static bool |
120 | gomp_loop_dynamic_start (long start, long end, long incr, long chunk_size, | |
121 | long *istart, long *iend) | |
122 | { | |
123 | struct gomp_thread *thr = gomp_thread (); | |
124 | bool ret; | |
125 | ||
126 | if (gomp_work_share_start (false)) | |
fd6481cf | 127 | { |
128 | gomp_loop_init (thr->ts.work_share, start, end, incr, | |
129 | GFS_DYNAMIC, chunk_size); | |
130 | gomp_work_share_init_done (); | |
131 | } | |
1e8e9920 | 132 | |
133 | #ifdef HAVE_SYNC_BUILTINS | |
1e8e9920 | 134 | ret = gomp_iter_dynamic_next (istart, iend); |
135 | #else | |
fd6481cf | 136 | gomp_mutex_lock (&thr->ts.work_share->lock); |
1e8e9920 | 137 | ret = gomp_iter_dynamic_next_locked (istart, iend); |
138 | gomp_mutex_unlock (&thr->ts.work_share->lock); | |
139 | #endif | |
140 | ||
141 | return ret; | |
142 | } | |
143 | ||
9561765e | 144 | /* Similarly as for dynamic, though the question is how can the chunk sizes |
145 | be decreased without a central locking or atomics. */ | |
146 | ||
1e8e9920 | 147 | static bool |
148 | gomp_loop_guided_start (long start, long end, long incr, long chunk_size, | |
149 | long *istart, long *iend) | |
150 | { | |
151 | struct gomp_thread *thr = gomp_thread (); | |
152 | bool ret; | |
153 | ||
154 | if (gomp_work_share_start (false)) | |
fd6481cf | 155 | { |
156 | gomp_loop_init (thr->ts.work_share, start, end, incr, | |
157 | GFS_GUIDED, chunk_size); | |
158 | gomp_work_share_init_done (); | |
159 | } | |
1e8e9920 | 160 | |
161 | #ifdef HAVE_SYNC_BUILTINS | |
1e8e9920 | 162 | ret = gomp_iter_guided_next (istart, iend); |
163 | #else | |
fd6481cf | 164 | gomp_mutex_lock (&thr->ts.work_share->lock); |
1e8e9920 | 165 | ret = gomp_iter_guided_next_locked (istart, iend); |
166 | gomp_mutex_unlock (&thr->ts.work_share->lock); | |
167 | #endif | |
168 | ||
169 | return ret; | |
170 | } | |
171 | ||
172 | bool | |
173 | GOMP_loop_runtime_start (long start, long end, long incr, | |
174 | long *istart, long *iend) | |
175 | { | |
fd6481cf | 176 | struct gomp_task_icv *icv = gomp_icv (false); |
177 | switch (icv->run_sched_var) | |
1e8e9920 | 178 | { |
179 | case GFS_STATIC: | |
43895be5 | 180 | return gomp_loop_static_start (start, end, incr, |
181 | icv->run_sched_chunk_size, | |
1e8e9920 | 182 | istart, iend); |
183 | case GFS_DYNAMIC: | |
43895be5 | 184 | return gomp_loop_dynamic_start (start, end, incr, |
185 | icv->run_sched_chunk_size, | |
1e8e9920 | 186 | istart, iend); |
187 | case GFS_GUIDED: | |
43895be5 | 188 | return gomp_loop_guided_start (start, end, incr, |
189 | icv->run_sched_chunk_size, | |
1e8e9920 | 190 | istart, iend); |
fd6481cf | 191 | case GFS_AUTO: |
192 | /* For now map to schedule(static), later on we could play with feedback | |
193 | driven choice. */ | |
194 | return gomp_loop_static_start (start, end, incr, 0, istart, iend); | |
1e8e9920 | 195 | default: |
196 | abort (); | |
197 | } | |
198 | } | |
199 | ||
200 | /* The *_ordered_*_start routines are similar. The only difference is that | |
201 | this work-share construct is initialized to expect an ORDERED section. */ | |
202 | ||
203 | static bool | |
204 | gomp_loop_ordered_static_start (long start, long end, long incr, | |
205 | long chunk_size, long *istart, long *iend) | |
206 | { | |
207 | struct gomp_thread *thr = gomp_thread (); | |
208 | ||
fd6481cf | 209 | thr->ts.static_trip = 0; |
1e8e9920 | 210 | if (gomp_work_share_start (true)) |
211 | { | |
212 | gomp_loop_init (thr->ts.work_share, start, end, incr, | |
213 | GFS_STATIC, chunk_size); | |
214 | gomp_ordered_static_init (); | |
fd6481cf | 215 | gomp_work_share_init_done (); |
1e8e9920 | 216 | } |
1e8e9920 | 217 | |
218 | return !gomp_iter_static_next (istart, iend); | |
219 | } | |
220 | ||
221 | static bool | |
222 | gomp_loop_ordered_dynamic_start (long start, long end, long incr, | |
223 | long chunk_size, long *istart, long *iend) | |
224 | { | |
225 | struct gomp_thread *thr = gomp_thread (); | |
226 | bool ret; | |
227 | ||
228 | if (gomp_work_share_start (true)) | |
fd6481cf | 229 | { |
230 | gomp_loop_init (thr->ts.work_share, start, end, incr, | |
231 | GFS_DYNAMIC, chunk_size); | |
232 | gomp_mutex_lock (&thr->ts.work_share->lock); | |
233 | gomp_work_share_init_done (); | |
234 | } | |
235 | else | |
236 | gomp_mutex_lock (&thr->ts.work_share->lock); | |
1e8e9920 | 237 | |
238 | ret = gomp_iter_dynamic_next_locked (istart, iend); | |
239 | if (ret) | |
240 | gomp_ordered_first (); | |
241 | gomp_mutex_unlock (&thr->ts.work_share->lock); | |
242 | ||
243 | return ret; | |
244 | } | |
245 | ||
246 | static bool | |
247 | gomp_loop_ordered_guided_start (long start, long end, long incr, | |
248 | long chunk_size, long *istart, long *iend) | |
249 | { | |
250 | struct gomp_thread *thr = gomp_thread (); | |
251 | bool ret; | |
252 | ||
253 | if (gomp_work_share_start (true)) | |
fd6481cf | 254 | { |
255 | gomp_loop_init (thr->ts.work_share, start, end, incr, | |
256 | GFS_GUIDED, chunk_size); | |
257 | gomp_mutex_lock (&thr->ts.work_share->lock); | |
258 | gomp_work_share_init_done (); | |
259 | } | |
260 | else | |
261 | gomp_mutex_lock (&thr->ts.work_share->lock); | |
1e8e9920 | 262 | |
263 | ret = gomp_iter_guided_next_locked (istart, iend); | |
264 | if (ret) | |
265 | gomp_ordered_first (); | |
266 | gomp_mutex_unlock (&thr->ts.work_share->lock); | |
267 | ||
268 | return ret; | |
269 | } | |
270 | ||
271 | bool | |
272 | GOMP_loop_ordered_runtime_start (long start, long end, long incr, | |
273 | long *istart, long *iend) | |
274 | { | |
fd6481cf | 275 | struct gomp_task_icv *icv = gomp_icv (false); |
276 | switch (icv->run_sched_var) | |
1e8e9920 | 277 | { |
278 | case GFS_STATIC: | |
279 | return gomp_loop_ordered_static_start (start, end, incr, | |
43895be5 | 280 | icv->run_sched_chunk_size, |
1e8e9920 | 281 | istart, iend); |
282 | case GFS_DYNAMIC: | |
283 | return gomp_loop_ordered_dynamic_start (start, end, incr, | |
43895be5 | 284 | icv->run_sched_chunk_size, |
1e8e9920 | 285 | istart, iend); |
286 | case GFS_GUIDED: | |
287 | return gomp_loop_ordered_guided_start (start, end, incr, | |
43895be5 | 288 | icv->run_sched_chunk_size, |
1e8e9920 | 289 | istart, iend); |
fd6481cf | 290 | case GFS_AUTO: |
291 | /* For now map to schedule(static), later on we could play with feedback | |
292 | driven choice. */ | |
293 | return gomp_loop_ordered_static_start (start, end, incr, | |
294 | 0, istart, iend); | |
1e8e9920 | 295 | default: |
296 | abort (); | |
297 | } | |
298 | } | |
299 | ||
43895be5 | 300 | /* The *_doacross_*_start routines are similar. The only difference is that |
301 | this work-share construct is initialized to expect an ORDERED(N) - DOACROSS | |
302 | section, and the worksharing loop iterates always from 0 to COUNTS[0] - 1 | |
303 | and other COUNTS array elements tell the library number of iterations | |
304 | in the ordered inner loops. */ | |
305 | ||
306 | static bool | |
307 | gomp_loop_doacross_static_start (unsigned ncounts, long *counts, | |
308 | long chunk_size, long *istart, long *iend) | |
309 | { | |
310 | struct gomp_thread *thr = gomp_thread (); | |
311 | ||
312 | thr->ts.static_trip = 0; | |
313 | if (gomp_work_share_start (false)) | |
314 | { | |
315 | gomp_loop_init (thr->ts.work_share, 0, counts[0], 1, | |
316 | GFS_STATIC, chunk_size); | |
317 | gomp_doacross_init (ncounts, counts, chunk_size); | |
318 | gomp_work_share_init_done (); | |
319 | } | |
320 | ||
321 | return !gomp_iter_static_next (istart, iend); | |
322 | } | |
323 | ||
324 | static bool | |
325 | gomp_loop_doacross_dynamic_start (unsigned ncounts, long *counts, | |
326 | long chunk_size, long *istart, long *iend) | |
327 | { | |
328 | struct gomp_thread *thr = gomp_thread (); | |
329 | bool ret; | |
330 | ||
331 | if (gomp_work_share_start (false)) | |
332 | { | |
333 | gomp_loop_init (thr->ts.work_share, 0, counts[0], 1, | |
334 | GFS_DYNAMIC, chunk_size); | |
335 | gomp_doacross_init (ncounts, counts, chunk_size); | |
336 | gomp_work_share_init_done (); | |
337 | } | |
338 | ||
339 | #ifdef HAVE_SYNC_BUILTINS | |
340 | ret = gomp_iter_dynamic_next (istart, iend); | |
341 | #else | |
342 | gomp_mutex_lock (&thr->ts.work_share->lock); | |
343 | ret = gomp_iter_dynamic_next_locked (istart, iend); | |
344 | gomp_mutex_unlock (&thr->ts.work_share->lock); | |
345 | #endif | |
346 | ||
347 | return ret; | |
348 | } | |
349 | ||
350 | static bool | |
351 | gomp_loop_doacross_guided_start (unsigned ncounts, long *counts, | |
352 | long chunk_size, long *istart, long *iend) | |
353 | { | |
354 | struct gomp_thread *thr = gomp_thread (); | |
355 | bool ret; | |
356 | ||
357 | if (gomp_work_share_start (false)) | |
358 | { | |
359 | gomp_loop_init (thr->ts.work_share, 0, counts[0], 1, | |
360 | GFS_GUIDED, chunk_size); | |
361 | gomp_doacross_init (ncounts, counts, chunk_size); | |
362 | gomp_work_share_init_done (); | |
363 | } | |
364 | ||
365 | #ifdef HAVE_SYNC_BUILTINS | |
366 | ret = gomp_iter_guided_next (istart, iend); | |
367 | #else | |
368 | gomp_mutex_lock (&thr->ts.work_share->lock); | |
369 | ret = gomp_iter_guided_next_locked (istart, iend); | |
370 | gomp_mutex_unlock (&thr->ts.work_share->lock); | |
371 | #endif | |
372 | ||
373 | return ret; | |
374 | } | |
375 | ||
376 | bool | |
377 | GOMP_loop_doacross_runtime_start (unsigned ncounts, long *counts, | |
378 | long *istart, long *iend) | |
379 | { | |
380 | struct gomp_task_icv *icv = gomp_icv (false); | |
381 | switch (icv->run_sched_var) | |
382 | { | |
383 | case GFS_STATIC: | |
384 | return gomp_loop_doacross_static_start (ncounts, counts, | |
385 | icv->run_sched_chunk_size, | |
386 | istart, iend); | |
387 | case GFS_DYNAMIC: | |
388 | return gomp_loop_doacross_dynamic_start (ncounts, counts, | |
389 | icv->run_sched_chunk_size, | |
390 | istart, iend); | |
391 | case GFS_GUIDED: | |
392 | return gomp_loop_doacross_guided_start (ncounts, counts, | |
393 | icv->run_sched_chunk_size, | |
394 | istart, iend); | |
395 | case GFS_AUTO: | |
396 | /* For now map to schedule(static), later on we could play with feedback | |
397 | driven choice. */ | |
398 | return gomp_loop_doacross_static_start (ncounts, counts, | |
399 | 0, istart, iend); | |
400 | default: | |
401 | abort (); | |
402 | } | |
403 | } | |
404 | ||
1e8e9920 | 405 | /* The *_next routines are called when the thread completes processing of |
406 | the iteration block currently assigned to it. If the work-share | |
407 | construct is bound directly to a parallel construct, then the iteration | |
408 | bounds may have been set up before the parallel. In which case, this | |
409 | may be the first iteration for the thread. | |
410 | ||
411 | Returns true if there is work remaining to be performed; *ISTART and | |
412 | *IEND are filled with a new iteration block. Returns false if all work | |
413 | has been assigned. */ | |
414 | ||
415 | static bool | |
416 | gomp_loop_static_next (long *istart, long *iend) | |
417 | { | |
418 | return !gomp_iter_static_next (istart, iend); | |
419 | } | |
420 | ||
421 | static bool | |
422 | gomp_loop_dynamic_next (long *istart, long *iend) | |
423 | { | |
424 | bool ret; | |
425 | ||
426 | #ifdef HAVE_SYNC_BUILTINS | |
427 | ret = gomp_iter_dynamic_next (istart, iend); | |
428 | #else | |
429 | struct gomp_thread *thr = gomp_thread (); | |
430 | gomp_mutex_lock (&thr->ts.work_share->lock); | |
431 | ret = gomp_iter_dynamic_next_locked (istart, iend); | |
432 | gomp_mutex_unlock (&thr->ts.work_share->lock); | |
433 | #endif | |
434 | ||
435 | return ret; | |
436 | } | |
437 | ||
438 | static bool | |
439 | gomp_loop_guided_next (long *istart, long *iend) | |
440 | { | |
441 | bool ret; | |
442 | ||
443 | #ifdef HAVE_SYNC_BUILTINS | |
444 | ret = gomp_iter_guided_next (istart, iend); | |
445 | #else | |
446 | struct gomp_thread *thr = gomp_thread (); | |
447 | gomp_mutex_lock (&thr->ts.work_share->lock); | |
448 | ret = gomp_iter_guided_next_locked (istart, iend); | |
449 | gomp_mutex_unlock (&thr->ts.work_share->lock); | |
450 | #endif | |
451 | ||
452 | return ret; | |
453 | } | |
454 | ||
455 | bool | |
456 | GOMP_loop_runtime_next (long *istart, long *iend) | |
457 | { | |
458 | struct gomp_thread *thr = gomp_thread (); | |
459 | ||
460 | switch (thr->ts.work_share->sched) | |
461 | { | |
462 | case GFS_STATIC: | |
fd6481cf | 463 | case GFS_AUTO: |
1e8e9920 | 464 | return gomp_loop_static_next (istart, iend); |
465 | case GFS_DYNAMIC: | |
466 | return gomp_loop_dynamic_next (istart, iend); | |
467 | case GFS_GUIDED: | |
468 | return gomp_loop_guided_next (istart, iend); | |
469 | default: | |
470 | abort (); | |
471 | } | |
472 | } | |
473 | ||
474 | /* The *_ordered_*_next routines are called when the thread completes | |
475 | processing of the iteration block currently assigned to it. | |
476 | ||
477 | Returns true if there is work remaining to be performed; *ISTART and | |
478 | *IEND are filled with a new iteration block. Returns false if all work | |
479 | has been assigned. */ | |
480 | ||
481 | static bool | |
482 | gomp_loop_ordered_static_next (long *istart, long *iend) | |
483 | { | |
484 | struct gomp_thread *thr = gomp_thread (); | |
485 | int test; | |
486 | ||
487 | gomp_ordered_sync (); | |
488 | gomp_mutex_lock (&thr->ts.work_share->lock); | |
489 | test = gomp_iter_static_next (istart, iend); | |
490 | if (test >= 0) | |
491 | gomp_ordered_static_next (); | |
492 | gomp_mutex_unlock (&thr->ts.work_share->lock); | |
493 | ||
494 | return test == 0; | |
495 | } | |
496 | ||
497 | static bool | |
498 | gomp_loop_ordered_dynamic_next (long *istart, long *iend) | |
499 | { | |
500 | struct gomp_thread *thr = gomp_thread (); | |
501 | bool ret; | |
502 | ||
503 | gomp_ordered_sync (); | |
504 | gomp_mutex_lock (&thr->ts.work_share->lock); | |
505 | ret = gomp_iter_dynamic_next_locked (istart, iend); | |
506 | if (ret) | |
507 | gomp_ordered_next (); | |
508 | else | |
509 | gomp_ordered_last (); | |
510 | gomp_mutex_unlock (&thr->ts.work_share->lock); | |
511 | ||
512 | return ret; | |
513 | } | |
514 | ||
515 | static bool | |
516 | gomp_loop_ordered_guided_next (long *istart, long *iend) | |
517 | { | |
518 | struct gomp_thread *thr = gomp_thread (); | |
519 | bool ret; | |
520 | ||
521 | gomp_ordered_sync (); | |
522 | gomp_mutex_lock (&thr->ts.work_share->lock); | |
523 | ret = gomp_iter_guided_next_locked (istart, iend); | |
524 | if (ret) | |
525 | gomp_ordered_next (); | |
526 | else | |
527 | gomp_ordered_last (); | |
528 | gomp_mutex_unlock (&thr->ts.work_share->lock); | |
529 | ||
530 | return ret; | |
531 | } | |
532 | ||
533 | bool | |
534 | GOMP_loop_ordered_runtime_next (long *istart, long *iend) | |
535 | { | |
536 | struct gomp_thread *thr = gomp_thread (); | |
537 | ||
538 | switch (thr->ts.work_share->sched) | |
539 | { | |
540 | case GFS_STATIC: | |
fd6481cf | 541 | case GFS_AUTO: |
1e8e9920 | 542 | return gomp_loop_ordered_static_next (istart, iend); |
543 | case GFS_DYNAMIC: | |
544 | return gomp_loop_ordered_dynamic_next (istart, iend); | |
545 | case GFS_GUIDED: | |
546 | return gomp_loop_ordered_guided_next (istart, iend); | |
547 | default: | |
548 | abort (); | |
549 | } | |
550 | } | |
551 | ||
552 | /* The GOMP_parallel_loop_* routines pre-initialize a work-share construct | |
553 | to avoid one synchronization once we get into the loop. */ | |
554 | ||
555 | static void | |
556 | gomp_parallel_loop_start (void (*fn) (void *), void *data, | |
557 | unsigned num_threads, long start, long end, | |
558 | long incr, enum gomp_schedule_type sched, | |
bc7bff74 | 559 | long chunk_size, unsigned int flags) |
1e8e9920 | 560 | { |
fd6481cf | 561 | struct gomp_team *team; |
1e8e9920 | 562 | |
fd6481cf | 563 | num_threads = gomp_resolve_num_threads (num_threads, 0); |
564 | team = gomp_new_team (num_threads); | |
565 | gomp_loop_init (&team->work_shares[0], start, end, incr, sched, chunk_size); | |
bc7bff74 | 566 | gomp_team_start (fn, data, num_threads, flags, team); |
1e8e9920 | 567 | } |
568 | ||
569 | void | |
570 | GOMP_parallel_loop_static_start (void (*fn) (void *), void *data, | |
571 | unsigned num_threads, long start, long end, | |
572 | long incr, long chunk_size) | |
573 | { | |
574 | gomp_parallel_loop_start (fn, data, num_threads, start, end, incr, | |
bc7bff74 | 575 | GFS_STATIC, chunk_size, 0); |
1e8e9920 | 576 | } |
577 | ||
578 | void | |
579 | GOMP_parallel_loop_dynamic_start (void (*fn) (void *), void *data, | |
580 | unsigned num_threads, long start, long end, | |
581 | long incr, long chunk_size) | |
582 | { | |
583 | gomp_parallel_loop_start (fn, data, num_threads, start, end, incr, | |
bc7bff74 | 584 | GFS_DYNAMIC, chunk_size, 0); |
1e8e9920 | 585 | } |
586 | ||
587 | void | |
588 | GOMP_parallel_loop_guided_start (void (*fn) (void *), void *data, | |
589 | unsigned num_threads, long start, long end, | |
590 | long incr, long chunk_size) | |
591 | { | |
592 | gomp_parallel_loop_start (fn, data, num_threads, start, end, incr, | |
bc7bff74 | 593 | GFS_GUIDED, chunk_size, 0); |
1e8e9920 | 594 | } |
595 | ||
596 | void | |
597 | GOMP_parallel_loop_runtime_start (void (*fn) (void *), void *data, | |
598 | unsigned num_threads, long start, long end, | |
599 | long incr) | |
600 | { | |
fd6481cf | 601 | struct gomp_task_icv *icv = gomp_icv (false); |
1e8e9920 | 602 | gomp_parallel_loop_start (fn, data, num_threads, start, end, incr, |
43895be5 | 603 | icv->run_sched_var, icv->run_sched_chunk_size, 0); |
bc7bff74 | 604 | } |
605 | ||
606 | ialias_redirect (GOMP_parallel_end) | |
607 | ||
608 | void | |
609 | GOMP_parallel_loop_static (void (*fn) (void *), void *data, | |
610 | unsigned num_threads, long start, long end, | |
611 | long incr, long chunk_size, unsigned flags) | |
612 | { | |
613 | gomp_parallel_loop_start (fn, data, num_threads, start, end, incr, | |
614 | GFS_STATIC, chunk_size, flags); | |
615 | fn (data); | |
616 | GOMP_parallel_end (); | |
617 | } | |
618 | ||
619 | void | |
620 | GOMP_parallel_loop_dynamic (void (*fn) (void *), void *data, | |
621 | unsigned num_threads, long start, long end, | |
622 | long incr, long chunk_size, unsigned flags) | |
623 | { | |
624 | gomp_parallel_loop_start (fn, data, num_threads, start, end, incr, | |
625 | GFS_DYNAMIC, chunk_size, flags); | |
626 | fn (data); | |
627 | GOMP_parallel_end (); | |
628 | } | |
629 | ||
630 | void | |
631 | GOMP_parallel_loop_guided (void (*fn) (void *), void *data, | |
632 | unsigned num_threads, long start, long end, | |
633 | long incr, long chunk_size, unsigned flags) | |
634 | { | |
635 | gomp_parallel_loop_start (fn, data, num_threads, start, end, incr, | |
636 | GFS_GUIDED, chunk_size, flags); | |
637 | fn (data); | |
638 | GOMP_parallel_end (); | |
639 | } | |
640 | ||
9561765e | 641 | #ifdef HAVE_ATTRIBUTE_ALIAS |
642 | extern __typeof(GOMP_parallel_loop_dynamic) GOMP_parallel_loop_nonmonotonic_dynamic | |
643 | __attribute__((alias ("GOMP_parallel_loop_dynamic"))); | |
644 | extern __typeof(GOMP_parallel_loop_guided) GOMP_parallel_loop_nonmonotonic_guided | |
645 | __attribute__((alias ("GOMP_parallel_loop_guided"))); | |
646 | #else | |
647 | void | |
648 | GOMP_parallel_loop_nonmonotonic_dynamic (void (*fn) (void *), void *data, | |
649 | unsigned num_threads, long start, | |
650 | long end, long incr, long chunk_size, | |
651 | unsigned flags) | |
652 | { | |
653 | gomp_parallel_loop_start (fn, data, num_threads, start, end, incr, | |
654 | GFS_DYNAMIC, chunk_size, flags); | |
655 | fn (data); | |
656 | GOMP_parallel_end (); | |
657 | } | |
658 | ||
659 | void | |
660 | GOMP_parallel_loop_nonmonotonic_guided (void (*fn) (void *), void *data, | |
661 | unsigned num_threads, long start, | |
662 | long end, long incr, long chunk_size, | |
663 | unsigned flags) | |
664 | { | |
665 | gomp_parallel_loop_start (fn, data, num_threads, start, end, incr, | |
666 | GFS_GUIDED, chunk_size, flags); | |
667 | fn (data); | |
668 | GOMP_parallel_end (); | |
669 | } | |
670 | #endif | |
671 | ||
bc7bff74 | 672 | void |
673 | GOMP_parallel_loop_runtime (void (*fn) (void *), void *data, | |
674 | unsigned num_threads, long start, long end, | |
675 | long incr, unsigned flags) | |
676 | { | |
677 | struct gomp_task_icv *icv = gomp_icv (false); | |
678 | gomp_parallel_loop_start (fn, data, num_threads, start, end, incr, | |
43895be5 | 679 | icv->run_sched_var, icv->run_sched_chunk_size, |
bc7bff74 | 680 | flags); |
681 | fn (data); | |
682 | GOMP_parallel_end (); | |
1e8e9920 | 683 | } |
684 | ||
685 | /* The GOMP_loop_end* routines are called after the thread is told that | |
bc7bff74 | 686 | all loop iterations are complete. The first two versions synchronize |
1e8e9920 | 687 | all threads; the nowait version does not. */ |
688 | ||
689 | void | |
690 | GOMP_loop_end (void) | |
691 | { | |
692 | gomp_work_share_end (); | |
693 | } | |
694 | ||
bc7bff74 | 695 | bool |
696 | GOMP_loop_end_cancel (void) | |
697 | { | |
698 | return gomp_work_share_end_cancel (); | |
699 | } | |
700 | ||
1e8e9920 | 701 | void |
702 | GOMP_loop_end_nowait (void) | |
703 | { | |
704 | gomp_work_share_end_nowait (); | |
705 | } | |
706 | ||
707 | ||
708 | /* We use static functions above so that we're sure that the "runtime" | |
709 | function can defer to the proper routine without interposition. We | |
710 | export the static function with a strong alias when possible, or with | |
711 | a wrapper function otherwise. */ | |
712 | ||
713 | #ifdef HAVE_ATTRIBUTE_ALIAS | |
714 | extern __typeof(gomp_loop_static_start) GOMP_loop_static_start | |
715 | __attribute__((alias ("gomp_loop_static_start"))); | |
716 | extern __typeof(gomp_loop_dynamic_start) GOMP_loop_dynamic_start | |
717 | __attribute__((alias ("gomp_loop_dynamic_start"))); | |
718 | extern __typeof(gomp_loop_guided_start) GOMP_loop_guided_start | |
719 | __attribute__((alias ("gomp_loop_guided_start"))); | |
9561765e | 720 | extern __typeof(gomp_loop_dynamic_start) GOMP_loop_nonmonotonic_dynamic_start |
721 | __attribute__((alias ("gomp_loop_dynamic_start"))); | |
722 | extern __typeof(gomp_loop_guided_start) GOMP_loop_nonmonotonic_guided_start | |
723 | __attribute__((alias ("gomp_loop_guided_start"))); | |
1e8e9920 | 724 | |
725 | extern __typeof(gomp_loop_ordered_static_start) GOMP_loop_ordered_static_start | |
726 | __attribute__((alias ("gomp_loop_ordered_static_start"))); | |
727 | extern __typeof(gomp_loop_ordered_dynamic_start) GOMP_loop_ordered_dynamic_start | |
728 | __attribute__((alias ("gomp_loop_ordered_dynamic_start"))); | |
729 | extern __typeof(gomp_loop_ordered_guided_start) GOMP_loop_ordered_guided_start | |
730 | __attribute__((alias ("gomp_loop_ordered_guided_start"))); | |
731 | ||
43895be5 | 732 | extern __typeof(gomp_loop_doacross_static_start) GOMP_loop_doacross_static_start |
733 | __attribute__((alias ("gomp_loop_doacross_static_start"))); | |
734 | extern __typeof(gomp_loop_doacross_dynamic_start) GOMP_loop_doacross_dynamic_start | |
735 | __attribute__((alias ("gomp_loop_doacross_dynamic_start"))); | |
736 | extern __typeof(gomp_loop_doacross_guided_start) GOMP_loop_doacross_guided_start | |
737 | __attribute__((alias ("gomp_loop_doacross_guided_start"))); | |
738 | ||
1e8e9920 | 739 | extern __typeof(gomp_loop_static_next) GOMP_loop_static_next |
740 | __attribute__((alias ("gomp_loop_static_next"))); | |
741 | extern __typeof(gomp_loop_dynamic_next) GOMP_loop_dynamic_next | |
742 | __attribute__((alias ("gomp_loop_dynamic_next"))); | |
743 | extern __typeof(gomp_loop_guided_next) GOMP_loop_guided_next | |
744 | __attribute__((alias ("gomp_loop_guided_next"))); | |
9561765e | 745 | extern __typeof(gomp_loop_dynamic_next) GOMP_loop_nonmonotonic_dynamic_next |
746 | __attribute__((alias ("gomp_loop_dynamic_next"))); | |
747 | extern __typeof(gomp_loop_guided_next) GOMP_loop_nonmonotonic_guided_next | |
748 | __attribute__((alias ("gomp_loop_guided_next"))); | |
1e8e9920 | 749 | |
750 | extern __typeof(gomp_loop_ordered_static_next) GOMP_loop_ordered_static_next | |
751 | __attribute__((alias ("gomp_loop_ordered_static_next"))); | |
752 | extern __typeof(gomp_loop_ordered_dynamic_next) GOMP_loop_ordered_dynamic_next | |
753 | __attribute__((alias ("gomp_loop_ordered_dynamic_next"))); | |
754 | extern __typeof(gomp_loop_ordered_guided_next) GOMP_loop_ordered_guided_next | |
755 | __attribute__((alias ("gomp_loop_ordered_guided_next"))); | |
756 | #else | |
757 | bool | |
758 | GOMP_loop_static_start (long start, long end, long incr, long chunk_size, | |
759 | long *istart, long *iend) | |
760 | { | |
761 | return gomp_loop_static_start (start, end, incr, chunk_size, istart, iend); | |
762 | } | |
763 | ||
764 | bool | |
765 | GOMP_loop_dynamic_start (long start, long end, long incr, long chunk_size, | |
766 | long *istart, long *iend) | |
767 | { | |
768 | return gomp_loop_dynamic_start (start, end, incr, chunk_size, istart, iend); | |
769 | } | |
770 | ||
771 | bool | |
772 | GOMP_loop_guided_start (long start, long end, long incr, long chunk_size, | |
773 | long *istart, long *iend) | |
774 | { | |
775 | return gomp_loop_guided_start (start, end, incr, chunk_size, istart, iend); | |
776 | } | |
777 | ||
9561765e | 778 | bool |
779 | GOMP_loop_nonmonotonic_dynamic_start (long start, long end, long incr, | |
780 | long chunk_size, long *istart, | |
781 | long *iend) | |
782 | { | |
783 | return gomp_loop_dynamic_start (start, end, incr, chunk_size, istart, iend); | |
784 | } | |
785 | ||
786 | bool | |
787 | GOMP_loop_nonmonotonic_guided_start (long start, long end, long incr, | |
788 | long chunk_size, long *istart, long *iend) | |
789 | { | |
790 | return gomp_loop_guided_start (start, end, incr, chunk_size, istart, iend); | |
791 | } | |
792 | ||
1e8e9920 | 793 | bool |
794 | GOMP_loop_ordered_static_start (long start, long end, long incr, | |
795 | long chunk_size, long *istart, long *iend) | |
796 | { | |
797 | return gomp_loop_ordered_static_start (start, end, incr, chunk_size, | |
798 | istart, iend); | |
799 | } | |
800 | ||
801 | bool | |
802 | GOMP_loop_ordered_dynamic_start (long start, long end, long incr, | |
803 | long chunk_size, long *istart, long *iend) | |
804 | { | |
805 | return gomp_loop_ordered_dynamic_start (start, end, incr, chunk_size, | |
806 | istart, iend); | |
807 | } | |
808 | ||
809 | bool | |
810 | GOMP_loop_ordered_guided_start (long start, long end, long incr, | |
811 | long chunk_size, long *istart, long *iend) | |
812 | { | |
813 | return gomp_loop_ordered_guided_start (start, end, incr, chunk_size, | |
814 | istart, iend); | |
815 | } | |
816 | ||
43895be5 | 817 | bool |
818 | GOMP_loop_doacross_static_start (unsigned ncounts, long *counts, | |
819 | long chunk_size, long *istart, long *iend) | |
820 | { | |
821 | return gomp_loop_doacross_static_start (ncounts, counts, chunk_size, | |
822 | istart, iend); | |
823 | } | |
824 | ||
825 | bool | |
826 | GOMP_loop_doacross_dynamic_start (unsigned ncounts, long *counts, | |
827 | long chunk_size, long *istart, long *iend) | |
828 | { | |
829 | return gomp_loop_doacross_dynamic_start (ncounts, counts, chunk_size, | |
830 | istart, iend); | |
831 | } | |
832 | ||
833 | bool | |
834 | GOMP_loop_doacross_guided_start (unsigned ncounts, long *counts, | |
835 | long chunk_size, long *istart, long *iend) | |
836 | { | |
837 | return gomp_loop_doacross_guided_start (ncounts, counts, chunk_size, | |
838 | istart, iend); | |
839 | } | |
840 | ||
1e8e9920 | 841 | bool |
842 | GOMP_loop_static_next (long *istart, long *iend) | |
843 | { | |
844 | return gomp_loop_static_next (istart, iend); | |
845 | } | |
846 | ||
847 | bool | |
848 | GOMP_loop_dynamic_next (long *istart, long *iend) | |
849 | { | |
850 | return gomp_loop_dynamic_next (istart, iend); | |
851 | } | |
852 | ||
853 | bool | |
854 | GOMP_loop_guided_next (long *istart, long *iend) | |
855 | { | |
856 | return gomp_loop_guided_next (istart, iend); | |
857 | } | |
858 | ||
9561765e | 859 | bool |
860 | GOMP_loop_nonmonotonic_dynamic_next (long *istart, long *iend) | |
861 | { | |
862 | return gomp_loop_dynamic_next (istart, iend); | |
863 | } | |
864 | ||
865 | bool | |
866 | GOMP_loop_nonmonotonic_guided_next (long *istart, long *iend) | |
867 | { | |
868 | return gomp_loop_guided_next (istart, iend); | |
869 | } | |
870 | ||
1e8e9920 | 871 | bool |
872 | GOMP_loop_ordered_static_next (long *istart, long *iend) | |
873 | { | |
874 | return gomp_loop_ordered_static_next (istart, iend); | |
875 | } | |
876 | ||
877 | bool | |
878 | GOMP_loop_ordered_dynamic_next (long *istart, long *iend) | |
879 | { | |
880 | return gomp_loop_ordered_dynamic_next (istart, iend); | |
881 | } | |
882 | ||
883 | bool | |
884 | GOMP_loop_ordered_guided_next (long *istart, long *iend) | |
885 | { | |
886 | return gomp_loop_ordered_guided_next (istart, iend); | |
887 | } | |
888 | #endif |