]> git.ipfire.org Git - thirdparty/gcc.git/blob - libgomp/loop.c
libgomp: Update documentation for indirect calls in target regions
[thirdparty/gcc.git] / libgomp / loop.c
1 /* Copyright (C) 2005-2024 Free Software Foundation, Inc.
2 Contributed by Richard Henderson <rth@redhat.com>.
3
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
6
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
26 /* This file handles the LOOP (FOR/DO) construct. */
27
28 #include <limits.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include "libgomp.h"
32
33
34 ialias (GOMP_loop_runtime_next)
35 ialias_redirect (GOMP_taskgroup_reduction_register)
36
37 /* Initialize the given work share construct from the given arguments. */
38
39 static inline void
40 gomp_loop_init (struct gomp_work_share *ws, long start, long end, long incr,
41 enum gomp_schedule_type sched, long chunk_size)
42 {
43 ws->sched = sched;
44 ws->chunk_size = chunk_size;
45 /* Canonicalize loops that have zero iterations to ->next == ->end. */
46 ws->end = ((incr > 0 && start > end) || (incr < 0 && start < end))
47 ? start : end;
48 ws->incr = incr;
49 ws->next = start;
50 if (sched == GFS_DYNAMIC)
51 {
52 ws->chunk_size *= incr;
53
54 #ifdef HAVE_SYNC_BUILTINS
55 {
56 /* For dynamic scheduling prepare things to make each iteration
57 faster. */
58 struct gomp_thread *thr = gomp_thread ();
59 struct gomp_team *team = thr->ts.team;
60 long nthreads = team ? team->nthreads : 1;
61
62 if (__builtin_expect (incr > 0, 1))
63 {
64 /* Cheap overflow protection. */
65 if (__builtin_expect ((nthreads | ws->chunk_size)
66 >= 1UL << (sizeof (long)
67 * __CHAR_BIT__ / 2 - 1), 0))
68 ws->mode = 0;
69 else
70 ws->mode = ws->end < (LONG_MAX
71 - (nthreads + 1) * ws->chunk_size);
72 }
73 /* Cheap overflow protection. */
74 else if (__builtin_expect ((nthreads | -ws->chunk_size)
75 >= 1UL << (sizeof (long)
76 * __CHAR_BIT__ / 2 - 1), 0))
77 ws->mode = 0;
78 else
79 ws->mode = ws->end > (nthreads + 1) * -ws->chunk_size - LONG_MAX;
80 }
81 #endif
82 }
83 }
84
85 /* The *_start routines are called when first encountering a loop construct
86 that is not bound directly to a parallel construct. The first thread
87 that arrives will create the work-share construct; subsequent threads
88 will see the construct exists and allocate work from it.
89
90 START, END, INCR are the bounds of the loop; due to the restrictions of
91 OpenMP, these values must be the same in every thread. This is not
92 verified (nor is it entirely verifiable, since START is not necessarily
93 retained intact in the work-share data structure). CHUNK_SIZE is the
94 scheduling parameter; again this must be identical in all threads.
95
96 Returns true if there's any work for this thread to perform. If so,
97 *ISTART and *IEND are filled with the bounds of the iteration block
98 allocated to this thread. Returns false if all work was assigned to
99 other threads prior to this thread's arrival. */
100
101 static bool
102 gomp_loop_static_start (long start, long end, long incr, long chunk_size,
103 long *istart, long *iend)
104 {
105 struct gomp_thread *thr = gomp_thread ();
106
107 thr->ts.static_trip = 0;
108 if (gomp_work_share_start (0))
109 {
110 gomp_loop_init (thr->ts.work_share, start, end, incr,
111 GFS_STATIC, chunk_size);
112 gomp_work_share_init_done ();
113 }
114
115 return !gomp_iter_static_next (istart, iend);
116 }
117
118 /* The current dynamic implementation is always monotonic. The
119 entrypoints without nonmonotonic in them have to be always monotonic,
120 but the nonmonotonic ones could be changed to use work-stealing for
121 improved scalability. */
122
123 static bool
124 gomp_loop_dynamic_start (long start, long end, long incr, long chunk_size,
125 long *istart, long *iend)
126 {
127 struct gomp_thread *thr = gomp_thread ();
128 bool ret;
129
130 if (gomp_work_share_start (0))
131 {
132 gomp_loop_init (thr->ts.work_share, start, end, incr,
133 GFS_DYNAMIC, chunk_size);
134 gomp_work_share_init_done ();
135 }
136
137 #ifdef HAVE_SYNC_BUILTINS
138 ret = gomp_iter_dynamic_next (istart, iend);
139 #else
140 gomp_mutex_lock (&thr->ts.work_share->lock);
141 ret = gomp_iter_dynamic_next_locked (istart, iend);
142 gomp_mutex_unlock (&thr->ts.work_share->lock);
143 #endif
144
145 return ret;
146 }
147
148 /* Similarly as for dynamic, though the question is how can the chunk sizes
149 be decreased without a central locking or atomics. */
150
151 static bool
152 gomp_loop_guided_start (long start, long end, long incr, long chunk_size,
153 long *istart, long *iend)
154 {
155 struct gomp_thread *thr = gomp_thread ();
156 bool ret;
157
158 if (gomp_work_share_start (0))
159 {
160 gomp_loop_init (thr->ts.work_share, start, end, incr,
161 GFS_GUIDED, chunk_size);
162 gomp_work_share_init_done ();
163 }
164
165 #ifdef HAVE_SYNC_BUILTINS
166 ret = gomp_iter_guided_next (istart, iend);
167 #else
168 gomp_mutex_lock (&thr->ts.work_share->lock);
169 ret = gomp_iter_guided_next_locked (istart, iend);
170 gomp_mutex_unlock (&thr->ts.work_share->lock);
171 #endif
172
173 return ret;
174 }
175
176 bool
177 GOMP_loop_runtime_start (long start, long end, long incr,
178 long *istart, long *iend)
179 {
180 struct gomp_task_icv *icv = gomp_icv (false);
181 switch (icv->run_sched_var & ~GFS_MONOTONIC)
182 {
183 case GFS_STATIC:
184 return gomp_loop_static_start (start, end, incr,
185 icv->run_sched_chunk_size,
186 istart, iend);
187 case GFS_DYNAMIC:
188 return gomp_loop_dynamic_start (start, end, incr,
189 icv->run_sched_chunk_size,
190 istart, iend);
191 case GFS_GUIDED:
192 return gomp_loop_guided_start (start, end, incr,
193 icv->run_sched_chunk_size,
194 istart, iend);
195 case GFS_AUTO:
196 /* For now map to schedule(static), later on we could play with feedback
197 driven choice. */
198 return gomp_loop_static_start (start, end, incr, 0, istart, iend);
199 default:
200 abort ();
201 }
202 }
203
204 static long
205 gomp_adjust_sched (long sched, long *chunk_size)
206 {
207 sched &= ~GFS_MONOTONIC;
208 switch (sched)
209 {
210 case GFS_STATIC:
211 case GFS_DYNAMIC:
212 case GFS_GUIDED:
213 return sched;
214 /* GFS_RUNTIME is used for runtime schedule without monotonic
215 or nonmonotonic modifiers on the clause.
216 GFS_RUNTIME|GFS_MONOTONIC for runtime schedule with monotonic
217 modifier. */
218 case GFS_RUNTIME:
219 /* GFS_AUTO is used for runtime schedule with nonmonotonic
220 modifier. */
221 case GFS_AUTO:
222 {
223 struct gomp_task_icv *icv = gomp_icv (false);
224 sched = icv->run_sched_var & ~GFS_MONOTONIC;
225 switch (sched)
226 {
227 case GFS_STATIC:
228 case GFS_DYNAMIC:
229 case GFS_GUIDED:
230 *chunk_size = icv->run_sched_chunk_size;
231 break;
232 case GFS_AUTO:
233 sched = GFS_STATIC;
234 *chunk_size = 0;
235 break;
236 default:
237 abort ();
238 }
239 return sched;
240 }
241 default:
242 abort ();
243 }
244 }
245
246 bool
247 GOMP_loop_start (long start, long end, long incr, long sched,
248 long chunk_size, long *istart, long *iend,
249 uintptr_t *reductions, void **mem)
250 {
251 struct gomp_thread *thr = gomp_thread ();
252
253 thr->ts.static_trip = 0;
254 if (reductions)
255 gomp_workshare_taskgroup_start ();
256 if (gomp_work_share_start (0))
257 {
258 sched = gomp_adjust_sched (sched, &chunk_size);
259 gomp_loop_init (thr->ts.work_share, start, end, incr,
260 sched, chunk_size);
261 if (reductions)
262 {
263 GOMP_taskgroup_reduction_register (reductions);
264 thr->task->taskgroup->workshare = true;
265 thr->ts.work_share->task_reductions = reductions;
266 }
267 if (mem)
268 {
269 uintptr_t size = (uintptr_t) *mem;
270 #define INLINE_ORDERED_TEAM_IDS_OFF \
271 ((offsetof (struct gomp_work_share, inline_ordered_team_ids) \
272 + __alignof__ (long long) - 1) & ~(__alignof__ (long long) - 1))
273 if (sizeof (struct gomp_work_share)
274 <= INLINE_ORDERED_TEAM_IDS_OFF
275 || __alignof__ (struct gomp_work_share) < __alignof__ (long long)
276 || size > (sizeof (struct gomp_work_share)
277 - INLINE_ORDERED_TEAM_IDS_OFF))
278 *mem
279 = (void *) (thr->ts.work_share->ordered_team_ids
280 = gomp_malloc_cleared (size));
281 else
282 *mem = memset (((char *) thr->ts.work_share)
283 + INLINE_ORDERED_TEAM_IDS_OFF, '\0', size);
284 }
285 gomp_work_share_init_done ();
286 }
287 else
288 {
289 if (reductions)
290 {
291 uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
292 gomp_workshare_task_reduction_register (reductions,
293 first_reductions);
294 }
295 if (mem)
296 {
297 if ((offsetof (struct gomp_work_share, inline_ordered_team_ids)
298 & (__alignof__ (long long) - 1)) == 0)
299 *mem = (void *) thr->ts.work_share->ordered_team_ids;
300 else
301 {
302 uintptr_t p = (uintptr_t) thr->ts.work_share->ordered_team_ids;
303 p += __alignof__ (long long) - 1;
304 p &= ~(__alignof__ (long long) - 1);
305 *mem = (void *) p;
306 }
307 }
308 }
309
310 if (!istart)
311 return true;
312 return ialias_call (GOMP_loop_runtime_next) (istart, iend);
313 }
314
315 /* The *_ordered_*_start routines are similar. The only difference is that
316 this work-share construct is initialized to expect an ORDERED section. */
317
318 static bool
319 gomp_loop_ordered_static_start (long start, long end, long incr,
320 long chunk_size, long *istart, long *iend)
321 {
322 struct gomp_thread *thr = gomp_thread ();
323
324 thr->ts.static_trip = 0;
325 if (gomp_work_share_start (1))
326 {
327 gomp_loop_init (thr->ts.work_share, start, end, incr,
328 GFS_STATIC, chunk_size);
329 gomp_ordered_static_init ();
330 gomp_work_share_init_done ();
331 }
332
333 return !gomp_iter_static_next (istart, iend);
334 }
335
336 static bool
337 gomp_loop_ordered_dynamic_start (long start, long end, long incr,
338 long chunk_size, long *istart, long *iend)
339 {
340 struct gomp_thread *thr = gomp_thread ();
341 bool ret;
342
343 if (gomp_work_share_start (1))
344 {
345 gomp_loop_init (thr->ts.work_share, start, end, incr,
346 GFS_DYNAMIC, chunk_size);
347 gomp_mutex_lock (&thr->ts.work_share->lock);
348 gomp_work_share_init_done ();
349 }
350 else
351 gomp_mutex_lock (&thr->ts.work_share->lock);
352
353 ret = gomp_iter_dynamic_next_locked (istart, iend);
354 if (ret)
355 gomp_ordered_first ();
356 gomp_mutex_unlock (&thr->ts.work_share->lock);
357
358 return ret;
359 }
360
361 static bool
362 gomp_loop_ordered_guided_start (long start, long end, long incr,
363 long chunk_size, long *istart, long *iend)
364 {
365 struct gomp_thread *thr = gomp_thread ();
366 bool ret;
367
368 if (gomp_work_share_start (1))
369 {
370 gomp_loop_init (thr->ts.work_share, start, end, incr,
371 GFS_GUIDED, chunk_size);
372 gomp_mutex_lock (&thr->ts.work_share->lock);
373 gomp_work_share_init_done ();
374 }
375 else
376 gomp_mutex_lock (&thr->ts.work_share->lock);
377
378 ret = gomp_iter_guided_next_locked (istart, iend);
379 if (ret)
380 gomp_ordered_first ();
381 gomp_mutex_unlock (&thr->ts.work_share->lock);
382
383 return ret;
384 }
385
386 bool
387 GOMP_loop_ordered_runtime_start (long start, long end, long incr,
388 long *istart, long *iend)
389 {
390 struct gomp_task_icv *icv = gomp_icv (false);
391 switch (icv->run_sched_var & ~GFS_MONOTONIC)
392 {
393 case GFS_STATIC:
394 return gomp_loop_ordered_static_start (start, end, incr,
395 icv->run_sched_chunk_size,
396 istart, iend);
397 case GFS_DYNAMIC:
398 return gomp_loop_ordered_dynamic_start (start, end, incr,
399 icv->run_sched_chunk_size,
400 istart, iend);
401 case GFS_GUIDED:
402 return gomp_loop_ordered_guided_start (start, end, incr,
403 icv->run_sched_chunk_size,
404 istart, iend);
405 case GFS_AUTO:
406 /* For now map to schedule(static), later on we could play with feedback
407 driven choice. */
408 return gomp_loop_ordered_static_start (start, end, incr,
409 0, istart, iend);
410 default:
411 abort ();
412 }
413 }
414
415 bool
416 GOMP_loop_ordered_start (long start, long end, long incr, long sched,
417 long chunk_size, long *istart, long *iend,
418 uintptr_t *reductions, void **mem)
419 {
420 struct gomp_thread *thr = gomp_thread ();
421 size_t ordered = 1;
422 bool ret;
423
424 thr->ts.static_trip = 0;
425 if (reductions)
426 gomp_workshare_taskgroup_start ();
427 if (mem)
428 ordered += (uintptr_t) *mem;
429 if (gomp_work_share_start (ordered))
430 {
431 sched = gomp_adjust_sched (sched, &chunk_size);
432 gomp_loop_init (thr->ts.work_share, start, end, incr,
433 sched, chunk_size);
434 if (reductions)
435 {
436 GOMP_taskgroup_reduction_register (reductions);
437 thr->task->taskgroup->workshare = true;
438 thr->ts.work_share->task_reductions = reductions;
439 }
440 if (sched == GFS_STATIC)
441 gomp_ordered_static_init ();
442 else
443 gomp_mutex_lock (&thr->ts.work_share->lock);
444 gomp_work_share_init_done ();
445 }
446 else
447 {
448 if (reductions)
449 {
450 uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
451 gomp_workshare_task_reduction_register (reductions,
452 first_reductions);
453 }
454 sched = thr->ts.work_share->sched;
455 if (sched != GFS_STATIC)
456 gomp_mutex_lock (&thr->ts.work_share->lock);
457 }
458
459 if (mem)
460 {
461 uintptr_t p
462 = (uintptr_t) (thr->ts.work_share->ordered_team_ids
463 + (thr->ts.team ? thr->ts.team->nthreads : 1));
464 p += __alignof__ (long long) - 1;
465 p &= ~(__alignof__ (long long) - 1);
466 *mem = (void *) p;
467 }
468
469 switch (sched)
470 {
471 case GFS_STATIC:
472 case GFS_AUTO:
473 return !gomp_iter_static_next (istart, iend);
474 case GFS_DYNAMIC:
475 ret = gomp_iter_dynamic_next_locked (istart, iend);
476 break;
477 case GFS_GUIDED:
478 ret = gomp_iter_guided_next_locked (istart, iend);
479 break;
480 default:
481 abort ();
482 }
483
484 if (ret)
485 gomp_ordered_first ();
486 gomp_mutex_unlock (&thr->ts.work_share->lock);
487 return ret;
488 }
489
490 /* The *_doacross_*_start routines are similar. The only difference is that
491 this work-share construct is initialized to expect an ORDERED(N) - DOACROSS
492 section, and the worksharing loop iterates always from 0 to COUNTS[0] - 1
493 and other COUNTS array elements tell the library number of iterations
494 in the ordered inner loops. */
495
496 static bool
497 gomp_loop_doacross_static_start (unsigned ncounts, long *counts,
498 long chunk_size, long *istart, long *iend)
499 {
500 struct gomp_thread *thr = gomp_thread ();
501
502 thr->ts.static_trip = 0;
503 if (gomp_work_share_start (0))
504 {
505 gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
506 GFS_STATIC, chunk_size);
507 gomp_doacross_init (ncounts, counts, chunk_size, 0);
508 gomp_work_share_init_done ();
509 }
510
511 return !gomp_iter_static_next (istart, iend);
512 }
513
514 static bool
515 gomp_loop_doacross_dynamic_start (unsigned ncounts, long *counts,
516 long chunk_size, long *istart, long *iend)
517 {
518 struct gomp_thread *thr = gomp_thread ();
519 bool ret;
520
521 if (gomp_work_share_start (0))
522 {
523 gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
524 GFS_DYNAMIC, chunk_size);
525 gomp_doacross_init (ncounts, counts, chunk_size, 0);
526 gomp_work_share_init_done ();
527 }
528
529 #ifdef HAVE_SYNC_BUILTINS
530 ret = gomp_iter_dynamic_next (istart, iend);
531 #else
532 gomp_mutex_lock (&thr->ts.work_share->lock);
533 ret = gomp_iter_dynamic_next_locked (istart, iend);
534 gomp_mutex_unlock (&thr->ts.work_share->lock);
535 #endif
536
537 return ret;
538 }
539
540 static bool
541 gomp_loop_doacross_guided_start (unsigned ncounts, long *counts,
542 long chunk_size, long *istart, long *iend)
543 {
544 struct gomp_thread *thr = gomp_thread ();
545 bool ret;
546
547 if (gomp_work_share_start (0))
548 {
549 gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
550 GFS_GUIDED, chunk_size);
551 gomp_doacross_init (ncounts, counts, chunk_size, 0);
552 gomp_work_share_init_done ();
553 }
554
555 #ifdef HAVE_SYNC_BUILTINS
556 ret = gomp_iter_guided_next (istart, iend);
557 #else
558 gomp_mutex_lock (&thr->ts.work_share->lock);
559 ret = gomp_iter_guided_next_locked (istart, iend);
560 gomp_mutex_unlock (&thr->ts.work_share->lock);
561 #endif
562
563 return ret;
564 }
565
566 bool
567 GOMP_loop_doacross_runtime_start (unsigned ncounts, long *counts,
568 long *istart, long *iend)
569 {
570 struct gomp_task_icv *icv = gomp_icv (false);
571 switch (icv->run_sched_var & ~GFS_MONOTONIC)
572 {
573 case GFS_STATIC:
574 return gomp_loop_doacross_static_start (ncounts, counts,
575 icv->run_sched_chunk_size,
576 istart, iend);
577 case GFS_DYNAMIC:
578 return gomp_loop_doacross_dynamic_start (ncounts, counts,
579 icv->run_sched_chunk_size,
580 istart, iend);
581 case GFS_GUIDED:
582 return gomp_loop_doacross_guided_start (ncounts, counts,
583 icv->run_sched_chunk_size,
584 istart, iend);
585 case GFS_AUTO:
586 /* For now map to schedule(static), later on we could play with feedback
587 driven choice. */
588 return gomp_loop_doacross_static_start (ncounts, counts,
589 0, istart, iend);
590 default:
591 abort ();
592 }
593 }
594
595 bool
596 GOMP_loop_doacross_start (unsigned ncounts, long *counts, long sched,
597 long chunk_size, long *istart, long *iend,
598 uintptr_t *reductions, void **mem)
599 {
600 struct gomp_thread *thr = gomp_thread ();
601
602 thr->ts.static_trip = 0;
603 if (reductions)
604 gomp_workshare_taskgroup_start ();
605 if (gomp_work_share_start (0))
606 {
607 size_t extra = 0;
608 if (mem)
609 extra = (uintptr_t) *mem;
610 sched = gomp_adjust_sched (sched, &chunk_size);
611 gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
612 sched, chunk_size);
613 gomp_doacross_init (ncounts, counts, chunk_size, extra);
614 if (reductions)
615 {
616 GOMP_taskgroup_reduction_register (reductions);
617 thr->task->taskgroup->workshare = true;
618 thr->ts.work_share->task_reductions = reductions;
619 }
620 gomp_work_share_init_done ();
621 }
622 else
623 {
624 if (reductions)
625 {
626 uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
627 gomp_workshare_task_reduction_register (reductions,
628 first_reductions);
629 }
630 sched = thr->ts.work_share->sched;
631 }
632
633 if (mem)
634 *mem = thr->ts.work_share->doacross->extra;
635
636 return ialias_call (GOMP_loop_runtime_next) (istart, iend);
637 }
638
639 /* The *_next routines are called when the thread completes processing of
640 the iteration block currently assigned to it. If the work-share
641 construct is bound directly to a parallel construct, then the iteration
642 bounds may have been set up before the parallel. In which case, this
643 may be the first iteration for the thread.
644
645 Returns true if there is work remaining to be performed; *ISTART and
646 *IEND are filled with a new iteration block. Returns false if all work
647 has been assigned. */
648
649 static bool
650 gomp_loop_static_next (long *istart, long *iend)
651 {
652 return !gomp_iter_static_next (istart, iend);
653 }
654
655 static bool
656 gomp_loop_dynamic_next (long *istart, long *iend)
657 {
658 bool ret;
659
660 #ifdef HAVE_SYNC_BUILTINS
661 ret = gomp_iter_dynamic_next (istart, iend);
662 #else
663 struct gomp_thread *thr = gomp_thread ();
664 gomp_mutex_lock (&thr->ts.work_share->lock);
665 ret = gomp_iter_dynamic_next_locked (istart, iend);
666 gomp_mutex_unlock (&thr->ts.work_share->lock);
667 #endif
668
669 return ret;
670 }
671
672 static bool
673 gomp_loop_guided_next (long *istart, long *iend)
674 {
675 bool ret;
676
677 #ifdef HAVE_SYNC_BUILTINS
678 ret = gomp_iter_guided_next (istart, iend);
679 #else
680 struct gomp_thread *thr = gomp_thread ();
681 gomp_mutex_lock (&thr->ts.work_share->lock);
682 ret = gomp_iter_guided_next_locked (istart, iend);
683 gomp_mutex_unlock (&thr->ts.work_share->lock);
684 #endif
685
686 return ret;
687 }
688
689 bool
690 GOMP_loop_runtime_next (long *istart, long *iend)
691 {
692 struct gomp_thread *thr = gomp_thread ();
693
694 switch (thr->ts.work_share->sched)
695 {
696 case GFS_STATIC:
697 case GFS_AUTO:
698 return gomp_loop_static_next (istart, iend);
699 case GFS_DYNAMIC:
700 return gomp_loop_dynamic_next (istart, iend);
701 case GFS_GUIDED:
702 return gomp_loop_guided_next (istart, iend);
703 default:
704 abort ();
705 }
706 }
707
708 /* The *_ordered_*_next routines are called when the thread completes
709 processing of the iteration block currently assigned to it.
710
711 Returns true if there is work remaining to be performed; *ISTART and
712 *IEND are filled with a new iteration block. Returns false if all work
713 has been assigned. */
714
715 static bool
716 gomp_loop_ordered_static_next (long *istart, long *iend)
717 {
718 struct gomp_thread *thr = gomp_thread ();
719 int test;
720
721 gomp_ordered_sync ();
722 gomp_mutex_lock (&thr->ts.work_share->lock);
723 test = gomp_iter_static_next (istart, iend);
724 if (test >= 0)
725 gomp_ordered_static_next ();
726 gomp_mutex_unlock (&thr->ts.work_share->lock);
727
728 return test == 0;
729 }
730
731 static bool
732 gomp_loop_ordered_dynamic_next (long *istart, long *iend)
733 {
734 struct gomp_thread *thr = gomp_thread ();
735 bool ret;
736
737 gomp_ordered_sync ();
738 gomp_mutex_lock (&thr->ts.work_share->lock);
739 ret = gomp_iter_dynamic_next_locked (istart, iend);
740 if (ret)
741 gomp_ordered_next ();
742 else
743 gomp_ordered_last ();
744 gomp_mutex_unlock (&thr->ts.work_share->lock);
745
746 return ret;
747 }
748
749 static bool
750 gomp_loop_ordered_guided_next (long *istart, long *iend)
751 {
752 struct gomp_thread *thr = gomp_thread ();
753 bool ret;
754
755 gomp_ordered_sync ();
756 gomp_mutex_lock (&thr->ts.work_share->lock);
757 ret = gomp_iter_guided_next_locked (istart, iend);
758 if (ret)
759 gomp_ordered_next ();
760 else
761 gomp_ordered_last ();
762 gomp_mutex_unlock (&thr->ts.work_share->lock);
763
764 return ret;
765 }
766
767 bool
768 GOMP_loop_ordered_runtime_next (long *istart, long *iend)
769 {
770 struct gomp_thread *thr = gomp_thread ();
771
772 switch (thr->ts.work_share->sched)
773 {
774 case GFS_STATIC:
775 case GFS_AUTO:
776 return gomp_loop_ordered_static_next (istart, iend);
777 case GFS_DYNAMIC:
778 return gomp_loop_ordered_dynamic_next (istart, iend);
779 case GFS_GUIDED:
780 return gomp_loop_ordered_guided_next (istart, iend);
781 default:
782 abort ();
783 }
784 }
785
786 /* The GOMP_parallel_loop_* routines pre-initialize a work-share construct
787 to avoid one synchronization once we get into the loop. */
788
789 static void
790 gomp_parallel_loop_start (void (*fn) (void *), void *data,
791 unsigned num_threads, long start, long end,
792 long incr, enum gomp_schedule_type sched,
793 long chunk_size, unsigned int flags)
794 {
795 struct gomp_team *team;
796
797 num_threads = gomp_resolve_num_threads (num_threads, 0);
798 team = gomp_new_team (num_threads);
799 gomp_loop_init (&team->work_shares[0], start, end, incr, sched, chunk_size);
800 gomp_team_start (fn, data, num_threads, flags, team, NULL);
801 }
802
803 void
804 GOMP_parallel_loop_static_start (void (*fn) (void *), void *data,
805 unsigned num_threads, long start, long end,
806 long incr, long chunk_size)
807 {
808 gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
809 GFS_STATIC, chunk_size, 0);
810 }
811
812 void
813 GOMP_parallel_loop_dynamic_start (void (*fn) (void *), void *data,
814 unsigned num_threads, long start, long end,
815 long incr, long chunk_size)
816 {
817 gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
818 GFS_DYNAMIC, chunk_size, 0);
819 }
820
821 void
822 GOMP_parallel_loop_guided_start (void (*fn) (void *), void *data,
823 unsigned num_threads, long start, long end,
824 long incr, long chunk_size)
825 {
826 gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
827 GFS_GUIDED, chunk_size, 0);
828 }
829
830 void
831 GOMP_parallel_loop_runtime_start (void (*fn) (void *), void *data,
832 unsigned num_threads, long start, long end,
833 long incr)
834 {
835 struct gomp_task_icv *icv = gomp_icv (false);
836 gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
837 icv->run_sched_var & ~GFS_MONOTONIC,
838 icv->run_sched_chunk_size, 0);
839 }
840
841 ialias_redirect (GOMP_parallel_end)
842
843 void
844 GOMP_parallel_loop_static (void (*fn) (void *), void *data,
845 unsigned num_threads, long start, long end,
846 long incr, long chunk_size, unsigned flags)
847 {
848 gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
849 GFS_STATIC, chunk_size, flags);
850 fn (data);
851 GOMP_parallel_end ();
852 }
853
854 void
855 GOMP_parallel_loop_dynamic (void (*fn) (void *), void *data,
856 unsigned num_threads, long start, long end,
857 long incr, long chunk_size, unsigned flags)
858 {
859 gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
860 GFS_DYNAMIC, chunk_size, flags);
861 fn (data);
862 GOMP_parallel_end ();
863 }
864
865 void
866 GOMP_parallel_loop_guided (void (*fn) (void *), void *data,
867 unsigned num_threads, long start, long end,
868 long incr, long chunk_size, unsigned flags)
869 {
870 gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
871 GFS_GUIDED, chunk_size, flags);
872 fn (data);
873 GOMP_parallel_end ();
874 }
875
876 void
877 GOMP_parallel_loop_runtime (void (*fn) (void *), void *data,
878 unsigned num_threads, long start, long end,
879 long incr, unsigned flags)
880 {
881 struct gomp_task_icv *icv = gomp_icv (false);
882 gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
883 icv->run_sched_var & ~GFS_MONOTONIC,
884 icv->run_sched_chunk_size, flags);
885 fn (data);
886 GOMP_parallel_end ();
887 }
888
889 #ifdef HAVE_ATTRIBUTE_ALIAS
890 extern __typeof(GOMP_parallel_loop_dynamic) GOMP_parallel_loop_nonmonotonic_dynamic
891 __attribute__((alias ("GOMP_parallel_loop_dynamic")));
892 extern __typeof(GOMP_parallel_loop_guided) GOMP_parallel_loop_nonmonotonic_guided
893 __attribute__((alias ("GOMP_parallel_loop_guided")));
894 extern __typeof(GOMP_parallel_loop_runtime) GOMP_parallel_loop_nonmonotonic_runtime
895 __attribute__((alias ("GOMP_parallel_loop_runtime")));
896 extern __typeof(GOMP_parallel_loop_runtime) GOMP_parallel_loop_maybe_nonmonotonic_runtime
897 __attribute__((alias ("GOMP_parallel_loop_runtime")));
898 #else
899 void
900 GOMP_parallel_loop_nonmonotonic_dynamic (void (*fn) (void *), void *data,
901 unsigned num_threads, long start,
902 long end, long incr, long chunk_size,
903 unsigned flags)
904 {
905 gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
906 GFS_DYNAMIC, chunk_size, flags);
907 fn (data);
908 GOMP_parallel_end ();
909 }
910
911 void
912 GOMP_parallel_loop_nonmonotonic_guided (void (*fn) (void *), void *data,
913 unsigned num_threads, long start,
914 long end, long incr, long chunk_size,
915 unsigned flags)
916 {
917 gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
918 GFS_GUIDED, chunk_size, flags);
919 fn (data);
920 GOMP_parallel_end ();
921 }
922
923 void
924 GOMP_parallel_loop_nonmonotonic_runtime (void (*fn) (void *), void *data,
925 unsigned num_threads, long start,
926 long end, long incr, unsigned flags)
927 {
928 struct gomp_task_icv *icv = gomp_icv (false);
929 gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
930 icv->run_sched_var & ~GFS_MONOTONIC,
931 icv->run_sched_chunk_size, flags);
932 fn (data);
933 GOMP_parallel_end ();
934 }
935
936 void
937 GOMP_parallel_loop_maybe_nonmonotonic_runtime (void (*fn) (void *), void *data,
938 unsigned num_threads, long start,
939 long end, long incr,
940 unsigned flags)
941 {
942 struct gomp_task_icv *icv = gomp_icv (false);
943 gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
944 icv->run_sched_var & ~GFS_MONOTONIC,
945 icv->run_sched_chunk_size, flags);
946 fn (data);
947 GOMP_parallel_end ();
948 }
949 #endif
950
951 /* The GOMP_loop_end* routines are called after the thread is told that
952 all loop iterations are complete. The first two versions synchronize
953 all threads; the nowait version does not. */
954
955 void
956 GOMP_loop_end (void)
957 {
958 gomp_work_share_end ();
959 }
960
961 bool
962 GOMP_loop_end_cancel (void)
963 {
964 return gomp_work_share_end_cancel ();
965 }
966
967 void
968 GOMP_loop_end_nowait (void)
969 {
970 gomp_work_share_end_nowait ();
971 }
972
973
974 /* We use static functions above so that we're sure that the "runtime"
975 function can defer to the proper routine without interposition. We
976 export the static function with a strong alias when possible, or with
977 a wrapper function otherwise. */
978
979 #ifdef HAVE_ATTRIBUTE_ALIAS
980 extern __typeof(gomp_loop_static_start) GOMP_loop_static_start
981 __attribute__((alias ("gomp_loop_static_start")));
982 extern __typeof(gomp_loop_dynamic_start) GOMP_loop_dynamic_start
983 __attribute__((alias ("gomp_loop_dynamic_start")));
984 extern __typeof(gomp_loop_guided_start) GOMP_loop_guided_start
985 __attribute__((alias ("gomp_loop_guided_start")));
986 extern __typeof(gomp_loop_dynamic_start) GOMP_loop_nonmonotonic_dynamic_start
987 __attribute__((alias ("gomp_loop_dynamic_start")));
988 extern __typeof(gomp_loop_guided_start) GOMP_loop_nonmonotonic_guided_start
989 __attribute__((alias ("gomp_loop_guided_start")));
990 extern __typeof(GOMP_loop_runtime_start) GOMP_loop_nonmonotonic_runtime_start
991 __attribute__((alias ("GOMP_loop_runtime_start")));
992 extern __typeof(GOMP_loop_runtime_start) GOMP_loop_maybe_nonmonotonic_runtime_start
993 __attribute__((alias ("GOMP_loop_runtime_start")));
994
995 extern __typeof(gomp_loop_ordered_static_start) GOMP_loop_ordered_static_start
996 __attribute__((alias ("gomp_loop_ordered_static_start")));
997 extern __typeof(gomp_loop_ordered_dynamic_start) GOMP_loop_ordered_dynamic_start
998 __attribute__((alias ("gomp_loop_ordered_dynamic_start")));
999 extern __typeof(gomp_loop_ordered_guided_start) GOMP_loop_ordered_guided_start
1000 __attribute__((alias ("gomp_loop_ordered_guided_start")));
1001
1002 extern __typeof(gomp_loop_doacross_static_start) GOMP_loop_doacross_static_start
1003 __attribute__((alias ("gomp_loop_doacross_static_start")));
1004 extern __typeof(gomp_loop_doacross_dynamic_start) GOMP_loop_doacross_dynamic_start
1005 __attribute__((alias ("gomp_loop_doacross_dynamic_start")));
1006 extern __typeof(gomp_loop_doacross_guided_start) GOMP_loop_doacross_guided_start
1007 __attribute__((alias ("gomp_loop_doacross_guided_start")));
1008
1009 extern __typeof(gomp_loop_static_next) GOMP_loop_static_next
1010 __attribute__((alias ("gomp_loop_static_next")));
1011 extern __typeof(gomp_loop_dynamic_next) GOMP_loop_dynamic_next
1012 __attribute__((alias ("gomp_loop_dynamic_next")));
1013 extern __typeof(gomp_loop_guided_next) GOMP_loop_guided_next
1014 __attribute__((alias ("gomp_loop_guided_next")));
1015 extern __typeof(gomp_loop_dynamic_next) GOMP_loop_nonmonotonic_dynamic_next
1016 __attribute__((alias ("gomp_loop_dynamic_next")));
1017 extern __typeof(gomp_loop_guided_next) GOMP_loop_nonmonotonic_guided_next
1018 __attribute__((alias ("gomp_loop_guided_next")));
1019 extern __typeof(GOMP_loop_runtime_next) GOMP_loop_nonmonotonic_runtime_next
1020 __attribute__((alias ("GOMP_loop_runtime_next")));
1021 extern __typeof(GOMP_loop_runtime_next) GOMP_loop_maybe_nonmonotonic_runtime_next
1022 __attribute__((alias ("GOMP_loop_runtime_next")));
1023
1024 extern __typeof(gomp_loop_ordered_static_next) GOMP_loop_ordered_static_next
1025 __attribute__((alias ("gomp_loop_ordered_static_next")));
1026 extern __typeof(gomp_loop_ordered_dynamic_next) GOMP_loop_ordered_dynamic_next
1027 __attribute__((alias ("gomp_loop_ordered_dynamic_next")));
1028 extern __typeof(gomp_loop_ordered_guided_next) GOMP_loop_ordered_guided_next
1029 __attribute__((alias ("gomp_loop_ordered_guided_next")));
1030 #else
1031 bool
1032 GOMP_loop_static_start (long start, long end, long incr, long chunk_size,
1033 long *istart, long *iend)
1034 {
1035 return gomp_loop_static_start (start, end, incr, chunk_size, istart, iend);
1036 }
1037
1038 bool
1039 GOMP_loop_dynamic_start (long start, long end, long incr, long chunk_size,
1040 long *istart, long *iend)
1041 {
1042 return gomp_loop_dynamic_start (start, end, incr, chunk_size, istart, iend);
1043 }
1044
1045 bool
1046 GOMP_loop_guided_start (long start, long end, long incr, long chunk_size,
1047 long *istart, long *iend)
1048 {
1049 return gomp_loop_guided_start (start, end, incr, chunk_size, istart, iend);
1050 }
1051
1052 bool
1053 GOMP_loop_nonmonotonic_dynamic_start (long start, long end, long incr,
1054 long chunk_size, long *istart,
1055 long *iend)
1056 {
1057 return gomp_loop_dynamic_start (start, end, incr, chunk_size, istart, iend);
1058 }
1059
1060 bool
1061 GOMP_loop_nonmonotonic_guided_start (long start, long end, long incr,
1062 long chunk_size, long *istart, long *iend)
1063 {
1064 return gomp_loop_guided_start (start, end, incr, chunk_size, istart, iend);
1065 }
1066
1067 bool
1068 GOMP_loop_nonmonotonic_runtime_start (long start, long end, long incr,
1069 long *istart, long *iend)
1070 {
1071 return GOMP_loop_runtime_start (start, end, incr, istart, iend);
1072 }
1073
1074 bool
1075 GOMP_loop_maybe_nonmonotonic_runtime_start (long start, long end, long incr,
1076 long *istart, long *iend)
1077 {
1078 return GOMP_loop_runtime_start (start, end, incr, istart, iend);
1079 }
1080
1081 bool
1082 GOMP_loop_ordered_static_start (long start, long end, long incr,
1083 long chunk_size, long *istart, long *iend)
1084 {
1085 return gomp_loop_ordered_static_start (start, end, incr, chunk_size,
1086 istart, iend);
1087 }
1088
1089 bool
1090 GOMP_loop_ordered_dynamic_start (long start, long end, long incr,
1091 long chunk_size, long *istart, long *iend)
1092 {
1093 return gomp_loop_ordered_dynamic_start (start, end, incr, chunk_size,
1094 istart, iend);
1095 }
1096
1097 bool
1098 GOMP_loop_ordered_guided_start (long start, long end, long incr,
1099 long chunk_size, long *istart, long *iend)
1100 {
1101 return gomp_loop_ordered_guided_start (start, end, incr, chunk_size,
1102 istart, iend);
1103 }
1104
1105 bool
1106 GOMP_loop_doacross_static_start (unsigned ncounts, long *counts,
1107 long chunk_size, long *istart, long *iend)
1108 {
1109 return gomp_loop_doacross_static_start (ncounts, counts, chunk_size,
1110 istart, iend);
1111 }
1112
1113 bool
1114 GOMP_loop_doacross_dynamic_start (unsigned ncounts, long *counts,
1115 long chunk_size, long *istart, long *iend)
1116 {
1117 return gomp_loop_doacross_dynamic_start (ncounts, counts, chunk_size,
1118 istart, iend);
1119 }
1120
1121 bool
1122 GOMP_loop_doacross_guided_start (unsigned ncounts, long *counts,
1123 long chunk_size, long *istart, long *iend)
1124 {
1125 return gomp_loop_doacross_guided_start (ncounts, counts, chunk_size,
1126 istart, iend);
1127 }
1128
1129 bool
1130 GOMP_loop_static_next (long *istart, long *iend)
1131 {
1132 return gomp_loop_static_next (istart, iend);
1133 }
1134
1135 bool
1136 GOMP_loop_dynamic_next (long *istart, long *iend)
1137 {
1138 return gomp_loop_dynamic_next (istart, iend);
1139 }
1140
1141 bool
1142 GOMP_loop_guided_next (long *istart, long *iend)
1143 {
1144 return gomp_loop_guided_next (istart, iend);
1145 }
1146
1147 bool
1148 GOMP_loop_nonmonotonic_dynamic_next (long *istart, long *iend)
1149 {
1150 return gomp_loop_dynamic_next (istart, iend);
1151 }
1152
1153 bool
1154 GOMP_loop_nonmonotonic_guided_next (long *istart, long *iend)
1155 {
1156 return gomp_loop_guided_next (istart, iend);
1157 }
1158
1159 bool
1160 GOMP_loop_nonmonotonic_runtime_next (long *istart, long *iend)
1161 {
1162 return GOMP_loop_runtime_next (istart, iend);
1163 }
1164
1165 bool
1166 GOMP_loop_maybe_nonmonotonic_runtime_next (long *istart, long *iend)
1167 {
1168 return GOMP_loop_runtime_next (istart, iend);
1169 }
1170
1171 bool
1172 GOMP_loop_ordered_static_next (long *istart, long *iend)
1173 {
1174 return gomp_loop_ordered_static_next (istart, iend);
1175 }
1176
1177 bool
1178 GOMP_loop_ordered_dynamic_next (long *istart, long *iend)
1179 {
1180 return gomp_loop_ordered_dynamic_next (istart, iend);
1181 }
1182
1183 bool
1184 GOMP_loop_ordered_guided_next (long *istart, long *iend)
1185 {
1186 return gomp_loop_ordered_guided_next (istart, iend);
1187 }
1188 #endif