]> git.ipfire.org Git - thirdparty/gcc.git/blame_incremental - libgomp/loop.c
libstdc++: Update LWG 4166 changes to concat_view::end() [PR120934]
[thirdparty/gcc.git] / libgomp / loop.c
... / ...
CommitLineData
1/* Copyright (C) 2005-2025 Free Software Foundation, Inc.
2 Contributed by Richard Henderson <rth@redhat.com>.
3
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
6
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
26/* This file handles the LOOP (FOR/DO) construct. */
27
28#include <limits.h>
29#include <stdlib.h>
30#include <string.h>
31#include "libgomp.h"
32
33
34ialias (GOMP_loop_runtime_next)
35ialias_redirect (GOMP_taskgroup_reduction_register)
36
37/* Initialize the given work share construct from the given arguments. */
38
39static inline void
40gomp_loop_init (struct gomp_work_share *ws, long start, long end, long incr,
41 enum gomp_schedule_type sched, long chunk_size)
42{
43 ws->sched = sched;
44 ws->chunk_size = chunk_size;
45 /* Canonicalize loops that have zero iterations to ->next == ->end. */
46 ws->end = ((incr > 0 && start > end) || (incr < 0 && start < end))
47 ? start : end;
48 ws->incr = incr;
49 ws->next = start;
50 if (sched == GFS_DYNAMIC)
51 {
52 ws->chunk_size *= incr;
53
54#ifdef HAVE_SYNC_BUILTINS
55 {
56 /* For dynamic scheduling prepare things to make each iteration
57 faster. */
58 struct gomp_thread *thr = gomp_thread ();
59 struct gomp_team *team = thr->ts.team;
60 long nthreads = team ? team->nthreads : 1;
61
62 if (__builtin_expect (incr > 0, 1))
63 {
64 /* Cheap overflow protection. */
65 if (__builtin_expect ((nthreads | ws->chunk_size)
66 >= 1UL << (sizeof (long)
67 * __CHAR_BIT__ / 2 - 1), 0))
68 ws->mode = 0;
69 else
70 ws->mode = ws->end < (LONG_MAX
71 - (nthreads + 1) * ws->chunk_size);
72 }
73 /* Cheap overflow protection. */
74 else if (__builtin_expect ((nthreads | -ws->chunk_size)
75 >= 1UL << (sizeof (long)
76 * __CHAR_BIT__ / 2 - 1), 0))
77 ws->mode = 0;
78 else
79 ws->mode = ws->end > (nthreads + 1) * -ws->chunk_size - LONG_MAX;
80 }
81#endif
82 }
83}
84
85/* The *_start routines are called when first encountering a loop construct
86 that is not bound directly to a parallel construct. The first thread
87 that arrives will create the work-share construct; subsequent threads
88 will see the construct exists and allocate work from it.
89
90 START, END, INCR are the bounds of the loop; due to the restrictions of
91 OpenMP, these values must be the same in every thread. This is not
92 verified (nor is it entirely verifiable, since START is not necessarily
93 retained intact in the work-share data structure). CHUNK_SIZE is the
94 scheduling parameter; again this must be identical in all threads.
95
96 Returns true if there's any work for this thread to perform. If so,
97 *ISTART and *IEND are filled with the bounds of the iteration block
98 allocated to this thread. Returns false if all work was assigned to
99 other threads prior to this thread's arrival. */
100
101static bool
102gomp_loop_static_start (long start, long end, long incr, long chunk_size,
103 long *istart, long *iend)
104{
105 struct gomp_thread *thr = gomp_thread ();
106
107 thr->ts.static_trip = 0;
108 if (gomp_work_share_start (0))
109 {
110 gomp_loop_init (thr->ts.work_share, start, end, incr,
111 GFS_STATIC, chunk_size);
112 gomp_work_share_init_done ();
113 }
114
115 return !gomp_iter_static_next (istart, iend);
116}
117
118/* The current dynamic implementation is always monotonic. The
119 entrypoints without nonmonotonic in them have to be always monotonic,
120 but the nonmonotonic ones could be changed to use work-stealing for
121 improved scalability. */
122
123static bool
124gomp_loop_dynamic_start (long start, long end, long incr, long chunk_size,
125 long *istart, long *iend)
126{
127 struct gomp_thread *thr = gomp_thread ();
128 bool ret;
129
130 if (gomp_work_share_start (0))
131 {
132 gomp_loop_init (thr->ts.work_share, start, end, incr,
133 GFS_DYNAMIC, chunk_size);
134 gomp_work_share_init_done ();
135 }
136
137#ifdef HAVE_SYNC_BUILTINS
138 ret = gomp_iter_dynamic_next (istart, iend);
139#else
140 gomp_mutex_lock (&thr->ts.work_share->lock);
141 ret = gomp_iter_dynamic_next_locked (istart, iend);
142 gomp_mutex_unlock (&thr->ts.work_share->lock);
143#endif
144
145 return ret;
146}
147
148/* Similarly as for dynamic, though the question is how can the chunk sizes
149 be decreased without a central locking or atomics. */
150
151static bool
152gomp_loop_guided_start (long start, long end, long incr, long chunk_size,
153 long *istart, long *iend)
154{
155 struct gomp_thread *thr = gomp_thread ();
156 bool ret;
157
158 if (gomp_work_share_start (0))
159 {
160 gomp_loop_init (thr->ts.work_share, start, end, incr,
161 GFS_GUIDED, chunk_size);
162 gomp_work_share_init_done ();
163 }
164
165#ifdef HAVE_SYNC_BUILTINS
166 ret = gomp_iter_guided_next (istart, iend);
167#else
168 gomp_mutex_lock (&thr->ts.work_share->lock);
169 ret = gomp_iter_guided_next_locked (istart, iend);
170 gomp_mutex_unlock (&thr->ts.work_share->lock);
171#endif
172
173 return ret;
174}
175
176bool
177GOMP_loop_runtime_start (long start, long end, long incr,
178 long *istart, long *iend)
179{
180 struct gomp_task_icv *icv = gomp_icv (false);
181 switch (icv->run_sched_var & ~GFS_MONOTONIC)
182 {
183 case GFS_STATIC:
184 return gomp_loop_static_start (start, end, incr,
185 icv->run_sched_chunk_size,
186 istart, iend);
187 case GFS_DYNAMIC:
188 return gomp_loop_dynamic_start (start, end, incr,
189 icv->run_sched_chunk_size,
190 istart, iend);
191 case GFS_GUIDED:
192 return gomp_loop_guided_start (start, end, incr,
193 icv->run_sched_chunk_size,
194 istart, iend);
195 case GFS_AUTO:
196 /* For now map to schedule(static), later on we could play with feedback
197 driven choice. */
198 return gomp_loop_static_start (start, end, incr, 0, istart, iend);
199 default:
200 abort ();
201 }
202}
203
204static long
205gomp_adjust_sched (long sched, long *chunk_size)
206{
207 sched &= ~GFS_MONOTONIC;
208 switch (sched)
209 {
210 case GFS_STATIC:
211 case GFS_DYNAMIC:
212 case GFS_GUIDED:
213 return sched;
214 /* GFS_RUNTIME is used for runtime schedule without monotonic
215 or nonmonotonic modifiers on the clause.
216 GFS_RUNTIME|GFS_MONOTONIC for runtime schedule with monotonic
217 modifier. */
218 case GFS_RUNTIME:
219 /* GFS_AUTO is used for runtime schedule with nonmonotonic
220 modifier. */
221 case GFS_AUTO:
222 {
223 struct gomp_task_icv *icv = gomp_icv (false);
224 sched = icv->run_sched_var & ~GFS_MONOTONIC;
225 switch (sched)
226 {
227 case GFS_STATIC:
228 case GFS_DYNAMIC:
229 case GFS_GUIDED:
230 *chunk_size = icv->run_sched_chunk_size;
231 break;
232 case GFS_AUTO:
233 sched = GFS_STATIC;
234 *chunk_size = 0;
235 break;
236 default:
237 abort ();
238 }
239 return sched;
240 }
241 default:
242 abort ();
243 }
244}
245
246bool
247GOMP_loop_start (long start, long end, long incr, long sched,
248 long chunk_size, long *istart, long *iend,
249 uintptr_t *reductions, void **mem)
250{
251 struct gomp_thread *thr = gomp_thread ();
252
253 thr->ts.static_trip = 0;
254 if (reductions)
255 gomp_workshare_taskgroup_start ();
256 if (gomp_work_share_start (0))
257 {
258 sched = gomp_adjust_sched (sched, &chunk_size);
259 gomp_loop_init (thr->ts.work_share, start, end, incr,
260 sched, chunk_size);
261 if (reductions)
262 {
263 GOMP_taskgroup_reduction_register (reductions);
264 thr->task->taskgroup->workshare = true;
265 thr->ts.work_share->task_reductions = reductions;
266 }
267 if (mem)
268 {
269 uintptr_t size = (uintptr_t) *mem;
270#define INLINE_ORDERED_TEAM_IDS_OFF \
271 ((offsetof (struct gomp_work_share, inline_ordered_team_ids) \
272 + __alignof__ (long long) - 1) & ~(__alignof__ (long long) - 1))
273 if (sizeof (struct gomp_work_share)
274 <= INLINE_ORDERED_TEAM_IDS_OFF
275 || __alignof__ (struct gomp_work_share) < __alignof__ (long long)
276 || size > (sizeof (struct gomp_work_share)
277 - INLINE_ORDERED_TEAM_IDS_OFF))
278 *mem
279 = (void *) (thr->ts.work_share->ordered_team_ids
280 = gomp_malloc_cleared (size));
281 else
282 *mem = memset (((char *) thr->ts.work_share)
283 + INLINE_ORDERED_TEAM_IDS_OFF, '\0', size);
284 }
285 gomp_work_share_init_done ();
286 }
287 else
288 {
289 if (reductions)
290 {
291 uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
292 gomp_workshare_task_reduction_register (reductions,
293 first_reductions);
294 }
295 if (mem)
296 {
297 if ((offsetof (struct gomp_work_share, inline_ordered_team_ids)
298 & (__alignof__ (long long) - 1)) == 0)
299 *mem = (void *) thr->ts.work_share->ordered_team_ids;
300 else
301 {
302 uintptr_t p = (uintptr_t) thr->ts.work_share->ordered_team_ids;
303 p += __alignof__ (long long) - 1;
304 p &= ~(__alignof__ (long long) - 1);
305 *mem = (void *) p;
306 }
307 }
308 }
309
310 if (!istart)
311 return true;
312 return ialias_call (GOMP_loop_runtime_next) (istart, iend);
313}
314
315/* The *_ordered_*_start routines are similar. The only difference is that
316 this work-share construct is initialized to expect an ORDERED section. */
317
318static bool
319gomp_loop_ordered_static_start (long start, long end, long incr,
320 long chunk_size, long *istart, long *iend)
321{
322 struct gomp_thread *thr = gomp_thread ();
323
324 thr->ts.static_trip = 0;
325 if (gomp_work_share_start (1))
326 {
327 gomp_loop_init (thr->ts.work_share, start, end, incr,
328 GFS_STATIC, chunk_size);
329 gomp_ordered_static_init ();
330 gomp_work_share_init_done ();
331 }
332
333 return !gomp_iter_static_next (istart, iend);
334}
335
336static bool
337gomp_loop_ordered_dynamic_start (long start, long end, long incr,
338 long chunk_size, long *istart, long *iend)
339{
340 struct gomp_thread *thr = gomp_thread ();
341 bool ret;
342
343 if (gomp_work_share_start (1))
344 {
345 gomp_loop_init (thr->ts.work_share, start, end, incr,
346 GFS_DYNAMIC, chunk_size);
347 gomp_mutex_lock (&thr->ts.work_share->lock);
348 gomp_work_share_init_done ();
349 }
350 else
351 gomp_mutex_lock (&thr->ts.work_share->lock);
352
353 ret = gomp_iter_dynamic_next_locked (istart, iend);
354 if (ret)
355 gomp_ordered_first ();
356 gomp_mutex_unlock (&thr->ts.work_share->lock);
357
358 return ret;
359}
360
361static bool
362gomp_loop_ordered_guided_start (long start, long end, long incr,
363 long chunk_size, long *istart, long *iend)
364{
365 struct gomp_thread *thr = gomp_thread ();
366 bool ret;
367
368 if (gomp_work_share_start (1))
369 {
370 gomp_loop_init (thr->ts.work_share, start, end, incr,
371 GFS_GUIDED, chunk_size);
372 gomp_mutex_lock (&thr->ts.work_share->lock);
373 gomp_work_share_init_done ();
374 }
375 else
376 gomp_mutex_lock (&thr->ts.work_share->lock);
377
378 ret = gomp_iter_guided_next_locked (istart, iend);
379 if (ret)
380 gomp_ordered_first ();
381 gomp_mutex_unlock (&thr->ts.work_share->lock);
382
383 return ret;
384}
385
386bool
387GOMP_loop_ordered_runtime_start (long start, long end, long incr,
388 long *istart, long *iend)
389{
390 struct gomp_task_icv *icv = gomp_icv (false);
391 switch (icv->run_sched_var & ~GFS_MONOTONIC)
392 {
393 case GFS_STATIC:
394 return gomp_loop_ordered_static_start (start, end, incr,
395 icv->run_sched_chunk_size,
396 istart, iend);
397 case GFS_DYNAMIC:
398 return gomp_loop_ordered_dynamic_start (start, end, incr,
399 icv->run_sched_chunk_size,
400 istart, iend);
401 case GFS_GUIDED:
402 return gomp_loop_ordered_guided_start (start, end, incr,
403 icv->run_sched_chunk_size,
404 istart, iend);
405 case GFS_AUTO:
406 /* For now map to schedule(static), later on we could play with feedback
407 driven choice. */
408 return gomp_loop_ordered_static_start (start, end, incr,
409 0, istart, iend);
410 default:
411 abort ();
412 }
413}
414
415bool
416GOMP_loop_ordered_start (long start, long end, long incr, long sched,
417 long chunk_size, long *istart, long *iend,
418 uintptr_t *reductions, void **mem)
419{
420 struct gomp_thread *thr = gomp_thread ();
421 size_t ordered = 1;
422 bool ret;
423
424 thr->ts.static_trip = 0;
425 if (reductions)
426 gomp_workshare_taskgroup_start ();
427 if (mem)
428 ordered += (uintptr_t) *mem;
429 if (gomp_work_share_start (ordered))
430 {
431 sched = gomp_adjust_sched (sched, &chunk_size);
432 gomp_loop_init (thr->ts.work_share, start, end, incr,
433 sched, chunk_size);
434 if (reductions)
435 {
436 GOMP_taskgroup_reduction_register (reductions);
437 thr->task->taskgroup->workshare = true;
438 thr->ts.work_share->task_reductions = reductions;
439 }
440 if (sched == GFS_STATIC)
441 gomp_ordered_static_init ();
442 else
443 gomp_mutex_lock (&thr->ts.work_share->lock);
444 gomp_work_share_init_done ();
445 }
446 else
447 {
448 if (reductions)
449 {
450 uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
451 gomp_workshare_task_reduction_register (reductions,
452 first_reductions);
453 }
454 sched = thr->ts.work_share->sched;
455 if (sched != GFS_STATIC)
456 gomp_mutex_lock (&thr->ts.work_share->lock);
457 }
458
459 if (mem)
460 {
461 uintptr_t p
462 = (uintptr_t) (thr->ts.work_share->ordered_team_ids
463 + (thr->ts.team ? thr->ts.team->nthreads : 1));
464 p += __alignof__ (long long) - 1;
465 p &= ~(__alignof__ (long long) - 1);
466 *mem = (void *) p;
467 }
468
469 switch (sched)
470 {
471 case GFS_STATIC:
472 case GFS_AUTO:
473 return !gomp_iter_static_next (istart, iend);
474 case GFS_DYNAMIC:
475 ret = gomp_iter_dynamic_next_locked (istart, iend);
476 break;
477 case GFS_GUIDED:
478 ret = gomp_iter_guided_next_locked (istart, iend);
479 break;
480 default:
481 abort ();
482 }
483
484 if (ret)
485 gomp_ordered_first ();
486 gomp_mutex_unlock (&thr->ts.work_share->lock);
487 return ret;
488}
489
490/* The *_doacross_*_start routines are similar. The only difference is that
491 this work-share construct is initialized to expect an ORDERED(N) - DOACROSS
492 section, and the worksharing loop iterates always from 0 to COUNTS[0] - 1
493 and other COUNTS array elements tell the library number of iterations
494 in the ordered inner loops. */
495
496static bool
497gomp_loop_doacross_static_start (unsigned ncounts, long *counts,
498 long chunk_size, long *istart, long *iend)
499{
500 struct gomp_thread *thr = gomp_thread ();
501
502 thr->ts.static_trip = 0;
503 if (gomp_work_share_start (0))
504 {
505 gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
506 GFS_STATIC, chunk_size);
507 gomp_doacross_init (ncounts, counts, chunk_size, 0);
508 gomp_work_share_init_done ();
509 }
510
511 return !gomp_iter_static_next (istart, iend);
512}
513
514static bool
515gomp_loop_doacross_dynamic_start (unsigned ncounts, long *counts,
516 long chunk_size, long *istart, long *iend)
517{
518 struct gomp_thread *thr = gomp_thread ();
519 bool ret;
520
521 if (gomp_work_share_start (0))
522 {
523 gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
524 GFS_DYNAMIC, chunk_size);
525 gomp_doacross_init (ncounts, counts, chunk_size, 0);
526 gomp_work_share_init_done ();
527 }
528
529#ifdef HAVE_SYNC_BUILTINS
530 ret = gomp_iter_dynamic_next (istart, iend);
531#else
532 gomp_mutex_lock (&thr->ts.work_share->lock);
533 ret = gomp_iter_dynamic_next_locked (istart, iend);
534 gomp_mutex_unlock (&thr->ts.work_share->lock);
535#endif
536
537 return ret;
538}
539
540static bool
541gomp_loop_doacross_guided_start (unsigned ncounts, long *counts,
542 long chunk_size, long *istart, long *iend)
543{
544 struct gomp_thread *thr = gomp_thread ();
545 bool ret;
546
547 if (gomp_work_share_start (0))
548 {
549 gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
550 GFS_GUIDED, chunk_size);
551 gomp_doacross_init (ncounts, counts, chunk_size, 0);
552 gomp_work_share_init_done ();
553 }
554
555#ifdef HAVE_SYNC_BUILTINS
556 ret = gomp_iter_guided_next (istart, iend);
557#else
558 gomp_mutex_lock (&thr->ts.work_share->lock);
559 ret = gomp_iter_guided_next_locked (istart, iend);
560 gomp_mutex_unlock (&thr->ts.work_share->lock);
561#endif
562
563 return ret;
564}
565
566bool
567GOMP_loop_doacross_runtime_start (unsigned ncounts, long *counts,
568 long *istart, long *iend)
569{
570 struct gomp_task_icv *icv = gomp_icv (false);
571 switch (icv->run_sched_var & ~GFS_MONOTONIC)
572 {
573 case GFS_STATIC:
574 return gomp_loop_doacross_static_start (ncounts, counts,
575 icv->run_sched_chunk_size,
576 istart, iend);
577 case GFS_DYNAMIC:
578 return gomp_loop_doacross_dynamic_start (ncounts, counts,
579 icv->run_sched_chunk_size,
580 istart, iend);
581 case GFS_GUIDED:
582 return gomp_loop_doacross_guided_start (ncounts, counts,
583 icv->run_sched_chunk_size,
584 istart, iend);
585 case GFS_AUTO:
586 /* For now map to schedule(static), later on we could play with feedback
587 driven choice. */
588 return gomp_loop_doacross_static_start (ncounts, counts,
589 0, istart, iend);
590 default:
591 abort ();
592 }
593}
594
595bool
596GOMP_loop_doacross_start (unsigned ncounts, long *counts, long sched,
597 long chunk_size, long *istart, long *iend,
598 uintptr_t *reductions, void **mem)
599{
600 struct gomp_thread *thr = gomp_thread ();
601
602 thr->ts.static_trip = 0;
603 if (reductions)
604 gomp_workshare_taskgroup_start ();
605 if (gomp_work_share_start (0))
606 {
607 size_t extra = 0;
608 if (mem)
609 extra = (uintptr_t) *mem;
610 sched = gomp_adjust_sched (sched, &chunk_size);
611 gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
612 sched, chunk_size);
613 gomp_doacross_init (ncounts, counts, chunk_size, extra);
614 if (reductions)
615 {
616 GOMP_taskgroup_reduction_register (reductions);
617 thr->task->taskgroup->workshare = true;
618 thr->ts.work_share->task_reductions = reductions;
619 }
620 gomp_work_share_init_done ();
621 }
622 else
623 {
624 if (reductions)
625 {
626 uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
627 gomp_workshare_task_reduction_register (reductions,
628 first_reductions);
629 }
630 sched = thr->ts.work_share->sched;
631 }
632
633 if (mem)
634 *mem = thr->ts.work_share->doacross->extra;
635
636 return ialias_call (GOMP_loop_runtime_next) (istart, iend);
637}
638
639/* The *_next routines are called when the thread completes processing of
640 the iteration block currently assigned to it. If the work-share
641 construct is bound directly to a parallel construct, then the iteration
642 bounds may have been set up before the parallel. In which case, this
643 may be the first iteration for the thread.
644
645 Returns true if there is work remaining to be performed; *ISTART and
646 *IEND are filled with a new iteration block. Returns false if all work
647 has been assigned. */
648
649static bool
650gomp_loop_static_next (long *istart, long *iend)
651{
652 return !gomp_iter_static_next (istart, iend);
653}
654
655static bool
656gomp_loop_dynamic_next (long *istart, long *iend)
657{
658 bool ret;
659
660#ifdef HAVE_SYNC_BUILTINS
661 ret = gomp_iter_dynamic_next (istart, iend);
662#else
663 struct gomp_thread *thr = gomp_thread ();
664 gomp_mutex_lock (&thr->ts.work_share->lock);
665 ret = gomp_iter_dynamic_next_locked (istart, iend);
666 gomp_mutex_unlock (&thr->ts.work_share->lock);
667#endif
668
669 return ret;
670}
671
672static bool
673gomp_loop_guided_next (long *istart, long *iend)
674{
675 bool ret;
676
677#ifdef HAVE_SYNC_BUILTINS
678 ret = gomp_iter_guided_next (istart, iend);
679#else
680 struct gomp_thread *thr = gomp_thread ();
681 gomp_mutex_lock (&thr->ts.work_share->lock);
682 ret = gomp_iter_guided_next_locked (istart, iend);
683 gomp_mutex_unlock (&thr->ts.work_share->lock);
684#endif
685
686 return ret;
687}
688
689bool
690GOMP_loop_runtime_next (long *istart, long *iend)
691{
692 struct gomp_thread *thr = gomp_thread ();
693
694 switch (thr->ts.work_share->sched)
695 {
696 case GFS_STATIC:
697 case GFS_AUTO:
698 return gomp_loop_static_next (istart, iend);
699 case GFS_DYNAMIC:
700 return gomp_loop_dynamic_next (istart, iend);
701 case GFS_GUIDED:
702 return gomp_loop_guided_next (istart, iend);
703 default:
704 abort ();
705 }
706}
707
708/* The *_ordered_*_next routines are called when the thread completes
709 processing of the iteration block currently assigned to it.
710
711 Returns true if there is work remaining to be performed; *ISTART and
712 *IEND are filled with a new iteration block. Returns false if all work
713 has been assigned. */
714
715static bool
716gomp_loop_ordered_static_next (long *istart, long *iend)
717{
718 struct gomp_thread *thr = gomp_thread ();
719 int test;
720
721 gomp_ordered_sync ();
722 gomp_mutex_lock (&thr->ts.work_share->lock);
723 test = gomp_iter_static_next (istart, iend);
724 if (test >= 0)
725 gomp_ordered_static_next ();
726 gomp_mutex_unlock (&thr->ts.work_share->lock);
727
728 return test == 0;
729}
730
731static bool
732gomp_loop_ordered_dynamic_next (long *istart, long *iend)
733{
734 struct gomp_thread *thr = gomp_thread ();
735 bool ret;
736
737 gomp_ordered_sync ();
738 gomp_mutex_lock (&thr->ts.work_share->lock);
739 ret = gomp_iter_dynamic_next_locked (istart, iend);
740 if (ret)
741 gomp_ordered_next ();
742 else
743 gomp_ordered_last ();
744 gomp_mutex_unlock (&thr->ts.work_share->lock);
745
746 return ret;
747}
748
749static bool
750gomp_loop_ordered_guided_next (long *istart, long *iend)
751{
752 struct gomp_thread *thr = gomp_thread ();
753 bool ret;
754
755 gomp_ordered_sync ();
756 gomp_mutex_lock (&thr->ts.work_share->lock);
757 ret = gomp_iter_guided_next_locked (istart, iend);
758 if (ret)
759 gomp_ordered_next ();
760 else
761 gomp_ordered_last ();
762 gomp_mutex_unlock (&thr->ts.work_share->lock);
763
764 return ret;
765}
766
767bool
768GOMP_loop_ordered_runtime_next (long *istart, long *iend)
769{
770 struct gomp_thread *thr = gomp_thread ();
771
772 switch (thr->ts.work_share->sched)
773 {
774 case GFS_STATIC:
775 case GFS_AUTO:
776 return gomp_loop_ordered_static_next (istart, iend);
777 case GFS_DYNAMIC:
778 return gomp_loop_ordered_dynamic_next (istart, iend);
779 case GFS_GUIDED:
780 return gomp_loop_ordered_guided_next (istart, iend);
781 default:
782 abort ();
783 }
784}
785
786/* The GOMP_parallel_loop_* routines pre-initialize a work-share construct
787 to avoid one synchronization once we get into the loop. */
788
789static void
790gomp_parallel_loop_start (void (*fn) (void *), void *data,
791 unsigned num_threads, long start, long end,
792 long incr, enum gomp_schedule_type sched,
793 long chunk_size, unsigned int flags)
794{
795 struct gomp_team *team;
796
797 num_threads = gomp_resolve_num_threads (num_threads, 0);
798 team = gomp_new_team (num_threads);
799 gomp_loop_init (&team->work_shares[0], start, end, incr, sched, chunk_size);
800 gomp_team_start (fn, data, num_threads, flags, team, NULL);
801}
802
803void
804GOMP_parallel_loop_static_start (void (*fn) (void *), void *data,
805 unsigned num_threads, long start, long end,
806 long incr, long chunk_size)
807{
808 gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
809 GFS_STATIC, chunk_size, 0);
810}
811
812void
813GOMP_parallel_loop_dynamic_start (void (*fn) (void *), void *data,
814 unsigned num_threads, long start, long end,
815 long incr, long chunk_size)
816{
817 gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
818 GFS_DYNAMIC, chunk_size, 0);
819}
820
821void
822GOMP_parallel_loop_guided_start (void (*fn) (void *), void *data,
823 unsigned num_threads, long start, long end,
824 long incr, long chunk_size)
825{
826 gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
827 GFS_GUIDED, chunk_size, 0);
828}
829
830void
831GOMP_parallel_loop_runtime_start (void (*fn) (void *), void *data,
832 unsigned num_threads, long start, long end,
833 long incr)
834{
835 struct gomp_task_icv *icv = gomp_icv (false);
836 gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
837 icv->run_sched_var & ~GFS_MONOTONIC,
838 icv->run_sched_chunk_size, 0);
839}
840
841ialias_redirect (GOMP_parallel_end)
842
843void
844GOMP_parallel_loop_static (void (*fn) (void *), void *data,
845 unsigned num_threads, long start, long end,
846 long incr, long chunk_size, unsigned flags)
847{
848 gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
849 GFS_STATIC, chunk_size, flags);
850 fn (data);
851 GOMP_parallel_end ();
852}
853
854void
855GOMP_parallel_loop_dynamic (void (*fn) (void *), void *data,
856 unsigned num_threads, long start, long end,
857 long incr, long chunk_size, unsigned flags)
858{
859 gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
860 GFS_DYNAMIC, chunk_size, flags);
861 fn (data);
862 GOMP_parallel_end ();
863}
864
865void
866GOMP_parallel_loop_guided (void (*fn) (void *), void *data,
867 unsigned num_threads, long start, long end,
868 long incr, long chunk_size, unsigned flags)
869{
870 gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
871 GFS_GUIDED, chunk_size, flags);
872 fn (data);
873 GOMP_parallel_end ();
874}
875
876void
877GOMP_parallel_loop_runtime (void (*fn) (void *), void *data,
878 unsigned num_threads, long start, long end,
879 long incr, unsigned flags)
880{
881 struct gomp_task_icv *icv = gomp_icv (false);
882 gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
883 icv->run_sched_var & ~GFS_MONOTONIC,
884 icv->run_sched_chunk_size, flags);
885 fn (data);
886 GOMP_parallel_end ();
887}
888
889#ifdef HAVE_ATTRIBUTE_ALIAS
890extern __typeof(GOMP_parallel_loop_dynamic) GOMP_parallel_loop_nonmonotonic_dynamic
891 __attribute__((alias ("GOMP_parallel_loop_dynamic")));
892extern __typeof(GOMP_parallel_loop_guided) GOMP_parallel_loop_nonmonotonic_guided
893 __attribute__((alias ("GOMP_parallel_loop_guided")));
894extern __typeof(GOMP_parallel_loop_runtime) GOMP_parallel_loop_nonmonotonic_runtime
895 __attribute__((alias ("GOMP_parallel_loop_runtime")));
896extern __typeof(GOMP_parallel_loop_runtime) GOMP_parallel_loop_maybe_nonmonotonic_runtime
897 __attribute__((alias ("GOMP_parallel_loop_runtime")));
898#else
899void
900GOMP_parallel_loop_nonmonotonic_dynamic (void (*fn) (void *), void *data,
901 unsigned num_threads, long start,
902 long end, long incr, long chunk_size,
903 unsigned flags)
904{
905 gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
906 GFS_DYNAMIC, chunk_size, flags);
907 fn (data);
908 GOMP_parallel_end ();
909}
910
911void
912GOMP_parallel_loop_nonmonotonic_guided (void (*fn) (void *), void *data,
913 unsigned num_threads, long start,
914 long end, long incr, long chunk_size,
915 unsigned flags)
916{
917 gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
918 GFS_GUIDED, chunk_size, flags);
919 fn (data);
920 GOMP_parallel_end ();
921}
922
923void
924GOMP_parallel_loop_nonmonotonic_runtime (void (*fn) (void *), void *data,
925 unsigned num_threads, long start,
926 long end, long incr, unsigned flags)
927{
928 struct gomp_task_icv *icv = gomp_icv (false);
929 gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
930 icv->run_sched_var & ~GFS_MONOTONIC,
931 icv->run_sched_chunk_size, flags);
932 fn (data);
933 GOMP_parallel_end ();
934}
935
936void
937GOMP_parallel_loop_maybe_nonmonotonic_runtime (void (*fn) (void *), void *data,
938 unsigned num_threads, long start,
939 long end, long incr,
940 unsigned flags)
941{
942 struct gomp_task_icv *icv = gomp_icv (false);
943 gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
944 icv->run_sched_var & ~GFS_MONOTONIC,
945 icv->run_sched_chunk_size, flags);
946 fn (data);
947 GOMP_parallel_end ();
948}
949#endif
950
951/* The GOMP_loop_end* routines are called after the thread is told that
952 all loop iterations are complete. The first two versions synchronize
953 all threads; the nowait version does not. */
954
955void
956GOMP_loop_end (void)
957{
958 gomp_work_share_end ();
959}
960
961bool
962GOMP_loop_end_cancel (void)
963{
964 return gomp_work_share_end_cancel ();
965}
966
967void
968GOMP_loop_end_nowait (void)
969{
970 gomp_work_share_end_nowait ();
971}
972
973
974/* We use static functions above so that we're sure that the "runtime"
975 function can defer to the proper routine without interposition. We
976 export the static function with a strong alias when possible, or with
977 a wrapper function otherwise. */
978
979#ifdef HAVE_ATTRIBUTE_ALIAS
980extern __typeof(gomp_loop_static_start) GOMP_loop_static_start
981 __attribute__((alias ("gomp_loop_static_start")));
982extern __typeof(gomp_loop_dynamic_start) GOMP_loop_dynamic_start
983 __attribute__((alias ("gomp_loop_dynamic_start")));
984extern __typeof(gomp_loop_guided_start) GOMP_loop_guided_start
985 __attribute__((alias ("gomp_loop_guided_start")));
986extern __typeof(gomp_loop_dynamic_start) GOMP_loop_nonmonotonic_dynamic_start
987 __attribute__((alias ("gomp_loop_dynamic_start")));
988extern __typeof(gomp_loop_guided_start) GOMP_loop_nonmonotonic_guided_start
989 __attribute__((alias ("gomp_loop_guided_start")));
990extern __typeof(GOMP_loop_runtime_start) GOMP_loop_nonmonotonic_runtime_start
991 __attribute__((alias ("GOMP_loop_runtime_start")));
992extern __typeof(GOMP_loop_runtime_start) GOMP_loop_maybe_nonmonotonic_runtime_start
993 __attribute__((alias ("GOMP_loop_runtime_start")));
994
995extern __typeof(gomp_loop_ordered_static_start) GOMP_loop_ordered_static_start
996 __attribute__((alias ("gomp_loop_ordered_static_start")));
997extern __typeof(gomp_loop_ordered_dynamic_start) GOMP_loop_ordered_dynamic_start
998 __attribute__((alias ("gomp_loop_ordered_dynamic_start")));
999extern __typeof(gomp_loop_ordered_guided_start) GOMP_loop_ordered_guided_start
1000 __attribute__((alias ("gomp_loop_ordered_guided_start")));
1001
1002extern __typeof(gomp_loop_doacross_static_start) GOMP_loop_doacross_static_start
1003 __attribute__((alias ("gomp_loop_doacross_static_start")));
1004extern __typeof(gomp_loop_doacross_dynamic_start) GOMP_loop_doacross_dynamic_start
1005 __attribute__((alias ("gomp_loop_doacross_dynamic_start")));
1006extern __typeof(gomp_loop_doacross_guided_start) GOMP_loop_doacross_guided_start
1007 __attribute__((alias ("gomp_loop_doacross_guided_start")));
1008
1009extern __typeof(gomp_loop_static_next) GOMP_loop_static_next
1010 __attribute__((alias ("gomp_loop_static_next")));
1011extern __typeof(gomp_loop_dynamic_next) GOMP_loop_dynamic_next
1012 __attribute__((alias ("gomp_loop_dynamic_next")));
1013extern __typeof(gomp_loop_guided_next) GOMP_loop_guided_next
1014 __attribute__((alias ("gomp_loop_guided_next")));
1015extern __typeof(gomp_loop_dynamic_next) GOMP_loop_nonmonotonic_dynamic_next
1016 __attribute__((alias ("gomp_loop_dynamic_next")));
1017extern __typeof(gomp_loop_guided_next) GOMP_loop_nonmonotonic_guided_next
1018 __attribute__((alias ("gomp_loop_guided_next")));
1019extern __typeof(GOMP_loop_runtime_next) GOMP_loop_nonmonotonic_runtime_next
1020 __attribute__((alias ("GOMP_loop_runtime_next")));
1021extern __typeof(GOMP_loop_runtime_next) GOMP_loop_maybe_nonmonotonic_runtime_next
1022 __attribute__((alias ("GOMP_loop_runtime_next")));
1023
1024extern __typeof(gomp_loop_ordered_static_next) GOMP_loop_ordered_static_next
1025 __attribute__((alias ("gomp_loop_ordered_static_next")));
1026extern __typeof(gomp_loop_ordered_dynamic_next) GOMP_loop_ordered_dynamic_next
1027 __attribute__((alias ("gomp_loop_ordered_dynamic_next")));
1028extern __typeof(gomp_loop_ordered_guided_next) GOMP_loop_ordered_guided_next
1029 __attribute__((alias ("gomp_loop_ordered_guided_next")));
1030#else
1031bool
1032GOMP_loop_static_start (long start, long end, long incr, long chunk_size,
1033 long *istart, long *iend)
1034{
1035 return gomp_loop_static_start (start, end, incr, chunk_size, istart, iend);
1036}
1037
1038bool
1039GOMP_loop_dynamic_start (long start, long end, long incr, long chunk_size,
1040 long *istart, long *iend)
1041{
1042 return gomp_loop_dynamic_start (start, end, incr, chunk_size, istart, iend);
1043}
1044
1045bool
1046GOMP_loop_guided_start (long start, long end, long incr, long chunk_size,
1047 long *istart, long *iend)
1048{
1049 return gomp_loop_guided_start (start, end, incr, chunk_size, istart, iend);
1050}
1051
1052bool
1053GOMP_loop_nonmonotonic_dynamic_start (long start, long end, long incr,
1054 long chunk_size, long *istart,
1055 long *iend)
1056{
1057 return gomp_loop_dynamic_start (start, end, incr, chunk_size, istart, iend);
1058}
1059
1060bool
1061GOMP_loop_nonmonotonic_guided_start (long start, long end, long incr,
1062 long chunk_size, long *istart, long *iend)
1063{
1064 return gomp_loop_guided_start (start, end, incr, chunk_size, istart, iend);
1065}
1066
1067bool
1068GOMP_loop_nonmonotonic_runtime_start (long start, long end, long incr,
1069 long *istart, long *iend)
1070{
1071 return GOMP_loop_runtime_start (start, end, incr, istart, iend);
1072}
1073
1074bool
1075GOMP_loop_maybe_nonmonotonic_runtime_start (long start, long end, long incr,
1076 long *istart, long *iend)
1077{
1078 return GOMP_loop_runtime_start (start, end, incr, istart, iend);
1079}
1080
1081bool
1082GOMP_loop_ordered_static_start (long start, long end, long incr,
1083 long chunk_size, long *istart, long *iend)
1084{
1085 return gomp_loop_ordered_static_start (start, end, incr, chunk_size,
1086 istart, iend);
1087}
1088
1089bool
1090GOMP_loop_ordered_dynamic_start (long start, long end, long incr,
1091 long chunk_size, long *istart, long *iend)
1092{
1093 return gomp_loop_ordered_dynamic_start (start, end, incr, chunk_size,
1094 istart, iend);
1095}
1096
1097bool
1098GOMP_loop_ordered_guided_start (long start, long end, long incr,
1099 long chunk_size, long *istart, long *iend)
1100{
1101 return gomp_loop_ordered_guided_start (start, end, incr, chunk_size,
1102 istart, iend);
1103}
1104
1105bool
1106GOMP_loop_doacross_static_start (unsigned ncounts, long *counts,
1107 long chunk_size, long *istart, long *iend)
1108{
1109 return gomp_loop_doacross_static_start (ncounts, counts, chunk_size,
1110 istart, iend);
1111}
1112
1113bool
1114GOMP_loop_doacross_dynamic_start (unsigned ncounts, long *counts,
1115 long chunk_size, long *istart, long *iend)
1116{
1117 return gomp_loop_doacross_dynamic_start (ncounts, counts, chunk_size,
1118 istart, iend);
1119}
1120
1121bool
1122GOMP_loop_doacross_guided_start (unsigned ncounts, long *counts,
1123 long chunk_size, long *istart, long *iend)
1124{
1125 return gomp_loop_doacross_guided_start (ncounts, counts, chunk_size,
1126 istart, iend);
1127}
1128
1129bool
1130GOMP_loop_static_next (long *istart, long *iend)
1131{
1132 return gomp_loop_static_next (istart, iend);
1133}
1134
1135bool
1136GOMP_loop_dynamic_next (long *istart, long *iend)
1137{
1138 return gomp_loop_dynamic_next (istart, iend);
1139}
1140
1141bool
1142GOMP_loop_guided_next (long *istart, long *iend)
1143{
1144 return gomp_loop_guided_next (istart, iend);
1145}
1146
1147bool
1148GOMP_loop_nonmonotonic_dynamic_next (long *istart, long *iend)
1149{
1150 return gomp_loop_dynamic_next (istart, iend);
1151}
1152
1153bool
1154GOMP_loop_nonmonotonic_guided_next (long *istart, long *iend)
1155{
1156 return gomp_loop_guided_next (istart, iend);
1157}
1158
1159bool
1160GOMP_loop_nonmonotonic_runtime_next (long *istart, long *iend)
1161{
1162 return GOMP_loop_runtime_next (istart, iend);
1163}
1164
1165bool
1166GOMP_loop_maybe_nonmonotonic_runtime_next (long *istart, long *iend)
1167{
1168 return GOMP_loop_runtime_next (istart, iend);
1169}
1170
1171bool
1172GOMP_loop_ordered_static_next (long *istart, long *iend)
1173{
1174 return gomp_loop_ordered_static_next (istart, iend);
1175}
1176
1177bool
1178GOMP_loop_ordered_dynamic_next (long *istart, long *iend)
1179{
1180 return gomp_loop_ordered_dynamic_next (istart, iend);
1181}
1182
1183bool
1184GOMP_loop_ordered_guided_next (long *istart, long *iend)
1185{
1186 return gomp_loop_ordered_guided_next (istart, iend);
1187}
1188#endif