libgomp/loop.c

   1 /* Copyright (C) 2005-2019 Free Software Foundation, Inc.
   2    Contributed by Richard Henderson <rth@redhat.com>.
   3
   4    This file is part of the GNU Offloading and Multi Processing Library
   5    (libgomp).
   6
   7    Libgomp is free software; you can redistribute it and/or modify it
   8    under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3, or (at your option)
  10    any later version.
  11
  12    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
  13    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  14    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  15    more details.
  16
  17    Under Section 7 of GPL version 3, you are granted additional
  18    permissions described in the GCC Runtime Library Exception, version
  19    3.1, as published by the Free Software Foundation.
  20
  21    You should have received a copy of the GNU General Public License and
  22    a copy of the GCC Runtime Library Exception along with this program;
  23    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
  24    <http://www.gnu.org/licenses/>.  */
  25
  26 /* This file handles the LOOP (FOR/DO) construct.  */
  27
  28 #include <limits.h>
  29 #include <stdlib.h>
  30 #include <string.h>
  31 #include "libgomp.h"
  32
  33
  34 ialias (GOMP_loop_runtime_next)
  35 ialias_redirect (GOMP_taskgroup_reduction_register)
  36
  37 /* Initialize the given work share construct from the given arguments.  */
  38
  39 static inline void
  40 gomp_loop_init (struct gomp_work_share *ws, long start, long end, long incr,
  41                 enum gomp_schedule_type sched, long chunk_size)
  42 {
  43   ws->sched = sched;
  44   ws->chunk_size = chunk_size;
  45   /* Canonicalize loops that have zero iterations to ->next == ->end.  */
  46   ws->end = ((incr > 0 && start > end) || (incr < 0 && start < end))
  47             ? start : end;
  48   ws->incr = incr;
  49   ws->next = start;
  50   if (sched == GFS_DYNAMIC)
  51     {
  52       ws->chunk_size *= incr;
  53
  54 #ifdef HAVE_SYNC_BUILTINS
  55       {
  56         /* For dynamic scheduling prepare things to make each iteration
  57            faster.  */
  58         struct gomp_thread *thr = gomp_thread ();
  59         struct gomp_team *team = thr->ts.team;
  60         long nthreads = team ? team->nthreads : 1;
  61
  62         if (__builtin_expect (incr > 0, 1))
  63           {
  64             /* Cheap overflow protection.  */
  65             if (__builtin_expect ((nthreads | ws->chunk_size)
  66                                   >= 1UL << (sizeof (long)
  67                                              * __CHAR_BIT__ / 2 - 1), 0))
  68               ws->mode = 0;
  69             else
  70               ws->mode = ws->end < (LONG_MAX
  71                                     - (nthreads + 1) * ws->chunk_size);
  72           }
  73         /* Cheap overflow protection.  */
  74         else if (__builtin_expect ((nthreads | -ws->chunk_size)
  75                                    >= 1UL << (sizeof (long)
  76                                               * __CHAR_BIT__ / 2 - 1), 0))
  77           ws->mode = 0;
  78         else
  79           ws->mode = ws->end > (nthreads + 1) * -ws->chunk_size - LONG_MAX;
  80       }
  81 #endif
  82     }
  83 }
  84
  85 /* The *_start routines are called when first encountering a loop construct
  86    that is not bound directly to a parallel construct.  The first thread
  87    that arrives will create the work-share construct; subsequent threads
  88    will see the construct exists and allocate work from it.
  89
  90    START, END, INCR are the bounds of the loop; due to the restrictions of
  91    OpenMP, these values must be the same in every thread.  This is not
  92    verified (nor is it entirely verifiable, since START is not necessarily
  93    retained intact in the work-share data structure).  CHUNK_SIZE is the
  94    scheduling parameter; again this must be identical in all threads.
  95
  96    Returns true if there's any work for this thread to perform.  If so,
  97    *ISTART and *IEND are filled with the bounds of the iteration block
  98    allocated to this thread.  Returns false if all work was assigned to
  99    other threads prior to this thread's arrival.  */
 100
 101 static bool
 102 gomp_loop_static_start (long start, long end, long incr, long chunk_size,
 103                         long *istart, long *iend)
 104 {
 105   struct gomp_thread *thr = gomp_thread ();
 106
 107   thr->ts.static_trip = 0;
 108   if (gomp_work_share_start (0))
 109     {
 110       gomp_loop_init (thr->ts.work_share, start, end, incr,
 111                       GFS_STATIC, chunk_size);
 112       gomp_work_share_init_done ();
 113     }
 114
 115   return !gomp_iter_static_next (istart, iend);
 116 }
 117
 118 /* The current dynamic implementation is always monotonic.  The
 119    entrypoints without nonmonotonic in them have to be always monotonic,
 120    but the nonmonotonic ones could be changed to use work-stealing for
 121    improved scalability.  */
 122
 123 static bool
 124 gomp_loop_dynamic_start (long start, long end, long incr, long chunk_size,
 125                          long *istart, long *iend)
 126 {
 127   struct gomp_thread *thr = gomp_thread ();
 128   bool ret;
 129
 130   if (gomp_work_share_start (0))
 131     {
 132       gomp_loop_init (thr->ts.work_share, start, end, incr,
 133                       GFS_DYNAMIC, chunk_size);
 134       gomp_work_share_init_done ();
 135     }
 136
 137 #ifdef HAVE_SYNC_BUILTINS
 138   ret = gomp_iter_dynamic_next (istart, iend);
 139 #else
 140   gomp_mutex_lock (&thr->ts.work_share->lock);
 141   ret = gomp_iter_dynamic_next_locked (istart, iend);
 142   gomp_mutex_unlock (&thr->ts.work_share->lock);
 143 #endif
 144
 145   return ret;
 146 }
 147
 148 /* Similarly as for dynamic, though the question is how can the chunk sizes
 149    be decreased without a central locking or atomics.  */
 150
 151 static bool
 152 gomp_loop_guided_start (long start, long end, long incr, long chunk_size,
 153                         long *istart, long *iend)
 154 {
 155   struct gomp_thread *thr = gomp_thread ();
 156   bool ret;
 157
 158   if (gomp_work_share_start (0))
 159     {
 160       gomp_loop_init (thr->ts.work_share, start, end, incr,
 161                       GFS_GUIDED, chunk_size);
 162       gomp_work_share_init_done ();
 163     }
 164
 165 #ifdef HAVE_SYNC_BUILTINS
 166   ret = gomp_iter_guided_next (istart, iend);
 167 #else
 168   gomp_mutex_lock (&thr->ts.work_share->lock);
 169   ret = gomp_iter_guided_next_locked (istart, iend);
 170   gomp_mutex_unlock (&thr->ts.work_share->lock);
 171 #endif
 172
 173   return ret;
 174 }
 175
 176 bool
 177 GOMP_loop_runtime_start (long start, long end, long incr,
 178                          long *istart, long *iend)
 179 {
 180   struct gomp_task_icv *icv = gomp_icv (false);
 181   switch (icv->run_sched_var & ~GFS_MONOTONIC)
 182     {
 183     case GFS_STATIC:
 184       return gomp_loop_static_start (start, end, incr,
 185                                      icv->run_sched_chunk_size,
 186                                      istart, iend);
 187     case GFS_DYNAMIC:
 188       return gomp_loop_dynamic_start (start, end, incr,
 189                                       icv->run_sched_chunk_size,
 190                                       istart, iend);
 191     case GFS_GUIDED:
 192       return gomp_loop_guided_start (start, end, incr,
 193                                      icv->run_sched_chunk_size,
 194                                      istart, iend);
 195     case GFS_AUTO:
 196       /* For now map to schedule(static), later on we could play with feedback
 197          driven choice.  */
 198       return gomp_loop_static_start (start, end, incr, 0, istart, iend);
 199     default:
 200       abort ();
 201     }
 202 }
 203
 204 static long
 205 gomp_adjust_sched (long sched, long *chunk_size)
 206 {
 207   sched &= ~GFS_MONOTONIC;
 208   switch (sched)
 209     {
 210     case GFS_STATIC:
 211     case GFS_DYNAMIC:
 212     case GFS_GUIDED:
 213       return sched;
 214     /* GFS_RUNTIME is used for runtime schedule without monotonic
 215        or nonmonotonic modifiers on the clause.
 216        GFS_RUNTIME|GFS_MONOTONIC for runtime schedule with monotonic
 217        modifier.  */
 218     case GFS_RUNTIME:
 219     /* GFS_AUTO is used for runtime schedule with nonmonotonic
 220        modifier.  */
 221     case GFS_AUTO:
 222       {
 223         struct gomp_task_icv *icv = gomp_icv (false);
 224         sched = icv->run_sched_var & ~GFS_MONOTONIC;
 225         switch (sched)
 226           {
 227           case GFS_STATIC:
 228           case GFS_DYNAMIC:
 229           case GFS_GUIDED:
 230             *chunk_size = icv->run_sched_chunk_size;
 231             break;
 232           case GFS_AUTO:
 233             sched = GFS_STATIC;
 234             *chunk_size = 0;
 235             break;
 236           default:
 237             abort ();
 238           }
 239         return sched;
 240       }
 241     default:
 242       abort ();
 243     }
 244 }
 245
 246 bool
 247 GOMP_loop_start (long start, long end, long incr, long sched,
 248                  long chunk_size, long *istart, long *iend,
 249                  uintptr_t *reductions, void **mem)
 250 {
 251   struct gomp_thread *thr = gomp_thread ();
 252
 253   thr->ts.static_trip = 0;
 254   if (reductions)
 255     gomp_workshare_taskgroup_start ();
 256   if (gomp_work_share_start (0))
 257     {
 258       sched = gomp_adjust_sched (sched, &chunk_size);
 259       gomp_loop_init (thr->ts.work_share, start, end, incr,
 260                       sched, chunk_size);
 261       if (reductions)
 262         {
 263           GOMP_taskgroup_reduction_register (reductions);
 264           thr->task->taskgroup->workshare = true;
 265           thr->ts.work_share->task_reductions = reductions;
 266         }
 267       if (mem)
 268         {
 269           uintptr_t size = (uintptr_t) *mem;
 270 #define INLINE_ORDERED_TEAM_IDS_OFF \
 271   ((offsetof (struct gomp_work_share, inline_ordered_team_ids)          \
 272     + __alignof__ (long long) - 1) & ~(__alignof__ (long long) - 1))
 273           if (size > (sizeof (struct gomp_work_share)
 274                       - INLINE_ORDERED_TEAM_IDS_OFF))
 275             *mem
 276               = (void *) (thr->ts.work_share->ordered_team_ids
 277                           = gomp_malloc_cleared (size));
 278           else
 279             *mem = memset (((char *) thr->ts.work_share)
 280                            + INLINE_ORDERED_TEAM_IDS_OFF, '\0', size);
 281         }
 282       gomp_work_share_init_done ();
 283     }
 284   else
 285     {
 286       if (reductions)
 287         {
 288           uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
 289           gomp_workshare_task_reduction_register (reductions,
 290                                                   first_reductions);
 291         }
 292       if (mem)
 293         {
 294           if ((offsetof (struct gomp_work_share, inline_ordered_team_ids)
 295                & (__alignof__ (long long) - 1)) == 0)
 296             *mem = (void *) thr->ts.work_share->ordered_team_ids;
 297           else
 298             {
 299               uintptr_t p = (uintptr_t) thr->ts.work_share->ordered_team_ids;
 300               p += __alignof__ (long long) - 1;
 301               p &= ~(__alignof__ (long long) - 1);
 302               *mem = (void *) p;
 303             }
 304         }
 305     }
 306
 307   if (!istart)
 308     return true;
 309   return ialias_call (GOMP_loop_runtime_next) (istart, iend);
 310 }
 311
 312 /* The *_ordered_*_start routines are similar.  The only difference is that
 313    this work-share construct is initialized to expect an ORDERED section.  */
 314
 315 static bool
 316 gomp_loop_ordered_static_start (long start, long end, long incr,
 317                                 long chunk_size, long *istart, long *iend)
 318 {
 319   struct gomp_thread *thr = gomp_thread ();
 320
 321   thr->ts.static_trip = 0;
 322   if (gomp_work_share_start (1))
 323     {
 324       gomp_loop_init (thr->ts.work_share, start, end, incr,
 325                       GFS_STATIC, chunk_size);
 326       gomp_ordered_static_init ();
 327       gomp_work_share_init_done ();
 328     }
 329
 330   return !gomp_iter_static_next (istart, iend);
 331 }
 332
 333 static bool
 334 gomp_loop_ordered_dynamic_start (long start, long end, long incr,
 335                                  long chunk_size, long *istart, long *iend)
 336 {
 337   struct gomp_thread *thr = gomp_thread ();
 338   bool ret;
 339
 340   if (gomp_work_share_start (1))
 341     {
 342       gomp_loop_init (thr->ts.work_share, start, end, incr,
 343                       GFS_DYNAMIC, chunk_size);
 344       gomp_mutex_lock (&thr->ts.work_share->lock);
 345       gomp_work_share_init_done ();
 346     }
 347   else
 348     gomp_mutex_lock (&thr->ts.work_share->lock);
 349
 350   ret = gomp_iter_dynamic_next_locked (istart, iend);
 351   if (ret)
 352     gomp_ordered_first ();
 353   gomp_mutex_unlock (&thr->ts.work_share->lock);
 354
 355   return ret;
 356 }
 357
 358 static bool
 359 gomp_loop_ordered_guided_start (long start, long end, long incr,
 360                                 long chunk_size, long *istart, long *iend)
 361 {
 362   struct gomp_thread *thr = gomp_thread ();
 363   bool ret;
 364
 365   if (gomp_work_share_start (1))
 366     {
 367       gomp_loop_init (thr->ts.work_share, start, end, incr,
 368                       GFS_GUIDED, chunk_size);
 369       gomp_mutex_lock (&thr->ts.work_share->lock);
 370       gomp_work_share_init_done ();
 371     }
 372   else
 373     gomp_mutex_lock (&thr->ts.work_share->lock);
 374
 375   ret = gomp_iter_guided_next_locked (istart, iend);
 376   if (ret)
 377     gomp_ordered_first ();
 378   gomp_mutex_unlock (&thr->ts.work_share->lock);
 379
 380   return ret;
 381 }
 382
 383 bool
 384 GOMP_loop_ordered_runtime_start (long start, long end, long incr,
 385                                  long *istart, long *iend)
 386 {
 387   struct gomp_task_icv *icv = gomp_icv (false);
 388   switch (icv->run_sched_var & ~GFS_MONOTONIC)
 389     {
 390     case GFS_STATIC:
 391       return gomp_loop_ordered_static_start (start, end, incr,
 392                                              icv->run_sched_chunk_size,
 393                                              istart, iend);
 394     case GFS_DYNAMIC:
 395       return gomp_loop_ordered_dynamic_start (start, end, incr,
 396                                               icv->run_sched_chunk_size,
 397                                               istart, iend);
 398     case GFS_GUIDED:
 399       return gomp_loop_ordered_guided_start (start, end, incr,
 400                                              icv->run_sched_chunk_size,
 401                                              istart, iend);
 402     case GFS_AUTO:
 403       /* For now map to schedule(static), later on we could play with feedback
 404          driven choice.  */
 405       return gomp_loop_ordered_static_start (start, end, incr,
 406                                              0, istart, iend);
 407     default:
 408       abort ();
 409     }
 410 }
 411
 412 bool
 413 GOMP_loop_ordered_start (long start, long end, long incr, long sched,
 414                          long chunk_size, long *istart, long *iend,
 415                          uintptr_t *reductions, void **mem)
 416 {
 417   struct gomp_thread *thr = gomp_thread ();
 418   size_t ordered = 1;
 419   bool ret;
 420
 421   thr->ts.static_trip = 0;
 422   if (reductions)
 423     gomp_workshare_taskgroup_start ();
 424   if (mem)
 425     ordered += (uintptr_t) *mem;
 426   if (gomp_work_share_start (ordered))
 427     {
 428       sched = gomp_adjust_sched (sched, &chunk_size);
 429       gomp_loop_init (thr->ts.work_share, start, end, incr,
 430                       sched, chunk_size);
 431       if (reductions)
 432         {
 433           GOMP_taskgroup_reduction_register (reductions);
 434           thr->task->taskgroup->workshare = true;
 435           thr->ts.work_share->task_reductions = reductions;
 436         }
 437       if (sched == GFS_STATIC)
 438         gomp_ordered_static_init ();
 439       else
 440         gomp_mutex_lock (&thr->ts.work_share->lock);
 441       gomp_work_share_init_done ();
 442     }
 443   else
 444     {
 445       if (reductions)
 446         {
 447           uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
 448           gomp_workshare_task_reduction_register (reductions,
 449                                                   first_reductions);
 450         }
 451       sched = thr->ts.work_share->sched;
 452       if (sched != GFS_STATIC)
 453         gomp_mutex_lock (&thr->ts.work_share->lock);
 454     }
 455
 456   if (mem)
 457     {
 458       uintptr_t p
 459         = (uintptr_t) (thr->ts.work_share->ordered_team_ids
 460                        + (thr->ts.team ? thr->ts.team->nthreads : 1));
 461       p += __alignof__ (long long) - 1;
 462       p &= ~(__alignof__ (long long) - 1);
 463       *mem = (void *) p;
 464     }
 465
 466   switch (sched)
 467     {
 468     case GFS_STATIC:
 469     case GFS_AUTO:
 470       return !gomp_iter_static_next (istart, iend);
 471     case GFS_DYNAMIC:
 472       ret = gomp_iter_dynamic_next_locked (istart, iend);
 473       break;
 474     case GFS_GUIDED:
 475       ret = gomp_iter_guided_next_locked (istart, iend);
 476       break;
 477     default:
 478       abort ();
 479     }
 480
 481   if (ret)
 482     gomp_ordered_first ();
 483   gomp_mutex_unlock (&thr->ts.work_share->lock);
 484   return ret;
 485 }
 486
 487 /* The *_doacross_*_start routines are similar.  The only difference is that
 488    this work-share construct is initialized to expect an ORDERED(N) - DOACROSS
 489    section, and the worksharing loop iterates always from 0 to COUNTS[0] - 1
 490    and other COUNTS array elements tell the library number of iterations
 491    in the ordered inner loops.  */
 492
 493 static bool
 494 gomp_loop_doacross_static_start (unsigned ncounts, long *counts,
 495                                  long chunk_size, long *istart, long *iend)
 496 {
 497   struct gomp_thread *thr = gomp_thread ();
 498
 499   thr->ts.static_trip = 0;
 500   if (gomp_work_share_start (0))
 501     {
 502       gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
 503                       GFS_STATIC, chunk_size);
 504       gomp_doacross_init (ncounts, counts, chunk_size, 0);
 505       gomp_work_share_init_done ();
 506     }
 507
 508   return !gomp_iter_static_next (istart, iend);
 509 }
 510
 511 static bool
 512 gomp_loop_doacross_dynamic_start (unsigned ncounts, long *counts,
 513                                   long chunk_size, long *istart, long *iend)
 514 {
 515   struct gomp_thread *thr = gomp_thread ();
 516   bool ret;
 517
 518   if (gomp_work_share_start (0))
 519     {
 520       gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
 521                       GFS_DYNAMIC, chunk_size);
 522       gomp_doacross_init (ncounts, counts, chunk_size, 0);
 523       gomp_work_share_init_done ();
 524     }
 525
 526 #ifdef HAVE_SYNC_BUILTINS
 527   ret = gomp_iter_dynamic_next (istart, iend);
 528 #else
 529   gomp_mutex_lock (&thr->ts.work_share->lock);
 530   ret = gomp_iter_dynamic_next_locked (istart, iend);
 531   gomp_mutex_unlock (&thr->ts.work_share->lock);
 532 #endif
 533
 534   return ret;
 535 }
 536
 537 static bool
 538 gomp_loop_doacross_guided_start (unsigned ncounts, long *counts,
 539                                  long chunk_size, long *istart, long *iend)
 540 {
 541   struct gomp_thread *thr = gomp_thread ();
 542   bool ret;
 543
 544   if (gomp_work_share_start (0))
 545     {
 546       gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
 547                       GFS_GUIDED, chunk_size);
 548       gomp_doacross_init (ncounts, counts, chunk_size, 0);
 549       gomp_work_share_init_done ();
 550     }
 551
 552 #ifdef HAVE_SYNC_BUILTINS
 553   ret = gomp_iter_guided_next (istart, iend);
 554 #else
 555   gomp_mutex_lock (&thr->ts.work_share->lock);
 556   ret = gomp_iter_guided_next_locked (istart, iend);
 557   gomp_mutex_unlock (&thr->ts.work_share->lock);
 558 #endif
 559
 560   return ret;
 561 }
 562
 563 bool
 564 GOMP_loop_doacross_runtime_start (unsigned ncounts, long *counts,
 565                                   long *istart, long *iend)
 566 {
 567   struct gomp_task_icv *icv = gomp_icv (false);
 568   switch (icv->run_sched_var & ~GFS_MONOTONIC)
 569     {
 570     case GFS_STATIC:
 571       return gomp_loop_doacross_static_start (ncounts, counts,
 572                                               icv->run_sched_chunk_size,
 573                                               istart, iend);
 574     case GFS_DYNAMIC:
 575       return gomp_loop_doacross_dynamic_start (ncounts, counts,
 576                                                icv->run_sched_chunk_size,
 577                                                istart, iend);
 578     case GFS_GUIDED:
 579       return gomp_loop_doacross_guided_start (ncounts, counts,
 580                                               icv->run_sched_chunk_size,
 581                                               istart, iend);
 582     case GFS_AUTO:
 583       /* For now map to schedule(static), later on we could play with feedback
 584          driven choice.  */
 585       return gomp_loop_doacross_static_start (ncounts, counts,
 586                                               0, istart, iend);
 587     default:
 588       abort ();
 589     }
 590 }
 591
 592 bool
 593 GOMP_loop_doacross_start (unsigned ncounts, long *counts, long sched,
 594                           long chunk_size, long *istart, long *iend,
 595                           uintptr_t *reductions, void **mem)
 596 {
 597   struct gomp_thread *thr = gomp_thread ();
 598
 599   thr->ts.static_trip = 0;
 600   if (reductions)
 601     gomp_workshare_taskgroup_start ();
 602   if (gomp_work_share_start (0))
 603     {
 604       size_t extra = 0;
 605       if (mem)
 606         extra = (uintptr_t) *mem;
 607       sched = gomp_adjust_sched (sched, &chunk_size);
 608       gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
 609                       sched, chunk_size);
 610       gomp_doacross_init (ncounts, counts, chunk_size, extra);
 611       if (reductions)
 612         {
 613           GOMP_taskgroup_reduction_register (reductions);
 614           thr->task->taskgroup->workshare = true;
 615           thr->ts.work_share->task_reductions = reductions;
 616         }
 617       gomp_work_share_init_done ();
 618     }
 619   else
 620     {
 621       if (reductions)
 622         {
 623           uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
 624           gomp_workshare_task_reduction_register (reductions,
 625                                                   first_reductions);
 626         }
 627       sched = thr->ts.work_share->sched;
 628     }
 629
 630   if (mem)
 631     *mem = thr->ts.work_share->doacross->extra;
 632
 633   return ialias_call (GOMP_loop_runtime_next) (istart, iend);
 634 }
 635
 636 /* The *_next routines are called when the thread completes processing of
 637    the iteration block currently assigned to it.  If the work-share
 638    construct is bound directly to a parallel construct, then the iteration
 639    bounds may have been set up before the parallel.  In which case, this
 640    may be the first iteration for the thread.
 641
 642    Returns true if there is work remaining to be performed; *ISTART and
 643    *IEND are filled with a new iteration block.  Returns false if all work
 644    has been assigned.  */
 645
 646 static bool
 647 gomp_loop_static_next (long *istart, long *iend)
 648 {
 649   return !gomp_iter_static_next (istart, iend);
 650 }
 651
 652 static bool
 653 gomp_loop_dynamic_next (long *istart, long *iend)
 654 {
 655   bool ret;
 656
 657 #ifdef HAVE_SYNC_BUILTINS
 658   ret = gomp_iter_dynamic_next (istart, iend);
 659 #else
 660   struct gomp_thread *thr = gomp_thread ();
 661   gomp_mutex_lock (&thr->ts.work_share->lock);
 662   ret = gomp_iter_dynamic_next_locked (istart, iend);
 663   gomp_mutex_unlock (&thr->ts.work_share->lock);
 664 #endif
 665
 666   return ret;
 667 }
 668
 669 static bool
 670 gomp_loop_guided_next (long *istart, long *iend)
 671 {
 672   bool ret;
 673
 674 #ifdef HAVE_SYNC_BUILTINS
 675   ret = gomp_iter_guided_next (istart, iend);
 676 #else
 677   struct gomp_thread *thr = gomp_thread ();
 678   gomp_mutex_lock (&thr->ts.work_share->lock);
 679   ret = gomp_iter_guided_next_locked (istart, iend);
 680   gomp_mutex_unlock (&thr->ts.work_share->lock);
 681 #endif
 682
 683   return ret;
 684 }
 685
 686 bool
 687 GOMP_loop_runtime_next (long *istart, long *iend)
 688 {
 689   struct gomp_thread *thr = gomp_thread ();
 690
 691   switch (thr->ts.work_share->sched)
 692     {
 693     case GFS_STATIC:
 694     case GFS_AUTO:
 695       return gomp_loop_static_next (istart, iend);
 696     case GFS_DYNAMIC:
 697       return gomp_loop_dynamic_next (istart, iend);
 698     case GFS_GUIDED:
 699       return gomp_loop_guided_next (istart, iend);
 700     default:
 701       abort ();
 702     }
 703 }
 704
 705 /* The *_ordered_*_next routines are called when the thread completes
 706    processing of the iteration block currently assigned to it.
 707
 708    Returns true if there is work remaining to be performed; *ISTART and
 709    *IEND are filled with a new iteration block.  Returns false if all work
 710    has been assigned.  */
 711
 712 static bool
 713 gomp_loop_ordered_static_next (long *istart, long *iend)
 714 {
 715   struct gomp_thread *thr = gomp_thread ();
 716   int test;
 717
 718   gomp_ordered_sync ();
 719   gomp_mutex_lock (&thr->ts.work_share->lock);
 720   test = gomp_iter_static_next (istart, iend);
 721   if (test >= 0)
 722     gomp_ordered_static_next ();
 723   gomp_mutex_unlock (&thr->ts.work_share->lock);
 724
 725   return test == 0;
 726 }
 727
 728 static bool
 729 gomp_loop_ordered_dynamic_next (long *istart, long *iend)
 730 {
 731   struct gomp_thread *thr = gomp_thread ();
 732   bool ret;
 733
 734   gomp_ordered_sync ();
 735   gomp_mutex_lock (&thr->ts.work_share->lock);
 736   ret = gomp_iter_dynamic_next_locked (istart, iend);
 737   if (ret)
 738     gomp_ordered_next ();
 739   else
 740     gomp_ordered_last ();
 741   gomp_mutex_unlock (&thr->ts.work_share->lock);
 742
 743   return ret;
 744 }
 745
 746 static bool
 747 gomp_loop_ordered_guided_next (long *istart, long *iend)
 748 {
 749   struct gomp_thread *thr = gomp_thread ();
 750   bool ret;
 751
 752   gomp_ordered_sync ();
 753   gomp_mutex_lock (&thr->ts.work_share->lock);
 754   ret = gomp_iter_guided_next_locked (istart, iend);
 755   if (ret)
 756     gomp_ordered_next ();
 757   else
 758     gomp_ordered_last ();
 759   gomp_mutex_unlock (&thr->ts.work_share->lock);
 760
 761   return ret;
 762 }
 763
 764 bool
 765 GOMP_loop_ordered_runtime_next (long *istart, long *iend)
 766 {
 767   struct gomp_thread *thr = gomp_thread ();
 768
 769   switch (thr->ts.work_share->sched)
 770     {
 771     case GFS_STATIC:
 772     case GFS_AUTO:
 773       return gomp_loop_ordered_static_next (istart, iend);
 774     case GFS_DYNAMIC:
 775       return gomp_loop_ordered_dynamic_next (istart, iend);
 776     case GFS_GUIDED:
 777       return gomp_loop_ordered_guided_next (istart, iend);
 778     default:
 779       abort ();
 780     }
 781 }
 782
 783 /* The GOMP_parallel_loop_* routines pre-initialize a work-share construct
 784    to avoid one synchronization once we get into the loop.  */
 785
 786 static void
 787 gomp_parallel_loop_start (void (*fn) (void *), void *data,
 788                           unsigned num_threads, long start, long end,
 789                           long incr, enum gomp_schedule_type sched,
 790                           long chunk_size, unsigned int flags)
 791 {
 792   struct gomp_team *team;
 793
 794   num_threads = gomp_resolve_num_threads (num_threads, 0);
 795   team = gomp_new_team (num_threads);
 796   gomp_loop_init (&team->work_shares[0], start, end, incr, sched, chunk_size);
 797   gomp_team_start (fn, data, num_threads, flags, team, NULL);
 798 }
 799
 800 void
 801 GOMP_parallel_loop_static_start (void (*fn) (void *), void *data,
 802                                  unsigned num_threads, long start, long end,
 803                                  long incr, long chunk_size)
 804 {
 805   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
 806                             GFS_STATIC, chunk_size, 0);
 807 }
 808
 809 void
 810 GOMP_parallel_loop_dynamic_start (void (*fn) (void *), void *data,
 811                                   unsigned num_threads, long start, long end,
 812                                   long incr, long chunk_size)
 813 {
 814   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
 815                             GFS_DYNAMIC, chunk_size, 0);
 816 }
 817
 818 void
 819 GOMP_parallel_loop_guided_start (void (*fn) (void *), void *data,
 820                                  unsigned num_threads, long start, long end,
 821                                  long incr, long chunk_size)
 822 {
 823   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
 824                             GFS_GUIDED, chunk_size, 0);
 825 }
 826
 827 void
 828 GOMP_parallel_loop_runtime_start (void (*fn) (void *), void *data,
 829                                   unsigned num_threads, long start, long end,
 830                                   long incr)
 831 {
 832   struct gomp_task_icv *icv = gomp_icv (false);
 833   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
 834                             icv->run_sched_var & ~GFS_MONOTONIC,
 835                             icv->run_sched_chunk_size, 0);
 836 }
 837
 838 ialias_redirect (GOMP_parallel_end)
 839
 840 void
 841 GOMP_parallel_loop_static (void (*fn) (void *), void *data,
 842                            unsigned num_threads, long start, long end,
 843                            long incr, long chunk_size, unsigned flags)
 844 {
 845   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
 846                             GFS_STATIC, chunk_size, flags);
 847   fn (data);
 848   GOMP_parallel_end ();
 849 }
 850
 851 void
 852 GOMP_parallel_loop_dynamic (void (*fn) (void *), void *data,
 853                             unsigned num_threads, long start, long end,
 854                             long incr, long chunk_size, unsigned flags)
 855 {
 856   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
 857                             GFS_DYNAMIC, chunk_size, flags);
 858   fn (data);
 859   GOMP_parallel_end ();
 860 }
 861
 862 void
 863 GOMP_parallel_loop_guided (void (*fn) (void *), void *data,
 864                           unsigned num_threads, long start, long end,
 865                           long incr, long chunk_size, unsigned flags)
 866 {
 867   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
 868                             GFS_GUIDED, chunk_size, flags);
 869   fn (data);
 870   GOMP_parallel_end ();
 871 }
 872
 873 void
 874 GOMP_parallel_loop_runtime (void (*fn) (void *), void *data,
 875                             unsigned num_threads, long start, long end,
 876                             long incr, unsigned flags)
 877 {
 878   struct gomp_task_icv *icv = gomp_icv (false);
 879   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
 880                             icv->run_sched_var & ~GFS_MONOTONIC,
 881                             icv->run_sched_chunk_size, flags);
 882   fn (data);
 883   GOMP_parallel_end ();
 884 }
 885
 886 #ifdef HAVE_ATTRIBUTE_ALIAS
 887 extern __typeof(GOMP_parallel_loop_dynamic) GOMP_parallel_loop_nonmonotonic_dynamic
 888         __attribute__((alias ("GOMP_parallel_loop_dynamic")));
 889 extern __typeof(GOMP_parallel_loop_guided) GOMP_parallel_loop_nonmonotonic_guided
 890         __attribute__((alias ("GOMP_parallel_loop_guided")));
 891 extern __typeof(GOMP_parallel_loop_runtime) GOMP_parallel_loop_nonmonotonic_runtime
 892         __attribute__((alias ("GOMP_parallel_loop_runtime")));
 893 extern __typeof(GOMP_parallel_loop_runtime) GOMP_parallel_loop_maybe_nonmonotonic_runtime
 894         __attribute__((alias ("GOMP_parallel_loop_runtime")));
 895 #else
 896 void
 897 GOMP_parallel_loop_nonmonotonic_dynamic (void (*fn) (void *), void *data,
 898                                          unsigned num_threads, long start,
 899                                          long end, long incr, long chunk_size,
 900                                          unsigned flags)
 901 {
 902   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
 903                             GFS_DYNAMIC, chunk_size, flags);
 904   fn (data);
 905   GOMP_parallel_end ();
 906 }
 907
 908 void
 909 GOMP_parallel_loop_nonmonotonic_guided (void (*fn) (void *), void *data,
 910                                         unsigned num_threads, long start,
 911                                         long end, long incr, long chunk_size,
 912                                         unsigned flags)
 913 {
 914   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
 915                             GFS_GUIDED, chunk_size, flags);
 916   fn (data);
 917   GOMP_parallel_end ();
 918 }
 919
 920 void
 921 GOMP_parallel_loop_nonmonotonic_runtime (void (*fn) (void *), void *data,
 922                                          unsigned num_threads, long start,
 923                                          long end, long incr, unsigned flags)
 924 {
 925   struct gomp_task_icv *icv = gomp_icv (false);
 926   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
 927                             icv->run_sched_var & ~GFS_MONOTONIC,
 928                             icv->run_sched_chunk_size, flags);
 929   fn (data);
 930   GOMP_parallel_end ();
 931 }
 932
 933 void
 934 GOMP_parallel_loop_maybe_nonmonotonic_runtime (void (*fn) (void *), void *data,
 935                                                unsigned num_threads, long start,
 936                                                long end, long incr,
 937                                                unsigned flags)
 938 {
 939   struct gomp_task_icv *icv = gomp_icv (false);
 940   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
 941                             icv->run_sched_var & ~GFS_MONOTONIC,
 942                             icv->run_sched_chunk_size, flags);
 943   fn (data);
 944   GOMP_parallel_end ();
 945 }
 946 #endif
 947
 948 /* The GOMP_loop_end* routines are called after the thread is told that
 949    all loop iterations are complete.  The first two versions synchronize
 950    all threads; the nowait version does not.  */
 951
 952 void
 953 GOMP_loop_end (void)
 954 {
 955   gomp_work_share_end ();
 956 }
 957
 958 bool
 959 GOMP_loop_end_cancel (void)
 960 {
 961   return gomp_work_share_end_cancel ();
 962 }
 963
 964 void
 965 GOMP_loop_end_nowait (void)
 966 {
 967   gomp_work_share_end_nowait ();
 968 }
 969
 970
 971 /* We use static functions above so that we're sure that the "runtime"
 972    function can defer to the proper routine without interposition.  We
 973    export the static function with a strong alias when possible, or with
 974    a wrapper function otherwise.  */
 975
 976 #ifdef HAVE_ATTRIBUTE_ALIAS
 977 extern __typeof(gomp_loop_static_start) GOMP_loop_static_start
 978         __attribute__((alias ("gomp_loop_static_start")));
 979 extern __typeof(gomp_loop_dynamic_start) GOMP_loop_dynamic_start
 980         __attribute__((alias ("gomp_loop_dynamic_start")));
 981 extern __typeof(gomp_loop_guided_start) GOMP_loop_guided_start
 982         __attribute__((alias ("gomp_loop_guided_start")));
 983 extern __typeof(gomp_loop_dynamic_start) GOMP_loop_nonmonotonic_dynamic_start
 984         __attribute__((alias ("gomp_loop_dynamic_start")));
 985 extern __typeof(gomp_loop_guided_start) GOMP_loop_nonmonotonic_guided_start
 986         __attribute__((alias ("gomp_loop_guided_start")));
 987 extern __typeof(GOMP_loop_runtime_start) GOMP_loop_nonmonotonic_runtime_start
 988         __attribute__((alias ("GOMP_loop_runtime_start")));
 989 extern __typeof(GOMP_loop_runtime_start) GOMP_loop_maybe_nonmonotonic_runtime_start
 990         __attribute__((alias ("GOMP_loop_runtime_start")));
 991
 992 extern __typeof(gomp_loop_ordered_static_start) GOMP_loop_ordered_static_start
 993         __attribute__((alias ("gomp_loop_ordered_static_start")));
 994 extern __typeof(gomp_loop_ordered_dynamic_start) GOMP_loop_ordered_dynamic_start
 995         __attribute__((alias ("gomp_loop_ordered_dynamic_start")));
 996 extern __typeof(gomp_loop_ordered_guided_start) GOMP_loop_ordered_guided_start
 997         __attribute__((alias ("gomp_loop_ordered_guided_start")));
 998
 999 extern __typeof(gomp_loop_doacross_static_start) GOMP_loop_doacross_static_start
1000         __attribute__((alias ("gomp_loop_doacross_static_start")));
1001 extern __typeof(gomp_loop_doacross_dynamic_start) GOMP_loop_doacross_dynamic_start
1002         __attribute__((alias ("gomp_loop_doacross_dynamic_start")));
1003 extern __typeof(gomp_loop_doacross_guided_start) GOMP_loop_doacross_guided_start
1004         __attribute__((alias ("gomp_loop_doacross_guided_start")));
1005
1006 extern __typeof(gomp_loop_static_next) GOMP_loop_static_next
1007         __attribute__((alias ("gomp_loop_static_next")));
1008 extern __typeof(gomp_loop_dynamic_next) GOMP_loop_dynamic_next
1009         __attribute__((alias ("gomp_loop_dynamic_next")));
1010 extern __typeof(gomp_loop_guided_next) GOMP_loop_guided_next
1011         __attribute__((alias ("gomp_loop_guided_next")));
1012 extern __typeof(gomp_loop_dynamic_next) GOMP_loop_nonmonotonic_dynamic_next
1013         __attribute__((alias ("gomp_loop_dynamic_next")));
1014 extern __typeof(gomp_loop_guided_next) GOMP_loop_nonmonotonic_guided_next
1015         __attribute__((alias ("gomp_loop_guided_next")));
1016 extern __typeof(GOMP_loop_runtime_next) GOMP_loop_nonmonotonic_runtime_next
1017         __attribute__((alias ("GOMP_loop_runtime_next")));
1018 extern __typeof(GOMP_loop_runtime_next) GOMP_loop_maybe_nonmonotonic_runtime_next
1019         __attribute__((alias ("GOMP_loop_runtime_next")));
1020
1021 extern __typeof(gomp_loop_ordered_static_next) GOMP_loop_ordered_static_next
1022         __attribute__((alias ("gomp_loop_ordered_static_next")));
1023 extern __typeof(gomp_loop_ordered_dynamic_next) GOMP_loop_ordered_dynamic_next
1024         __attribute__((alias ("gomp_loop_ordered_dynamic_next")));
1025 extern __typeof(gomp_loop_ordered_guided_next) GOMP_loop_ordered_guided_next
1026         __attribute__((alias ("gomp_loop_ordered_guided_next")));
1027 #else
1028 bool
1029 GOMP_loop_static_start (long start, long end, long incr, long chunk_size,
1030                         long *istart, long *iend)
1031 {
1032   return gomp_loop_static_start (start, end, incr, chunk_size, istart, iend);
1033 }
1034
1035 bool
1036 GOMP_loop_dynamic_start (long start, long end, long incr, long chunk_size,
1037                          long *istart, long *iend)
1038 {
1039   return gomp_loop_dynamic_start (start, end, incr, chunk_size, istart, iend);
1040 }
1041
1042 bool
1043 GOMP_loop_guided_start (long start, long end, long incr, long chunk_size,
1044                         long *istart, long *iend)
1045 {
1046   return gomp_loop_guided_start (start, end, incr, chunk_size, istart, iend);
1047 }
1048
1049 bool
1050 GOMP_loop_nonmonotonic_dynamic_start (long start, long end, long incr,
1051                                       long chunk_size, long *istart,
1052                                       long *iend)
1053 {
1054   return gomp_loop_dynamic_start (start, end, incr, chunk_size, istart, iend);
1055 }
1056
1057 bool
1058 GOMP_loop_nonmonotonic_guided_start (long start, long end, long incr,
1059                                      long chunk_size, long *istart, long *iend)
1060 {
1061   return gomp_loop_guided_start (start, end, incr, chunk_size, istart, iend);
1062 }
1063
1064 bool
1065 GOMP_loop_nonmonotonic_runtime_start (long start, long end, long incr,
1066                                       long *istart, long *iend)
1067 {
1068   return GOMP_loop_runtime_start (start, end, incr, istart, iend);
1069 }
1070
1071 bool
1072 GOMP_loop_maybe_nonmonotonic_runtime_start (long start, long end, long incr,
1073                                             long *istart, long *iend)
1074 {
1075   return GOMP_loop_runtime_start (start, end, incr, istart, iend);
1076 }
1077
1078 bool
1079 GOMP_loop_ordered_static_start (long start, long end, long incr,
1080                                 long chunk_size, long *istart, long *iend)
1081 {
1082   return gomp_loop_ordered_static_start (start, end, incr, chunk_size,
1083                                          istart, iend);
1084 }
1085
1086 bool
1087 GOMP_loop_ordered_dynamic_start (long start, long end, long incr,
1088                                  long chunk_size, long *istart, long *iend)
1089 {
1090   return gomp_loop_ordered_dynamic_start (start, end, incr, chunk_size,
1091                                           istart, iend);
1092 }
1093
1094 bool
1095 GOMP_loop_ordered_guided_start (long start, long end, long incr,
1096                                 long chunk_size, long *istart, long *iend)
1097 {
1098   return gomp_loop_ordered_guided_start (start, end, incr, chunk_size,
1099                                          istart, iend);
1100 }
1101
1102 bool
1103 GOMP_loop_doacross_static_start (unsigned ncounts, long *counts,
1104                                  long chunk_size, long *istart, long *iend)
1105 {
1106   return gomp_loop_doacross_static_start (ncounts, counts, chunk_size,
1107                                           istart, iend);
1108 }
1109
1110 bool
1111 GOMP_loop_doacross_dynamic_start (unsigned ncounts, long *counts,
1112                                   long chunk_size, long *istart, long *iend)
1113 {
1114   return gomp_loop_doacross_dynamic_start (ncounts, counts, chunk_size,
1115                                            istart, iend);
1116 }
1117
1118 bool
1119 GOMP_loop_doacross_guided_start (unsigned ncounts, long *counts,
1120                                  long chunk_size, long *istart, long *iend)
1121 {
1122   return gomp_loop_doacross_guided_start (ncounts, counts, chunk_size,
1123                                           istart, iend);
1124 }
1125
1126 bool
1127 GOMP_loop_static_next (long *istart, long *iend)
1128 {
1129   return gomp_loop_static_next (istart, iend);
1130 }
1131
1132 bool
1133 GOMP_loop_dynamic_next (long *istart, long *iend)
1134 {
1135   return gomp_loop_dynamic_next (istart, iend);
1136 }
1137
1138 bool
1139 GOMP_loop_guided_next (long *istart, long *iend)
1140 {
1141   return gomp_loop_guided_next (istart, iend);
1142 }
1143
1144 bool
1145 GOMP_loop_nonmonotonic_dynamic_next (long *istart, long *iend)
1146 {
1147   return gomp_loop_dynamic_next (istart, iend);
1148 }
1149
1150 bool
1151 GOMP_loop_nonmonotonic_guided_next (long *istart, long *iend)
1152 {
1153   return gomp_loop_guided_next (istart, iend);
1154 }
1155
1156 bool
1157 GOMP_loop_nonmonotonic_runtime_next (long *istart, long *iend)
1158 {
1159   return GOMP_loop_runtime_next (istart, iend);
1160 }
1161
1162 bool
1163 GOMP_loop_maybe_nonmonotonic_runtime_next (long *istart, long *iend)
1164 {
1165   return GOMP_loop_runtime_next (istart, iend);
1166 }
1167
1168 bool
1169 GOMP_loop_ordered_static_next (long *istart, long *iend)
1170 {
1171   return gomp_loop_ordered_static_next (istart, iend);
1172 }
1173
1174 bool
1175 GOMP_loop_ordered_dynamic_next (long *istart, long *iend)
1176 {
1177   return gomp_loop_ordered_dynamic_next (istart, iend);
1178 }
1179
1180 bool
1181 GOMP_loop_ordered_guided_next (long *istart, long *iend)
1182 {
1183   return gomp_loop_ordered_guided_next (istart, iend);
1184 }
1185 #endif