libgomp/loop_ull.c

   1 /* Copyright (C) 2005-2021 Free Software Foundation, Inc.
   2    Contributed by Richard Henderson <rth@redhat.com>.
   3
   4    This file is part of the GNU Offloading and Multi Processing Library
   5    (libgomp).
   6
   7    Libgomp is free software; you can redistribute it and/or modify it
   8    under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3, or (at your option)
  10    any later version.
  11
  12    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
  13    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  14    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  15    more details.
  16
  17    Under Section 7 of GPL version 3, you are granted additional
  18    permissions described in the GCC Runtime Library Exception, version
  19    3.1, as published by the Free Software Foundation.
  20
  21    You should have received a copy of the GNU General Public License and
  22    a copy of the GCC Runtime Library Exception along with this program;
  23    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
  24    <http://www.gnu.org/licenses/>.  */
  25
  26 /* This file handles the LOOP (FOR/DO) construct.  */
  27
  28 #include <limits.h>
  29 #include <stdlib.h>
  30 #include <string.h>
  31 #include "libgomp.h"
  32
  33 ialias (GOMP_loop_ull_runtime_next)
  34 ialias_redirect (GOMP_taskgroup_reduction_register)
  35
  36 typedef unsigned long long gomp_ull;
  37
  38 /* Initialize the given work share construct from the given arguments.  */
  39
  40 static inline void
  41 gomp_loop_ull_init (struct gomp_work_share *ws, bool up, gomp_ull start,
  42                     gomp_ull end, gomp_ull incr, enum gomp_schedule_type sched,
  43                     gomp_ull chunk_size)
  44 {
  45   ws->sched = sched;
  46   ws->chunk_size_ull = chunk_size;
  47   /* Canonicalize loops that have zero iterations to ->next == ->end.  */
  48   ws->end_ull = ((up && start > end) || (!up && start < end))
  49                 ? start : end;
  50   ws->incr_ull = incr;
  51   ws->next_ull = start;
  52   ws->mode = 0;
  53   if (sched == GFS_DYNAMIC)
  54     {
  55       ws->chunk_size_ull *= incr;
  56
  57 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
  58       {
  59         /* For dynamic scheduling prepare things to make each iteration
  60            faster.  */
  61         struct gomp_thread *thr = gomp_thread ();
  62         struct gomp_team *team = thr->ts.team;
  63         long nthreads = team ? team->nthreads : 1;
  64
  65         if (__builtin_expect (up, 1))
  66           {
  67             /* Cheap overflow protection.  */
  68             if (__builtin_expect ((nthreads | ws->chunk_size_ull)
  69                                   < 1ULL << (sizeof (gomp_ull)
  70                                              * __CHAR_BIT__ / 2 - 1), 1))
  71               ws->mode = ws->end_ull < (__LONG_LONG_MAX__ * 2ULL + 1
  72                                         - (nthreads + 1) * ws->chunk_size_ull);
  73           }
  74         /* Cheap overflow protection.  */
  75         else if (__builtin_expect ((nthreads | -ws->chunk_size_ull)
  76                                    < 1ULL << (sizeof (gomp_ull)
  77                                               * __CHAR_BIT__ / 2 - 1), 1))
  78           ws->mode = ws->end_ull > ((nthreads + 1) * -ws->chunk_size_ull
  79                                     - (__LONG_LONG_MAX__ * 2ULL + 1));
  80       }
  81 #endif
  82     }
  83   if (!up)
  84     ws->mode |= 2;
  85 }
  86
  87 /* The *_start routines are called when first encountering a loop construct
  88    that is not bound directly to a parallel construct.  The first thread
  89    that arrives will create the work-share construct; subsequent threads
  90    will see the construct exists and allocate work from it.
  91
  92    START, END, INCR are the bounds of the loop; due to the restrictions of
  93    OpenMP, these values must be the same in every thread.  This is not
  94    verified (nor is it entirely verifiable, since START is not necessarily
  95    retained intact in the work-share data structure).  CHUNK_SIZE is the
  96    scheduling parameter; again this must be identical in all threads.
  97
  98    Returns true if there's any work for this thread to perform.  If so,
  99    *ISTART and *IEND are filled with the bounds of the iteration block
 100    allocated to this thread.  Returns false if all work was assigned to
 101    other threads prior to this thread's arrival.  */
 102
 103 static bool
 104 gomp_loop_ull_static_start (bool up, gomp_ull start, gomp_ull end,
 105                             gomp_ull incr, gomp_ull chunk_size,
 106                             gomp_ull *istart, gomp_ull *iend)
 107 {
 108   struct gomp_thread *thr = gomp_thread ();
 109
 110   thr->ts.static_trip = 0;
 111   if (gomp_work_share_start (0))
 112     {
 113       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
 114                           GFS_STATIC, chunk_size);
 115       gomp_work_share_init_done ();
 116     }
 117
 118   return !gomp_iter_ull_static_next (istart, iend);
 119 }
 120
 121 static bool
 122 gomp_loop_ull_dynamic_start (bool up, gomp_ull start, gomp_ull end,
 123                              gomp_ull incr, gomp_ull chunk_size,
 124                              gomp_ull *istart, gomp_ull *iend)
 125 {
 126   struct gomp_thread *thr = gomp_thread ();
 127   bool ret;
 128
 129   if (gomp_work_share_start (0))
 130     {
 131       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
 132                           GFS_DYNAMIC, chunk_size);
 133       gomp_work_share_init_done ();
 134     }
 135
 136 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
 137   ret = gomp_iter_ull_dynamic_next (istart, iend);
 138 #else
 139   gomp_mutex_lock (&thr->ts.work_share->lock);
 140   ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
 141   gomp_mutex_unlock (&thr->ts.work_share->lock);
 142 #endif
 143
 144   return ret;
 145 }
 146
 147 static bool
 148 gomp_loop_ull_guided_start (bool up, gomp_ull start, gomp_ull end,
 149                             gomp_ull incr, gomp_ull chunk_size,
 150                             gomp_ull *istart, gomp_ull *iend)
 151 {
 152   struct gomp_thread *thr = gomp_thread ();
 153   bool ret;
 154
 155   if (gomp_work_share_start (0))
 156     {
 157       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
 158                           GFS_GUIDED, chunk_size);
 159       gomp_work_share_init_done ();
 160     }
 161
 162 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
 163   ret = gomp_iter_ull_guided_next (istart, iend);
 164 #else
 165   gomp_mutex_lock (&thr->ts.work_share->lock);
 166   ret = gomp_iter_ull_guided_next_locked (istart, iend);
 167   gomp_mutex_unlock (&thr->ts.work_share->lock);
 168 #endif
 169
 170   return ret;
 171 }
 172
 173 bool
 174 GOMP_loop_ull_runtime_start (bool up, gomp_ull start, gomp_ull end,
 175                              gomp_ull incr, gomp_ull *istart, gomp_ull *iend)
 176 {
 177   struct gomp_task_icv *icv = gomp_icv (false);
 178   switch (icv->run_sched_var & ~GFS_MONOTONIC)
 179     {
 180     case GFS_STATIC:
 181       return gomp_loop_ull_static_start (up, start, end, incr,
 182                                          icv->run_sched_chunk_size,
 183                                          istart, iend);
 184     case GFS_DYNAMIC:
 185       return gomp_loop_ull_dynamic_start (up, start, end, incr,
 186                                           icv->run_sched_chunk_size,
 187                                           istart, iend);
 188     case GFS_GUIDED:
 189       return gomp_loop_ull_guided_start (up, start, end, incr,
 190                                          icv->run_sched_chunk_size,
 191                                          istart, iend);
 192     case GFS_AUTO:
 193       /* For now map to schedule(static), later on we could play with feedback
 194          driven choice.  */
 195       return gomp_loop_ull_static_start (up, start, end, incr,
 196                                          0, istart, iend);
 197     default:
 198       abort ();
 199     }
 200 }
 201
 202 static long
 203 gomp_adjust_sched (long sched, gomp_ull *chunk_size)
 204 {
 205   sched &= ~GFS_MONOTONIC;
 206   switch (sched)
 207     {
 208     case GFS_STATIC:
 209     case GFS_DYNAMIC:
 210     case GFS_GUIDED:
 211       return sched;
 212     /* GFS_RUNTIME is used for runtime schedule without monotonic
 213        or nonmonotonic modifiers on the clause.
 214        GFS_RUNTIME|GFS_MONOTONIC for runtime schedule with monotonic
 215        modifier.  */
 216     case GFS_RUNTIME:
 217     /* GFS_AUTO is used for runtime schedule with nonmonotonic
 218        modifier.  */
 219     case GFS_AUTO:
 220       {
 221         struct gomp_task_icv *icv = gomp_icv (false);
 222         sched = icv->run_sched_var & ~GFS_MONOTONIC;
 223         switch (sched)
 224           {
 225           case GFS_STATIC:
 226           case GFS_DYNAMIC:
 227           case GFS_GUIDED:
 228             *chunk_size = icv->run_sched_chunk_size;
 229             break;
 230           case GFS_AUTO:
 231             sched = GFS_STATIC;
 232             *chunk_size = 0;
 233             break;
 234           default:
 235             abort ();
 236           }
 237         return sched;
 238       }
 239     default:
 240       abort ();
 241     }
 242 }
 243
 244 bool
 245 GOMP_loop_ull_start (bool up, gomp_ull start, gomp_ull end,
 246                      gomp_ull incr, long sched, gomp_ull chunk_size,
 247                      gomp_ull *istart, gomp_ull *iend,
 248                      uintptr_t *reductions, void **mem)
 249 {
 250   struct gomp_thread *thr = gomp_thread ();
 251
 252   thr->ts.static_trip = 0;
 253   if (reductions)
 254     gomp_workshare_taskgroup_start ();
 255   if (gomp_work_share_start (0))
 256     {
 257       sched = gomp_adjust_sched (sched, &chunk_size);
 258       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
 259                           sched, chunk_size);
 260       if (reductions)
 261         {
 262           GOMP_taskgroup_reduction_register (reductions);
 263           thr->task->taskgroup->workshare = true;
 264           thr->ts.work_share->task_reductions = reductions;
 265         }
 266       if (mem)
 267         {
 268           uintptr_t size = (uintptr_t) *mem;
 269 #define INLINE_ORDERED_TEAM_IDS_OFF \
 270   ((offsetof (struct gomp_work_share, inline_ordered_team_ids)          \
 271     + __alignof__ (long long) - 1) & ~(__alignof__ (long long) - 1))
 272           if (size > (sizeof (struct gomp_work_share)
 273                       - INLINE_ORDERED_TEAM_IDS_OFF))
 274             *mem
 275               = (void *) (thr->ts.work_share->ordered_team_ids
 276                           = gomp_malloc_cleared (size));
 277           else
 278             *mem = memset (((char *) thr->ts.work_share)
 279                            + INLINE_ORDERED_TEAM_IDS_OFF, '\0', size);
 280         }
 281       gomp_work_share_init_done ();
 282     }
 283   else
 284     {
 285       if (reductions)
 286         {
 287           uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
 288           gomp_workshare_task_reduction_register (reductions,
 289                                                   first_reductions);
 290         }
 291       if (mem)
 292         {
 293           if ((offsetof (struct gomp_work_share, inline_ordered_team_ids)
 294                & (__alignof__ (long long) - 1)) == 0)
 295             *mem = (void *) thr->ts.work_share->ordered_team_ids;
 296           else
 297             {
 298               uintptr_t p = (uintptr_t) thr->ts.work_share->ordered_team_ids;
 299               p += __alignof__ (long long) - 1;
 300               p &= ~(__alignof__ (long long) - 1);
 301               *mem = (void *) p;
 302             }
 303         }
 304     }
 305
 306   return ialias_call (GOMP_loop_ull_runtime_next) (istart, iend);
 307 }
 308
 309 /* The *_ordered_*_start routines are similar.  The only difference is that
 310    this work-share construct is initialized to expect an ORDERED section.  */
 311
 312 static bool
 313 gomp_loop_ull_ordered_static_start (bool up, gomp_ull start, gomp_ull end,
 314                                     gomp_ull incr, gomp_ull chunk_size,
 315                                     gomp_ull *istart, gomp_ull *iend)
 316 {
 317   struct gomp_thread *thr = gomp_thread ();
 318
 319   thr->ts.static_trip = 0;
 320   if (gomp_work_share_start (1))
 321     {
 322       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
 323                           GFS_STATIC, chunk_size);
 324       gomp_ordered_static_init ();
 325       gomp_work_share_init_done ();
 326     }
 327
 328   return !gomp_iter_ull_static_next (istart, iend);
 329 }
 330
 331 static bool
 332 gomp_loop_ull_ordered_dynamic_start (bool up, gomp_ull start, gomp_ull end,
 333                                      gomp_ull incr, gomp_ull chunk_size,
 334                                      gomp_ull *istart, gomp_ull *iend)
 335 {
 336   struct gomp_thread *thr = gomp_thread ();
 337   bool ret;
 338
 339   if (gomp_work_share_start (1))
 340     {
 341       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
 342                           GFS_DYNAMIC, chunk_size);
 343       gomp_mutex_lock (&thr->ts.work_share->lock);
 344       gomp_work_share_init_done ();
 345     }
 346   else
 347     gomp_mutex_lock (&thr->ts.work_share->lock);
 348
 349   ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
 350   if (ret)
 351     gomp_ordered_first ();
 352   gomp_mutex_unlock (&thr->ts.work_share->lock);
 353
 354   return ret;
 355 }
 356
 357 static bool
 358 gomp_loop_ull_ordered_guided_start (bool up, gomp_ull start, gomp_ull end,
 359                                     gomp_ull incr, gomp_ull chunk_size,
 360                                     gomp_ull *istart, gomp_ull *iend)
 361 {
 362   struct gomp_thread *thr = gomp_thread ();
 363   bool ret;
 364
 365   if (gomp_work_share_start (1))
 366     {
 367       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
 368                           GFS_GUIDED, chunk_size);
 369       gomp_mutex_lock (&thr->ts.work_share->lock);
 370       gomp_work_share_init_done ();
 371     }
 372   else
 373     gomp_mutex_lock (&thr->ts.work_share->lock);
 374
 375   ret = gomp_iter_ull_guided_next_locked (istart, iend);
 376   if (ret)
 377     gomp_ordered_first ();
 378   gomp_mutex_unlock (&thr->ts.work_share->lock);
 379
 380   return ret;
 381 }
 382
 383 bool
 384 GOMP_loop_ull_ordered_runtime_start (bool up, gomp_ull start, gomp_ull end,
 385                                      gomp_ull incr, gomp_ull *istart,
 386                                      gomp_ull *iend)
 387 {
 388   struct gomp_task_icv *icv = gomp_icv (false);
 389   switch (icv->run_sched_var & ~GFS_MONOTONIC)
 390     {
 391     case GFS_STATIC:
 392       return gomp_loop_ull_ordered_static_start (up, start, end, incr,
 393                                                  icv->run_sched_chunk_size,
 394                                                  istart, iend);
 395     case GFS_DYNAMIC:
 396       return gomp_loop_ull_ordered_dynamic_start (up, start, end, incr,
 397                                                   icv->run_sched_chunk_size,
 398                                                   istart, iend);
 399     case GFS_GUIDED:
 400       return gomp_loop_ull_ordered_guided_start (up, start, end, incr,
 401                                                  icv->run_sched_chunk_size,
 402                                                  istart, iend);
 403     case GFS_AUTO:
 404       /* For now map to schedule(static), later on we could play with feedback
 405          driven choice.  */
 406       return gomp_loop_ull_ordered_static_start (up, start, end, incr,
 407                                                  0, istart, iend);
 408     default:
 409       abort ();
 410     }
 411 }
 412
 413 bool
 414 GOMP_loop_ull_ordered_start (bool up, gomp_ull start, gomp_ull end,
 415                              gomp_ull incr, long sched, gomp_ull chunk_size,
 416                              gomp_ull *istart, gomp_ull *iend,
 417                              uintptr_t *reductions, void **mem)
 418 {
 419   struct gomp_thread *thr = gomp_thread ();
 420   size_t ordered = 1;
 421   bool ret;
 422
 423   thr->ts.static_trip = 0;
 424   if (reductions)
 425     gomp_workshare_taskgroup_start ();
 426   if (mem)
 427     ordered += (uintptr_t) *mem;
 428   if (gomp_work_share_start (ordered))
 429     {
 430       sched = gomp_adjust_sched (sched, &chunk_size);
 431       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
 432                           sched, chunk_size);
 433       if (reductions)
 434         {
 435           GOMP_taskgroup_reduction_register (reductions);
 436           thr->task->taskgroup->workshare = true;
 437           thr->ts.work_share->task_reductions = reductions;
 438         }
 439       if (sched == GFS_STATIC)
 440         gomp_ordered_static_init ();
 441       else
 442         gomp_mutex_lock (&thr->ts.work_share->lock);
 443       gomp_work_share_init_done ();
 444     }
 445   else
 446     {
 447       if (reductions)
 448         {
 449           uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
 450           gomp_workshare_task_reduction_register (reductions,
 451                                                   first_reductions);
 452         }
 453       sched = thr->ts.work_share->sched;
 454       if (sched != GFS_STATIC)
 455         gomp_mutex_lock (&thr->ts.work_share->lock);
 456     }
 457
 458   if (mem)
 459     {
 460       uintptr_t p
 461         = (uintptr_t) (thr->ts.work_share->ordered_team_ids
 462                        + (thr->ts.team ? thr->ts.team->nthreads : 1));
 463       p += __alignof__ (long long) - 1;
 464       p &= ~(__alignof__ (long long) - 1);
 465       *mem = (void *) p;
 466     }
 467
 468   switch (sched)
 469     {
 470     case GFS_STATIC:
 471     case GFS_AUTO:
 472       return !gomp_iter_ull_static_next (istart, iend);
 473     case GFS_DYNAMIC:
 474       ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
 475       break;
 476     case GFS_GUIDED:
 477       ret = gomp_iter_ull_guided_next_locked (istart, iend);
 478       break;
 479     default:
 480       abort ();
 481     }
 482
 483   if (ret)
 484     gomp_ordered_first ();
 485   gomp_mutex_unlock (&thr->ts.work_share->lock);
 486   return ret;
 487 }
 488
 489 /* The *_doacross_*_start routines are similar.  The only difference is that
 490    this work-share construct is initialized to expect an ORDERED(N) - DOACROSS
 491    section, and the worksharing loop iterates always from 0 to COUNTS[0] - 1
 492    and other COUNTS array elements tell the library number of iterations
 493    in the ordered inner loops.  */
 494
 495 static bool
 496 gomp_loop_ull_doacross_static_start (unsigned ncounts, gomp_ull *counts,
 497                                      gomp_ull chunk_size, gomp_ull *istart,
 498                                      gomp_ull *iend)
 499 {
 500   struct gomp_thread *thr = gomp_thread ();
 501
 502   thr->ts.static_trip = 0;
 503   if (gomp_work_share_start (0))
 504     {
 505       gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
 506                           GFS_STATIC, chunk_size);
 507       gomp_doacross_ull_init (ncounts, counts, chunk_size, 0);
 508       gomp_work_share_init_done ();
 509     }
 510
 511   return !gomp_iter_ull_static_next (istart, iend);
 512 }
 513
 514 static bool
 515 gomp_loop_ull_doacross_dynamic_start (unsigned ncounts, gomp_ull *counts,
 516                                       gomp_ull chunk_size, gomp_ull *istart,
 517                                       gomp_ull *iend)
 518 {
 519   struct gomp_thread *thr = gomp_thread ();
 520   bool ret;
 521
 522   if (gomp_work_share_start (0))
 523     {
 524       gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
 525                           GFS_DYNAMIC, chunk_size);
 526       gomp_doacross_ull_init (ncounts, counts, chunk_size, 0);
 527       gomp_work_share_init_done ();
 528     }
 529
 530 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
 531   ret = gomp_iter_ull_dynamic_next (istart, iend);
 532 #else
 533   gomp_mutex_lock (&thr->ts.work_share->lock);
 534   ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
 535   gomp_mutex_unlock (&thr->ts.work_share->lock);
 536 #endif
 537
 538   return ret;
 539 }
 540
 541 static bool
 542 gomp_loop_ull_doacross_guided_start (unsigned ncounts, gomp_ull *counts,
 543                                      gomp_ull chunk_size, gomp_ull *istart,
 544                                      gomp_ull *iend)
 545 {
 546   struct gomp_thread *thr = gomp_thread ();
 547   bool ret;
 548
 549   if (gomp_work_share_start (0))
 550     {
 551       gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
 552                           GFS_GUIDED, chunk_size);
 553       gomp_doacross_ull_init (ncounts, counts, chunk_size, 0);
 554       gomp_work_share_init_done ();
 555     }
 556
 557 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
 558   ret = gomp_iter_ull_guided_next (istart, iend);
 559 #else
 560   gomp_mutex_lock (&thr->ts.work_share->lock);
 561   ret = gomp_iter_ull_guided_next_locked (istart, iend);
 562   gomp_mutex_unlock (&thr->ts.work_share->lock);
 563 #endif
 564
 565   return ret;
 566 }
 567
 568 bool
 569 GOMP_loop_ull_doacross_runtime_start (unsigned ncounts, gomp_ull *counts,
 570                                       gomp_ull *istart, gomp_ull *iend)
 571 {
 572   struct gomp_task_icv *icv = gomp_icv (false);
 573   switch (icv->run_sched_var & ~GFS_MONOTONIC)
 574     {
 575     case GFS_STATIC:
 576       return gomp_loop_ull_doacross_static_start (ncounts, counts,
 577                                                   icv->run_sched_chunk_size,
 578                                                   istart, iend);
 579     case GFS_DYNAMIC:
 580       return gomp_loop_ull_doacross_dynamic_start (ncounts, counts,
 581                                                    icv->run_sched_chunk_size,
 582                                                    istart, iend);
 583     case GFS_GUIDED:
 584       return gomp_loop_ull_doacross_guided_start (ncounts, counts,
 585                                                   icv->run_sched_chunk_size,
 586                                                   istart, iend);
 587     case GFS_AUTO:
 588       /* For now map to schedule(static), later on we could play with feedback
 589          driven choice.  */
 590       return gomp_loop_ull_doacross_static_start (ncounts, counts,
 591                                                   0, istart, iend);
 592     default:
 593       abort ();
 594     }
 595 }
 596
 597 bool
 598 GOMP_loop_ull_doacross_start (unsigned ncounts, gomp_ull *counts,
 599                               long sched, gomp_ull chunk_size,
 600                               gomp_ull *istart, gomp_ull *iend,
 601                               uintptr_t *reductions, void **mem)
 602 {
 603   struct gomp_thread *thr = gomp_thread ();
 604
 605   thr->ts.static_trip = 0;
 606   if (reductions)
 607     gomp_workshare_taskgroup_start ();
 608   if (gomp_work_share_start (0))
 609     {
 610       size_t extra = 0;
 611       if (mem)
 612         extra = (uintptr_t) *mem;
 613       sched = gomp_adjust_sched (sched, &chunk_size);
 614       gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
 615                           sched, chunk_size);
 616       gomp_doacross_ull_init (ncounts, counts, chunk_size, extra);
 617       if (reductions)
 618         {
 619           GOMP_taskgroup_reduction_register (reductions);
 620           thr->task->taskgroup->workshare = true;
 621           thr->ts.work_share->task_reductions = reductions;
 622         }
 623       gomp_work_share_init_done ();
 624     }
 625   else
 626     {
 627       if (reductions)
 628         {
 629           uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
 630           gomp_workshare_task_reduction_register (reductions,
 631                                                   first_reductions);
 632         }
 633       sched = thr->ts.work_share->sched;
 634     }
 635
 636   if (mem)
 637     *mem = thr->ts.work_share->doacross->extra;
 638
 639   return ialias_call (GOMP_loop_ull_runtime_next) (istart, iend);
 640 }
 641
 642 /* The *_next routines are called when the thread completes processing of
 643    the iteration block currently assigned to it.  If the work-share
 644    construct is bound directly to a parallel construct, then the iteration
 645    bounds may have been set up before the parallel.  In which case, this
 646    may be the first iteration for the thread.
 647
 648    Returns true if there is work remaining to be performed; *ISTART and
 649    *IEND are filled with a new iteration block.  Returns false if all work
 650    has been assigned.  */
 651
 652 static bool
 653 gomp_loop_ull_static_next (gomp_ull *istart, gomp_ull *iend)
 654 {
 655   return !gomp_iter_ull_static_next (istart, iend);
 656 }
 657
 658 static bool
 659 gomp_loop_ull_dynamic_next (gomp_ull *istart, gomp_ull *iend)
 660 {
 661   bool ret;
 662
 663 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
 664   ret = gomp_iter_ull_dynamic_next (istart, iend);
 665 #else
 666   struct gomp_thread *thr = gomp_thread ();
 667   gomp_mutex_lock (&thr->ts.work_share->lock);
 668   ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
 669   gomp_mutex_unlock (&thr->ts.work_share->lock);
 670 #endif
 671
 672   return ret;
 673 }
 674
 675 static bool
 676 gomp_loop_ull_guided_next (gomp_ull *istart, gomp_ull *iend)
 677 {
 678   bool ret;
 679
 680 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
 681   ret = gomp_iter_ull_guided_next (istart, iend);
 682 #else
 683   struct gomp_thread *thr = gomp_thread ();
 684   gomp_mutex_lock (&thr->ts.work_share->lock);
 685   ret = gomp_iter_ull_guided_next_locked (istart, iend);
 686   gomp_mutex_unlock (&thr->ts.work_share->lock);
 687 #endif
 688
 689   return ret;
 690 }
 691
 692 bool
 693 GOMP_loop_ull_runtime_next (gomp_ull *istart, gomp_ull *iend)
 694 {
 695   struct gomp_thread *thr = gomp_thread ();
 696
 697   switch (thr->ts.work_share->sched)
 698     {
 699     case GFS_STATIC:
 700     case GFS_AUTO:
 701       return gomp_loop_ull_static_next (istart, iend);
 702     case GFS_DYNAMIC:
 703       return gomp_loop_ull_dynamic_next (istart, iend);
 704     case GFS_GUIDED:
 705       return gomp_loop_ull_guided_next (istart, iend);
 706     default:
 707       abort ();
 708     }
 709 }
 710
 711 /* The *_ordered_*_next routines are called when the thread completes
 712    processing of the iteration block currently assigned to it.
 713
 714    Returns true if there is work remaining to be performed; *ISTART and
 715    *IEND are filled with a new iteration block.  Returns false if all work
 716    has been assigned.  */
 717
 718 static bool
 719 gomp_loop_ull_ordered_static_next (gomp_ull *istart, gomp_ull *iend)
 720 {
 721   struct gomp_thread *thr = gomp_thread ();
 722   int test;
 723
 724   gomp_ordered_sync ();
 725   gomp_mutex_lock (&thr->ts.work_share->lock);
 726   test = gomp_iter_ull_static_next (istart, iend);
 727   if (test >= 0)
 728     gomp_ordered_static_next ();
 729   gomp_mutex_unlock (&thr->ts.work_share->lock);
 730
 731   return test == 0;
 732 }
 733
 734 static bool
 735 gomp_loop_ull_ordered_dynamic_next (gomp_ull *istart, gomp_ull *iend)
 736 {
 737   struct gomp_thread *thr = gomp_thread ();
 738   bool ret;
 739
 740   gomp_ordered_sync ();
 741   gomp_mutex_lock (&thr->ts.work_share->lock);
 742   ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
 743   if (ret)
 744     gomp_ordered_next ();
 745   else
 746     gomp_ordered_last ();
 747   gomp_mutex_unlock (&thr->ts.work_share->lock);
 748
 749   return ret;
 750 }
 751
 752 static bool
 753 gomp_loop_ull_ordered_guided_next (gomp_ull *istart, gomp_ull *iend)
 754 {
 755   struct gomp_thread *thr = gomp_thread ();
 756   bool ret;
 757
 758   gomp_ordered_sync ();
 759   gomp_mutex_lock (&thr->ts.work_share->lock);
 760   ret = gomp_iter_ull_guided_next_locked (istart, iend);
 761   if (ret)
 762     gomp_ordered_next ();
 763   else
 764     gomp_ordered_last ();
 765   gomp_mutex_unlock (&thr->ts.work_share->lock);
 766
 767   return ret;
 768 }
 769
 770 bool
 771 GOMP_loop_ull_ordered_runtime_next (gomp_ull *istart, gomp_ull *iend)
 772 {
 773   struct gomp_thread *thr = gomp_thread ();
 774
 775   switch (thr->ts.work_share->sched)
 776     {
 777     case GFS_STATIC:
 778     case GFS_AUTO:
 779       return gomp_loop_ull_ordered_static_next (istart, iend);
 780     case GFS_DYNAMIC:
 781       return gomp_loop_ull_ordered_dynamic_next (istart, iend);
 782     case GFS_GUIDED:
 783       return gomp_loop_ull_ordered_guided_next (istart, iend);
 784     default:
 785       abort ();
 786     }
 787 }
 788
 789 /* We use static functions above so that we're sure that the "runtime"
 790    function can defer to the proper routine without interposition.  We
 791    export the static function with a strong alias when possible, or with
 792    a wrapper function otherwise.  */
 793
 794 #ifdef HAVE_ATTRIBUTE_ALIAS
 795 extern __typeof(gomp_loop_ull_static_start) GOMP_loop_ull_static_start
 796         __attribute__((alias ("gomp_loop_ull_static_start")));
 797 extern __typeof(gomp_loop_ull_dynamic_start) GOMP_loop_ull_dynamic_start
 798         __attribute__((alias ("gomp_loop_ull_dynamic_start")));
 799 extern __typeof(gomp_loop_ull_guided_start) GOMP_loop_ull_guided_start
 800         __attribute__((alias ("gomp_loop_ull_guided_start")));
 801 extern __typeof(gomp_loop_ull_dynamic_start) GOMP_loop_ull_nonmonotonic_dynamic_start
 802         __attribute__((alias ("gomp_loop_ull_dynamic_start")));
 803 extern __typeof(gomp_loop_ull_guided_start) GOMP_loop_ull_nonmonotonic_guided_start
 804         __attribute__((alias ("gomp_loop_ull_guided_start")));
 805 extern __typeof(GOMP_loop_ull_runtime_start) GOMP_loop_ull_nonmonotonic_runtime_start
 806         __attribute__((alias ("GOMP_loop_ull_runtime_start")));
 807 extern __typeof(GOMP_loop_ull_runtime_start) GOMP_loop_ull_maybe_nonmonotonic_runtime_start
 808         __attribute__((alias ("GOMP_loop_ull_runtime_start")));
 809
 810 extern __typeof(gomp_loop_ull_ordered_static_start) GOMP_loop_ull_ordered_static_start
 811         __attribute__((alias ("gomp_loop_ull_ordered_static_start")));
 812 extern __typeof(gomp_loop_ull_ordered_dynamic_start) GOMP_loop_ull_ordered_dynamic_start
 813         __attribute__((alias ("gomp_loop_ull_ordered_dynamic_start")));
 814 extern __typeof(gomp_loop_ull_ordered_guided_start) GOMP_loop_ull_ordered_guided_start
 815         __attribute__((alias ("gomp_loop_ull_ordered_guided_start")));
 816
 817 extern __typeof(gomp_loop_ull_doacross_static_start) GOMP_loop_ull_doacross_static_start
 818         __attribute__((alias ("gomp_loop_ull_doacross_static_start")));
 819 extern __typeof(gomp_loop_ull_doacross_dynamic_start) GOMP_loop_ull_doacross_dynamic_start
 820         __attribute__((alias ("gomp_loop_ull_doacross_dynamic_start")));
 821 extern __typeof(gomp_loop_ull_doacross_guided_start) GOMP_loop_ull_doacross_guided_start
 822         __attribute__((alias ("gomp_loop_ull_doacross_guided_start")));
 823
 824 extern __typeof(gomp_loop_ull_static_next) GOMP_loop_ull_static_next
 825         __attribute__((alias ("gomp_loop_ull_static_next")));
 826 extern __typeof(gomp_loop_ull_dynamic_next) GOMP_loop_ull_dynamic_next
 827         __attribute__((alias ("gomp_loop_ull_dynamic_next")));
 828 extern __typeof(gomp_loop_ull_guided_next) GOMP_loop_ull_guided_next
 829         __attribute__((alias ("gomp_loop_ull_guided_next")));
 830 extern __typeof(gomp_loop_ull_dynamic_next) GOMP_loop_ull_nonmonotonic_dynamic_next
 831         __attribute__((alias ("gomp_loop_ull_dynamic_next")));
 832 extern __typeof(gomp_loop_ull_guided_next) GOMP_loop_ull_nonmonotonic_guided_next
 833         __attribute__((alias ("gomp_loop_ull_guided_next")));
 834 extern __typeof(GOMP_loop_ull_runtime_next) GOMP_loop_ull_nonmonotonic_runtime_next
 835         __attribute__((alias ("GOMP_loop_ull_runtime_next")));
 836 extern __typeof(GOMP_loop_ull_runtime_next) GOMP_loop_ull_maybe_nonmonotonic_runtime_next
 837         __attribute__((alias ("GOMP_loop_ull_runtime_next")));
 838
 839 extern __typeof(gomp_loop_ull_ordered_static_next) GOMP_loop_ull_ordered_static_next
 840         __attribute__((alias ("gomp_loop_ull_ordered_static_next")));
 841 extern __typeof(gomp_loop_ull_ordered_dynamic_next) GOMP_loop_ull_ordered_dynamic_next
 842         __attribute__((alias ("gomp_loop_ull_ordered_dynamic_next")));
 843 extern __typeof(gomp_loop_ull_ordered_guided_next) GOMP_loop_ull_ordered_guided_next
 844         __attribute__((alias ("gomp_loop_ull_ordered_guided_next")));
 845 #else
 846 bool
 847 GOMP_loop_ull_static_start (bool up, gomp_ull start, gomp_ull end,
 848                             gomp_ull incr, gomp_ull chunk_size,
 849                             gomp_ull *istart, gomp_ull *iend)
 850 {
 851   return gomp_loop_ull_static_start (up, start, end, incr, chunk_size, istart,
 852                                      iend);
 853 }
 854
 855 bool
 856 GOMP_loop_ull_dynamic_start (bool up, gomp_ull start, gomp_ull end,
 857                              gomp_ull incr, gomp_ull chunk_size,
 858                              gomp_ull *istart, gomp_ull *iend)
 859 {
 860   return gomp_loop_ull_dynamic_start (up, start, end, incr, chunk_size, istart,
 861                                       iend);
 862 }
 863
 864 bool
 865 GOMP_loop_ull_guided_start (bool up, gomp_ull start, gomp_ull end,
 866                             gomp_ull incr, gomp_ull chunk_size,
 867                             gomp_ull *istart, gomp_ull *iend)
 868 {
 869   return gomp_loop_ull_guided_start (up, start, end, incr, chunk_size, istart,
 870                                      iend);
 871 }
 872
 873 bool
 874 GOMP_loop_ull_nonmonotonic_dynamic_start (bool up, gomp_ull start,
 875                                           gomp_ull end, gomp_ull incr,
 876                                           gomp_ull chunk_size,
 877                                           gomp_ull *istart, gomp_ull *iend)
 878 {
 879   return gomp_loop_ull_dynamic_start (up, start, end, incr, chunk_size, istart,
 880                                       iend);
 881 }
 882
 883 bool
 884 GOMP_loop_ull_nonmonotonic_guided_start (bool up, gomp_ull start, gomp_ull end,
 885                                          gomp_ull incr, gomp_ull chunk_size,
 886                                          gomp_ull *istart, gomp_ull *iend)
 887 {
 888   return gomp_loop_ull_guided_start (up, start, end, incr, chunk_size, istart,
 889                                      iend);
 890 }
 891
 892 bool
 893 GOMP_loop_ull_nonmonotonic_runtime_start (bool up, gomp_ull start,
 894                                           gomp_ull end, gomp_ull incr,
 895                                           gomp_ull *istart, gomp_ull *iend)
 896 {
 897   return GOMP_loop_ull_runtime_start (up, start, end, incr, istart, iend);
 898 }
 899
 900 bool
 901 GOMP_loop_ull_maybe_nonmonotonic_runtime_start (bool up, gomp_ull start,
 902                                                 gomp_ull end, gomp_ull incr,
 903                                                 gomp_ull *istart,
 904                                                 gomp_ull *iend)
 905 {
 906   return GOMP_loop_ull_runtime_start (up, start, end, incr, istart, iend);
 907 }
 908
 909 bool
 910 GOMP_loop_ull_ordered_static_start (bool up, gomp_ull start, gomp_ull end,
 911                                     gomp_ull incr, gomp_ull chunk_size,
 912                                     gomp_ull *istart, gomp_ull *iend)
 913 {
 914   return gomp_loop_ull_ordered_static_start (up, start, end, incr, chunk_size,
 915                                              istart, iend);
 916 }
 917
 918 bool
 919 GOMP_loop_ull_ordered_dynamic_start (bool up, gomp_ull start, gomp_ull end,
 920                                      gomp_ull incr, gomp_ull chunk_size,
 921                                      gomp_ull *istart, gomp_ull *iend)
 922 {
 923   return gomp_loop_ull_ordered_dynamic_start (up, start, end, incr, chunk_size,
 924                                               istart, iend);
 925 }
 926
 927 bool
 928 GOMP_loop_ull_ordered_guided_start (bool up, gomp_ull start, gomp_ull end,
 929                                     gomp_ull incr, gomp_ull chunk_size,
 930                                     gomp_ull *istart, gomp_ull *iend)
 931 {
 932   return gomp_loop_ull_ordered_guided_start (up, start, end, incr, chunk_size,
 933                                              istart, iend);
 934 }
 935
 936 bool
 937 GOMP_loop_ull_doacross_static_start (unsigned ncounts, gomp_ull *counts,
 938                                      gomp_ull chunk_size, gomp_ull *istart,
 939                                      gomp_ull *iend)
 940 {
 941   return gomp_loop_ull_doacross_static_start (ncounts, counts, chunk_size,
 942                                               istart, iend);
 943 }
 944
 945 bool
 946 GOMP_loop_ull_doacross_dynamic_start (unsigned ncounts, gomp_ull *counts,
 947                                       gomp_ull chunk_size, gomp_ull *istart,
 948                                       gomp_ull *iend)
 949 {
 950   return gomp_loop_ull_doacross_dynamic_start (ncounts, counts, chunk_size,
 951                                                istart, iend);
 952 }
 953
 954 bool
 955 GOMP_loop_ull_doacross_guided_start (unsigned ncounts, gomp_ull *counts,
 956                                      gomp_ull chunk_size, gomp_ull *istart,
 957                                      gomp_ull *iend)
 958 {
 959   return gomp_loop_ull_doacross_guided_start (ncounts, counts, chunk_size,
 960                                               istart, iend);
 961 }
 962
 963 bool
 964 GOMP_loop_ull_static_next (gomp_ull *istart, gomp_ull *iend)
 965 {
 966   return gomp_loop_ull_static_next (istart, iend);
 967 }
 968
 969 bool
 970 GOMP_loop_ull_dynamic_next (gomp_ull *istart, gomp_ull *iend)
 971 {
 972   return gomp_loop_ull_dynamic_next (istart, iend);
 973 }
 974
 975 bool
 976 GOMP_loop_ull_guided_next (gomp_ull *istart, gomp_ull *iend)
 977 {
 978   return gomp_loop_ull_guided_next (istart, iend);
 979 }
 980
 981 bool
 982 GOMP_loop_ull_nonmonotonic_dynamic_next (gomp_ull *istart, gomp_ull *iend)
 983 {
 984   return gomp_loop_ull_dynamic_next (istart, iend);
 985 }
 986
 987 bool
 988 GOMP_loop_ull_nonmonotonic_guided_next (gomp_ull *istart, gomp_ull *iend)
 989 {
 990   return gomp_loop_ull_guided_next (istart, iend);
 991 }
 992
 993 bool
 994 GOMP_loop_ull_nonmonotonic_runtime_next (gomp_ull *istart, gomp_ull *iend)
 995 {
 996   return GOMP_loop_ull_runtime_next (istart, iend);
 997 }
 998
 999 bool
1000 GOMP_loop_ull_maybe_nonmonotonic_runtime_next (gomp_ull *istart,
1001                                                gomp_ull *iend)
1002 {
1003   return GOMP_loop_ull_runtime_next (istart, iend);
1004 }
1005
1006 bool
1007 GOMP_loop_ull_ordered_static_next (gomp_ull *istart, gomp_ull *iend)
1008 {
1009   return gomp_loop_ull_ordered_static_next (istart, iend);
1010 }
1011
1012 bool
1013 GOMP_loop_ull_ordered_dynamic_next (gomp_ull *istart, gomp_ull *iend)
1014 {
1015   return gomp_loop_ull_ordered_dynamic_next (istart, iend);
1016 }
1017
1018 bool
1019 GOMP_loop_ull_ordered_guided_next (gomp_ull *istart, gomp_ull *iend)
1020 {
1021   return gomp_loop_ull_ordered_guided_next (istart, iend);
1022 }
1023 #endif