]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
openmp: Differentiate between order(concurrent) and order(reproducible:concurrent)
authorJakub Jelinek <jakub@redhat.com>
Fri, 1 Oct 2021 08:45:48 +0000 (10:45 +0200)
committerJakub Jelinek <jakub@redhat.com>
Fri, 1 Oct 2021 08:45:48 +0000 (10:45 +0200)
While OpenMP 5.1 implies order(concurrent) is the same thing as
order(reproducible:concurrent), this is going to change in OpenMP 5.2, where
essentially order(concurrent) means nothing is stated on whether it is
reproducible or unconstrained (and is determined by other means, e.g. for/do
with schedule static or runtime with static being selected is implicitly
reproducible, distribute with dist_schedule static is implicitly reproducible,
loop is implicitly reproducible) and when the modifier is specified explicitly,
it overrides the implicit behavior either way.
And, when order(reproducible:concurrent) is used with e.g. schedule(dynamic)
or some other schedule that is by definition not reproducible, it is
implementation's duty to ensure it is reproducible, either by remembering how
it scheduled some loop and then replaying the same schedule when seeing loops
with the same directive/schedule/number of iterations, or by overriding the
schedule to some reproducible one.

This patch doesn't implement the 5.2 wording just yet, but in the FEs
differentiates between the 3 states - no explicit modifier, explicit reproducible
or explicit unconstrainted, so that the middle-end can easily switch any time.
Instead it follows the 5.1 wording where both order(concurrent) (implicit or
explicit) or order(reproducible:concurrent) imply reproducibility.
And, it implements the easier method, when for/do should be reproducible, it
just chooses static schedule.  order(concurrent) implies no OpenMP APIs in the
loop body nor threadprivate vars, so the exact scheduling isn't (easily at least)
observable.

2021-10-01  Jakub Jelinek  <jakub@redhat.com>

gcc/
* tree.h (OMP_CLAUSE_ORDER_REPRODUCIBLE): Define.
* tree-pretty-print.c (dump_omp_clause) <case OMP_CLAUSE_ORDER>: Print
reproducible: for OMP_CLAUSE_ORDER_REPRODUCIBLE.
* omp-general.c (omp_extract_for_data): If OMP_CLAUSE_ORDER is seen
without OMP_CLAUSE_ORDER_UNCONSTRAINED, overwrite sched_kind to
OMP_CLAUSE_SCHEDULE_STATIC.
gcc/c-family/
* c-omp.c (c_omp_split_clauses): Also copy
OMP_CLAUSE_ORDER_REPRODUCIBLE.
gcc/c/
* c-parser.c (c_parser_omp_clause_order): Set
OMP_CLAUSE_ORDER_REPRODUCIBLE for explicit reproducible: modifier.
gcc/cp/
* parser.c (cp_parser_omp_clause_order): Set
OMP_CLAUSE_ORDER_REPRODUCIBLE for explicit reproducible: modifier.
gcc/fortran/
* gfortran.h (gfc_omp_clauses): Add order_reproducible bitfield.
* dump-parse-tree.c (show_omp_clauses): Print REPRODUCIBLE: for it.
* openmp.c (gfc_match_omp_clauses): Set order_reproducible for
explicit reproducible: modifier.
* trans-openmp.c (gfc_trans_omp_clauses): Set
OMP_CLAUSE_ORDER_REPRODUCIBLE for order_reproducible.
(gfc_split_omp_clauses): Also copy order_reproducible.
gcc/testsuite/
* gfortran.dg/gomp/order-5.f90: Adjust scan-tree-dump-times regexps.
libgomp/
* testsuite/libgomp.c-c++-common/order-reproducible-1.c: New test.
* testsuite/libgomp.c-c++-common/order-reproducible-2.c: New test.

13 files changed:
gcc/c-family/c-omp.c
gcc/c/c-parser.c
gcc/cp/parser.c
gcc/fortran/dump-parse-tree.c
gcc/fortran/gfortran.h
gcc/fortran/openmp.c
gcc/fortran/trans-openmp.c
gcc/omp-general.c
gcc/testsuite/gfortran.dg/gomp/order-5.f90
gcc/tree-pretty-print.c
gcc/tree.h
libgomp/testsuite/libgomp.c-c++-common/order-reproducible-1.c [new file with mode: 0644]
libgomp/testsuite/libgomp.c-c++-common/order-reproducible-2.c [new file with mode: 0644]

index 1f07a0a454b2a4ebcb0a2a20946105ed3f6413ea..2849fdabc3d642bf9d4e84fcec136ac7c8ea7ba3 100644 (file)
@@ -2128,6 +2128,8 @@ c_omp_split_clauses (location_t loc, enum tree_code code,
                                    OMP_CLAUSE_ORDER);
              OMP_CLAUSE_ORDER_UNCONSTRAINED (c)
                = OMP_CLAUSE_ORDER_UNCONSTRAINED (clauses);
+             OMP_CLAUSE_ORDER_REPRODUCIBLE (c)
+               = OMP_CLAUSE_ORDER_REPRODUCIBLE (clauses);
              OMP_CLAUSE_CHAIN (c) = cclauses[C_OMP_CLAUSE_SPLIT_DISTRIBUTE];
              cclauses[C_OMP_CLAUSE_SPLIT_DISTRIBUTE] = c;
            }
@@ -2139,6 +2141,8 @@ c_omp_split_clauses (location_t loc, enum tree_code code,
                                        OMP_CLAUSE_ORDER);
                  OMP_CLAUSE_ORDER_UNCONSTRAINED (c)
                    = OMP_CLAUSE_ORDER_UNCONSTRAINED (clauses);
+                 OMP_CLAUSE_ORDER_REPRODUCIBLE (c)
+                   = OMP_CLAUSE_ORDER_REPRODUCIBLE (clauses);
                  OMP_CLAUSE_CHAIN (c) = cclauses[C_OMP_CLAUSE_SPLIT_FOR];
                  cclauses[C_OMP_CLAUSE_SPLIT_FOR] = c;
                  s = C_OMP_CLAUSE_SPLIT_SIMD;
index 21ff25d3b2c1622a99fa78e3a4e979073c95c9fc..a66f43f6dc2f1074a3a004c63486bfb3f29c821a 100644 (file)
@@ -14626,6 +14626,7 @@ c_parser_omp_clause_order (c_parser *parser, tree list)
   tree c;
   const char *p;
   bool unconstrained = false;
+  bool reproducible = false;
 
   matching_parens parens;
   if (!parens.require_open (parser))
@@ -14636,7 +14637,9 @@ c_parser_omp_clause_order (c_parser *parser, tree list)
       p = IDENTIFIER_POINTER (c_parser_peek_token (parser)->value);
       if (strcmp (p, "unconstrained") == 0)
        unconstrained = true;
-      else if (strcmp (p, "reproducible") != 0)
+      else if (strcmp (p, "reproducible") == 0)
+       reproducible = true;
+      else
        {
          c_parser_error (parser, "expected %<reproducible%> or "
                                  "%<unconstrained%>");
@@ -14661,6 +14664,7 @@ c_parser_omp_clause_order (c_parser *parser, tree list)
   check_no_duplicate_clause (list, OMP_CLAUSE_ORDER, "order");
   c = build_omp_clause (loc, OMP_CLAUSE_ORDER);
   OMP_CLAUSE_ORDER_UNCONSTRAINED (c) = unconstrained;
+  OMP_CLAUSE_ORDER_REPRODUCIBLE (c) = reproducible;
   OMP_CLAUSE_CHAIN (c) = list;
   return c;
 
index 04f5a24cc03c6a492afc747f9f77ef5a705f944a..c6f1a9796c5be9f6de1b3a9aa45eb5d56623d8e3 100644 (file)
@@ -37735,6 +37735,7 @@ cp_parser_omp_clause_order (cp_parser *parser, tree list, location_t location)
   tree c, id;
   const char *p;
   bool unconstrained = false;
+  bool reproducible = false;
 
   matching_parens parens;
   if (!parens.require_open (parser))
@@ -37747,7 +37748,9 @@ cp_parser_omp_clause_order (cp_parser *parser, tree list, location_t location)
       p = IDENTIFIER_POINTER (id);
       if (strcmp (p, "unconstrained") == 0)
        unconstrained = true;
-      else if (strcmp (p, "reproducible") != 0)
+      else if (strcmp (p, "reproducible") == 0)
+       reproducible = true;
+      else
        {
          cp_parser_error (parser, "expected %<reproducible%> or "
                                   "%<unconstrained%>");
@@ -37778,6 +37781,7 @@ cp_parser_omp_clause_order (cp_parser *parser, tree list, location_t location)
   check_no_duplicate_clause (list, OMP_CLAUSE_ORDER, "order", location);
   c = build_omp_clause (location, OMP_CLAUSE_ORDER);
   OMP_CLAUSE_ORDER_UNCONSTRAINED (c) = unconstrained;
+  OMP_CLAUSE_ORDER_REPRODUCIBLE (c) = reproducible;
   OMP_CLAUSE_CHAIN (c) = list;
   return c;
 
index 28eb09e261d77ba1c536296db9581743abd80554..64e04c043f673792e8b8e0472542cb5d99ee2d22 100644 (file)
@@ -1634,6 +1634,8 @@ show_omp_clauses (gfc_omp_clauses *omp_clauses)
       fputs (" ORDER(", dumpfile);
       if (omp_clauses->order_unconstrained)
        fputs ("UNCONSTRAINED:", dumpfile);
+      else if (omp_clauses->order_reproducible)
+       fputs ("REPRODUCIBLE:", dumpfile);
       fputs ("CONCURRENT)", dumpfile);
     }
   if (omp_clauses->ordered)
index 7ef835b211a1e81564f7996a90df0c85badb4302..c25d1cca3a844ea06d4a75c2ae4283b07d54da6a 100644 (file)
@@ -1491,8 +1491,8 @@ typedef struct gfc_omp_clauses
   unsigned inbranch:1, notinbranch:1, nogroup:1;
   unsigned sched_simd:1, sched_monotonic:1, sched_nonmonotonic:1;
   unsigned simd:1, threads:1, depend_source:1, destroy:1, order_concurrent:1;
-  unsigned order_unconstrained:1, capture:1, grainsize_strict:1;
-  unsigned num_tasks_strict:1;
+  unsigned order_unconstrained:1, order_reproducible:1, capture:1;
+  unsigned grainsize_strict:1, num_tasks_strict:1;
   ENUM_BITFIELD (gfc_omp_sched_kind) sched_kind:3;
   ENUM_BITFIELD (gfc_omp_device_type) device_type:2;
   ENUM_BITFIELD (gfc_omp_memorder) memorder:3;
index 9ee52d6b0eaa0ea805bff0f38a8b291a5d2cfbde..6a4ca2868f8f20c2345c270428dfbabc34b25569 100644 (file)
@@ -2374,8 +2374,9 @@ gfc_match_omp_clauses (gfc_omp_clauses **cp, const omp_mask mask,
            {
              if (m == MATCH_ERROR)
                goto error;
-             if (gfc_match (" reproducible : concurrent )") == MATCH_YES
-                 || gfc_match (" concurrent )") == MATCH_YES)
+             if (gfc_match (" reproducible : concurrent )") == MATCH_YES)
+               c->order_reproducible = true;
+             else if (gfc_match (" concurrent )") == MATCH_YES)
                ;
              else if (gfc_match (" unconstrained : concurrent )") == MATCH_YES)
                c->order_unconstrained = true;
index 4ca2c3f9e7fcc878dc70ab7d4972c301e41d40df..d234d1b070fbd635cef123f39d4b2dfbdaf3d11f 100644 (file)
@@ -3804,6 +3804,7 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses,
     {
       c = build_omp_clause (gfc_get_location (&where), OMP_CLAUSE_ORDER);
       OMP_CLAUSE_ORDER_UNCONSTRAINED (c) = clauses->order_unconstrained;
+      OMP_CLAUSE_ORDER_REPRODUCIBLE (c) = clauses->order_reproducible;
       omp_clauses = gfc_trans_add_clause (c, omp_clauses);
     }
 
@@ -5895,6 +5896,8 @@ gfc_split_omp_clauses (gfc_code *code,
            = code->ext.omp_clauses->order_concurrent;
          clausesa[GFC_OMP_SPLIT_DISTRIBUTE].order_unconstrained
            = code->ext.omp_clauses->order_unconstrained;
+         clausesa[GFC_OMP_SPLIT_DISTRIBUTE].order_reproducible
+           = code->ext.omp_clauses->order_reproducible;
        }
       if (mask & GFC_OMP_MASK_PARALLEL)
        {
@@ -5951,6 +5954,8 @@ gfc_split_omp_clauses (gfc_code *code,
            = code->ext.omp_clauses->order_concurrent;
          clausesa[GFC_OMP_SPLIT_DO].order_unconstrained
            = code->ext.omp_clauses->order_unconstrained;
+         clausesa[GFC_OMP_SPLIT_DO].order_reproducible
+           = code->ext.omp_clauses->order_reproducible;
        }
       if (mask & GFC_OMP_MASK_SIMD)
        {
@@ -5969,6 +5974,8 @@ gfc_split_omp_clauses (gfc_code *code,
            = code->ext.omp_clauses->order_concurrent;
          clausesa[GFC_OMP_SPLIT_SIMD].order_unconstrained
            = code->ext.omp_clauses->order_unconstrained;
+         clausesa[GFC_OMP_SPLIT_SIMD].order_reproducible
+           = code->ext.omp_clauses->order_reproducible;
          /* And this is copied to all.  */
          clausesa[GFC_OMP_SPLIT_SIMD].if_expr
            = code->ext.omp_clauses->if_expr;
index cc6aecb1d6695600bbb9384e6f49ad5cba256fe6..1e4c0b2553154c7123f6824bf9ec3aee252a5658 100644 (file)
@@ -193,6 +193,7 @@ omp_extract_for_data (gomp_for *for_stmt, struct omp_for_data *fd,
                    == GF_OMP_FOR_KIND_DISTRIBUTE;
   bool taskloop = gimple_omp_for_kind (for_stmt)
                  == GF_OMP_FOR_KIND_TASKLOOP;
+  bool order_reproducible = false;
   tree iterv, countv;
 
   fd->for_stmt = for_stmt;
@@ -277,10 +278,25 @@ omp_extract_for_data (gomp_for *for_stmt, struct omp_for_data *fd,
            && !OMP_CLAUSE__SCANTEMP__CONTROL (t))
          fd->have_nonctrl_scantemp = true;
        break;
+      case OMP_CLAUSE_ORDER:
+       /* FIXME: For OpenMP 5.2 this should change to
+          if (OMP_CLAUSE_ORDER_REPRODUCIBLE (t))
+          (with the exception of loop construct but that lowers to
+          no schedule/dist_schedule clauses currently).  */
+       if (!OMP_CLAUSE_ORDER_UNCONSTRAINED (t))
+         order_reproducible = true;
       default:
        break;
       }
 
+  /* For order(reproducible:concurrent) schedule ({dynamic,guided,runtime})
+     we have either the option to expensively remember at runtime how we've
+     distributed work from first loop and reuse that in following loops with
+     the same number of iterations and schedule, or just force static schedule.
+     OpenMP API calls etc. aren't allowed in order(concurrent) bodies so
+     users can't observe it easily anyway.  */
+  if (order_reproducible)
+    fd->sched_kind = OMP_CLAUSE_SCHEDULE_STATIC;
   if (fd->collapse > 1 || fd->tiling)
     fd->loops = loops;
   else
index 4d9e33642afb569be801af6f3f45fc10a5ca40b4..0dddb968cb4ef2ced48e1803d503b705fa572c0a 100644 (file)
@@ -116,14 +116,14 @@ subroutine f4 (a)
   end do
 end
 
-! { dg-final { scan-tree-dump-times "#pragma omp distribute order\\(concurrent\\)" 6 "original"} }
+! { dg-final { scan-tree-dump-times "#pragma omp distribute order\\(reproducible:concurrent\\)" 6 "original"} }
 ! { dg-final { scan-tree-dump-times "#pragma omp distribute order\\(unconstrained:concurrent\\)" 6 "original"} }
-! { dg-final { scan-tree-dump-times "#pragma omp for nowait order\\(concurrent\\)" 6 "original"} }
+! { dg-final { scan-tree-dump-times "#pragma omp for nowait order\\(reproducible:concurrent\\)" 6 "original"} }
 ! { dg-final { scan-tree-dump-times "#pragma omp for nowait order\\(unconstrained:concurrent\\)" 6 "original"} }
-! { dg-final { scan-tree-dump-times "#pragma omp for order\\(concurrent\\)" 2 "original"} }
+! { dg-final { scan-tree-dump-times "#pragma omp for order\\(reproducible:concurrent\\)" 2 "original"} }
 ! { dg-final { scan-tree-dump-times "#pragma omp for order\\(unconstrained:concurrent\\)" 2 "original"} }
 ! { dg-final { scan-tree-dump-times "#pragma omp parallel" 12 "original"} }
-! { dg-final { scan-tree-dump-times "#pragma omp simd linear\\(i:1\\) order\\(concurrent\\)" 6 "original"} }
+! { dg-final { scan-tree-dump-times "#pragma omp simd linear\\(i:1\\) order\\(reproducible:concurrent\\)" 6 "original"} }
 ! { dg-final { scan-tree-dump-times "#pragma omp simd linear\\(i:1\\) order\\(unconstrained:concurrent\\)" 6 "original"} }
 ! { dg-final { scan-tree-dump-times "#pragma omp taskloop" 2 "original"} }
 ! { dg-final { scan-tree-dump-times "#pragma omp teams" 8 "original"} }
index 35e567cbfaf53a51a166f88b2a15885602a8340e..0b5bdd78f064cd28189f5d2148856562af013c09 100644 (file)
@@ -1165,6 +1165,8 @@ dump_omp_clause (pretty_printer *pp, tree clause, int spc, dump_flags_t flags)
       pp_string (pp, "order(");
       if (OMP_CLAUSE_ORDER_UNCONSTRAINED (clause))
        pp_string (pp, "unconstrained:");
+      else if (OMP_CLAUSE_ORDER_REPRODUCIBLE (clause))
+       pp_string (pp, "reproducible:");
       pp_string (pp, "concurrent)");
       break;
 
index 7d1257b7a750293de494c0c409a199f63b41f3fa..7542d97ce12154cf08b023a2b31a0756c5709aa9 100644 (file)
@@ -1718,6 +1718,9 @@ class auto_suppress_location_wrappers
 /* True for unconstrained modifier on order(concurrent) clause.  */
 #define OMP_CLAUSE_ORDER_UNCONSTRAINED(NODE) \
   (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_ORDER)->base.public_flag)
+/* True for reproducible modifier on order(concurrent) clause.  */
+#define OMP_CLAUSE_ORDER_REPRODUCIBLE(NODE) \
+  TREE_PROTECTED (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_ORDER))
 
 #define OMP_CLAUSE_REDUCTION_CODE(NODE)        \
   (OMP_CLAUSE_RANGE_CHECK (NODE, OMP_CLAUSE_REDUCTION, \
diff --git a/libgomp/testsuite/libgomp.c-c++-common/order-reproducible-1.c b/libgomp/testsuite/libgomp.c-c++-common/order-reproducible-1.c
new file mode 100644 (file)
index 0000000..7e6968f
--- /dev/null
@@ -0,0 +1,63 @@
+#include <unistd.h>
+#include <stdlib.h>
+
+int
+main ()
+{
+  int a[128];
+  #pragma omp teams num_teams(5)
+  {
+    #pragma omp loop bind(teams)
+    for (int i = 0; i < 128; i++)
+      {
+       a[i] = i;
+       if (i == 0)
+         usleep (20);
+       else if (i == 17)
+         usleep (40);
+      }
+    #pragma omp loop bind(teams)
+    for (int i = 0; i < 128; i++)
+      a[i] += i;
+  }
+  for (int i = 0; i < 128; i++)
+    if (a[i] != 2 * i)
+      abort ();
+  #pragma omp teams num_teams(5)
+  {
+    #pragma omp loop bind(teams) order(concurrent)
+    for (int i = 0; i < 128; i++)
+      {
+       a[i] *= 2;
+       if (i == 1)
+         usleep (20);
+       else if (i == 13)
+         usleep (40);
+      }
+    #pragma omp loop bind(teams) order(concurrent)
+    for (int i = 0; i < 128; i++)
+      a[i] += i;
+  }
+  for (int i = 0; i < 128; i++)
+    if (a[i] != 5 * i)
+      abort ();
+  #pragma omp teams num_teams(5)
+  {
+    #pragma omp loop bind(teams) order(reproducible:concurrent)
+    for (int i = 0; i < 128; i++)
+      {
+       a[i] *= 2;
+       if (i == 2)
+         usleep (20);
+       else if (i == 105)
+         usleep (40);
+      }
+    #pragma omp loop bind(teams) order(reproducible:concurrent)
+    for (int i = 0; i < 128; i++)
+      a[i] += i;
+  }
+  for (int i = 0; i < 128; i++)
+    if (a[i] != 11 * i)
+      abort ();
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/order-reproducible-2.c b/libgomp/testsuite/libgomp.c-c++-common/order-reproducible-2.c
new file mode 100644 (file)
index 0000000..c8ba658
--- /dev/null
@@ -0,0 +1,28 @@
+#include <unistd.h>
+#include <stdlib.h>
+
+int
+main ()
+{
+  int a[128];
+  #pragma omp parallel num_threads(8)
+  {
+    #pragma omp barrier
+    #pragma omp for nowait schedule (dynamic, 2) order(reproducible:concurrent)
+    for (int i = 0; i < 128; i++)
+      {
+       a[i] = i;
+       if (i == 0)
+         usleep (20);
+       else if (i == 17)
+         usleep (40);
+      }
+    #pragma omp for nowait schedule (dynamic, 2) order(reproducible:concurrent)
+    for (int i = 0; i < 128; i++)
+      a[i] += i;
+  }
+  for (int i = 0; i < 128; i++)
+    if (a[i] != 2 * i)
+      abort ();
+  return 0;
+}