builtin-types.def (BT_FN_VOID_BOOL, [...]): New.

author Jakub Jelinek <jakub@redhat.com>

Wed, 7 Nov 2018 19:21:43 +0000 (20:21 +0100)

committer Jakub Jelinek <jakub@gcc.gnu.org>

Wed, 7 Nov 2018 19:21:43 +0000 (20:21 +0100)
author Jakub Jelinek <jakub@redhat.com>
Wed, 7 Nov 2018 19:21:43 +0000 (20:21 +0100)
committer Jakub Jelinek <jakub@gcc.gnu.org>
Wed, 7 Nov 2018 19:21:43 +0000 (20:21 +0100)
diff --git a/gcc/ChangeLog.gomp b/gcc/ChangeLog.gomp

index 7940067c0a40cb647d383b0453fccdde3bae9eaf..66fd79a4c4ff07de14f47c08397ebd86a998d9d0 100644 (file)
--- a/gcc/ChangeLog.gomp
+++ b/gcc/ChangeLog.gomp
@@ -1,3 +1,53 @@
+2018-11-07  Jakub Jelinek  <jakub@redhat.com>
+
+       * builtin-types.def (BT_FN_VOID_BOOL, BT_FN_UINT_UINT_PTR_PTR,
+       BT_FN_BOOL_UINT_LONGPTR_LONG_LONG_LONGPTR_LONGPTR_PTR_PTR,
+       BT_FN_BOOL_UINT_ULLPTR_LONG_ULL_ULLPTR_ULLPTR_PTR_PTR,
+       BT_FN_BOOL_LONG_LONG_LONG_LONG_LONG_LONGPTR_LONGPTR_PTR_PTR,
+       BT_FN_BOOL_BOOL_ULL_ULL_ULL_LONG_ULL_ULLPTR_ULLPTR_PTR_PTR): New.
+       * omp-builtins.def (BUILT_IN_GOMP_LOOP_START,
+       BUILT_IN_GOMP_LOOP_ORDERED_START, BUILT_IN_GOMP_LOOP_DOACROSS_START,
+       BUILT_IN_GOMP_LOOP_ULL_START, BUILT_IN_GOMP_LOOP_ULL_ORDERED_START,
+       BUILT_IN_GOMP_LOOP_ULL_DOACROSS_START, BUILT_IN_GOMP_SECTIONS2_START,
+       BUILT_IN_GOMP_WORKSHARE_TASK_REDUCTION_UNREGISTER): New.
+       * omp-general.h (struct omp_for_data): Add have_reductemp member.
+       * omp-general.c (omp_extract_for_data): Initialize it.
+       * omp-low.c (build_outer_var_ref): Ignore taskgroup outer contexts.
+       Fix up the condition when lookup_decl should be used.
+       (scan_sharing_clauses): Call install_var_local for reductions with
+       task modifier even in worksharing contexts.
+       (lower_rec_input_clauses): Don't lookup_decl reductemp in worksharing
+       contexts.  Handle reductions with task modifier in worksharing
+       contexts.  Ignore _reductemp_ clause in worksharing contexts.
+       (lower_reduction_clauses): Ignore reduction clause with task modifiers
+       even in worksharing contexts.
+       (lower_send_clauses): Likewise.
+       (maybe_add_implicit_barrier_cancel): Add OMP_RETURN argument, don't
+       rely that it is the last stmt in body so far.  Ignore outer taskgroup
+       contexts.
+       (omp_task_reductions_find_first): Move earlier.
+       (lower_omp_task_reductions): Add forward declaration.  Handle
+       OMP_FOR and OMP_SECTIONS, add support for parallel cancellation.
+       (lower_omp_sections): Handle reduction clauses with taskgroup
+       modifiers.  Adjust maybe_add_implicit_barrier_cancel caller.
+       (lower_omp_single): Adjust maybe_add_implicit_barrier_cancel caller.
+       (lower_omp_for): Likewise.  Handle reduction clauses with taskgroup
+       modifiers.
+       * omp-expand.c (omp_adjust_chunk_size): Don't adjust anything if
+       chunk_size is zero.
+       (determine_parallel_type): Don't combine parallel with worksharing
+       which has _reductemp_ clause.
+       (expand_omp_for_generic): Add SCHED_ARG argument.  Handle expansion
+       of worksharing loops with task reductions.
+       (expand_omp_for_static_nochunk): Handle expansion of worksharing
+       loops with task reductions.
+       (expand_omp_for_static_chunk): Likewise.
+       (expand_omp_for): Adjust expand_omp_for_generic caller, use
+       GOMP_loop{,_ull}{,_ordered,_doacross}_start builtins if there are
+       task reductions.
+       (expand_omp_sections): Handle expansion of sections with task
+       reductions.
+
  2018-10-25  Jakub Jelinek  <jakub@redhat.com>
  
         * omp-builtins.def (BUILT_IN_GOMP_LOOP_NONMONOTONIC_RUNTIME_START,
diff --git a/gcc/builtin-types.def b/gcc/builtin-types.def

index 6e8448486a1a1b99552e2e7c6d8fb08d2628829f..92c0b0bb779ed513ba68084771d02b1c1aec6607 100644 (file)
--- a/gcc/builtin-types.def
+++ b/gcc/builtin-types.def
@@ -251,6 +251,7 @@ DEF_FUNCTION_TYPE_1 (BT_FN_INT_CONST_STRING, BT_INT, BT_CONST_STRING)
  DEF_FUNCTION_TYPE_1 (BT_FN_PTR_PTR, BT_PTR, BT_PTR)
  DEF_FUNCTION_TYPE_1 (BT_FN_VOID_VALIST_REF, BT_VOID, BT_VALIST_REF)
  DEF_FUNCTION_TYPE_1 (BT_FN_VOID_INT, BT_VOID, BT_INT)
+DEF_FUNCTION_TYPE_1 (BT_FN_VOID_BOOL, BT_VOID, BT_BOOL)
  DEF_FUNCTION_TYPE_1 (BT_FN_FLOAT_CONST_STRING, BT_FLOAT, BT_CONST_STRING)
  DEF_FUNCTION_TYPE_1 (BT_FN_DOUBLE_CONST_STRING, BT_DOUBLE, BT_CONST_STRING)
  DEF_FUNCTION_TYPE_1 (BT_FN_LONGDOUBLE_CONST_STRING,
@@ -621,6 +622,7 @@ DEF_FUNCTION_TYPE_3 (BT_FN_VOID_UINT32_UINT32_PTR,
                      BT_VOID, BT_UINT32, BT_UINT32, BT_PTR)
  DEF_FUNCTION_TYPE_3 (BT_FN_VOID_SIZE_SIZE_PTR, BT_VOID, BT_SIZE, BT_SIZE,
                      BT_PTR)
+DEF_FUNCTION_TYPE_3 (BT_FN_UINT_UINT_PTR_PTR, BT_UINT, BT_UINT, BT_PTR, BT_PTR)
  
  DEF_FUNCTION_TYPE_4 (BT_FN_SIZE_CONST_PTR_SIZE_SIZE_FILEPTR,
                      BT_SIZE, BT_CONST_PTR, BT_SIZE, BT_SIZE, BT_FILEPTR)
@@ -731,6 +733,12 @@ DEF_FUNCTION_TYPE_7 (BT_FN_VOID_INT_SIZE_PTR_PTR_PTR_UINT_PTR,
  DEF_FUNCTION_TYPE_8 (BT_FN_VOID_OMPFN_PTR_UINT_LONG_LONG_LONG_LONG_UINT,
                      BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR, BT_UINT,
                      BT_LONG, BT_LONG, BT_LONG, BT_LONG, BT_UINT)
+DEF_FUNCTION_TYPE_8 (BT_FN_BOOL_UINT_LONGPTR_LONG_LONG_LONGPTR_LONGPTR_PTR_PTR,
+                    BT_BOOL, BT_UINT, BT_PTR_LONG, BT_LONG, BT_LONG,
+                    BT_PTR_LONG, BT_PTR_LONG, BT_PTR, BT_PTR)
+DEF_FUNCTION_TYPE_8 (BT_FN_BOOL_UINT_ULLPTR_LONG_ULL_ULLPTR_ULLPTR_PTR_PTR,
+                    BT_BOOL, BT_UINT, BT_PTR_ULONGLONG, BT_LONG, BT_ULONGLONG,
+                    BT_PTR_ULONGLONG, BT_PTR_ULONGLONG, BT_PTR, BT_PTR)
  
  DEF_FUNCTION_TYPE_9 (BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_BOOL_UINT_PTR_INT,
                      BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR,
@@ -739,6 +747,14 @@ DEF_FUNCTION_TYPE_9 (BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_BOOL_UINT_PTR_INT,
  DEF_FUNCTION_TYPE_9 (BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_PTR,
                      BT_VOID, BT_INT, BT_PTR_FN_VOID_PTR, BT_SIZE, BT_PTR,
                      BT_PTR, BT_PTR, BT_UINT, BT_PTR, BT_PTR)
+DEF_FUNCTION_TYPE_9 (BT_FN_BOOL_LONG_LONG_LONG_LONG_LONG_LONGPTR_LONGPTR_PTR_PTR,
+                    BT_BOOL, BT_LONG, BT_LONG, BT_LONG, BT_LONG, BT_LONG,
+                    BT_PTR_LONG, BT_PTR_LONG, BT_PTR, BT_PTR)
+
+DEF_FUNCTION_TYPE_10 (BT_FN_BOOL_BOOL_ULL_ULL_ULL_LONG_ULL_ULLPTR_ULLPTR_PTR_PTR,
+                     BT_BOOL, BT_BOOL, BT_ULONGLONG, BT_ULONGLONG,
+                     BT_ULONGLONG, BT_LONG, BT_ULONGLONG, BT_PTR_ULONGLONG,
+                     BT_PTR_ULONGLONG, BT_PTR, BT_PTR)
  
  DEF_FUNCTION_TYPE_11 (BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_UINT_LONG_INT_LONG_LONG_LONG,
                       BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR,
diff --git a/gcc/fortran/ChangeLog.gomp b/gcc/fortran/ChangeLog.gomp

index 999509f3af9f6a98d082557c5b66e1afa8441854..5355ef1f35df2ac3735bb86ae0893c7e81b9b978 100644 (file)
--- a/gcc/fortran/ChangeLog.gomp
+++ b/gcc/fortran/ChangeLog.gomp
@@ -1,3 +1,12 @@
+2018-11-07  Jakub Jelinek  <jakub@redhat.com>
+
+       * types.def (BT_FN_VOID_BOOL, BT_FN_UINT_UINT_PTR_PTR,
+       BT_FN_BOOL_UINT_LONGPTR_LONG_LONG_LONGPTR_LONGPTR_PTR_PTR,
+       BT_FN_BOOL_UINT_ULLPTR_LONG_ULL_ULLPTR_ULLPTR_PTR_PTR,
+       BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_PTR,
+       BT_FN_BOOL_LONG_LONG_LONG_LONG_LONG_LONGPTR_LONGPTR_PTR_PTR,
+       BT_FN_BOOL_BOOL_ULL_ULL_ULL_LONG_ULL_ULLPTR_ULLPTR_PTR_PTR): New.
+
  2018-10-23  Jakub Jelinek  <jakub@redhat.com>
  
         * types.def (BT_FN_UINT_OMPFN_PTR_UINT_UINT): New.
diff --git a/gcc/fortran/types.def b/gcc/fortran/types.def

index ba12ede7d938cd1b69c1693c3e397591d5a309ae..7ba23bac34c027b5b7a4b62e35f86ebd36896980 100644 (file)
--- a/gcc/fortran/types.def
+++ b/gcc/fortran/types.def
@@ -86,6 +86,7 @@ DEF_FUNCTION_TYPE_1 (BT_FN_INT_INT, BT_INT, BT_INT)
  DEF_FUNCTION_TYPE_1 (BT_FN_UINT_UINT, BT_UINT, BT_UINT)
  DEF_FUNCTION_TYPE_1 (BT_FN_PTR_PTR, BT_PTR, BT_PTR)
  DEF_FUNCTION_TYPE_1 (BT_FN_VOID_INT, BT_VOID, BT_INT)
+DEF_FUNCTION_TYPE_1 (BT_FN_VOID_BOOL, BT_VOID, BT_BOOL)
  DEF_FUNCTION_TYPE_1 (BT_FN_BOOL_INT, BT_BOOL, BT_INT)
  
  DEF_POINTER_TYPE (BT_PTR_FN_VOID_PTR, BT_FN_VOID_PTR)
@@ -147,6 +148,7 @@ DEF_FUNCTION_TYPE_3 (BT_FN_VOID_VPTR_I8_INT, BT_VOID, BT_VOLATILE_PTR, BT_I8, BT
  DEF_FUNCTION_TYPE_3 (BT_FN_VOID_VPTR_I16_INT, BT_VOID, BT_VOLATILE_PTR, BT_I16, BT_INT)
  DEF_FUNCTION_TYPE_3 (BT_FN_VOID_SIZE_SIZE_PTR, BT_VOID, BT_SIZE, BT_SIZE,
                      BT_PTR)
+DEF_FUNCTION_TYPE_3 (BT_FN_UINT_UINT_PTR_PTR, BT_UINT, BT_UINT, BT_PTR, BT_PTR)
  
  DEF_FUNCTION_TYPE_4 (BT_FN_VOID_OMPFN_PTR_UINT_UINT,
                       BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR, BT_UINT, BT_UINT)
@@ -221,14 +223,28 @@ DEF_FUNCTION_TYPE_7 (BT_FN_VOID_INT_SIZE_PTR_PTR_PTR_UINT_PTR,
  DEF_FUNCTION_TYPE_8 (BT_FN_VOID_OMPFN_PTR_UINT_LONG_LONG_LONG_LONG_UINT,
                      BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR, BT_UINT,
                      BT_LONG, BT_LONG, BT_LONG, BT_LONG, BT_UINT)
+DEF_FUNCTION_TYPE_8 (BT_FN_BOOL_UINT_LONGPTR_LONG_LONG_LONGPTR_LONGPTR_PTR_PTR,
+                    BT_BOOL, BT_UINT, BT_PTR_LONG, BT_LONG, BT_LONG,
+                    BT_PTR_LONG, BT_PTR_LONG, BT_PTR, BT_PTR)
+DEF_FUNCTION_TYPE_8 (BT_FN_BOOL_UINT_ULLPTR_LONG_ULL_ULLPTR_ULLPTR_PTR_PTR,
+                    BT_BOOL, BT_UINT, BT_PTR_ULONGLONG, BT_LONG, BT_ULONGLONG,
+                    BT_PTR_ULONGLONG, BT_PTR_ULONGLONG, BT_PTR, BT_PTR)
  
  DEF_FUNCTION_TYPE_9 (BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_BOOL_UINT_PTR_INT,
                      BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR,
                      BT_PTR_FN_VOID_PTR_PTR, BT_LONG, BT_LONG,
                      BT_BOOL, BT_UINT, BT_PTR, BT_INT)
  DEF_FUNCTION_TYPE_9 (BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_PTR,
-                     BT_VOID, BT_INT, BT_PTR_FN_VOID_PTR, BT_SIZE, BT_PTR,
-                     BT_PTR, BT_PTR, BT_UINT, BT_PTR, BT_PTR)
+                    BT_VOID, BT_INT, BT_PTR_FN_VOID_PTR, BT_SIZE, BT_PTR,
+                    BT_PTR, BT_PTR, BT_UINT, BT_PTR, BT_PTR)
+DEF_FUNCTION_TYPE_9 (BT_FN_BOOL_LONG_LONG_LONG_LONG_LONG_LONGPTR_LONGPTR_PTR_PTR,
+                    BT_BOOL, BT_LONG, BT_LONG, BT_LONG, BT_LONG, BT_LONG,
+                    BT_PTR_LONG, BT_PTR_LONG, BT_PTR, BT_PTR)
+
+DEF_FUNCTION_TYPE_10 (BT_FN_BOOL_BOOL_ULL_ULL_ULL_LONG_ULL_ULLPTR_ULLPTR_PTR_PTR,
+                     BT_BOOL, BT_BOOL, BT_ULONGLONG, BT_ULONGLONG,
+                     BT_ULONGLONG, BT_LONG, BT_ULONGLONG, BT_PTR_ULONGLONG,
+                     BT_PTR_ULONGLONG, BT_PTR, BT_PTR)
  
  DEF_FUNCTION_TYPE_11 (BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_UINT_LONG_INT_LONG_LONG_LONG,
                       BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR,
diff --git a/gcc/omp-builtins.def b/gcc/omp-builtins.def

index 5698af7a6a213e7a709aafee06bc966659c1ef91..70051635fa0ac4b71af4ee659a976e3a5c337d88 100644 (file)
--- a/gcc/omp-builtins.def
+++ b/gcc/omp-builtins.def
@@ -164,6 +164,18 @@ DEF_GOMP_BUILTIN (BUILT_IN_GOMP_LOOP_DOACROSS_RUNTIME_START,
                   "GOMP_loop_doacross_runtime_start",
                   BT_FN_BOOL_UINT_LONGPTR_LONGPTR_LONGPTR,
                   ATTR_NOTHROW_LEAF_LIST)
+DEF_GOMP_BUILTIN (BUILT_IN_GOMP_LOOP_START,
+                 "GOMP_loop_start",
+                 BT_FN_BOOL_LONG_LONG_LONG_LONG_LONG_LONGPTR_LONGPTR_PTR_PTR,
+                 ATTR_NOTHROW_LEAF_LIST)
+DEF_GOMP_BUILTIN (BUILT_IN_GOMP_LOOP_ORDERED_START,
+                 "GOMP_loop_ordered_start",
+                 BT_FN_BOOL_LONG_LONG_LONG_LONG_LONG_LONGPTR_LONGPTR_PTR_PTR,
+                 ATTR_NOTHROW_LEAF_LIST)
+DEF_GOMP_BUILTIN (BUILT_IN_GOMP_LOOP_DOACROSS_START,
+                 "GOMP_loop_doacross_start",
+                 BT_FN_BOOL_UINT_LONGPTR_LONG_LONG_LONGPTR_LONGPTR_PTR_PTR,
+                 ATTR_NOTHROW_LEAF_LIST)
  DEF_GOMP_BUILTIN (BUILT_IN_GOMP_LOOP_STATIC_NEXT, "GOMP_loop_static_next",
                   BT_FN_BOOL_LONGPTR_LONGPTR, ATTR_NOTHROW_LEAF_LIST)
  DEF_GOMP_BUILTIN (BUILT_IN_GOMP_LOOP_DYNAMIC_NEXT, "GOMP_loop_dynamic_next",
@@ -260,6 +272,18 @@ DEF_GOMP_BUILTIN (BUILT_IN_GOMP_LOOP_ULL_DOACROSS_RUNTIME_START,
                   "GOMP_loop_ull_doacross_runtime_start",
                   BT_FN_BOOL_UINT_ULLPTR_ULLPTR_ULLPTR,
                   ATTR_NOTHROW_LEAF_LIST)
+DEF_GOMP_BUILTIN (BUILT_IN_GOMP_LOOP_ULL_START,
+                 "GOMP_loop_ull_start",
+                 BT_FN_BOOL_BOOL_ULL_ULL_ULL_LONG_ULL_ULLPTR_ULLPTR_PTR_PTR,
+                 ATTR_NOTHROW_LEAF_LIST)
+DEF_GOMP_BUILTIN (BUILT_IN_GOMP_LOOP_ULL_ORDERED_START,
+                 "GOMP_loop_ull_ordered_start",
+                 BT_FN_BOOL_BOOL_ULL_ULL_ULL_LONG_ULL_ULLPTR_ULLPTR_PTR_PTR,
+                 ATTR_NOTHROW_LEAF_LIST)
+DEF_GOMP_BUILTIN (BUILT_IN_GOMP_LOOP_ULL_DOACROSS_START,
+                 "GOMP_loop_ull_doacross_start",
+                 BT_FN_BOOL_UINT_ULLPTR_LONG_ULL_ULLPTR_ULLPTR_PTR_PTR,
+                 ATTR_NOTHROW_LEAF_LIST)
  DEF_GOMP_BUILTIN (BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT,
                   "GOMP_loop_ull_static_next",
                   BT_FN_BOOL_ULONGLONGPTR_ULONGLONGPTR, ATTR_NOTHROW_LEAF_LIST)
@@ -365,6 +389,8 @@ DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TASKLOOP_ULL, "GOMP_taskloop_ull",
                   ATTR_NOTHROW_LIST)
  DEF_GOMP_BUILTIN (BUILT_IN_GOMP_SECTIONS_START, "GOMP_sections_start",
                   BT_FN_UINT_UINT, ATTR_NOTHROW_LEAF_LIST)
+DEF_GOMP_BUILTIN (BUILT_IN_GOMP_SECTIONS2_START, "GOMP_sections2_start",
+                 BT_FN_UINT_UINT_PTR_PTR, ATTR_NOTHROW_LEAF_LIST)
  DEF_GOMP_BUILTIN (BUILT_IN_GOMP_SECTIONS_NEXT, "GOMP_sections_next",
                   BT_FN_UINT, ATTR_NOTHROW_LEAF_LIST)
  DEF_GOMP_BUILTIN (BUILT_IN_GOMP_PARALLEL_SECTIONS,
@@ -415,5 +441,8 @@ DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TASKGROUP_REDUCTION_UNREGISTER,
  DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TASK_REDUCTION_REMAP,
                   "GOMP_task_reduction_remap",
                   BT_FN_VOID_SIZE_SIZE_PTR, ATTR_NOTHROW_LEAF_LIST)
+DEF_GOMP_BUILTIN (BUILT_IN_GOMP_WORKSHARE_TASK_REDUCTION_UNREGISTER,
+                 "GOMP_workshare_task_reduction_unregister",
+                 BT_FN_VOID_BOOL, ATTR_NOTHROW_LEAF_LIST)
  DEF_GOACC_BUILTIN (BUILT_IN_GOACC_DECLARE, "GOACC_declare",
                    BT_FN_VOID_INT_SIZE_PTR_PTR_PTR, ATTR_NOTHROW_LIST)
diff --git a/gcc/omp-expand.c b/gcc/omp-expand.c

index 11790682857fd545edd898cd4722eb066882a7e6..ccb94ba9de4ac3db71a214dee285083039e9c410 100644 (file)
--- a/gcc/omp-expand.c
+++ b/gcc/omp-expand.c
@@ -204,7 +204,7 @@ workshare_safe_to_combine_p (basic_block ws_entry_bb)
  static tree
  omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
  {
-  if (!simd_schedule)
+  if (!simd_schedule || integer_zerop (chunk_size))
      return chunk_size;
  
    poly_uint64 vf = omp_max_vf ();
@@ -345,13 +345,14 @@ determine_parallel_type (struct omp_region *region)
           if (c == NULL
               || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
                   == OMP_CLAUSE_SCHEDULE_STATIC)
-             || omp_find_clause (clauses, OMP_CLAUSE_ORDERED))
-           {
-             region->is_combined_parallel = false;
-             region->inner->is_combined_parallel = false;
-             return;
-           }
+             || omp_find_clause (clauses, OMP_CLAUSE_ORDERED)
+             || omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_))
+           return;
         }
+      else if (region->inner->type == GIMPLE_OMP_SECTIONS
+              && omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
+                                  OMP_CLAUSE__REDUCTEMP_))
+       return;
  
        region->is_combined_parallel = true;
        region->inner->is_combined_parallel = true;
@@ -2618,6 +2619,7 @@ expand_omp_for_generic (struct omp_region *region,
                         struct omp_for_data *fd,
                         enum built_in_function start_fn,
                         enum built_in_function next_fn,
+                       tree sched_arg,
                         gimple *inner_stmt)
  {
    tree type, istart0, iend0, iend;
@@ -2665,6 +2667,30 @@ expand_omp_for_generic (struct omp_region *region,
        && omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi)),
                           OMP_CLAUSE_LASTPRIVATE))
      ordered_lastprivate = false;
+  tree reductions = NULL_TREE;
+  tree mem = NULL_TREE;
+  if (sched_arg)
+    {
+      if (fd->have_reductemp)
+       {
+         tree c = omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi)),
+                                   OMP_CLAUSE__REDUCTEMP_);
+         reductions = OMP_CLAUSE_DECL (c);
+         gcc_assert (TREE_CODE (reductions) == SSA_NAME);
+         gimple *g = SSA_NAME_DEF_STMT (reductions);
+         reductions = gimple_assign_rhs1 (g);
+         OMP_CLAUSE_DECL (c) = reductions;
+         entry_bb = gimple_bb (g);
+         edge e = split_block (entry_bb, g);
+         if (region->entry == entry_bb)
+           region->entry = e->dest;
+         gsi = gsi_last_bb (entry_bb);
+       }
+      else
+       reductions = null_pointer_node;
+      /* For now.  */
+      mem = null_pointer_node;
+    }
    if (fd->collapse > 1 || fd->ordered)
      {
        int first_zero_iter1 = -1, first_zero_iter2 = -1;
@@ -2851,7 +2877,18 @@ expand_omp_for_generic (struct omp_region *region,
             {
               t = fold_convert (fd->iter_type, fd->chunk_size);
               t = omp_adjust_chunk_size (t, fd->simd_schedule);
-             if (fd->ordered)
+             if (sched_arg)
+               {
+                 if (fd->ordered)
+                   t = build_call_expr (builtin_decl_explicit (start_fn),
+                                        8, t0, t1, sched_arg, t, t3, t4,
+                                        reductions, mem);
+                 else
+                   t = build_call_expr (builtin_decl_explicit (start_fn),
+                                        9, t0, t1, t2, sched_arg, t, t3, t4,
+                                        reductions, mem);
+               }
+             else if (fd->ordered)
                 t = build_call_expr (builtin_decl_explicit (start_fn),
                                      5, t0, t1, t, t3, t4);
               else
@@ -2884,7 +2921,11 @@ expand_omp_for_generic (struct omp_region *region,
               tree bfn_decl = builtin_decl_explicit (start_fn);
               t = fold_convert (fd->iter_type, fd->chunk_size);
               t = omp_adjust_chunk_size (t, fd->simd_schedule);
-             t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
+             if (sched_arg)
+               t = build_call_expr (bfn_decl, 10, t5, t0, t1, t2, sched_arg,
+                                    t, t3, t4, reductions, mem);
+             else
+               t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
             }
           else
             t = build_call_expr (builtin_decl_explicit (start_fn),
@@ -2903,6 +2944,17 @@ expand_omp_for_generic (struct omp_region *region,
        gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
                          GSI_SAME_STMT);
      }
+  if (fd->have_reductemp)
+    {
+      gimple *g = gsi_stmt (gsi);
+      gsi_remove (&gsi, true);
+      release_ssa_name (gimple_assign_lhs (g));
+
+      entry_bb = region->entry;
+      gsi = gsi_last_nondebug_bb (entry_bb);
+
+      gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
+    }
    gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
  
    /* Remove the GIMPLE_OMP_FOR statement.  */
@@ -3201,9 +3253,6 @@ expand_omp_for_generic (struct omp_region *region,
    else
      t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
    gcall *call_stmt = gimple_build_call (t, 0);
-  if (gimple_omp_return_lhs (gsi_stmt (gsi)))
-    gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
-  gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
    if (fd->ordered)
      {
        tree arr = counts[fd->ordered];
@@ -3212,6 +3261,17 @@ expand_omp_for_generic (struct omp_region *region,
        gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
                         GSI_SAME_STMT);
      }
+  if (gimple_omp_return_lhs (gsi_stmt (gsi)))
+    {
+      gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
+      if (fd->have_reductemp)
+       {
+         gimple *g = gimple_build_assign (reductions, NOP_EXPR,
+                                          gimple_call_lhs (call_stmt));
+         gsi_insert_after (&gsi, g, GSI_SAME_STMT);
+       }
+    }
+  gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
    gsi_remove (&gsi, true);
  
    /* Connect the new blocks.  */
@@ -3394,6 +3454,7 @@ expand_omp_for_static_nochunk (struct omp_region *region,
    bool broken_loop = region->cont == NULL;
    tree *counts = NULL;
    tree n1, n2, step;
+  tree reductions = NULL_TREE;
  
    itype = type = TREE_TYPE (fd->loop.v);
    if (POINTER_TYPE_P (type))
@@ -3477,6 +3538,29 @@ expand_omp_for_static_nochunk (struct omp_region *region,
        gsi = gsi_last_bb (entry_bb);
      }
  
+  if (fd->have_reductemp)
+    {
+      tree t1 = build_int_cst (long_integer_type_node, 0);
+      tree t2 = build_int_cst (long_integer_type_node, 1);
+      tree t3 = build_int_cstu (long_integer_type_node,
+                               (HOST_WIDE_INT_1U << 31) + 1);
+      tree clauses = gimple_omp_for_clauses (fd->for_stmt);
+      clauses = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
+      reductions = OMP_CLAUSE_DECL (clauses);
+      gcc_assert (TREE_CODE (reductions) == SSA_NAME);
+      gimple *g = SSA_NAME_DEF_STMT (reductions);
+      reductions = gimple_assign_rhs1 (g);
+      OMP_CLAUSE_DECL (clauses) = reductions;
+      gimple_stmt_iterator gsi2 = gsi_for_stmt (g);
+      tree t
+       = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
+                          9, t1, t2, t2, t3, t1, null_pointer_node,
+                          null_pointer_node, reductions, null_pointer_node);
+      force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
+                               true, GSI_SAME_STMT);
+      gsi_remove (&gsi2, true);
+      release_ssa_name (gimple_assign_lhs (g));
+    }
    switch (gimple_omp_for_kind (fd->for_stmt))
      {
      case GF_OMP_FOR_KIND_FOR:
@@ -3747,7 +3831,25 @@ expand_omp_for_static_nochunk (struct omp_region *region,
    if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
      {
        t = gimple_omp_return_lhs (gsi_stmt (gsi));
-      gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
+      if (fd->have_reductemp)
+       {
+         tree fn;
+         if (t)
+           fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
+         else
+           fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
+         gcall *g = gimple_build_call (fn, 0);
+         if (t)
+           {
+             gimple_call_set_lhs (g, t);
+             gsi_insert_after (&gsi, gimple_build_assign (reductions,
+                                                          NOP_EXPR, t),
+                               GSI_SAME_STMT);
+           }
+         gsi_insert_after (&gsi, g, GSI_SAME_STMT);
+       }
+      else
+       gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
      }
    gsi_remove (&gsi, true);
  
@@ -3884,6 +3986,7 @@ expand_omp_for_static_chunk (struct omp_region *region,
    bool broken_loop = region->cont == NULL;
    tree *counts = NULL;
    tree n1, n2, step;
+  tree reductions = NULL_TREE;
  
    itype = type = TREE_TYPE (fd->loop.v);
    if (POINTER_TYPE_P (type))
@@ -3971,6 +4074,29 @@ expand_omp_for_static_chunk (struct omp_region *region,
        gsi = gsi_last_bb (entry_bb);
      }
  
+  if (fd->have_reductemp)
+    {
+      tree t1 = build_int_cst (long_integer_type_node, 0);
+      tree t2 = build_int_cst (long_integer_type_node, 1);
+      tree t3 = build_int_cstu (long_integer_type_node,
+                               (HOST_WIDE_INT_1U << 31) + 1);
+      tree clauses = gimple_omp_for_clauses (fd->for_stmt);
+      clauses = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
+      reductions = OMP_CLAUSE_DECL (clauses);
+      gcc_assert (TREE_CODE (reductions) == SSA_NAME);
+      gimple *g = SSA_NAME_DEF_STMT (reductions);
+      reductions = gimple_assign_rhs1 (g);
+      OMP_CLAUSE_DECL (clauses) = reductions;
+      gimple_stmt_iterator gsi2 = gsi_for_stmt (g);
+      tree t
+       = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
+                          9, t1, t2, t2, t3, t1, null_pointer_node,
+                          null_pointer_node, reductions, null_pointer_node);
+      force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
+                               true, GSI_SAME_STMT);
+      gsi_remove (&gsi2, true);
+      release_ssa_name (gimple_assign_lhs (g));
+    }
    switch (gimple_omp_for_kind (fd->for_stmt))
      {
      case GF_OMP_FOR_KIND_FOR:
@@ -4274,7 +4400,25 @@ expand_omp_for_static_chunk (struct omp_region *region,
    if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
      {
        t = gimple_omp_return_lhs (gsi_stmt (gsi));
-      gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
+      if (fd->have_reductemp)
+       {
+         tree fn;
+         if (t)
+           fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
+         else
+           fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
+         gcall *g = gimple_build_call (fn, 0);
+         if (t)
+           {
+             gimple_call_set_lhs (g, t);
+             gsi_insert_after (&gsi, gimple_build_assign (reductions,
+                                                          NOP_EXPR, t),
+                               GSI_SAME_STMT);
+           }
+         gsi_insert_after (&gsi, g, GSI_SAME_STMT);
+       }
+      else
+       gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
      }
    gsi_remove (&gsi, true);
  
@@ -5809,6 +5953,8 @@ expand_omp_for (struct omp_region *region, gimple *inner_stmt)
    else
      {
        int fn_index, start_ix, next_ix;
+      unsigned HOST_WIDE_INT sched = 0;
+      tree sched_arg = NULL_TREE;
  
        gcc_assert (gimple_omp_for_kind (fd.for_stmt)
                   == GF_OMP_FOR_KIND_FOR);
@@ -5822,12 +5968,16 @@ expand_omp_for (struct omp_region *region, gimple *inner_stmt)
             {
               gcc_assert (!fd.have_ordered);
               fn_index = 6;
+             sched = 4;
             }
           else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
                    && !fd.have_ordered)
             fn_index = 7;
           else
-           fn_index = 3;
+           {
+             fn_index = 3;
+             sched = (HOST_WIDE_INT_1U << 31);
+           }
           break;
         case OMP_CLAUSE_SCHEDULE_DYNAMIC:
         case OMP_CLAUSE_SCHEDULE_GUIDED:
@@ -5835,13 +5985,17 @@ expand_omp_for (struct omp_region *region, gimple *inner_stmt)
               && !fd.have_ordered)
             {
               fn_index = 3 + fd.sched_kind;
+             sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
               break;
             }
           fn_index = fd.sched_kind;
+         sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
+         sched += (HOST_WIDE_INT_1U << 31);
           break;
         case OMP_CLAUSE_SCHEDULE_STATIC:
           gcc_assert (fd.have_ordered);
           fn_index = 0;
+         sched = (HOST_WIDE_INT_1U << 31) + 1;
           break;
         default:
           gcc_unreachable ();
@@ -5853,6 +6007,18 @@ expand_omp_for (struct omp_region *region, gimple *inner_stmt)
        else
         start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
        next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
+      if (fd.have_reductemp)
+       {
+         if (fd.ordered)
+           start_ix = (int)BUILT_IN_GOMP_LOOP_DOACROSS_START;
+         else if (fd.have_ordered)
+           start_ix = (int)BUILT_IN_GOMP_LOOP_ORDERED_START;
+         else
+           start_ix = (int)BUILT_IN_GOMP_LOOP_START;
+         sched_arg = build_int_cstu (long_integer_type_node, sched);
+         if (!fd.chunk_size)
+           fd.chunk_size = integer_zero_node;
+       }
        if (fd.iter_type == long_long_unsigned_type_node)
         {
           start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
@@ -5861,7 +6027,8 @@ expand_omp_for (struct omp_region *region, gimple *inner_stmt)
                       - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
         }
        expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
-                             (enum built_in_function) next_ix, inner_stmt);
+                             (enum built_in_function) next_ix, sched_arg,
+                             inner_stmt);
      }
  
    if (gimple_in_ssa_p (cfun))
@@ -5961,7 +6128,25 @@ expand_omp_sections (struct omp_region *region)
    sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
    gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
    vin = gimple_omp_sections_control (sections_stmt);
-  if (!is_combined_parallel (region))
+  tree clauses = gimple_omp_sections_clauses (sections_stmt);
+  tree reductmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
+  if (reductmp)
+    {
+      tree reductions = OMP_CLAUSE_DECL (reductmp);
+      gcc_assert (TREE_CODE (reductions) == SSA_NAME);
+      gimple *g = SSA_NAME_DEF_STMT (reductions);
+      reductions = gimple_assign_rhs1 (g);
+      OMP_CLAUSE_DECL (reductmp) = reductions;
+      gimple_stmt_iterator gsi = gsi_for_stmt (g);
+      t = build_int_cst (unsigned_type_node, len - 1);
+      u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS2_START);
+      stmt = gimple_build_call (u, 3, t, reductions, null_pointer_node);
+      gimple_call_set_lhs (stmt, vin);
+      gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
+      gsi_remove (&gsi, true);
+      release_ssa_name (gimple_assign_lhs (g));
+    }
+  else if (!is_combined_parallel (region))
      {
        /* If we are not inside a combined parallel+sections region,
          call GOMP_sections_start.  */
@@ -5975,8 +6160,11 @@ expand_omp_sections (struct omp_region *region)
        u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
        stmt = gimple_build_call (u, 0);
      }
-  gimple_call_set_lhs (stmt, vin);
-  gsi_insert_after (&si, stmt, GSI_SAME_STMT);
+  if (!reductmp)
+    {
+      gimple_call_set_lhs (stmt, vin);
+      gsi_insert_after (&si, stmt, GSI_SAME_STMT);
+    }
    gsi_remove (&si, true);
  
    /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
diff --git a/gcc/omp-general.c b/gcc/omp-general.c

index 2d53e105ec393e7827f00e7ba8a40b40c12cba39..99d8226ef213d18cd3ffdbf092494d58c6fd2a51 100644 (file)
--- a/gcc/omp-general.c
+++ b/gcc/omp-general.c
@@ -138,6 +138,7 @@ omp_extract_for_data (gomp_for *for_stmt, struct omp_for_data *fd,
    fd->pre = NULL;
    fd->have_nowait = distribute || simd;
    fd->have_ordered = false;
+  fd->have_reductemp = false;
    fd->tiling = NULL_TREE;
    fd->collapse = 1;
    fd->ordered = 0;
@@ -188,6 +189,8 @@ omp_extract_for_data (gomp_for *for_stmt, struct omp_for_data *fd,
         collapse_iter = &OMP_CLAUSE_TILE_ITERVAR (t);
         collapse_count = &OMP_CLAUSE_TILE_COUNT (t);
         break;
+      case OMP_CLAUSE__REDUCTEMP_:
+       fd->have_reductemp = true;
        default:
         break;
        }
diff --git a/gcc/omp-general.h b/gcc/omp-general.h

index b5af0e07ebc0341fdcc91f9ef681890f4765034e..b847506d45282092e64e55eca4709c61f52c42d7 100644 (file)
--- a/gcc/omp-general.h
+++ b/gcc/omp-general.h
@@ -62,7 +62,7 @@ struct omp_for_data
    tree tiling;  /* Tiling values (if non null).  */
    int collapse;  /* Collapsed loops, 1 for a non-collapsed loop.  */
    int ordered;
-  bool have_nowait, have_ordered, simd_schedule;
+  bool have_nowait, have_ordered, simd_schedule, have_reductemp;
    unsigned char sched_modifiers;
    enum omp_clause_schedule_kind sched_kind;
    struct omp_for_data_loop *loops;
diff --git a/gcc/omp-low.c b/gcc/omp-low.c

index 040795950e6447b99ae9d25c22ad82c6919fdd5d..0679904c7edb74f9cf130c8e27d15c5761813a1a 100644 (file)
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -542,6 +542,9 @@ build_outer_var_ref (tree var, omp_context *ctx,
                      enum omp_clause_code code = OMP_CLAUSE_ERROR)
  {
    tree x;
+  omp_context *outer = ctx->outer;
+  while (outer && gimple_code (outer->stmt) == GIMPLE_OMP_TASKGROUP)
+    outer = outer->outer;
  
    if (is_global_var (maybe_lookup_decl_in_outer_ctx (var, ctx)))
      x = var;
@@ -568,44 +571,43 @@ build_outer_var_ref (tree var, omp_context *ctx,
          Similarly for OMP_CLAUSE_PRIVATE with outer ref, that can refer
          to private vars in all worksharing constructs.  */
        x = NULL_TREE;
-      if (ctx->outer && is_taskreg_ctx (ctx))
-       x = lookup_decl (var, ctx->outer);
-      else if (ctx->outer)
+      if (outer && is_taskreg_ctx (outer))
+       x = lookup_decl (var, outer);
+      else if (outer)
         x = maybe_lookup_decl_in_outer_ctx (var, ctx);
        if (x == NULL_TREE)
         x = var;
      }
    else if (code == OMP_CLAUSE_LASTPRIVATE && is_taskloop_ctx (ctx))
      {
-      gcc_assert (ctx->outer);
+      gcc_assert (outer);
        splay_tree_node n
-       = splay_tree_lookup (ctx->outer->field_map,
+       = splay_tree_lookup (outer->field_map,
                              (splay_tree_key) &DECL_UID (var));
        if (n == NULL)
         {
-         if (is_global_var (maybe_lookup_decl_in_outer_ctx (var, ctx->outer)))
+         if (is_global_var (maybe_lookup_decl_in_outer_ctx (var, outer)))
             x = var;
           else
-           x = lookup_decl (var, ctx->outer);
+           x = lookup_decl (var, outer);
         }
        else
         {
           tree field = (tree) n->value;
           /* If the receiver record type was remapped in the child function,
              remap the field into the new record type.  */
-         x = maybe_lookup_field (field, ctx->outer);
+         x = maybe_lookup_field (field, outer);
           if (x != NULL)
             field = x;
  
-         x = build_simple_mem_ref (ctx->outer->receiver_decl);
+         x = build_simple_mem_ref (outer->receiver_decl);
           x = omp_build_component_ref (x, field);
-         if (use_pointer_for_field (var, ctx->outer))
+         if (use_pointer_for_field (var, outer))
             x = build_simple_mem_ref (x);
         }
      }
-  else if (ctx->outer)
+  else if (outer)
      {
-      omp_context *outer = ctx->outer;
        if (gimple_code (outer->stmt) == GIMPLE_OMP_GRID_BODY)
         {
           outer = outer->outer;
@@ -1130,6 +1132,12 @@ scan_sharing_clauses (tree clauses, omp_context *ctx)
               install_var_local (decl, ctx);
               break;
             }
+         if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
+             && OMP_CLAUSE_REDUCTION_TASK (c))
+           {
+             install_var_local (decl, ctx);
+             break;
+           }
           goto do_private;
  
         case OMP_CLAUSE_LASTPRIVATE:
@@ -3833,7 +3841,9 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
           gimple_call_set_lhs (g, v);
           gimple_seq_add_stmt (ilist, g);
           c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
-         tskred_temp = lookup_decl (OMP_CLAUSE_DECL (c), ctx);
+         tskred_temp = OMP_CLAUSE_DECL (c);
+         if (is_taskreg_ctx (ctx))
+           tskred_temp = lookup_decl (tskred_temp, ctx);
           tree v2 = create_tmp_var (sizetype);
           g = gimple_build_assign (v2, NOP_EXPR, v);
           gimple_seq_add_stmt (ilist, g);
@@ -3890,8 +3900,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
               break;
             case OMP_CLAUSE_REDUCTION:
             case OMP_CLAUSE_IN_REDUCTION:
-             if (is_task_ctx (ctx)
-                 || (OMP_CLAUSE_REDUCTION_TASK (c) && is_parallel_ctx (ctx)))
+             if (is_task_ctx (ctx) || OMP_CLAUSE_REDUCTION_TASK (c))
                 {
                   task_reduction_p = true;
                   if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION)
@@ -3923,8 +3932,11 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
                else if (OMP_CLAUSE_REDUCTION_OMP_ORIG_REF (c))
                 reduction_omp_orig_ref = true;
               break;
-           case OMP_CLAUSE__LOOPTEMP_:
             case OMP_CLAUSE__REDUCTEMP_:
+             if (!is_taskreg_ctx (ctx))
+               continue;
+             /* FALLTHRU */
+           case OMP_CLAUSE__LOOPTEMP_:
               /* Handle _looptemp_/_reductemp_ clauses only on
                  parallel/task.  */
               if (fd)
@@ -5761,8 +5773,7 @@ lower_reduction_clauses (tree clauses, gimple_seq *stmt_seqp, omp_context *ctx)
       update in that case, otherwise use a lock.  */
    for (c = clauses; c && count < 2; c = OMP_CLAUSE_CHAIN (c))
      if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
-        && (!OMP_CLAUSE_REDUCTION_TASK (c)
-           || !is_parallel_ctx (ctx)))
+       && !OMP_CLAUSE_REDUCTION_TASK (c))
        {
         if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c)
             || TREE_CODE (OMP_CLAUSE_DECL (c)) == MEM_REF)
@@ -5784,8 +5795,7 @@ lower_reduction_clauses (tree clauses, gimple_seq *stmt_seqp, omp_context *ctx)
        location_t clause_loc = OMP_CLAUSE_LOCATION (c);
  
        if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_REDUCTION
-         || (OMP_CLAUSE_REDUCTION_TASK (c)
-             && is_parallel_ctx (ctx)))
+         || OMP_CLAUSE_REDUCTION_TASK (c))
         continue;
  
        enum omp_clause_code ccode = OMP_CLAUSE_REDUCTION;
@@ -6078,9 +6088,7 @@ lower_send_clauses (tree clauses, gimple_seq *ilist, gimple_seq *olist,
         case OMP_CLAUSE__REDUCTEMP_:
           break;
         case OMP_CLAUSE_REDUCTION:
-         if (is_task_ctx (ctx))
-           continue;
-          if (OMP_CLAUSE_REDUCTION_TASK (c) && is_parallel_ctx (ctx))
+         if (is_task_ctx (ctx) || OMP_CLAUSE_REDUCTION_TASK (c))
             continue;
           break;
         case OMP_CLAUSE_SHARED:
@@ -6511,30 +6519,55 @@ maybe_catch_exception (gimple_seq body)
     cancellation in the implicit barrier.  */
  
  static void
-maybe_add_implicit_barrier_cancel (omp_context *ctx, gimple_seq *body)
+maybe_add_implicit_barrier_cancel (omp_context *ctx, gimple *omp_return,
+                                  gimple_seq *body)
  {
-  gimple *omp_return = gimple_seq_last_stmt (*body);
    gcc_assert (gimple_code (omp_return) == GIMPLE_OMP_RETURN);
    if (gimple_omp_return_nowait_p (omp_return))
      return;
-  if (ctx->outer
-      && gimple_code (ctx->outer->stmt) == GIMPLE_OMP_PARALLEL
-      && ctx->outer->cancellable)
-    {
-      tree fndecl = builtin_decl_explicit (BUILT_IN_GOMP_CANCEL);
-      tree c_bool_type = TREE_TYPE (TREE_TYPE (fndecl));
-      tree lhs = create_tmp_var (c_bool_type);
-      gimple_omp_return_set_lhs (omp_return, lhs);
-      tree fallthru_label = create_artificial_label (UNKNOWN_LOCATION);
-      gimple *g = gimple_build_cond (NE_EXPR, lhs,
-                                   fold_convert (c_bool_type,
-                                                 boolean_false_node),
-                                   ctx->outer->cancel_label, fallthru_label);
-      gimple_seq_add_stmt (body, g);
-      gimple_seq_add_stmt (body, gimple_build_label (fallthru_label));
+  for (omp_context *outer = ctx->outer; outer; outer = outer->outer)
+    if (gimple_code (outer->stmt) == GIMPLE_OMP_PARALLEL
+       && outer->cancellable)
+      {
+       tree fndecl = builtin_decl_explicit (BUILT_IN_GOMP_CANCEL);
+       tree c_bool_type = TREE_TYPE (TREE_TYPE (fndecl));
+       tree lhs = create_tmp_var (c_bool_type);
+       gimple_omp_return_set_lhs (omp_return, lhs);
+       tree fallthru_label = create_artificial_label (UNKNOWN_LOCATION);
+       gimple *g = gimple_build_cond (NE_EXPR, lhs,
+                                      fold_convert (c_bool_type,
+                                                    boolean_false_node),
+                                      outer->cancel_label, fallthru_label);
+       gimple_seq_add_stmt (body, g);
+       gimple_seq_add_stmt (body, gimple_build_label (fallthru_label));
+      }
+    else if (gimple_code (outer->stmt) != GIMPLE_OMP_TASKGROUP)
+      return;
+}
+
+/* Find the first task_reduction or reduction clause or return NULL
+   if there are none.  */
+
+static inline tree
+omp_task_reductions_find_first (tree clauses, enum tree_code code,
+                               enum omp_clause_code ccode)
+{
+  while (1)
+    {
+      clauses = omp_find_clause (clauses, ccode);
+      if (clauses == NULL_TREE)
+       return NULL_TREE;
+      if (ccode != OMP_CLAUSE_REDUCTION
+         || code == OMP_TASKLOOP
+         || OMP_CLAUSE_REDUCTION_TASK (clauses))
+       return clauses;
+      clauses = OMP_CLAUSE_CHAIN (clauses);
      }
  }
  
+static void lower_omp_task_reductions (omp_context *, enum tree_code, tree,
+                                      gimple_seq *, gimple_seq *);
+
  /* Lower the OpenMP sections directive in the current statement in GSI_P.
     CTX is the enclosing OMP context for the current statement.  */
  
@@ -6546,7 +6579,7 @@ lower_omp_sections (gimple_stmt_iterator *gsi_p, omp_context *ctx)
    gomp_sections *stmt;
    gimple *t;
    gbind *new_stmt, *bind;
-  gimple_seq ilist, dlist, olist, new_body;
+  gimple_seq ilist, dlist, olist, tred_dlist = NULL, new_body;
  
    stmt = as_a <gomp_sections *> (gsi_stmt (*gsi_p));
  
@@ -6554,6 +6587,27 @@ lower_omp_sections (gimple_stmt_iterator *gsi_p, omp_context *ctx)
  
    dlist = NULL;
    ilist = NULL;
+
+  tree rclauses
+    = omp_task_reductions_find_first (gimple_omp_sections_clauses (stmt),
+                                     OMP_SECTIONS, OMP_CLAUSE_REDUCTION);
+  tree rtmp = NULL_TREE;
+  if (rclauses)
+    {
+      tree type = build_pointer_type (pointer_sized_int_node);
+      tree temp = create_tmp_var (type);
+      tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__REDUCTEMP_);
+      OMP_CLAUSE_DECL (c) = temp;
+      OMP_CLAUSE_CHAIN (c) = gimple_omp_sections_clauses (stmt);
+      gimple_omp_sections_set_clauses (stmt, c);
+      lower_omp_task_reductions (ctx, OMP_SECTIONS,
+                                gimple_omp_sections_clauses (stmt),
+                                &ilist, &tred_dlist);
+      rclauses = c;
+      rtmp = make_ssa_name (type);
+      gimple_seq_add_stmt (&ilist, gimple_build_assign (rtmp, temp));
+    }
+
    lower_rec_input_clauses (gimple_omp_sections_clauses (stmt),
                            &ilist, &dlist, ctx, NULL);
  
@@ -6625,7 +6679,11 @@ lower_omp_sections (gimple_stmt_iterator *gsi_p, omp_context *ctx)
                                  OMP_CLAUSE_NOWAIT) != NULL_TREE;
    t = gimple_build_omp_return (nowait);
    gimple_seq_add_stmt (&new_body, t);
-  maybe_add_implicit_barrier_cancel (ctx, &new_body);
+  gimple_seq_add_seq (&new_body, tred_dlist);
+  maybe_add_implicit_barrier_cancel (ctx, t, &new_body);
+
+  if (rclauses)
+    OMP_CLAUSE_DECL (rclauses) = rtmp;
  
    gimple_bind_set_body (new_stmt, new_body);
  }
@@ -6787,7 +6845,7 @@ lower_omp_single (gimple_stmt_iterator *gsi_p, omp_context *ctx)
                                  OMP_CLAUSE_NOWAIT) != NULL_TREE;
    gimple *g = gimple_build_omp_return (nowait);
    gimple_seq_add_stmt (&bind_body_tail, g);
-  maybe_add_implicit_barrier_cancel (ctx, &bind_body_tail);
+  maybe_add_implicit_barrier_cancel (ctx, g, &bind_body_tail);
    if (ctx->record_type)
      {
        gimple_stmt_iterator gsi = gsi_start (bind_body_tail);
@@ -6849,26 +6907,6 @@ lower_omp_master (gimple_stmt_iterator *gsi_p, omp_context *ctx)
    BLOCK_VARS (block) = ctx->block_vars;
  }
  
-/* Find the first task_reduction or reduction clause or return NULL
-   if there are none.  */
-
-static inline tree
-omp_task_reductions_find_first (tree clauses, enum tree_code code,
-                               enum omp_clause_code ccode)
-{
-  while (1)
-    {
-      clauses = omp_find_clause (clauses, ccode);
-      if (clauses == NULL_TREE)
-       return NULL_TREE;
-      if (ccode != OMP_CLAUSE_REDUCTION
-         || code == OMP_TASKLOOP
-         || OMP_CLAUSE_REDUCTION_TASK (clauses))
-       return clauses;
-      clauses = OMP_CLAUSE_CHAIN (clauses);
-    }
-}
-
  /* Helper function for lower_omp_task_reductions.  For a specific PASS
     find out the current clause it should be processed, or return false
     if all have been processed already.  */
@@ -6918,12 +6956,35 @@ lower_omp_task_reductions (omp_context *ctx, enum tree_code code, tree clauses,
    enum omp_clause_code ccode
      = (code == OMP_TASKGROUP
         ? OMP_CLAUSE_TASK_REDUCTION : OMP_CLAUSE_REDUCTION);
+  tree cancellable = NULL_TREE;
    clauses = omp_task_reductions_find_first (clauses, code, ccode);
    if (clauses == NULL_TREE)
      return;
+  if (code == OMP_FOR || code == OMP_SECTIONS)
+    {
+      for (omp_context *outer = ctx->outer; outer; outer = outer->outer)
+       if (gimple_code (outer->stmt) == GIMPLE_OMP_PARALLEL
+           && outer->cancellable)
+         {
+           cancellable = error_mark_node;
+           break;
+         }
+       else if (gimple_code (outer->stmt) != GIMPLE_OMP_TASKGROUP)
+         break;
+    }
    tree record_type = lang_hooks.types.make_type (RECORD_TYPE);
    tree *last = &TYPE_FIELDS (record_type);
    unsigned cnt = 0;
+  if (cancellable)
+    {
+      tree field = build_decl (UNKNOWN_LOCATION, FIELD_DECL, NULL_TREE,
+                              ptr_type_node);
+      tree ifield = build_decl (UNKNOWN_LOCATION, FIELD_DECL, NULL_TREE,
+                               integer_type_node);
+      *last = field;
+      DECL_CHAIN (field) = ifield;
+      last = &DECL_CHAIN (ifield);
+    }
    for (int pass = 0; pass < 2; pass++)
      {
        tree decl, type, next;
@@ -7010,7 +7071,49 @@ lower_omp_task_reductions (omp_context *ctx, enum tree_code code, tree clauses,
    tree idx = create_tmp_var (size_type_node);
    gimple_seq_add_stmt (end, gimple_build_assign (idx, size_zero_node));
    tree num_thr_sz = create_tmp_var (size_type_node);
+  tree lab1 = create_artificial_label (UNKNOWN_LOCATION);
+  tree lab2 = create_artificial_label (UNKNOWN_LOCATION);
+  tree lab3 = NULL_TREE;
    gimple *g;
+  if (code == OMP_FOR || code == OMP_SECTIONS)
+    {
+      /* For worksharing constructs, only perform it in the master thread,
+        with the exception of cancelled implicit barriers - then only handle
+        the current thread.  */
+      tree lab4 = create_artificial_label (UNKNOWN_LOCATION);
+      t = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
+      tree thr_num = create_tmp_var (integer_type_node);
+      g = gimple_build_call (t, 0);
+      gimple_call_set_lhs (g, thr_num);
+      gimple_seq_add_stmt (end, g);
+      if (cancellable)
+       {
+         tree c;
+         tree lab5 = create_artificial_label (UNKNOWN_LOCATION);
+         tree lab6 = create_artificial_label (UNKNOWN_LOCATION);
+         lab3 = create_artificial_label (UNKNOWN_LOCATION);
+         if (code == OMP_FOR)
+           c = gimple_omp_for_clauses (ctx->stmt);
+         else if (code == OMP_SECTIONS)
+           c = gimple_omp_sections_clauses (ctx->stmt);
+         c = OMP_CLAUSE_DECL (omp_find_clause (c, OMP_CLAUSE__REDUCTEMP_));
+         cancellable = c;
+         g = gimple_build_cond (NE_EXPR, c, build_zero_cst (TREE_TYPE (c)),
+                                lab5, lab6);
+         gimple_seq_add_stmt (end, g);
+         gimple_seq_add_stmt (end, gimple_build_label (lab5));
+         g = gimple_build_assign (idx, NOP_EXPR, thr_num);
+         gimple_seq_add_stmt (end, g);
+         g = gimple_build_assign (num_thr_sz, PLUS_EXPR, idx,
+                                  build_one_cst (TREE_TYPE (idx)));
+         gimple_seq_add_stmt (end, g);
+         gimple_seq_add_stmt (end, gimple_build_goto (lab3));
+         gimple_seq_add_stmt (end, gimple_build_label (lab6));
+       }
+      g = gimple_build_cond (NE_EXPR, thr_num, integer_zero_node, lab2, lab4);
+      gimple_seq_add_stmt (end, g);
+      gimple_seq_add_stmt (end, gimple_build_label (lab4));
+    }
    if (code != OMP_PARALLEL)
      {
        t = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
@@ -7020,6 +7123,8 @@ lower_omp_task_reductions (omp_context *ctx, enum tree_code code, tree clauses,
        gimple_seq_add_stmt (end, g);
        g = gimple_build_assign (num_thr_sz, NOP_EXPR, num_thr);
        gimple_seq_add_stmt (end, g);
+      if (cancellable)
+       gimple_seq_add_stmt (end, gimple_build_label (lab3));
      }
    else
      {
@@ -7033,8 +7138,6 @@ lower_omp_task_reductions (omp_context *ctx, enum tree_code code, tree clauses,
               NULL_TREE, NULL_TREE);
    tree data = create_tmp_var (pointer_sized_int_node);
    gimple_seq_add_stmt (end, gimple_build_assign (data, t));
-  tree lab1 = create_artificial_label (UNKNOWN_LOCATION);
-  tree lab2 = create_artificial_label (UNKNOWN_LOCATION);
    gimple_seq_add_stmt (end, gimple_build_label (lab1));
    tree ptr;
    if (TREE_CODE (TYPE_SIZE_UNIT (record_type)) == INTEGER_CST)
@@ -7045,6 +7148,8 @@ lower_omp_task_reductions (omp_context *ctx, enum tree_code code, tree clauses,
  
    tree field = TYPE_FIELDS (record_type);
    cnt = 0;
+  if (cancellable)
+    field = DECL_CHAIN (DECL_CHAIN (field));
    for (int pass = 0; pass < 2; pass++)
      {
        tree decl, type, next;
@@ -7117,9 +7222,9 @@ lower_omp_task_reductions (omp_context *ctx, enum tree_code code, tree clauses,
  
           tree bfield = DECL_CHAIN (field);
           tree cond;
-         if (code == OMP_PARALLEL)
-           /* In parallel all threads unconditionally initialize all their
-              task reduction private variables.  */
+         if (code == OMP_PARALLEL || code == OMP_FOR || code == OMP_SECTIONS)
+           /* In parallel or worksharing all threads unconditionally
+              initialize all their task reduction private variables.  */
             cond = boolean_true_node;
           else if (TREE_TYPE (ptr) == ptr_type_node)
             {
@@ -7143,6 +7248,18 @@ lower_omp_task_reductions (omp_context *ctx, enum tree_code code, tree clauses,
                                  lab3, lab4);
           gimple_seq_add_stmt (end, g);
           gimple_seq_add_stmt (end, gimple_build_label (lab3));
+         if (cancellable && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c) == NULL_TREE)
+           {
+             /* If this reduction doesn't need destruction and parallel
+                has been cancelled, there is nothing to do for this
+                reduction, so jump around the merge operation.  */
+             tree lab5 = create_artificial_label (UNKNOWN_LOCATION);
+             g = gimple_build_cond (NE_EXPR, cancellable,
+                                    build_zero_cst (TREE_TYPE (cancellable)),
+                                    lab4, lab5);
+             gimple_seq_add_stmt (end, g);
+             gimple_seq_add_stmt (end, gimple_build_label (lab5));
+           }
  
           tree new_var;
           if (TREE_TYPE (ptr) == ptr_type_node)
@@ -7202,6 +7319,20 @@ lower_omp_task_reductions (omp_context *ctx, enum tree_code code, tree clauses,
                   tree placeholder = OMP_CLAUSE_REDUCTION_PLACEHOLDER (c);
                   tree decl_placeholder
                     = OMP_CLAUSE_REDUCTION_DECL_PLACEHOLDER (c);
+                 tree lab6 = NULL_TREE;
+                 if (cancellable)
+                   {
+                     /* If this reduction needs destruction and parallel
+                        has been cancelled, jump around the merge operation
+                        to the destruction.  */
+                     tree lab5 = create_artificial_label (UNKNOWN_LOCATION);
+                     lab6 = create_artificial_label (UNKNOWN_LOCATION);
+                     tree zero = build_zero_cst (TREE_TYPE (cancellable));
+                     g = gimple_build_cond (NE_EXPR, cancellable, zero,
+                                            lab6, lab5);
+                     gimple_seq_add_stmt (end, g);
+                     gimple_seq_add_stmt (end, gimple_build_label (lab5));
+                   }
                   SET_DECL_VALUE_EXPR (placeholder, out);
                   DECL_HAS_VALUE_EXPR_P (placeholder) = 1;
                   SET_DECL_VALUE_EXPR (decl_placeholder, priv);
@@ -7215,6 +7346,8 @@ lower_omp_task_reductions (omp_context *ctx, enum tree_code code, tree clauses,
                       OMP_CLAUSE_REDUCTION_PLACEHOLDER (c) = NULL;
                       OMP_CLAUSE_REDUCTION_DECL_PLACEHOLDER (c) = NULL;
                     }
+                 if (cancellable)
+                   gimple_seq_add_stmt (end, gimple_build_label (lab6));
                   tree x = lang_hooks.decls.omp_clause_dtor (c, priv);
                   if (x)
                     {
@@ -7247,7 +7380,20 @@ lower_omp_task_reductions (omp_context *ctx, enum tree_code code, tree clauses,
             {
               tree placeholder = OMP_CLAUSE_REDUCTION_PLACEHOLDER (c);
               tree oldv = NULL_TREE;
-
+             tree lab6 = NULL_TREE;
+             if (cancellable)
+               {
+                 /* If this reduction needs destruction and parallel
+                    has been cancelled, jump around the merge operation
+                    to the destruction.  */
+                 tree lab5 = create_artificial_label (UNKNOWN_LOCATION);
+                 lab6 = create_artificial_label (UNKNOWN_LOCATION);
+                 tree zero = build_zero_cst (TREE_TYPE (cancellable));
+                 g = gimple_build_cond (NE_EXPR, cancellable, zero,
+                                        lab6, lab5);
+                 gimple_seq_add_stmt (end, g);
+                 gimple_seq_add_stmt (end, gimple_build_label (lab5));
+               }
               if (omp_is_reference (decl)
                   && !useless_type_conversion_p (TREE_TYPE (placeholder),
                                                  TREE_TYPE (ref)))
@@ -7283,6 +7429,8 @@ lower_omp_task_reductions (omp_context *ctx, enum tree_code code, tree clauses,
               OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c) = NULL;
               if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_TASK_REDUCTION)
                 OMP_CLAUSE_REDUCTION_PLACEHOLDER (c) = NULL;
+             if (cancellable)
+               gimple_seq_add_stmt (end, gimple_build_label (lab6));
               tree x = lang_hooks.decls.omp_clause_dtor (c, new_var);
               if (x)
                 {
@@ -7309,10 +7457,16 @@ lower_omp_task_reductions (omp_context *ctx, enum tree_code code, tree clauses,
        g = gimple_build_call (t, 1, build_fold_addr_expr (avar));
        gimple_seq_add_stmt (start, g);
      }
-  else if (code == OMP_TASKLOOP || code == OMP_PARALLEL)
+  else
      {
-      tree c = omp_find_clause (gimple_omp_taskreg_clauses (ctx->stmt),
-                               OMP_CLAUSE__REDUCTEMP_);
+      tree c;
+      if (code == OMP_FOR)
+       c = gimple_omp_for_clauses (ctx->stmt);
+      else if (code == OMP_SECTIONS)
+       c = gimple_omp_sections_clauses (ctx->stmt);
+      else
+       c = gimple_omp_taskreg_clauses (ctx->stmt);
+      c = omp_find_clause (c, OMP_CLAUSE__REDUCTEMP_);
        t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (c)),
                         build_fold_addr_expr (avar));
        gimplify_assign (OMP_CLAUSE_DECL (c), t, start);
@@ -7324,8 +7478,28 @@ lower_omp_task_reductions (omp_context *ctx, enum tree_code code, tree clauses,
    g = gimple_build_cond (NE_EXPR, idx, num_thr_sz, lab1, lab2);
    gimple_seq_add_stmt (end, g);
    gimple_seq_add_stmt (end, gimple_build_label (lab2));
-  t = builtin_decl_explicit (BUILT_IN_GOMP_TASKGROUP_REDUCTION_UNREGISTER);
-  g = gimple_build_call (t, 1, build_fold_addr_expr (avar));
+  if (code == OMP_FOR || code == OMP_SECTIONS)
+    {
+      enum built_in_function bfn
+       = BUILT_IN_GOMP_WORKSHARE_TASK_REDUCTION_UNREGISTER;
+      t = builtin_decl_explicit (bfn);
+      tree c_bool_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (t)));
+      tree arg;
+      if (cancellable)
+       {
+         arg = create_tmp_var (c_bool_type);
+         gimple_seq_add_stmt (end, gimple_build_assign (arg, NOP_EXPR,
+                                                        cancellable));
+       }
+      else
+       arg = build_int_cst (c_bool_type, 0);
+      g = gimple_build_call (t, 1, arg);
+    }
+  else
+    {
+      t = builtin_decl_explicit (BUILT_IN_GOMP_TASKGROUP_REDUCTION_UNREGISTER);
+      g = gimple_build_call (t, 1, build_fold_addr_expr (avar));
+    }
    gimple_seq_add_stmt (end, g);
    t = build_constructor (atype, NULL);
    TREE_THIS_VOLATILE (t) = 1;
@@ -7953,7 +8127,8 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx)
    struct omp_for_data fd, *fdp = NULL;
    gomp_for *stmt = as_a <gomp_for *> (gsi_stmt (*gsi_p));
    gbind *new_stmt;
-  gimple_seq omp_for_body, body, dlist;
+  gimple_seq omp_for_body, body, dlist, tred_ilist = NULL, tred_dlist = NULL;
+  gimple_seq cnt_list = NULL;
    gimple_seq oacc_head = NULL, oacc_tail = NULL;
    size_t i;
  
@@ -8046,9 +8221,30 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx)
    /* The pre-body and input clauses go before the lowered GIMPLE_OMP_FOR.  */
    dlist = NULL;
    body = NULL;
+  tree rclauses
+    = omp_task_reductions_find_first (gimple_omp_for_clauses (stmt), OMP_FOR,
+                                     OMP_CLAUSE_REDUCTION);
+  tree rtmp = NULL_TREE;
+  if (rclauses)
+    {
+      tree type = build_pointer_type (pointer_sized_int_node);
+      tree temp = create_tmp_var (type);
+      tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__REDUCTEMP_);
+      OMP_CLAUSE_DECL (c) = temp;
+      OMP_CLAUSE_CHAIN (c) = gimple_omp_for_clauses (stmt);
+      gimple_omp_for_set_clauses (stmt, c);
+      lower_omp_task_reductions (ctx, OMP_FOR,
+                                gimple_omp_for_clauses (stmt),
+                                &tred_ilist, &tred_dlist);
+      rclauses = c;
+      rtmp = make_ssa_name (type);
+      gimple_seq_add_stmt (&body, gimple_build_assign (rtmp, temp));
+    }
+
    lower_rec_input_clauses (gimple_omp_for_clauses (stmt), &body, &dlist, ctx,
                            fdp);
-  gimple_seq_add_seq (&body, gimple_omp_for_pre_body (stmt));
+  gimple_seq_add_seq (rclauses ? &tred_ilist : &body,
+                     gimple_omp_for_pre_body (stmt));
  
    lower_omp (gimple_omp_body_ptr (stmt), ctx);
  
@@ -8063,20 +8259,24 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx)
      {
        rhs_p = gimple_omp_for_initial_ptr (stmt, i);
        if (!is_gimple_min_invariant (*rhs_p))
-       *rhs_p = get_formal_tmp_var (*rhs_p, &body);
+       *rhs_p = get_formal_tmp_var (*rhs_p, &cnt_list);
        else if (TREE_CODE (*rhs_p) == ADDR_EXPR)
         recompute_tree_invariant_for_addr_expr (*rhs_p);
  
        rhs_p = gimple_omp_for_final_ptr (stmt, i);
        if (!is_gimple_min_invariant (*rhs_p))
-       *rhs_p = get_formal_tmp_var (*rhs_p, &body);
+       *rhs_p = get_formal_tmp_var (*rhs_p, &cnt_list);
        else if (TREE_CODE (*rhs_p) == ADDR_EXPR)
         recompute_tree_invariant_for_addr_expr (*rhs_p);
  
        rhs_p = &TREE_OPERAND (gimple_omp_for_incr (stmt, i), 1);
        if (!is_gimple_min_invariant (*rhs_p))
-       *rhs_p = get_formal_tmp_var (*rhs_p, &body);
+       *rhs_p = get_formal_tmp_var (*rhs_p, &cnt_list);
      }
+  if (rclauses)
+    gimple_seq_add_seq (&tred_ilist, cnt_list);
+  else
+    gimple_seq_add_seq (&body, cnt_list);
  
    /* Once lowered, extract the bounds and clauses.  */
    omp_extract_for_data (stmt, &fd, NULL);
@@ -8123,13 +8323,26 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx)
  
    gimple_seq_add_seq (&body, dlist);
  
+  if (rclauses)
+    {
+      gimple_seq_add_seq (&tred_ilist, body);
+      body = tred_ilist;
+    }
+
    body = maybe_catch_exception (body);
  
    if (!phony_loop)
      {
        /* Region exit marker goes at the end of the loop body.  */
-      gimple_seq_add_stmt (&body, gimple_build_omp_return (fd.have_nowait));
-      maybe_add_implicit_barrier_cancel (ctx, &body);
+      gimple *g = gimple_build_omp_return (fd.have_nowait);
+      gimple_seq_add_stmt (&body, g);
+
+      gimple_seq_add_seq (&body, tred_dlist);
+
+      maybe_add_implicit_barrier_cancel (ctx, g, &body);
+
+      if (rclauses)
+       OMP_CLAUSE_DECL (rclauses) = rtmp;
      }
  
    /* Add OpenACC joining and reduction markers just after the loop.  */
diff --git a/libgomp/ChangeLog.gomp b/libgomp/ChangeLog.gomp

index cddc1365d05d7635ce0069fff9b0075f35285e57..0f459346aeeadf175f12004e972f711fbd609e47 100644 (file)
--- a/libgomp/ChangeLog.gomp
+++ b/libgomp/ChangeLog.gomp
@@ -1,3 +1,107 @@
+2018-11-07  Jakub Jelinek  <jakub@redhat.com>
+
+       * libgomp_g.h (GOMP_loop_start, GOMP_loop_ordered_start,
+       GOMP_loop_doacross_start, GOMP_loop_ull_start,
+       GOMP_loop_ull_ordered_start, GOMP_loop_ull_doacross_start,
+       GOMP_workshare_task_reduction_unregister, GOMP_sections2_start): New
+       prototypes.
+       * libgomp.h (struct gomp_doacross_work_share): Add extra field.
+       (struct gomp_work_share): Add task_reductions field.
+       (struct gomp_taskgroup): Add workshare flag.
+       (gomp_doacross_init, gomp_doacross_ull_init): Add size_t argument.
+       (gomp_workshare_taskgroup_start,
+       gomp_workshare_task_reduction_register): New prototypes.
+       (gomp_init_work_share, gomp_work_share_start): Change bool argument
+       to size_t.
+       * libgomp.map (GOMP_5.0): Export GOMP_loop_start,
+       GOMP_loop_ordered_start, GOMP_loop_doacross_start,
+       GOMP_loop_ull_start, GOMP_loop_ull_ordered_start,
+       GOMP_loop_ull_doacross_start,
+       GOMP_workshare_task_reduction_unregister and GOMP_sections2_start.
+       * loop.c: Include string.h.
+       (GOMP_loop_runtime_next): Add ialias.
+       (GOMP_taskgroup_reduction_register): Add ialias_redirect.
+       (gomp_loop_static_start, gomp_loop_dynamic_start,
+       gomp_loop_guided_start, gomp_loop_ordered_static_start,
+       gomp_loop_ordered_dynamic_start, gomp_loop_ordered_guided_start,
+       gomp_loop_doacross_static_start, gomp_loop_doacross_dynamic_start,
+       gomp_loop_doacross_guided_start): Adjust gomp_work_share_start
+       or gomp_doacross_init callers.
+       (gomp_adjust_sched, GOMP_loop_start, GOMP_loop_ordered_start,
+       GOMP_loop_doacross_start): New functions.
+       * loop_ull.c: Include string.h.
+       (GOMP_loop_ull_runtime_next): Add ialias.
+       (GOMP_taskgroup_reduction_register): Add ialias_redirect.
+       (gomp_loop_ull_static_start, gomp_loop_ull_dynamic_start,
+       gomp_loop_ull_guided_start, gomp_loop_ull_ordered_static_start,
+       gomp_loop_ull_ordered_dynamic_start,
+       gomp_loop_ull_ordered_guided_start,
+       gomp_loop_ull_doacross_static_start,
+       gomp_loop_ull_doacross_dynamic_start,
+       gomp_loop_ull_doacross_guided_start): Adjust gomp_work_share_start
+       and gomp_doacross_ull_init callers.
+       (gomp_adjust_sched, GOMP_loop_ull_start, GOMP_loop_ull_ordered_start,
+       GOMP_loop_ull_doacross_start): New functions.
+       * sections.c: Include string.h.
+       (GOMP_taskgroup_reduction_register): Add ialias_redirect.
+       (GOMP_sections_start): Adjust gomp_work_share_start caller.
+       (GOMP_sections2_start): New function.
+       * ordered.c (gomp_doacross_init, gomp_doacross_ull_init): Add
+       EXTRA argument.  If not needed to prepare array, if extra is 0,
+       clear ws->doacross, otherwise allocate just doacross structure and
+       extra payload.  If array is needed, allocate also extra payload.
+       (GOMP_doacross_post, GOMP_doacross_wait, GOMP_doacross_ull_post,
+       GOMP_doacross_ull_wait): Handle doacross->array == NULL like
+       doacross == NULL.
+       * parallel.c (GOMP_cancellation_point): If taskgroup has workshare
+       flag set, check cancelled of prev taskgroup if any.
+       (GOMP_cancel): If taskgroup has workshare flag set, set cancelled
+       on prev taskgroup if any.
+       * single.c (GOMP_single_start, GOMP_single_copy_start): Adjust
+       gomp_work_share_start callers.
+       * target.c (GOMP_target_update_ext, GOMP_target_enter_exit_data):
+       If taskgroup has workshare flag set, check cancelled on prev
+       taskgroup if any.  Guard all cancellation tests with
+       gomp_cancel_var test.
+       * taskloop.c (GOMP_taskloop): Likewise.
+       * task.c (GOMP_task, gomp_create_target_task, gomp_task_run_pre,
+       GOMP_taskwait_depend): Likewise.
+       (gomp_taskgroup_init): Clear workshare flag, reorder initialization.
+       (gomp_reduction_register): Add always_inline attribute.  Add
+       ORIG argument, if non-NULL, don't allocate memory, but copy it
+       from there.
+       (gomp_create_artificial_team): New function.
+       (GOMP_taskgroup_reduction_register): Extend function comment.
+       Use gomp_create_artificial_team.  Adjust gomp_reduction_register
+       caller.
+       (gomp_parallel_reduction_register): Adjust gomp_reduction_register
+       caller.
+       (gomp_workshare_task_reduction_register,
+       gomp_workshare_taskgroup_start,
+       GOMP_workshare_task_reduction_unregister): New functions.
+       * team.c (gomp_new_team): Adjust gomp_init_work_share caller.
+       * work.c (gomp_init_work_share): Change ORDERED argument from
+       bool to size_t, if more than 1 allocate also extra payload at the
+       end of array.  Never keep ordered_team_ids NULL, set it
+       to inline_ordered_team_ids instead.
+       (gomp_work_share_start): Change ORDERED argument from bool to size_t,
+       return true instead of ws.
+       * testsuite/libgomp.c-c++-common/cancel-parallel-1.c: New test.
+       * testsuite/libgomp.c-c++-common/cancel-taskgroup-3.c: New test.
+       * testsuite/libgomp.c-c++-common/task-reduction-6.c (struct S):
+       Use unsigned long long int instead of unsigned long int.
+       (main): Verify r == t.
+       * testsuite/libgomp.c-c++-common/task-reduction-8.c: New test.
+       * testsuite/libgomp.c-c++-common/task-reduction-9.c: New test.
+       * testsuite/libgomp.c-c++-common/task-reduction-11.c: New test.
+       * testsuite/libgomp.c-c++-common/task-reduction-12.c: New test.
+       * testsuite/libgomp.c++/task-reduction-14.C: New test.
+       * testsuite/libgomp.c++/task-reduction-15.C: New test.
+       * testsuite/libgomp.c++/task-reduction-16.C: New test.
+       * testsuite/libgomp.c++/task-reduction-17.C: New test.
+       * testsuite/libgomp.c++/task-reduction-18.C: New test.
+       * testsuite/libgomp.c++/task-reduction-19.C: New test.
+
  2018-10-26  Jakub Jelinek  <jakub@redhat.com>
  
         * libgomp.h (GOMP_HAVE_EFFICIENT_ALIGNED_ALLOC): Define unless
diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h

index 9728c8e5f6876f8e6e9d4d74213ba5e826ca90f7..828e9b0095b3128998b10617ee637f4044535b65 100644 (file)
--- a/libgomp/libgomp.h
+++ b/libgomp/libgomp.h
@@ -188,6 +188,8 @@ struct gomp_doacross_work_share
      /* Likewise, but for the ull implementation.  */
      unsigned long long boundary_ull;
    };
+  /* Pointer to extra memory if needed for lastprivate(conditional).  */
+  void *extra;
    /* Array of shift counts for each dimension if they can be flattened.  */
    unsigned int shift_counts[];
  };
@@ -289,6 +291,9 @@ struct gomp_work_share
      struct gomp_work_share *next_free;
    };
  
+  /* Task reductions for this work-sharing construct.  */
+  uintptr_t *task_reductions;
+
    /* If only few threads are in the team, ordered_team_ids can point
       to this array which fills the padding at the end of this struct.  */
    unsigned inline_ordered_team_ids[0];
@@ -490,6 +495,7 @@ struct gomp_taskgroup
    uintptr_t *reductions;
    bool in_taskgroup_wait;
    bool cancelled;
+  bool workshare;
    gomp_sem_t taskgroup_sem;
    size_t num_children;
  };
@@ -795,9 +801,9 @@ extern void gomp_ordered_next (void);
  extern void gomp_ordered_static_init (void);
  extern void gomp_ordered_static_next (void);
  extern void gomp_ordered_sync (void);
-extern void gomp_doacross_init (unsigned, long *, long);
+extern void gomp_doacross_init (unsigned, long *, long, size_t);
  extern void gomp_doacross_ull_init (unsigned, unsigned long long *,
-                                   unsigned long long);
+                                   unsigned long long, size_t);
  
  /* parallel.c */
  
@@ -822,6 +828,8 @@ extern bool gomp_create_target_task (struct gomp_device_descr *,
                                      enum gomp_target_task_state);
  extern struct gomp_taskgroup *gomp_parallel_reduction_register (uintptr_t *,
                                                                 unsigned);
+extern void gomp_workshare_taskgroup_start (void);
+extern void gomp_workshare_task_reduction_register (uintptr_t *, uintptr_t *);
  
  static void inline
  gomp_finish_task (struct gomp_task *task)
@@ -1061,9 +1069,9 @@ extern bool gomp_remove_var (struct gomp_device_descr *, splay_tree_key);
  
  /* work.c */
  
-extern void gomp_init_work_share (struct gomp_work_share *, bool, unsigned);
+extern void gomp_init_work_share (struct gomp_work_share *, size_t, unsigned);
  extern void gomp_fini_work_share (struct gomp_work_share *);
-extern bool gomp_work_share_start (bool);
+extern bool gomp_work_share_start (size_t);
  extern void gomp_work_share_end (void);
  extern bool gomp_work_share_end_cancel (void);
  extern void gomp_work_share_end_nowait (void);
diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map

index 0ea7578e027bd794df0bc1f0aa057bf72b848229..4c19a259eaa6638091c632ebf8404a43e05b2217 100644 (file)
--- a/libgomp/libgomp.map
+++ b/libgomp/libgomp.map
@@ -316,22 +316,30 @@ GOMP_4.5 {
  
  GOMP_5.0 {
    global:
+       GOMP_loop_doacross_start;
         GOMP_loop_maybe_nonmonotonic_runtime_next;
         GOMP_loop_maybe_nonmonotonic_runtime_start;
         GOMP_loop_nonmonotonic_runtime_next;
         GOMP_loop_nonmonotonic_runtime_start;
+       GOMP_loop_ordered_start;
+       GOMP_loop_start;
+       GOMP_loop_ull_doacross_start;
         GOMP_loop_ull_maybe_nonmonotonic_runtime_next;
         GOMP_loop_ull_maybe_nonmonotonic_runtime_start;
         GOMP_loop_ull_nonmonotonic_runtime_next;
         GOMP_loop_ull_nonmonotonic_runtime_start;
+       GOMP_loop_ull_ordered_start;
+       GOMP_loop_ull_start;
         GOMP_parallel_loop_maybe_nonmonotonic_runtime;
         GOMP_parallel_loop_nonmonotonic_runtime;
         GOMP_parallel_reductions;
+       GOMP_sections2_start;
         GOMP_taskgroup_reduction_register;
         GOMP_taskgroup_reduction_unregister;
         GOMP_task_reduction_remap;
         GOMP_taskwait_depend;
         GOMP_teams_reg;
+       GOMP_workshare_task_reduction_unregister;
  } GOMP_4.5;
  
  OACC_2.0 {
diff --git a/libgomp/libgomp_g.h b/libgomp/libgomp_g.h

index 6d24a4f0192a1831ac88a243f1155e93e4f3ceab..5b54839b29e5aeb5bee5a71775c25790e01754a0 100644 (file)
--- a/libgomp/libgomp_g.h
+++ b/libgomp/libgomp_g.h
@@ -61,6 +61,8 @@ extern bool GOMP_loop_nonmonotonic_runtime_start (long, long, long,
                                                   long *, long *);
  extern bool GOMP_loop_maybe_nonmonotonic_runtime_start (long, long, long,
                                                         long *, long *);
+extern bool GOMP_loop_start (long, long, long, long, long, long *, long *,
+                            uintptr_t *, void **);
  
  extern bool GOMP_loop_ordered_static_start (long, long, long, long,
                                             long *, long *);
@@ -69,6 +71,8 @@ extern bool GOMP_loop_ordered_dynamic_start (long, long, long, long,
  extern bool GOMP_loop_ordered_guided_start (long, long, long, long,
                                             long *, long *);
  extern bool GOMP_loop_ordered_runtime_start (long, long, long, long *, long *);
+extern bool GOMP_loop_ordered_start (long, long, long, long, long, long *,
+                                    long *, uintptr_t *, void **);
  
  extern bool GOMP_loop_static_next (long *, long *);
  extern bool GOMP_loop_dynamic_next (long *, long *);
@@ -92,6 +96,8 @@ extern bool GOMP_loop_doacross_guided_start (unsigned, long *, long, long *,
                                              long *);
  extern bool GOMP_loop_doacross_runtime_start (unsigned, long *, long *,
                                               long *);
+extern bool GOMP_loop_doacross_start (unsigned, long *, long, long, long *,
+                                     long *, uintptr_t *, void **);
  
  extern void GOMP_parallel_loop_static_start (void (*)(void *), void *,
                                              unsigned, long, long, long, long);
@@ -179,6 +185,10 @@ extern bool GOMP_loop_ull_maybe_nonmonotonic_runtime_start (bool,
                                                             unsigned long long,
                                                             unsigned long long *,
                                                             unsigned long long *);
+extern bool GOMP_loop_ull_start (bool, unsigned long long, unsigned long long,
+                                unsigned long long, long, unsigned long long,
+                                unsigned long long *, unsigned long long *,
+                                uintptr_t *, void **);
  
  extern bool GOMP_loop_ull_ordered_static_start (bool, unsigned long long,
                                                 unsigned long long,
@@ -203,6 +213,13 @@ extern bool GOMP_loop_ull_ordered_runtime_start (bool, unsigned long long,
                                                  unsigned long long,
                                                  unsigned long long *,
                                                  unsigned long long *);
+extern bool GOMP_loop_ull_ordered_start (bool, unsigned long long,
+                                        unsigned long long,
+                                        unsigned long long, long,
+                                        unsigned long long,
+                                        unsigned long long *,
+                                        unsigned long long *,
+                                        uintptr_t *, void **);
  
  extern bool GOMP_loop_ull_static_next (unsigned long long *,
                                        unsigned long long *);
@@ -249,6 +266,11 @@ extern bool GOMP_loop_ull_doacross_runtime_start (unsigned,
                                                   unsigned long long *,
                                                   unsigned long long *,
                                                   unsigned long long *);
+extern bool GOMP_loop_ull_doacross_start (unsigned, unsigned long long *,
+                                         long, unsigned long long,
+                                         unsigned long long *,
+                                         unsigned long long *,
+                                         uintptr_t *, void **);
  
  /* ordered.c */
  
@@ -289,10 +311,12 @@ extern void GOMP_taskgroup_end (void);
  extern void GOMP_taskgroup_reduction_register (uintptr_t *);
  extern void GOMP_taskgroup_reduction_unregister (uintptr_t *);
  extern void GOMP_task_reduction_remap (size_t, size_t, void **);
+extern void GOMP_workshare_task_reduction_unregister (bool);
  
  /* sections.c */
  
  extern unsigned GOMP_sections_start (unsigned);
+extern unsigned GOMP_sections2_start (unsigned, uintptr_t *, void **);
  extern unsigned GOMP_sections_next (void);
  extern void GOMP_parallel_sections_start (void (*) (void *), void *,
                                           unsigned, unsigned);
diff --git a/libgomp/loop.c b/libgomp/loop.c

index bafd89a0c20b524544999c87eb30a50b7f079609..4e0683ba675ad1a748efadbcb287f165d63a9235 100644 (file)
--- a/libgomp/loop.c
+++ b/libgomp/loop.c
@@ -27,9 +27,13 @@
  
  #include <limits.h>
  #include <stdlib.h>
+#include <string.h>
  #include "libgomp.h"
  
  
+ialias (GOMP_loop_runtime_next)
+ialias_redirect (GOMP_taskgroup_reduction_register)
+
  /* Initialize the given work share construct from the given arguments.  */
  
  static inline void
@@ -101,7 +105,7 @@ gomp_loop_static_start (long start, long end, long incr, long chunk_size,
    struct gomp_thread *thr = gomp_thread ();
  
    thr->ts.static_trip = 0;
-  if (gomp_work_share_start (false))
+  if (gomp_work_share_start (0))
      {
        gomp_loop_init (thr->ts.work_share, start, end, incr,
                       GFS_STATIC, chunk_size);
@@ -123,7 +127,7 @@ gomp_loop_dynamic_start (long start, long end, long incr, long chunk_size,
    struct gomp_thread *thr = gomp_thread ();
    bool ret;
  
-  if (gomp_work_share_start (false))
+  if (gomp_work_share_start (0))
      {
        gomp_loop_init (thr->ts.work_share, start, end, incr,
                       GFS_DYNAMIC, chunk_size);
@@ -151,7 +155,7 @@ gomp_loop_guided_start (long start, long end, long incr, long chunk_size,
    struct gomp_thread *thr = gomp_thread ();
    bool ret;
  
-  if (gomp_work_share_start (false))
+  if (gomp_work_share_start (0))
      {
        gomp_loop_init (thr->ts.work_share, start, end, incr,
                       GFS_GUIDED, chunk_size);
@@ -197,6 +201,100 @@ GOMP_loop_runtime_start (long start, long end, long incr,
      }
  }
  
+static long
+gomp_adjust_sched (long sched, long *chunk_size)
+{
+  sched &= ~GFS_MONOTONIC;
+  switch (sched)
+    {
+    case GFS_STATIC:
+    case GFS_DYNAMIC:
+    case GFS_GUIDED:
+      return sched;
+    /* GFS_RUNTIME is used for runtime schedule without monotonic
+       or nonmonotonic modifiers on the clause.
+       GFS_RUNTIME|GFS_MONOTONIC for runtime schedule with monotonic
+       modifier.  */
+    case GFS_RUNTIME:
+    /* GFS_AUTO is used for runtime schedule with nonmonotonic
+       modifier.  */
+    case GFS_AUTO:
+      {
+       struct gomp_task_icv *icv = gomp_icv (false);
+       sched = icv->run_sched_var & ~GFS_MONOTONIC;
+       switch (sched)
+         {
+         case GFS_STATIC:
+         case GFS_DYNAMIC:
+         case GFS_GUIDED:
+           *chunk_size = icv->run_sched_chunk_size;
+           break;
+         case GFS_AUTO:
+           sched = GFS_STATIC;
+           *chunk_size = 0;
+           break;
+         default:
+           abort ();
+         }
+       return sched;
+      }
+    default:
+      abort ();
+    }
+}
+
+bool
+GOMP_loop_start (long start, long end, long incr, long sched,
+                long chunk_size, long *istart, long *iend,
+                uintptr_t *reductions, void **mem)
+{
+  struct gomp_thread *thr = gomp_thread ();
+
+  thr->ts.static_trip = 0;
+  if (reductions)
+    gomp_workshare_taskgroup_start ();
+  if (gomp_work_share_start (0))
+    {
+      sched = gomp_adjust_sched (sched, &chunk_size);
+      gomp_loop_init (thr->ts.work_share, start, end, incr,
+                     sched, chunk_size);
+      if (reductions)
+       {
+         GOMP_taskgroup_reduction_register (reductions);
+         thr->task->taskgroup->workshare = true;
+         thr->ts.work_share->task_reductions = reductions;
+       }
+      if (mem)
+       {
+         uintptr_t size = (uintptr_t) *mem;
+         if (size > (sizeof (struct gomp_work_share)
+                     - offsetof (struct gomp_work_share,
+                                 inline_ordered_team_ids)))
+           thr->ts.work_share->ordered_team_ids
+             = gomp_malloc_cleared (size);
+         else
+           memset (thr->ts.work_share->ordered_team_ids, '\0', size);
+         *mem = (void *) thr->ts.work_share->ordered_team_ids;
+       }
+      gomp_work_share_init_done ();
+    }
+  else
+    {
+      if (reductions)
+       {
+         uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
+         gomp_workshare_task_reduction_register (reductions,
+                                                 first_reductions);
+       }
+      if (mem)
+       *mem = (void *) thr->ts.work_share->ordered_team_ids;
+    }
+
+  if (!istart)
+    return true;
+  return ialias_call (GOMP_loop_runtime_next) (istart, iend);
+}
+
  /* The *_ordered_*_start routines are similar.  The only difference is that
     this work-share construct is initialized to expect an ORDERED section.  */
  
@@ -207,7 +305,7 @@ gomp_loop_ordered_static_start (long start, long end, long incr,
    struct gomp_thread *thr = gomp_thread ();
  
    thr->ts.static_trip = 0;
-  if (gomp_work_share_start (true))
+  if (gomp_work_share_start (1))
      {
        gomp_loop_init (thr->ts.work_share, start, end, incr,
                       GFS_STATIC, chunk_size);
@@ -225,7 +323,7 @@ gomp_loop_ordered_dynamic_start (long start, long end, long incr,
    struct gomp_thread *thr = gomp_thread ();
    bool ret;
  
-  if (gomp_work_share_start (true))
+  if (gomp_work_share_start (1))
      {
        gomp_loop_init (thr->ts.work_share, start, end, incr,
                       GFS_DYNAMIC, chunk_size);
@@ -250,7 +348,7 @@ gomp_loop_ordered_guided_start (long start, long end, long incr,
    struct gomp_thread *thr = gomp_thread ();
    bool ret;
  
-  if (gomp_work_share_start (true))
+  if (gomp_work_share_start (1))
      {
        gomp_loop_init (thr->ts.work_share, start, end, incr,
                       GFS_GUIDED, chunk_size);
@@ -297,6 +395,81 @@ GOMP_loop_ordered_runtime_start (long start, long end, long incr,
      }
  }
  
+bool
+GOMP_loop_ordered_start (long start, long end, long incr, long sched,
+                        long chunk_size, long *istart, long *iend,
+                        uintptr_t *reductions, void **mem)
+{
+  struct gomp_thread *thr = gomp_thread ();
+  size_t ordered = 1;
+  bool ret;
+
+  thr->ts.static_trip = 0;
+  if (reductions)
+    gomp_workshare_taskgroup_start ();
+  if (mem)
+    ordered += (uintptr_t) *mem;
+  if (gomp_work_share_start (ordered))
+    {
+      sched = gomp_adjust_sched (sched, &chunk_size);
+      gomp_loop_init (thr->ts.work_share, start, end, incr,
+                     sched, chunk_size);
+      if (reductions)
+       {
+         GOMP_taskgroup_reduction_register (reductions);
+         thr->task->taskgroup->workshare = true;
+         thr->ts.work_share->task_reductions = reductions;
+       }
+      if (sched == GFS_STATIC)
+       gomp_ordered_static_init ();
+      else
+       gomp_mutex_lock (&thr->ts.work_share->lock);
+      gomp_work_share_init_done ();
+    }
+  else
+    {
+      if (reductions)
+       {
+         uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
+         gomp_workshare_task_reduction_register (reductions,
+                                                 first_reductions);
+       }
+      sched = thr->ts.work_share->sched;
+      if (sched != GFS_STATIC)
+       gomp_mutex_lock (&thr->ts.work_share->lock);
+    }
+
+  if (mem)
+    {
+      uintptr_t p
+       = (uintptr_t) (thr->ts.work_share->ordered_team_ids
+                      + (thr->ts.team ? thr->ts.team->nthreads : 1));
+      p += __alignof__ (long long) - 1;
+      p &= ~(__alignof__ (long long) - 1);
+      *mem = (void *) p;
+    }
+
+  switch (sched)
+    {
+    case GFS_STATIC:
+    case GFS_AUTO:
+      return !gomp_iter_static_next (istart, iend);
+    case GFS_DYNAMIC:
+      ret = gomp_iter_dynamic_next_locked (istart, iend);
+      break;
+    case GFS_GUIDED:
+      ret = gomp_iter_guided_next_locked (istart, iend);
+      break;
+    default:
+      abort ();
+    }
+
+  if (ret)
+    gomp_ordered_first ();
+  gomp_mutex_unlock (&thr->ts.work_share->lock);
+  return ret;
+}
+
  /* The *_doacross_*_start routines are similar.  The only difference is that
     this work-share construct is initialized to expect an ORDERED(N) - DOACROSS
     section, and the worksharing loop iterates always from 0 to COUNTS[0] - 1
@@ -310,11 +483,11 @@ gomp_loop_doacross_static_start (unsigned ncounts, long *counts,
    struct gomp_thread *thr = gomp_thread ();
  
    thr->ts.static_trip = 0;
-  if (gomp_work_share_start (false))
+  if (gomp_work_share_start (0))
      {
        gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
                       GFS_STATIC, chunk_size);
-      gomp_doacross_init (ncounts, counts, chunk_size);
+      gomp_doacross_init (ncounts, counts, chunk_size, 0);
        gomp_work_share_init_done ();
      }
  
@@ -328,11 +501,11 @@ gomp_loop_doacross_dynamic_start (unsigned ncounts, long *counts,
    struct gomp_thread *thr = gomp_thread ();
    bool ret;
  
-  if (gomp_work_share_start (false))
+  if (gomp_work_share_start (0))
      {
        gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
                       GFS_DYNAMIC, chunk_size);
-      gomp_doacross_init (ncounts, counts, chunk_size);
+      gomp_doacross_init (ncounts, counts, chunk_size, 0);
        gomp_work_share_init_done ();
      }
  
@@ -354,11 +527,11 @@ gomp_loop_doacross_guided_start (unsigned ncounts, long *counts,
    struct gomp_thread *thr = gomp_thread ();
    bool ret;
  
-  if (gomp_work_share_start (false))
+  if (gomp_work_share_start (0))
      {
        gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
                       GFS_GUIDED, chunk_size);
-      gomp_doacross_init (ncounts, counts, chunk_size);
+      gomp_doacross_init (ncounts, counts, chunk_size, 0);
        gomp_work_share_init_done ();
      }
  
@@ -402,6 +575,50 @@ GOMP_loop_doacross_runtime_start (unsigned ncounts, long *counts,
      }
  }
  
+bool
+GOMP_loop_doacross_start (unsigned ncounts, long *counts, long sched,
+                         long chunk_size, long *istart, long *iend,
+                         uintptr_t *reductions, void **mem)
+{
+  struct gomp_thread *thr = gomp_thread ();
+
+  thr->ts.static_trip = 0;
+  if (reductions)
+    gomp_workshare_taskgroup_start ();
+  if (gomp_work_share_start (0))
+    {
+      size_t extra = 0;
+      if (mem)
+       extra = (uintptr_t) *mem;
+      sched = gomp_adjust_sched (sched, &chunk_size);
+      gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
+                     sched, chunk_size);
+      gomp_doacross_init (ncounts, counts, chunk_size, extra);
+      if (reductions)
+       {
+         GOMP_taskgroup_reduction_register (reductions);
+         thr->task->taskgroup->workshare = true;
+         thr->ts.work_share->task_reductions = reductions;
+       }
+      gomp_work_share_init_done ();
+    }
+  else
+    {
+      if (reductions)
+       {
+         uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
+         gomp_workshare_task_reduction_register (reductions,
+                                                 first_reductions);
+       }
+      sched = thr->ts.work_share->sched;
+    }
+
+  if (mem)
+    *mem = thr->ts.work_share->doacross->extra;
+
+  return ialias_call (GOMP_loop_runtime_next) (istart, iend);
+}
+
  /* The *_next routines are called when the thread completes processing of
     the iteration block currently assigned to it.  If the work-share
     construct is bound directly to a parallel construct, then the iteration
diff --git a/libgomp/loop_ull.c b/libgomp/loop_ull.c

index 7b2dfe6e26b2b444131620e4a0d1c2c39df963bb..ac658023e13baba57575479673f1a8d67e156818 100644 (file)
--- a/libgomp/loop_ull.c
+++ b/libgomp/loop_ull.c
@@ -27,8 +27,12 @@
  
  #include <limits.h>
  #include <stdlib.h>
+#include <string.h>
  #include "libgomp.h"
  
+ialias (GOMP_loop_ull_runtime_next)
+ialias_redirect (GOMP_taskgroup_reduction_register)
+
  typedef unsigned long long gomp_ull;
  
  /* Initialize the given work share construct from the given arguments.  */
@@ -104,7 +108,7 @@ gomp_loop_ull_static_start (bool up, gomp_ull start, gomp_ull end,
    struct gomp_thread *thr = gomp_thread ();
  
    thr->ts.static_trip = 0;
-  if (gomp_work_share_start (false))
+  if (gomp_work_share_start (0))
      {
        gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
                           GFS_STATIC, chunk_size);
@@ -122,7 +126,7 @@ gomp_loop_ull_dynamic_start (bool up, gomp_ull start, gomp_ull end,
    struct gomp_thread *thr = gomp_thread ();
    bool ret;
  
-  if (gomp_work_share_start (false))
+  if (gomp_work_share_start (0))
      {
        gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
                           GFS_DYNAMIC, chunk_size);
@@ -148,7 +152,7 @@ gomp_loop_ull_guided_start (bool up, gomp_ull start, gomp_ull end,
    struct gomp_thread *thr = gomp_thread ();
    bool ret;
  
-  if (gomp_work_share_start (false))
+  if (gomp_work_share_start (0))
      {
        gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
                           GFS_GUIDED, chunk_size);
@@ -195,6 +199,99 @@ GOMP_loop_ull_runtime_start (bool up, gomp_ull start, gomp_ull end,
      }
  }
  
+static long
+gomp_adjust_sched (long sched, gomp_ull *chunk_size)
+{
+  sched &= ~GFS_MONOTONIC;
+  switch (sched)
+    {
+    case GFS_STATIC:
+    case GFS_DYNAMIC:
+    case GFS_GUIDED:
+      return sched;
+    /* GFS_RUNTIME is used for runtime schedule without monotonic
+       or nonmonotonic modifiers on the clause.
+       GFS_RUNTIME|GFS_MONOTONIC for runtime schedule with monotonic
+       modifier.  */
+    case GFS_RUNTIME:
+    /* GFS_AUTO is used for runtime schedule with nonmonotonic
+       modifier.  */
+    case GFS_AUTO:
+      {
+       struct gomp_task_icv *icv = gomp_icv (false);
+       sched = icv->run_sched_var & ~GFS_MONOTONIC;
+       switch (sched)
+         {
+         case GFS_STATIC:
+         case GFS_DYNAMIC:
+         case GFS_GUIDED:
+           *chunk_size = icv->run_sched_chunk_size;
+           break;
+         case GFS_AUTO:
+           sched = GFS_STATIC;
+           *chunk_size = 0;
+           break;
+         default:
+           abort ();
+         }
+       return sched;
+      }
+    default:
+      abort ();
+    }
+}
+
+bool
+GOMP_loop_ull_start (bool up, gomp_ull start, gomp_ull end,
+                    gomp_ull incr, long sched, gomp_ull chunk_size,
+                    gomp_ull *istart, gomp_ull *iend,
+                    uintptr_t *reductions, void **mem)
+{
+  struct gomp_thread *thr = gomp_thread ();
+
+  thr->ts.static_trip = 0;
+  if (reductions)
+    gomp_workshare_taskgroup_start ();
+  if (gomp_work_share_start (0))
+    {
+      sched = gomp_adjust_sched (sched, &chunk_size);
+      gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
+                         sched, chunk_size);
+      if (reductions)
+       {
+         GOMP_taskgroup_reduction_register (reductions);
+         thr->task->taskgroup->workshare = true;
+         thr->ts.work_share->task_reductions = reductions;
+       }
+      if (mem)
+       {
+         uintptr_t size = (uintptr_t) *mem;
+         if (size > (sizeof (struct gomp_work_share)
+                     - offsetof (struct gomp_work_share,
+                                 inline_ordered_team_ids)))
+           thr->ts.work_share->ordered_team_ids
+             = gomp_malloc_cleared (size);
+         else
+           memset (thr->ts.work_share->ordered_team_ids, '\0', size);
+         *mem = (void *) thr->ts.work_share->ordered_team_ids;
+       }
+      gomp_work_share_init_done ();
+    }
+  else
+    {
+      if (reductions)
+       {
+         uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
+         gomp_workshare_task_reduction_register (reductions,
+                                                 first_reductions);
+       }
+      if (mem)
+       *mem = (void *) thr->ts.work_share->ordered_team_ids;
+    }
+
+  return ialias_call (GOMP_loop_ull_runtime_next) (istart, iend);
+}
+
  /* The *_ordered_*_start routines are similar.  The only difference is that
     this work-share construct is initialized to expect an ORDERED section.  */
  
@@ -206,7 +303,7 @@ gomp_loop_ull_ordered_static_start (bool up, gomp_ull start, gomp_ull end,
    struct gomp_thread *thr = gomp_thread ();
  
    thr->ts.static_trip = 0;
-  if (gomp_work_share_start (true))
+  if (gomp_work_share_start (1))
      {
        gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
                           GFS_STATIC, chunk_size);
@@ -225,7 +322,7 @@ gomp_loop_ull_ordered_dynamic_start (bool up, gomp_ull start, gomp_ull end,
    struct gomp_thread *thr = gomp_thread ();
    bool ret;
  
-  if (gomp_work_share_start (true))
+  if (gomp_work_share_start (1))
      {
        gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
                           GFS_DYNAMIC, chunk_size);
@@ -251,7 +348,7 @@ gomp_loop_ull_ordered_guided_start (bool up, gomp_ull start, gomp_ull end,
    struct gomp_thread *thr = gomp_thread ();
    bool ret;
  
-  if (gomp_work_share_start (true))
+  if (gomp_work_share_start (1))
      {
        gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
                           GFS_GUIDED, chunk_size);
@@ -299,6 +396,82 @@ GOMP_loop_ull_ordered_runtime_start (bool up, gomp_ull start, gomp_ull end,
      }
  }
  
+bool
+GOMP_loop_ull_ordered_start (bool up, gomp_ull start, gomp_ull end,
+                            gomp_ull incr, long sched, gomp_ull chunk_size,
+                            gomp_ull *istart, gomp_ull *iend,
+                            uintptr_t *reductions, void **mem)
+{
+  struct gomp_thread *thr = gomp_thread ();
+  size_t ordered = 1;
+  bool ret;
+
+  thr->ts.static_trip = 0;
+  if (reductions)
+    gomp_workshare_taskgroup_start ();
+  if (mem)
+    ordered += (uintptr_t) *mem;
+  if (gomp_work_share_start (ordered))
+    {
+      sched = gomp_adjust_sched (sched, &chunk_size);
+      gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
+                         sched, chunk_size);
+      if (reductions)
+       {
+         GOMP_taskgroup_reduction_register (reductions);
+         thr->task->taskgroup->workshare = true;
+         thr->ts.work_share->task_reductions = reductions;
+       }
+      if (sched == GFS_STATIC)
+       gomp_ordered_static_init ();
+      else
+       gomp_mutex_lock (&thr->ts.work_share->lock);
+      gomp_work_share_init_done ();
+    }
+  else
+    {
+      if (reductions)
+       {
+         uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
+         gomp_workshare_task_reduction_register (reductions,
+                                                 first_reductions);
+       }
+      sched = thr->ts.work_share->sched;
+      if (sched != GFS_STATIC)
+       gomp_mutex_lock (&thr->ts.work_share->lock);
+    }
+
+  if (mem)
+    {
+      uintptr_t p
+       = (uintptr_t) (thr->ts.work_share->ordered_team_ids
+                      + (thr->ts.team ? thr->ts.team->nthreads : 1));
+      p += __alignof__ (long long) - 1;
+      p &= ~(__alignof__ (long long) - 1);
+      *mem = (void *) p;
+    }
+
+  switch (sched)
+    {
+    case GFS_STATIC:
+    case GFS_AUTO:
+      return !gomp_iter_ull_static_next (istart, iend);
+    case GFS_DYNAMIC:
+      ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
+      break;
+    case GFS_GUIDED:
+      ret = gomp_iter_ull_guided_next_locked (istart, iend);
+      break;
+    default:
+      abort ();
+    }
+
+  if (ret)
+    gomp_ordered_first ();
+  gomp_mutex_unlock (&thr->ts.work_share->lock);
+  return ret;
+}
+
  /* The *_doacross_*_start routines are similar.  The only difference is that
     this work-share construct is initialized to expect an ORDERED(N) - DOACROSS
     section, and the worksharing loop iterates always from 0 to COUNTS[0] - 1
@@ -313,11 +486,11 @@ gomp_loop_ull_doacross_static_start (unsigned ncounts, gomp_ull *counts,
    struct gomp_thread *thr = gomp_thread ();
  
    thr->ts.static_trip = 0;
-  if (gomp_work_share_start (false))
+  if (gomp_work_share_start (0))
      {
        gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
                           GFS_STATIC, chunk_size);
-      gomp_doacross_ull_init (ncounts, counts, chunk_size);
+      gomp_doacross_ull_init (ncounts, counts, chunk_size, 0);
        gomp_work_share_init_done ();
      }
  
@@ -332,11 +505,11 @@ gomp_loop_ull_doacross_dynamic_start (unsigned ncounts, gomp_ull *counts,
    struct gomp_thread *thr = gomp_thread ();
    bool ret;
  
-  if (gomp_work_share_start (false))
+  if (gomp_work_share_start (0))
      {
        gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
                           GFS_DYNAMIC, chunk_size);
-      gomp_doacross_ull_init (ncounts, counts, chunk_size);
+      gomp_doacross_ull_init (ncounts, counts, chunk_size, 0);
        gomp_work_share_init_done ();
      }
  
@@ -359,11 +532,11 @@ gomp_loop_ull_doacross_guided_start (unsigned ncounts, gomp_ull *counts,
    struct gomp_thread *thr = gomp_thread ();
    bool ret;
  
-  if (gomp_work_share_start (false))
+  if (gomp_work_share_start (0))
      {
        gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
                           GFS_GUIDED, chunk_size);
-      gomp_doacross_ull_init (ncounts, counts, chunk_size);
+      gomp_doacross_ull_init (ncounts, counts, chunk_size, 0);
        gomp_work_share_init_done ();
      }
  
@@ -407,6 +580,51 @@ GOMP_loop_ull_doacross_runtime_start (unsigned ncounts, gomp_ull *counts,
      }
  }
  
+bool
+GOMP_loop_ull_doacross_start (unsigned ncounts, gomp_ull *counts,
+                             long sched, gomp_ull chunk_size,
+                             gomp_ull *istart, gomp_ull *iend,
+                             uintptr_t *reductions, void **mem)
+{
+  struct gomp_thread *thr = gomp_thread ();
+
+  thr->ts.static_trip = 0;
+  if (reductions)
+    gomp_workshare_taskgroup_start ();
+  if (gomp_work_share_start (0))
+    {
+      size_t extra = 0;
+      if (mem)
+       extra = (uintptr_t) *mem;
+      sched = gomp_adjust_sched (sched, &chunk_size);
+      gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
+                         sched, chunk_size);
+      gomp_doacross_ull_init (ncounts, counts, chunk_size, extra);
+      if (reductions)
+       {
+         GOMP_taskgroup_reduction_register (reductions);
+         thr->task->taskgroup->workshare = true;
+         thr->ts.work_share->task_reductions = reductions;
+       }
+      gomp_work_share_init_done ();
+    }
+  else
+    {
+      if (reductions)
+       {
+         uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
+         gomp_workshare_task_reduction_register (reductions,
+                                                 first_reductions);
+       }
+      sched = thr->ts.work_share->sched;
+    }
+
+  if (mem)
+    *mem = thr->ts.work_share->doacross->extra;
+
+  return ialias_call (GOMP_loop_ull_runtime_next) (istart, iend);
+}
+
  /* The *_next routines are called when the thread completes processing of
     the iteration block currently assigned to it.  If the work-share
     construct is bound directly to a parallel construct, then the iteration
diff --git a/libgomp/ordered.c b/libgomp/ordered.c

index 1bdd5b2f25bcf4bccf664c4bbb106717a7ced8fc..521e9122d908ecb483519480367ba0da717c75a6 100644 (file)
--- a/libgomp/ordered.c
+++ b/libgomp/ordered.c
@@ -259,7 +259,8 @@ GOMP_ordered_end (void)
  #define MAX_COLLAPSED_BITS (__SIZEOF_LONG__ * __CHAR_BIT__)
  
  void
-gomp_doacross_init (unsigned ncounts, long *counts, long chunk_size)
+gomp_doacross_init (unsigned ncounts, long *counts, long chunk_size,
+                   size_t extra)
  {
    struct gomp_thread *thr = gomp_thread ();
    struct gomp_team *team = thr->ts.team;
@@ -269,13 +270,24 @@ gomp_doacross_init (unsigned ncounts, long *counts, long chunk_size)
    struct gomp_doacross_work_share *doacross;
  
    if (team == NULL || team->nthreads == 1)
-    return;
+    {
+    empty:
+      if (!extra)
+       ws->doacross = NULL;
+      else
+       {
+         doacross = gomp_malloc_cleared (sizeof (*doacross) + extra);
+         doacross->extra = (void *) (doacross + 1);
+         ws->doacross = doacross;
+       }
+      return;
+    }
  
    for (i = 0; i < ncounts; i++)
      {
        /* If any count is 0, GOMP_doacross_{post,wait} can't be called.  */
        if (counts[i] == 0)
-       return;
+       goto empty;
  
        if (num_bits <= MAX_COLLAPSED_BITS)
         {
@@ -314,7 +326,7 @@ gomp_doacross_init (unsigned ncounts, long *counts, long chunk_size)
    elt_sz = (elt_sz + 63) & ~63UL;
  
    doacross = gomp_malloc (sizeof (*doacross) + 63 + num_ents * elt_sz
-                         + shift_sz);
+                         + shift_sz + extra);
    doacross->chunk_size = chunk_size;
    doacross->elt_sz = elt_sz;
    doacross->ncounts = ncounts;
@@ -322,6 +334,13 @@ gomp_doacross_init (unsigned ncounts, long *counts, long chunk_size)
    doacross->array = (unsigned char *)
                     ((((uintptr_t) (doacross + 1)) + 63 + shift_sz)
                      & ~(uintptr_t) 63);
+  if (extra)
+    {
+      doacross->extra = doacross->array + num_ents * elt_sz;
+      memset (doacross->extra, '\0', extra);
+    }
+  else
+    doacross->extra = NULL;
    if (num_bits <= MAX_COLLAPSED_BITS)
      {
        unsigned int shift_count = 0;
@@ -360,7 +379,8 @@ GOMP_doacross_post (long *counts)
    unsigned long ent;
    unsigned int i;
  
-  if (__builtin_expect (doacross == NULL, 0))
+  if (__builtin_expect (doacross == NULL, 0)
+      || __builtin_expect (doacross->array == NULL, 0))
      {
        __sync_synchronize ();
        return;
@@ -411,7 +431,8 @@ GOMP_doacross_wait (long first, ...)
    unsigned long ent;
    unsigned int i;
  
-  if (__builtin_expect (doacross == NULL, 0))
+  if (__builtin_expect (doacross == NULL, 0)
+      || __builtin_expect (doacross->array == NULL, 0))
      {
        __sync_synchronize ();
        return;
@@ -488,7 +509,8 @@ GOMP_doacross_wait (long first, ...)
  typedef unsigned long long gomp_ull;
  
  void
-gomp_doacross_ull_init (unsigned ncounts, gomp_ull *counts, gomp_ull chunk_size)
+gomp_doacross_ull_init (unsigned ncounts, gomp_ull *counts,
+                       gomp_ull chunk_size, size_t extra)
  {
    struct gomp_thread *thr = gomp_thread ();
    struct gomp_team *team = thr->ts.team;
@@ -498,13 +520,24 @@ gomp_doacross_ull_init (unsigned ncounts, gomp_ull *counts, gomp_ull chunk_size)
    struct gomp_doacross_work_share *doacross;
  
    if (team == NULL || team->nthreads == 1)
-    return;
+    {
+    empty:
+      if (!extra)
+       ws->doacross = NULL;
+      else
+       {
+         doacross = gomp_malloc_cleared (sizeof (*doacross) + extra);
+         doacross->extra = (void *) (doacross + 1);
+         ws->doacross = doacross;
+       }
+      return;
+    }
  
    for (i = 0; i < ncounts; i++)
      {
        /* If any count is 0, GOMP_doacross_{post,wait} can't be called.  */
        if (counts[i] == 0)
-       return;
+       goto empty;
  
        if (num_bits <= MAX_COLLAPSED_BITS)
         {
@@ -557,6 +590,13 @@ gomp_doacross_ull_init (unsigned ncounts, gomp_ull *counts, gomp_ull chunk_size)
    doacross->array = (unsigned char *)
                     ((((uintptr_t) (doacross + 1)) + 63 + shift_sz)
                      & ~(uintptr_t) 63);
+  if (extra)
+    {
+      doacross->extra = doacross->array + num_ents * elt_sz;
+      memset (doacross->extra, '\0', extra);
+    }
+  else
+    doacross->extra = NULL;
    if (num_bits <= MAX_COLLAPSED_BITS)
      {
        unsigned int shift_count = 0;
@@ -595,7 +635,8 @@ GOMP_doacross_ull_post (gomp_ull *counts)
    unsigned long ent;
    unsigned int i;
  
-  if (__builtin_expect (doacross == NULL, 0))
+  if (__builtin_expect (doacross == NULL, 0)
+      || __builtin_expect (doacross->array == NULL, 0))
      {
        __sync_synchronize ();
        return;
@@ -667,7 +708,8 @@ GOMP_doacross_ull_wait (gomp_ull first, ...)
    unsigned long ent;
    unsigned int i;
  
-  if (__builtin_expect (doacross == NULL, 0))
+  if (__builtin_expect (doacross == NULL, 0)
+      || __builtin_expect (doacross->array == NULL, 0))
      {
        __sync_synchronize ();
        return;
diff --git a/libgomp/parallel.c b/libgomp/parallel.c

index ead1394aaa3af4d99c86c2fbe375dc0ace8de9ec..c7a8c788a3bc629195c243d40e4603d34ce3dd40 100644 (file)
--- a/libgomp/parallel.c
+++ b/libgomp/parallel.c
@@ -205,8 +205,15 @@ GOMP_cancellation_point (int which)
      }
    else if (which & GOMP_CANCEL_TASKGROUP)
      {
-      if (thr->task->taskgroup && thr->task->taskgroup->cancelled)
-       return true;
+      if (thr->task->taskgroup)
+       {
+         if (thr->task->taskgroup->cancelled)
+           return true;
+         if (thr->task->taskgroup->workshare
+             && thr->task->taskgroup->prev
+             && thr->task->taskgroup->prev->cancelled)
+           return true;
+       }
        /* FALLTHRU into the GOMP_CANCEL_PARALLEL case,
          as #pragma omp cancel parallel also cancels all explicit
          tasks.  */
@@ -238,11 +245,17 @@ GOMP_cancel (int which, bool do_cancel)
      }
    else if (which & GOMP_CANCEL_TASKGROUP)
      {
-      if (thr->task->taskgroup && !thr->task->taskgroup->cancelled)
+      if (thr->task->taskgroup)
         {
-         gomp_mutex_lock (&team->task_lock);
-         thr->task->taskgroup->cancelled = true;
-         gomp_mutex_unlock (&team->task_lock);
+         struct gomp_taskgroup *taskgroup = thr->task->taskgroup;
+         if (taskgroup->workshare && taskgroup->prev)
+           taskgroup = taskgroup->prev;
+         if (!taskgroup->cancelled)
+           {
+             gomp_mutex_lock (&team->task_lock);
+             taskgroup->cancelled = true;
+             gomp_mutex_unlock (&team->task_lock);
+           }
         }
        return true;
      }
diff --git a/libgomp/sections.c b/libgomp/sections.c

index 2a6e6ec1c9d474a9f4318c8d0e81f19b5d185188..3449e0067ddcdb6803a6de92aaa813a477607f88 100644 (file)
--- a/libgomp/sections.c
+++ b/libgomp/sections.c
@@ -26,8 +26,11 @@
  /* This file handles the SECTIONS construct.  */
  
  #include "libgomp.h"
+#include <string.h>
  
  
+ialias_redirect (GOMP_taskgroup_reduction_register)
+
  /* Initialize the given work share construct from the given arguments.  */
  
  static inline void
@@ -72,7 +75,7 @@ GOMP_sections_start (unsigned count)
    struct gomp_thread *thr = gomp_thread ();
    long s, e, ret;
  
-  if (gomp_work_share_start (false))
+  if (gomp_work_share_start (0))
      {
        gomp_sections_init (thr->ts.work_share, count);
        gomp_work_share_init_done ();
@@ -95,6 +98,66 @@ GOMP_sections_start (unsigned count)
    return ret;
  }
  
+unsigned
+GOMP_sections2_start (unsigned count, uintptr_t *reductions, void **mem)
+{
+  struct gomp_thread *thr = gomp_thread ();
+  long s, e, ret;
+
+  if (reductions)
+    gomp_workshare_taskgroup_start ();
+  if (gomp_work_share_start (0))
+    {
+      gomp_sections_init (thr->ts.work_share, count);
+      if (reductions)
+       {
+         GOMP_taskgroup_reduction_register (reductions);
+         thr->task->taskgroup->workshare = true;
+         thr->ts.work_share->task_reductions = reductions;
+       }
+      if (mem)
+       {
+         uintptr_t size = (uintptr_t) *mem;
+         if (size > (sizeof (struct gomp_work_share)
+                     - offsetof (struct gomp_work_share,
+                                 inline_ordered_team_ids)))
+           thr->ts.work_share->ordered_team_ids
+             = gomp_malloc_cleared (size);
+         else
+           memset (thr->ts.work_share->ordered_team_ids, '\0', size);
+         *mem = (void *) thr->ts.work_share->ordered_team_ids;
+       }
+      gomp_work_share_init_done ();
+    }
+  else
+    {
+      if (reductions)
+       {
+         uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
+         gomp_workshare_task_reduction_register (reductions,
+                                                 first_reductions);
+       }
+      if (mem)
+       *mem = (void *) thr->ts.work_share->ordered_team_ids;
+    }
+
+#ifdef HAVE_SYNC_BUILTINS
+  if (gomp_iter_dynamic_next (&s, &e))
+    ret = s;
+  else
+    ret = 0;
+#else
+  gomp_mutex_lock (&thr->ts.work_share->lock);
+  if (gomp_iter_dynamic_next_locked (&s, &e))
+    ret = s;
+  else
+    ret = 0;
+  gomp_mutex_unlock (&thr->ts.work_share->lock);
+#endif
+
+  return ret;
+}
+
  /* This routine is called when the thread completes processing of the
     section currently assigned to it.  If the work-share construct is
     bound directly to a parallel construct, then the construct may have
diff --git a/libgomp/single.c b/libgomp/single.c

index 24a7780ad93acf9f6dff61bbcf605b7e693de4a2..d5093c6730c396221bce4109b5d8208be42fe6e6 100644 (file)
--- a/libgomp/single.c
+++ b/libgomp/single.c
@@ -47,7 +47,7 @@ GOMP_single_start (void)
    return __sync_bool_compare_and_swap (&team->single_count, single_count,
                                        single_count + 1L);
  #else
-  bool ret = gomp_work_share_start (false);
+  bool ret = gomp_work_share_start (0);
    if (ret)
      gomp_work_share_init_done ();
    gomp_work_share_end_nowait ();
@@ -68,7 +68,7 @@ GOMP_single_copy_start (void)
    bool first;
    void *ret;
  
-  first = gomp_work_share_start (false);
+  first = gomp_work_share_start (0);
    
    if (first)
      {
diff --git a/libgomp/target.c b/libgomp/target.c

index d9288274243da97dcb8007604fbe7185f38a8b5c..8ebc2a370a1656d0a3e3e68664f3048ef668ffb1 100644 (file)
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -1854,11 +1854,20 @@ GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
               struct gomp_team *team = thr->ts.team;
               /* If parallel or taskgroup has been cancelled, don't start new
                  tasks.  */
-             if (team
-                 && (gomp_team_barrier_cancelled (&team->barrier)
-                     || (thr->task->taskgroup
-                         && thr->task->taskgroup->cancelled)))
-               return;
+             if (__builtin_expect (gomp_cancel_var, 0) && team)
+               {
+                 if (gomp_team_barrier_cancelled (&team->barrier))
+                   return;
+                 if (thr->task->taskgroup)
+                   {
+                     if (thr->task->taskgroup->cancelled)
+                       return;
+                     if (thr->task->taskgroup->workshare
+                         && thr->task->taskgroup->prev
+                         && thr->task->taskgroup->prev->cancelled)
+                       return;
+                   }
+               }
  
               gomp_task_maybe_wait_for_dependencies (depend);
             }
@@ -1873,10 +1882,20 @@ GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
    struct gomp_thread *thr = gomp_thread ();
    struct gomp_team *team = thr->ts.team;
    /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
-  if (team
-      && (gomp_team_barrier_cancelled (&team->barrier)
-         || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
-    return;
+  if (__builtin_expect (gomp_cancel_var, 0) && team)
+    {
+      if (gomp_team_barrier_cancelled (&team->barrier))
+       return;
+      if (thr->task->taskgroup)
+       {
+         if (thr->task->taskgroup->cancelled)
+           return;
+         if (thr->task->taskgroup->workshare
+             && thr->task->taskgroup->prev
+             && thr->task->taskgroup->prev->cancelled)
+           return;
+       }
+    }
  
    gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
  }
@@ -1985,11 +2004,20 @@ GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
               struct gomp_team *team = thr->ts.team;
               /* If parallel or taskgroup has been cancelled, don't start new
                  tasks.  */
-             if (team
-                 && (gomp_team_barrier_cancelled (&team->barrier)
-                     || (thr->task->taskgroup
-                         && thr->task->taskgroup->cancelled)))
-               return;
+             if (__builtin_expect (gomp_cancel_var, 0) && team)
+               {
+                 if (gomp_team_barrier_cancelled (&team->barrier))
+                   return;
+                 if (thr->task->taskgroup)
+                   {
+                     if (thr->task->taskgroup->cancelled)
+                       return;
+                     if (thr->task->taskgroup->workshare
+                         && thr->task->taskgroup->prev
+                         && thr->task->taskgroup->prev->cancelled)
+                       return;
+                   }
+               }
  
               gomp_task_maybe_wait_for_dependencies (depend);
             }
@@ -2004,10 +2032,20 @@ GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
    struct gomp_thread *thr = gomp_thread ();
    struct gomp_team *team = thr->ts.team;
    /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
-  if (team
-      && (gomp_team_barrier_cancelled (&team->barrier)
-         || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
-    return;
+  if (__builtin_expect (gomp_cancel_var, 0) && team)
+    {
+      if (gomp_team_barrier_cancelled (&team->barrier))
+       return;
+      if (thr->task->taskgroup)
+       {
+         if (thr->task->taskgroup->cancelled)
+           return;
+         if (thr->task->taskgroup->workshare
+             && thr->task->taskgroup->prev
+             && thr->task->taskgroup->prev->cancelled)
+           return;
+       }
+    }
  
    size_t i;
    if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
diff --git a/libgomp/task.c b/libgomp/task.c

index 77e12c404ac564ecd531285c6f39e01f000c4953..0c78b3c939cde5e2289b0bf4de69f27a8493426d 100644 (file)
--- a/libgomp/task.c
+++ b/libgomp/task.c
@@ -363,10 +363,20 @@ GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
  #endif
  
    /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
-  if (team
-      && (gomp_team_barrier_cancelled (&team->barrier)
-         || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
-    return;
+  if (__builtin_expect (gomp_cancel_var, 0) && team)
+    {
+      if (gomp_team_barrier_cancelled (&team->barrier))
+       return;
+      if (thr->task->taskgroup)
+       {
+         if (thr->task->taskgroup->cancelled)
+           return;
+         if (thr->task->taskgroup->workshare
+             && thr->task->taskgroup->prev
+             && thr->task->taskgroup->prev->cancelled)
+           return;
+       }
+    }
  
    if ((flags & GOMP_TASK_FLAG_PRIORITY) == 0)
      priority = 0;
@@ -464,14 +474,26 @@ GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
        gomp_mutex_lock (&team->task_lock);
        /* If parallel or taskgroup has been cancelled, don't start new
          tasks.  */
-      if (__builtin_expect ((gomp_team_barrier_cancelled (&team->barrier)
-                            || (taskgroup && taskgroup->cancelled))
-                           && !task->copy_ctors_done, 0))
+      if (__builtin_expect (gomp_cancel_var, 0)
+         && !task->copy_ctors_done)
         {
-         gomp_mutex_unlock (&team->task_lock);
-         gomp_finish_task (task);
-         free (task);
-         return;
+         if (gomp_team_barrier_cancelled (&team->barrier))
+           {
+           do_cancel:
+             gomp_mutex_unlock (&team->task_lock);
+             gomp_finish_task (task);
+             free (task);
+             return;
+           }
+         if (taskgroup)
+           {
+             if (taskgroup->cancelled)
+               goto do_cancel;
+             if (taskgroup->workshare
+                 && taskgroup->prev
+                 && taskgroup->prev->cancelled)
+               goto do_cancel;
+           }
         }
        if (taskgroup)
         taskgroup->num_children++;
@@ -662,10 +684,20 @@ gomp_create_target_task (struct gomp_device_descr *devicep,
    struct gomp_team *team = thr->ts.team;
  
    /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
-  if (team
-      && (gomp_team_barrier_cancelled (&team->barrier)
-         || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
-    return true;
+  if (__builtin_expect (gomp_cancel_var, 0) && team)
+    {
+      if (gomp_team_barrier_cancelled (&team->barrier))
+       return true;
+      if (thr->task->taskgroup)
+       {
+         if (thr->task->taskgroup->cancelled)
+           return true;
+         if (thr->task->taskgroup->workshare
+             && thr->task->taskgroup->prev
+             && thr->task->taskgroup->prev->cancelled)
+           return true;
+       }
+    }
  
    struct gomp_target_task *ttask;
    struct gomp_task *task;
@@ -748,13 +780,25 @@ gomp_create_target_task (struct gomp_device_descr *devicep,
    task->final_task = 0;
    gomp_mutex_lock (&team->task_lock);
    /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
-  if (__builtin_expect (gomp_team_barrier_cancelled (&team->barrier)
-                       || (taskgroup && taskgroup->cancelled), 0))
+  if (__builtin_expect (gomp_cancel_var, 0))
      {
-      gomp_mutex_unlock (&team->task_lock);
-      gomp_finish_task (task);
-      free (task);
-      return true;
+      if (gomp_team_barrier_cancelled (&team->barrier))
+       {
+       do_cancel:
+         gomp_mutex_unlock (&team->task_lock);
+         gomp_finish_task (task);
+         free (task);
+         return true;
+       }
+      if (taskgroup)
+       {
+         if (taskgroup->cancelled)
+           goto do_cancel;
+         if (taskgroup->workshare
+             && taskgroup->prev
+             && taskgroup->prev->cancelled)
+           goto do_cancel;
+       }
      }
    if (depend_size)
      {
@@ -1047,10 +1091,21 @@ gomp_task_run_pre (struct gomp_task *child_task, struct gomp_task *parent,
  
    if (--team->task_queued_count == 0)
      gomp_team_barrier_clear_task_pending (&team->barrier);
-  if ((gomp_team_barrier_cancelled (&team->barrier)
-       || (taskgroup && taskgroup->cancelled))
+  if (__builtin_expect (gomp_cancel_var, 0)
        && !child_task->copy_ctors_done)
-    return true;
+    {
+      if (gomp_team_barrier_cancelled (&team->barrier))
+       return true;
+      if (taskgroup)
+       {
+         if (taskgroup->cancelled)
+           return true;
+         if (taskgroup->workshare
+             && taskgroup->prev
+             && taskgroup->prev->cancelled)
+           return true;
+       }
+    }
    return false;
  }
  
@@ -1527,10 +1582,20 @@ GOMP_taskwait_depend (void **depend)
    struct gomp_team *team = thr->ts.team;
  
    /* If parallel or taskgroup has been cancelled, return early.  */
-  if (team
-      && (gomp_team_barrier_cancelled (&team->barrier)
-         || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
-    return;
+  if (__builtin_expect (gomp_cancel_var, 0) && team)
+    {
+      if (gomp_team_barrier_cancelled (&team->barrier))
+       return;
+      if (thr->task->taskgroup)
+       {
+         if (thr->task->taskgroup->cancelled)
+           return;
+         if (thr->task->taskgroup->workshare
+             && thr->task->taskgroup->prev
+             && thr->task->taskgroup->prev->cancelled)
+           return;
+       }
+    }
  
    if (thr->task && thr->task->depend_hash)
      gomp_task_maybe_wait_for_dependencies (depend);
@@ -1770,9 +1835,10 @@ gomp_taskgroup_init (struct gomp_taskgroup *prev)
      = gomp_malloc (sizeof (struct gomp_taskgroup));
    taskgroup->prev = prev;
    priority_queue_init (&taskgroup->taskgroup_queue);
-  taskgroup->in_taskgroup_wait = false;
    taskgroup->reductions = prev ? prev->reductions : NULL;
+  taskgroup->in_taskgroup_wait = false;
    taskgroup->cancelled = false;
+  taskgroup->workshare = false;
    taskgroup->num_children = 0;
    gomp_sem_init (&taskgroup->taskgroup_sem, 0);
    return taskgroup;
@@ -1956,21 +2022,34 @@ GOMP_taskgroup_end (void)
    free (taskgroup);
  }
  
-static inline void
-gomp_reduction_register (uintptr_t *data, uintptr_t *old, unsigned nthreads)
+static inline __attribute__((always_inline)) void
+gomp_reduction_register (uintptr_t *data, uintptr_t *old, uintptr_t *orig,
+                        unsigned nthreads)
  {
    size_t total_cnt = 0;
    uintptr_t *d = data;
    struct htab *old_htab = NULL, *new_htab;
    do
      {
-      size_t sz = d[1] * nthreads;
-      /* Should use omp_alloc if d[3] is not -1.  */
-      void *ptr = gomp_aligned_alloc (d[2], sz);
-      memset (ptr, '\0', sz);
-      d[2] = (uintptr_t) ptr;
+      if (__builtin_expect (orig != NULL, 0))
+       {
+         /* For worksharing task reductions, memory has been allocated
+            already by some other thread that encountered the construct
+            earlier.  */
+         d[2] = orig[2];
+         d[6] = orig[6];
+         orig = (uintptr_t *) orig[4];
+       }
+      else
+       {
+         size_t sz = d[1] * nthreads;
+         /* Should use omp_alloc if d[3] is not -1.  */
+         void *ptr = gomp_aligned_alloc (d[2], sz);
+         memset (ptr, '\0', sz);
+         d[2] = (uintptr_t) ptr;
+         d[6] = d[2] + sz;
+       }
        d[5] = 0;
-      d[6] = d[2] + sz;
        total_cnt += d[0];
        if (d[4] == 0)
         {
@@ -2028,6 +2107,38 @@ gomp_reduction_register (uintptr_t *data, uintptr_t *old, unsigned nthreads)
    d[5] = (uintptr_t) new_htab;
  }
  
+static void
+gomp_create_artificial_team (void)
+{
+  struct gomp_thread *thr = gomp_thread ();
+  struct gomp_task_icv *icv;
+  struct gomp_team *team = gomp_new_team (1);
+  struct gomp_task *task = thr->task;
+  icv = task ? &task->icv : &gomp_global_icv;
+  team->prev_ts = thr->ts;
+  thr->ts.team = team;
+  thr->ts.team_id = 0;
+  thr->ts.work_share = &team->work_shares[0];
+  thr->ts.last_work_share = NULL;
+#ifdef HAVE_SYNC_BUILTINS
+  thr->ts.single_count = 0;
+#endif
+  thr->ts.static_trip = 0;
+  thr->task = &team->implicit_task[0];
+  gomp_init_task (thr->task, NULL, icv);
+  if (task)
+    {
+      thr->task = task;
+      gomp_end_task ();
+      free (task);
+      thr->task = &team->implicit_task[0];
+    }
+#ifdef LIBGOMP_USE_PTHREADS
+  else
+    pthread_setspecific (gomp_thread_destructor, thr);
+#endif
+}
+
  /* The format of data is:
     data[0]     cnt
     data[1]     size
@@ -2039,7 +2150,12 @@ gomp_reduction_register (uintptr_t *data, uintptr_t *old, unsigned nthreads)
     cnt times
     ent[0]      address
     ent[1]      offset
-   ent[2]      used internally (pointer to data[0]).  */
+   ent[2]      used internally (pointer to data[0])
+   The entries are sorted by increasing offset, so that a binary
+   search can be performed.  Normally, data[8] is 0, exception is
+   for worksharing construct task reductions in cancellable parallel,
+   where at offset 0 there should be space for a pointer and an integer
+   which are used internally.  */
  
  void
  GOMP_taskgroup_reduction_register (uintptr_t *data)
@@ -2047,41 +2163,18 @@ GOMP_taskgroup_reduction_register (uintptr_t *data)
    struct gomp_thread *thr = gomp_thread ();
    struct gomp_team *team = thr->ts.team;
    struct gomp_task *task;
+  unsigned nthreads;
    if (__builtin_expect (team == NULL, 0))
      {
        /* The task reduction code needs a team and task, so for
          orphaned taskgroups just create the implicit team.  */
-      struct gomp_task_icv *icv;
-      team = gomp_new_team (1);
-      task = thr->task;
-      icv = task ? &task->icv : &gomp_global_icv;
-      team->prev_ts = thr->ts;
-      thr->ts.team = team;
-      thr->ts.team_id = 0;
-      thr->ts.work_share = &team->work_shares[0];
-      thr->ts.last_work_share = NULL;
-#ifdef HAVE_SYNC_BUILTINS
-      thr->ts.single_count = 0;
-#endif
-      thr->ts.static_trip = 0;
-      thr->task = &team->implicit_task[0];
-      gomp_init_task (thr->task, NULL, icv);
-      if (task)
-       {
-         thr->task = task;
-         gomp_end_task ();
-         free (task);
-         thr->task = &team->implicit_task[0];
-       }
-#ifdef LIBGOMP_USE_PTHREADS
-      else
-       pthread_setspecific (gomp_thread_destructor, thr);
-#endif
-      GOMP_taskgroup_start ();
+      gomp_create_artificial_team ();
+      ialias_call (GOMP_taskgroup_start) ();
+      team = thr->ts.team;
      }
-  unsigned nthreads = team->nthreads;
+  nthreads = team->nthreads;
    task = thr->task;
-  gomp_reduction_register (data, task->taskgroup->reductions, nthreads);
+  gomp_reduction_register (data, task->taskgroup->reductions, NULL, nthreads);
    task->taskgroup->reductions = data;
  }
  
@@ -2175,11 +2268,56 @@ struct gomp_taskgroup *
  gomp_parallel_reduction_register (uintptr_t *data, unsigned nthreads)
  {
    struct gomp_taskgroup *taskgroup = gomp_taskgroup_init (NULL);
-  gomp_reduction_register (data, NULL, nthreads);
+  gomp_reduction_register (data, NULL, NULL, nthreads);
    taskgroup->reductions = data;
    return taskgroup;
  }
  
+void
+gomp_workshare_task_reduction_register (uintptr_t *data, uintptr_t *orig)
+{
+  struct gomp_thread *thr = gomp_thread ();
+  struct gomp_team *team = thr->ts.team;
+  struct gomp_task *task = thr->task;
+  unsigned nthreads = team->nthreads;
+  gomp_reduction_register (data, task->taskgroup->reductions, orig, nthreads);
+  task->taskgroup->reductions = data;
+}
+
+void
+gomp_workshare_taskgroup_start (void)
+{
+  struct gomp_thread *thr = gomp_thread ();
+  struct gomp_team *team = thr->ts.team;
+  struct gomp_task *task;
+
+  if (team == NULL)
+    {
+      gomp_create_artificial_team ();
+      team = thr->ts.team;
+    }
+  task = thr->task;
+  task->taskgroup = gomp_taskgroup_init (task->taskgroup);
+  task->taskgroup->workshare = true;
+}
+
+void
+GOMP_workshare_task_reduction_unregister (bool cancelled)
+{
+  struct gomp_thread *thr = gomp_thread ();
+  struct gomp_task *task = thr->task;
+  struct gomp_team *team = thr->ts.team;
+  uintptr_t *data = task->taskgroup->reductions;
+  ialias_call (GOMP_taskgroup_end) ();
+  if (thr->ts.team_id == 0)
+    ialias_call (GOMP_taskgroup_reduction_unregister) (data);
+  else
+    htab_free ((struct htab *) data[5]);
+
+  if (!cancelled)
+    gomp_team_barrier_wait (&team->barrier);
+}
+
  int
  omp_in_final (void)
  {
diff --git a/libgomp/taskloop.c b/libgomp/taskloop.c

index d20af399d385154564618f6cabe14ba26a13d28c..4621405aa58e599e3a0c5de0623312c40ed434b2 100644 (file)
--- a/libgomp/taskloop.c
+++ b/libgomp/taskloop.c
@@ -149,8 +149,17 @@ GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
  
    if (flags & GOMP_TASK_FLAG_NOGROUP)
      {
-      if (thr->task && thr->task->taskgroup && thr->task->taskgroup->cancelled)
-       return;
+      if (__builtin_expect (gomp_cancel_var, 0)
+         && thr->task
+         && thr->task->taskgroup)
+       {
+         if (thr->task->taskgroup->cancelled)
+           return;
+         if (thr->task->taskgroup->workshare
+             && thr->task->taskgroup->prev
+             && thr->task->taskgroup->prev->cancelled)
+           return;
+       }
      }
    else
      {
@@ -292,19 +301,31 @@ GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
        gomp_mutex_lock (&team->task_lock);
        /* If parallel or taskgroup has been cancelled, don't start new
          tasks.  */
-      if (__builtin_expect ((gomp_team_barrier_cancelled (&team->barrier)
-                            || (taskgroup && taskgroup->cancelled))
-                           && cpyfn == NULL, 0))
+      if (__builtin_expect (gomp_cancel_var, 0)
+         && cpyfn == NULL)
         {
-         gomp_mutex_unlock (&team->task_lock);
-         for (i = 0; i < num_tasks; i++)
+         if (gomp_team_barrier_cancelled (&team->barrier))
+           {
+           do_cancel:
+             gomp_mutex_unlock (&team->task_lock);
+             for (i = 0; i < num_tasks; i++)
+               {
+                 gomp_finish_task (tasks[i]);
+                 free (tasks[i]);
+               }
+             if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0)
+               ialias_call (GOMP_taskgroup_end) ();
+             return;
+           }
+         if (taskgroup)
             {
-             gomp_finish_task (tasks[i]);
-             free (tasks[i]);
+             if (taskgroup->cancelled)
+               goto do_cancel;
+             if (taskgroup->workshare
+                 && taskgroup->prev
+                 && taskgroup->prev->cancelled)
+               goto do_cancel;
             }
-         if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0)
-           ialias_call (GOMP_taskgroup_end) ();
-         return;
         }
        if (taskgroup)
         taskgroup->num_children += num_tasks;
diff --git a/libgomp/team.c b/libgomp/team.c

index 0956a1f8f1ff87df5f26c9d566a2ac1807e96d64..e3e4c4d1ef2771c260ce7239e02422a873bdac8f 100644 (file)
--- a/libgomp/team.c
+++ b/libgomp/team.c
@@ -187,7 +187,7 @@ gomp_new_team (unsigned nthreads)
    team->single_count = 0;
  #endif
    team->work_shares_to_free = &team->work_shares[0];
-  gomp_init_work_share (&team->work_shares[0], false, nthreads);
+  gomp_init_work_share (&team->work_shares[0], 0, nthreads);
    team->work_shares[0].next_alloc = NULL;
    team->work_share_list_free = NULL;
    team->work_share_list_alloc = &team->work_shares[1];
diff --git a/libgomp/testsuite/libgomp.c++/task-reduction-14.C b/libgomp/testsuite/libgomp.c++/task-reduction-14.C

new file mode 100644 (file)

index 0000000..3f4e79b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/task-reduction-14.C
@@ -0,0 +1,72 @@
+#include <omp.h>
+#include <stdlib.h>
+
+struct A { A (); ~A (); A (const A &); static int cnt1, cnt2, cnt3; int a; };
+int A::cnt1;
+int A::cnt2;
+int A::cnt3;
+A::A () : a (0)
+{
+  #pragma omp atomic
+  cnt1++;
+}
+A::A (const A &x) : a (x.a)
+{
+  #pragma omp atomic
+  cnt2++;
+}
+A::~A ()
+{
+  #pragma omp atomic
+  cnt3++;
+}
+#pragma omp declare reduction (+: A: omp_out.a += omp_in.a)
+
+void
+foo (int x)
+{
+  A a, b[2];
+  int d = 1;
+  long int e[2] = { 1L, 1L };
+  int c = 0;
+  #pragma omp parallel
+  {
+    if (x && omp_get_thread_num () == 0)
+      {
+       for (int i = 0; i < 10000000; ++i)
+         asm volatile ("");
+       c = 1;
+       #pragma omp cancel parallel
+      }
+    #pragma omp for reduction (task, +: a, b) reduction (task, *: d, e)
+    for (int i = 0; i < 64; i++)
+      #pragma omp task in_reduction (+: a, b) in_reduction (*: d, e)
+      {
+       a.a++;
+       b[0].a += 2;
+       b[1].a += 3;
+       d *= ((i & 7) == 0) + 1;
+       e[0] *= ((i & 7) == 3) + 1;
+       e[1] *= ((i & 3) == 2) + 1;
+      }
+    if (x && omp_get_cancellation ())
+      abort ();
+  }
+  if (!c)
+    {
+      if (a.a != 64 || b[0].a != 128 || b[1].a != 192)
+       abort ();
+      if (d != 256 || e[0] != 256L || e[1] != 65536L)
+       abort ();
+    }
+}
+
+int
+main ()
+{
+  int c1 = A::cnt1, c2 = A::cnt2, c3 = A::cnt3;
+  volatile int zero = 0;
+  foo (zero);
+  if (A::cnt1 + A::cnt2 - c1 - c2 != A::cnt3 - c3)
+    abort ();
+}
diff --git a/libgomp/testsuite/libgomp.c++/task-reduction-15.C b/libgomp/testsuite/libgomp.c++/task-reduction-15.C

new file mode 100644 (file)

index 0000000..8a01e6b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/task-reduction-15.C
@@ -0,0 +1,75 @@
+extern "C" void abort ();
+
+int as;
+int &a = as;
+long int bs = 1;
+long int &b = bs;
+
+template <typename T, typename U>
+void
+foo (T &c, U &d)
+{
+  T i;
+  for (i = 0; i < 2; i++)
+    #pragma omp task in_reduction (*: d) in_reduction (+: c) \
+                    in_reduction (+: a) in_reduction (*: b)
+    {
+      a += 7;
+      b *= 2;
+      c += 9;
+      d *= 3;
+    }
+}
+
+template <typename T, typename U>
+void
+bar ()
+{
+  T cs = 0;
+  T &c = cs;
+  U ds = 1;
+  #pragma omp parallel if (0)
+  {
+    U &d = ds;
+    #pragma omp parallel
+    {
+      T i;
+      #pragma omp for reduction (task, +: a, c) reduction (task, *: b, d)
+      for (i = 0; i < 4; i++)
+       #pragma omp task in_reduction (+: a, c) in_reduction (*: b, d)
+       {
+         T j;
+         a += 7;
+         b *= 2;
+         for (j = 0; j < 2; j++)
+           #pragma omp task in_reduction (+: a, c) in_reduction (*: b, d)
+           {
+             a += 7;
+             b *= 2;
+             c += 9;
+             d *= 3;
+             foo (c, d);
+           }
+         c += 9;
+         d *= 3;
+       }
+#define THREEP4 (3LL * 3LL * 3LL * 3LL)
+      if (d != (THREEP4 * THREEP4 * THREEP4 * THREEP4 * THREEP4 * THREEP4
+               * THREEP4))
+       abort ();
+      if (a != 28 * 7 || b != (1L << 28) || c != 28 * 9)
+       abort ();
+    }
+  }
+  if (a != 28 * 7 || b != (1L << 28) || c != 28 * 9)
+    abort ();
+  if (ds != (THREEP4 * THREEP4 * THREEP4 * THREEP4 * THREEP4 * THREEP4
+            * THREEP4))
+    abort ();
+}
+
+int
+main ()
+{
+  bar<int, long long int> ();
+}
diff --git a/libgomp/testsuite/libgomp.c++/task-reduction-16.C b/libgomp/testsuite/libgomp.c++/task-reduction-16.C

new file mode 100644 (file)

index 0000000..5835edc
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/task-reduction-16.C
@@ -0,0 +1,130 @@
+extern "C" void abort ();
+
+struct S { S (); S (long long int, int); ~S (); static int cnt1, cnt2, cnt3; long long int s; int t; };
+
+int S::cnt1;
+int S::cnt2;
+int S::cnt3;
+
+S::S ()
+{
+  #pragma omp atomic
+  cnt1++;
+}
+
+S::S (long long int x, int y) : s (x), t (y)
+{
+  #pragma omp atomic update
+  ++cnt2;
+}
+
+S::~S ()
+{
+  #pragma omp atomic
+  cnt3 = cnt3 + 1;
+  if (t < 3 || t > 9 || (t & 1) == 0)
+    abort ();
+}
+
+void
+bar (S *p, S *o)
+{
+  p->s = 1;
+  if (o->t != 5)
+    abort ();
+  p->t = 9;
+}
+
+static inline void
+baz (S *o, S *i)
+{
+  if (o->t != 5 || i->t != 9)
+    abort ();
+  o->s *= i->s;
+}
+
+#pragma omp declare reduction (+: S : omp_out.s += omp_in.s) initializer (omp_priv (0, 3))
+#pragma omp declare reduction (*: S : baz (&omp_out, &omp_in)) initializer (bar (&omp_priv, &omp_orig))
+
+S as = { 0LL, 7 };
+S &a = as;
+S bs (1LL, 5);
+S &b = bs;
+
+void
+foo (S &c, S &d)
+{
+  int i;
+  for (i = 0; i < 2; i++)
+    #pragma omp task in_reduction (+: c) in_reduction (*: b, d) in_reduction (+: a)
+    {
+      a.s += 7;
+      b.s *= 2;
+      c.s += 9;
+      d.s *= 3;
+      if ((a.t != 7 && a.t != 3) || (b.t != 5 && b.t != 9)
+         || (c.t != 7 && c.t != 3) || (d.t != 5 && d.t != 9))
+       abort ();
+    }
+}
+
+void
+test ()
+{
+  S cs = { 0LL, 7 };
+  S &c = cs;
+  S ds (1LL, 5);
+  #pragma omp parallel if (0)
+  {
+    S &d = ds;
+    #pragma omp parallel shared (a, b, c, d)
+    {
+      #pragma omp for schedule (static, 1) reduction (task, +: a, c) reduction (task, *: b, d)
+      for (int i = 0; i < 4; i++)
+       #pragma omp task in_reduction (*: b, d) in_reduction (+: a, c)
+       {
+         int j;
+         a.s += 7;
+         b.s *= 2;
+         for (j = 0; j < 2; j++)
+           #pragma omp task in_reduction (+: a) in_reduction (*: b) \
+                            in_reduction (+: c) in_reduction (*: d)
+           {
+             a.s += 7;
+             b.s *= 2;
+             c.s += 9;
+             d.s *= 3;
+             foo (c, d);
+             if ((a.t != 7 && a.t != 3) || (b.t != 5 && b.t != 9)
+                 || (c.t != 7 && c.t != 3) || (d.t != 5 && d.t != 9))
+               abort ();
+           }
+         c.s += 9;
+         d.s *= 3;
+         if ((a.t != 7 && a.t != 3) || (b.t != 5 && b.t != 9)
+             || (c.t != 7 && c.t != 3) || (d.t != 5 && d.t != 9))
+           abort ();
+       }
+#define THREEP7 (3LL * 3LL * 3LL * 3LL * 3LL * 3LL * 3LL)
+      if (d.s != (THREEP7 * THREEP7 * THREEP7 * THREEP7) || d.t != 5)
+       abort ();
+      if (a.s != 28 * 7 || a.t != 7 || b.s != (1L << 28) || b.t != 5
+         || c.s != 28 * 9 || c.t != 7)
+       abort ();
+    }
+  }
+  if (a.s != 28 * 7 || a.t != 7 || b.s != (1L << 28) || b.t != 5
+      || c.s != 28 * 9 || c.t != 7)
+    abort ();
+  if (ds.s != (THREEP7 * THREEP7 * THREEP7 * THREEP7) || ds.t != 5)
+    abort ();
+}
+
+int
+main ()
+{
+  int c1 = S::cnt1, c2 = S::cnt2, c3 = S::cnt3;
+  test ();
+  if (S::cnt1 + S::cnt2 - c1 - c2 != S::cnt3 - c3)
+    abort ();
+}
diff --git a/libgomp/testsuite/libgomp.c++/task-reduction-17.C b/libgomp/testsuite/libgomp.c++/task-reduction-17.C

new file mode 100644 (file)

index 0000000..c00c8e4
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/task-reduction-17.C
@@ -0,0 +1,300 @@
+extern "C" void abort ();
+
+int as[2];
+int (&a)[2] = as;
+long long int bs[7] = { 9, 11, 1, 1, 1, 13, 15 };
+long long int (&b)[7] = bs;
+int es[3] = { 5, 0, 5 };
+int (&e)[3] = es;
+int fs[5] = { 6, 7, 0, 0, 9 };
+int (&f)[5] = fs;
+int gs[4] = { 1, 0, 0, 2 };
+int (&g)[4] = gs;
+int hs[3] = { 0, 1, 4 };
+int (&h)[3] = hs;
+int ks[4][2] = { { 5, 6 }, { 0, 0 }, { 0, 0 }, { 7, 8 } };
+int (&k)[4][2] = ks;
+long long *ss;
+long long *&s = ss;
+long long (*ts)[2];
+long long (*&t)[2] = ts;
+
+template <typename T>
+void
+foo (T &n, T *&c, long long int *&d, T (&m)[3], T *&r, T (&o)[4], T *&p, T (&q)[4][2])
+{
+  T i;
+  for (i = 0; i < 2; i++)
+    #pragma omp task in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \
+                    in_reduction (+: o[n:n*2], m[1], k[1:2][:], p[0], f[2:2]) \
+                    in_reduction (+: q[1:2][:], g[n:n*2], e[1], h[0], r[2:2]) \
+                    in_reduction (*: s[1:2], t[2:2][:])
+    {
+      a[0] += 7;
+      a[1] += 17;
+      b[2] *= 2;
+      b[4] *= 2;
+      c[0] += 6;
+      d[1] *= 2;
+      e[1] += 19;
+      f[2] += 21;
+      f[3] += 23;
+      g[1] += 25;
+      g[2] += 27;
+      h[0] += 29;
+      k[1][0] += 31;
+      k[2][1] += 33;
+      m[1] += 19;
+      r[2] += 21;
+      r[3] += 23;
+      o[1] += 25;
+      o[2] += 27;
+      p[0] += 29;
+      q[1][0] += 31;
+      q[2][1] += 33;
+      s[1] *= 2;
+      t[2][0] *= 2;
+      t[3][1] *= 2;
+    }
+}
+
+template <typename T, typename I>
+void
+test (T &n, I x, I y)
+{
+  T cs[2] = { 0, 0 };
+  T (&c)[2] = cs;
+  T ps[3] = { 0, 1, 4 };
+  T (&p)[3] = ps;
+  T qs[4][2] = { { 5, 6 }, { 0, 0 }, { 0, 0 }, { 7, 8 } };
+  T (&q)[4][2] = qs;
+  long long sb[4] = { 5, 1, 1, 6 };
+  long long tb[5][2] = { { 9, 10 }, { 11, 12 }, { 1, 1 }, { 1, 1 }, { 13, 14 } };
+  T ms[3] = { 5, 0, 5 };
+  T os[4] = { 1, 0, 0, 2 };
+  s = sb;
+  t = tb;
+  #pragma omp parallel if (0)
+  {
+    long long int ds[] = { 1, 1 };
+    long long int (&d)[2] = ds;
+    T (&m)[3] = ms;
+    T rs[5] = { 6, 7, 0, 0, 9 };
+    T (&r)[5] = rs;
+    T (&o)[4] = os;
+    #pragma omp parallel
+    {
+      #pragma omp for reduction (task,+: a, c) reduction (task,*: b[2 * n:3 * n], d) \
+                     reduction (task,+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \
+                     reduction (task,+: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \
+                     reduction (task,*: t[2:2][:], s[1:n + 1]) schedule (dynamic)
+      for (I i = x; i != y; i++)
+       #pragma omp task in_reduction (+: a, c) in_reduction (*: b[2 * n:3 * n], d) \
+                        in_reduction (+: o[n:n*2], q[1:2][:], p[0], m[1], r[2:2]) \
+                        in_reduction (+: g[n:n * 2], e[1], k[1:2][:], h[0], f[2:2]) \
+                        in_reduction (*: s[1:2], t[2:2][:])
+       {
+         T j;
+         a[0] += 2;
+         a[1] += 3;
+         b[2] *= 2;
+         f[3] += 8;
+         g[1] += 9;
+         g[2] += 10;
+         h[0] += 11;
+         k[1][1] += 13;
+         k[2][1] += 15;
+         m[1] += 16;
+         r[2] += 8;
+         s[1] *= 2;
+         t[2][1] *= 2;
+         t[3][1] *= 2;
+         for (j = 0; j < 2; j++)
+           #pragma omp task in_reduction (+: a, c[:2]) \
+                            in_reduction (*: b[2 * n:3 * n], d[n - 1:n + 1]) \
+                            in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][:2]) \
+                            in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][:2]) \
+                            in_reduction (*: s[n:2], t[2:2][:])
+           {
+             m[1] += 6;
+             r[2] += 7;
+             q[1][0] += 17;
+             q[2][0] += 19;
+             a[0] += 4;
+             a[1] += 5;
+             b[3] *= 2;
+             b[4] *= 2;
+             f[3] += 18;
+             g[1] += 29;
+             g[2] += 18;
+             h[0] += 19;
+             s[2] *= 2;
+             t[2][0] *= 2;
+             t[3][0] *= 2;
+             T *cp = c;
+             long long int *dp = d;
+             T *rp = r;
+             T *pp = p;
+             foo (n, cp, dp, m, rp, o, pp, q);
+             r[3] += 18;
+             o[1] += 29;
+             o[2] += 18;
+             p[0] += 19;
+             c[0] += 4;
+             c[1] += 5;
+             d[0] *= 2;
+             e[1] += 6;
+             f[2] += 7;
+             k[1][0] += 17;
+             k[2][0] += 19;
+           }
+         r[3] += 8;
+         o[1] += 9;
+         o[2] += 10;
+         p[0] += 11;
+         q[1][1] += 13;
+         q[2][1] += 15;
+         b[3] *= 2;
+         c[0] += 4;
+         c[1] += 9;
+         d[0] *= 2;
+         e[1] += 16;
+         f[2] += 8;
+       }
+      if (d[0] != 1LL << (8 + 4)
+         || d[1] != 1LL << 16
+         || m[0] != 5
+         || m[1] != 19 * 16 + 6 * 8 + 16 * 4
+         || m[2] != 5
+         || r[0] != 6
+         || r[1] != 7
+         || r[2] != 21 * 16 + 7 * 8 + 8 * 4
+         || r[3] != 23 * 16 + 18 * 8 + 8 * 4
+         || r[4] != 9
+         || o[0] != 1
+         || o[1] != 25 * 16 + 29 * 8 + 9 * 4
+         || o[2] != 27 * 16 + 18 * 8 + 10 * 4
+         || o[3] != 2)
+       abort ();
+      if (a[0] != 7 * 16 + 4 * 8 + 2 * 4
+         || a[1] != 17 * 16 + 5 * 8 + 3 * 4
+         || b[0] != 9 || b[1] != 11
+         || b[2] != 1LL << (16 + 4)
+         || b[3] != 1LL << (8 + 4)
+         || b[4] != 1LL << (16 + 8)
+         || b[5] != 13 || b[6] != 15
+         || c[0] != 6 * 16 + 4 * 8 + 4 * 4
+         || c[1] != 5 * 8 + 9 * 4
+         || e[0] != 5
+         || e[1] != 19 * 16 + 6 * 8 + 16 * 4
+         || e[2] != 5
+         || f[0] != 6
+         || f[1] != 7
+         || f[2] != 21 * 16 + 7 * 8 + 8 * 4
+         || f[3] != 23 * 16 + 18 * 8 + 8 * 4
+         || f[4] != 9
+         || g[0] != 1
+         || g[1] != 25 * 16 + 29 * 8 + 9 * 4
+         || g[2] != 27 * 16 + 18 * 8 + 10 * 4
+         || g[3] != 2
+         || h[0] != 29 * 16 + 19 * 8 + 11 * 4
+         || h[1] != 1 || h[2] != 4
+         || k[0][0] != 5 || k[0][1] != 6
+         || k[1][0] != 31 * 16 + 17 * 8
+         || k[1][1] != 13 * 4
+         || k[2][0] != 19 * 8
+         || k[2][1] != 33 * 16 + 15 * 4
+         || k[3][0] != 7 || k[3][1] != 8
+         || p[0] != 29 * 16 + 19 * 8 + 11 * 4
+         || p[1] != 1 || p[2] != 4
+         || q[0][0] != 5 || q[0][1] != 6
+         || q[1][0] != 31 * 16 + 17 * 8
+         || q[1][1] != 13 * 4
+         || q[2][0] != 19 * 8
+         || q[2][1] != 33 * 16 + 15 * 4
+         || q[3][0] != 7 || q[3][1] != 8
+         || sb[0] != 5
+         || sb[1] != 1LL << (16 + 4)
+         || sb[2] != 1LL << 8
+         || sb[3] != 6
+         || tb[0][0] != 9 || tb[0][1] != 10 || tb[1][0] != 11 || tb[1][1] != 12
+         || tb[2][0] != 1LL << (16 + 8)
+         || tb[2][1] != 1LL << 4
+         || tb[3][0] != 1LL << 8
+         || tb[3][1] != 1LL << (16 + 4)
+         || tb[4][0] != 13 || tb[4][1] != 14)
+       abort ();
+    }
+    if (d[0] != 1LL << (8 + 4)
+       || d[1] != 1LL << 16
+       || m[0] != 5
+       || m[1] != 19 * 16 + 6 * 8 + 16 * 4
+       || m[2] != 5
+       || r[0] != 6
+       || r[1] != 7
+       || r[2] != 21 * 16 + 7 * 8 + 8 * 4
+       || r[3] != 23 * 16 + 18 * 8 + 8 * 4
+       || r[4] != 9
+       || o[0] != 1
+       || o[1] != 25 * 16 + 29 * 8 + 9 * 4
+       || o[2] != 27 * 16 + 18 * 8 + 10 * 4
+       || o[3] != 2)
+      abort ();
+  }
+  if (a[0] != 7 * 16 + 4 * 8 + 2 * 4
+      || a[1] != 17 * 16 + 5 * 8 + 3 * 4
+      || b[0] != 9 || b[1] != 11
+      || b[2] != 1LL << (16 + 4)
+      || b[3] != 1LL << (8 + 4)
+      || b[4] != 1LL << (16 + 8)
+      || b[5] != 13 || b[6] != 15
+      || c[0] != 6 * 16 + 4 * 8 + 4 * 4
+      || c[1] != 5 * 8 + 9 * 4
+      || e[0] != 5
+      || e[1] != 19 * 16 + 6 * 8 + 16 * 4
+      || e[2] != 5
+      || f[0] != 6
+      || f[1] != 7
+      || f[2] != 21 * 16 + 7 * 8 + 8 * 4
+      || f[3] != 23 * 16 + 18 * 8 + 8 * 4
+      || f[4] != 9
+      || g[0] != 1
+      || g[1] != 25 * 16 + 29 * 8 + 9 * 4
+      || g[2] != 27 * 16 + 18 * 8 + 10 * 4
+      || g[3] != 2
+      || h[0] != 29 * 16 + 19 * 8 + 11 * 4
+      || h[1] != 1 || h[2] != 4
+      || k[0][0] != 5 || k[0][1] != 6
+      || k[1][0] != 31 * 16 + 17 * 8
+      || k[1][1] != 13 * 4
+      || k[2][0] != 19 * 8
+      || k[2][1] != 33 * 16 + 15 * 4
+      || k[3][0] != 7 || k[3][1] != 8
+      || p[0] != 29 * 16 + 19 * 8 + 11 * 4
+      || p[1] != 1 || p[2] != 4
+      || q[0][0] != 5 || q[0][1] != 6
+      || q[1][0] != 31 * 16 + 17 * 8
+      || q[1][1] != 13 * 4
+      || q[2][0] != 19 * 8
+      || q[2][1] != 33 * 16 + 15 * 4
+      || q[3][0] != 7 || q[3][1] != 8
+      || sb[0] != 5
+      || sb[1] != 1LL << (16 + 4)
+      || sb[2] != 1LL << 8
+      || sb[3] != 6
+      || tb[0][0] != 9 || tb[0][1] != 10 || tb[1][0] != 11 || tb[1][1] != 12
+      || tb[2][0] != 1LL << (16 + 8)
+      || tb[2][1] != 1LL << 4
+      || tb[3][0] != 1LL << 8
+      || tb[3][1] != 1LL << (16 + 4)
+      || tb[4][0] != 13 || tb[4][1] != 14)
+    abort ();
+}
+
+int
+main ()
+{
+  int n = 1;
+  test (n, 0ULL, 4ULL);
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c++/task-reduction-18.C b/libgomp/testsuite/libgomp.c++/task-reduction-18.C

new file mode 100644 (file)

index 0000000..99c0e37
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/task-reduction-18.C
@@ -0,0 +1,325 @@
+extern "C" void abort ();
+
+struct S { S (); S (long int, long int); ~S (); static int cnt1, cnt2, cnt3; long int s, t; };
+
+int S::cnt1;
+int S::cnt2;
+int S::cnt3;
+
+S::S ()
+{
+  #pragma omp atomic
+  cnt1++;
+}
+
+S::S (long int x, long int y) : s (x), t (y)
+{
+  #pragma omp atomic update
+  ++cnt2;
+}
+
+S::~S ()
+{
+  #pragma omp atomic
+  cnt3 = cnt3 + 1;
+  if (t < 3 || t > 9 || (t & 1) == 0)
+    abort ();
+}
+
+void
+bar (S *p, S *o)
+{
+  p->s = 1;
+  if (o->t != 5)
+    abort ();
+  p->t = 9;
+}
+
+static inline void
+baz (S *o, S *i)
+{
+  if (o->t != 5 || i->t != 9)
+    abort ();
+  o->s *= i->s;
+}
+
+#pragma omp declare reduction (+: S : omp_out.s += omp_in.s) initializer (omp_priv (0, 3))
+#pragma omp declare reduction (*: S : baz (&omp_out, &omp_in)) initializer (bar (&omp_priv, &omp_orig))
+
+S a[2] = { { 0, 7 }, { 0, 7 } };
+S b[7] = { { 9, 5 }, { 11, 5 }, { 1, 5 }, { 1, 5 }, { 1, 5 }, { 13, 5 }, { 15, 5 } };
+S e[3] = { { 5, 7 }, { 0, 7 }, { 5, 7 } };
+S f[5] = { { 6, 7 }, { 7, 7 }, { 0, 7 }, { 0, 7 }, { 9, 7 } };
+S g[4] = { { 1, 7 }, { 0, 7 }, { 0, 7 }, { 2, 7 } };
+S h[3] = { { 0, 7 }, { 1, 7 }, { 4, 7 } };
+S k[4][2] = { { { 5, 7 }, { 6, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 7, 7 }, { 8, 7 } } };
+S *s;
+S (*t)[2];
+
+template <int N>
+void
+foo (int n, S *c, S *d, S m[3], S *r, S o[4], S *p, S q[4][2])
+{
+  int i;
+  for (i = 0; i < 2; i++)
+    #pragma omp task in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \
+                    in_reduction (+: o[n:n*2], m[1], k[1:2][:], p[0], f[2:2]) \
+                    in_reduction (+: q[1:2][:], g[n:n*2], e[1], h[0], r[2:2]) \
+                    in_reduction (*: s[1:2], t[2:2][:])
+    {
+      a[0].s += 7;
+      a[1].s += 17;
+      b[2].s *= 2;
+      b[4].s *= 2;
+      c[0].s += 6;
+      d[1].s *= 2;
+      e[1].s += 19;
+      f[2].s += 21;
+      f[3].s += 23;
+      g[1].s += 25;
+      g[2].s += 27;
+      h[0].s += 29;
+      k[1][0].s += 31;
+      k[2][1].s += 33;
+      m[1].s += 19;
+      r[2].s += 21;
+      r[3].s += 23;
+      o[1].s += 25;
+      o[2].s += 27;
+      p[0].s += 29;
+      q[1][0].s += 31;
+      q[2][1].s += 33;
+      s[1].s *= 2;
+      t[2][0].s *= 2;
+      t[3][1].s *= 2;
+      if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3)
+         || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3))
+       abort ();
+      for (int z = 0; z < 2; z++)
+       if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3)
+           || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3)
+           || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3)
+           || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3)
+           || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3)
+           || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3)
+           || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9))
+         abort ();
+      for (int z = 0; z < 3; z++)
+       if (b[z + 2].t != 5 && b[z + 2].t != 9)
+         abort ();
+    }
+}
+
+template <int N>
+void
+test (int n)
+{
+  S c[2] = { { 0, 7 }, { 0, 7 } };
+  S p[3] = { { 0, 7 }, { 1, 7 }, { 4, 7 } };
+  S q[4][2] = { { { 5, 7 }, { 6, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 7, 7 }, { 8, 7 } } };
+  S ss[4] = { { 5, 5 }, { 1, 5 }, { 1, 5 }, { 6, 5 } };
+  S tt[5][2] = { { { 9, 5 }, { 10, 5 } }, { { 11, 5 }, { 12, 5 } }, { { 1, 5 }, { 1, 5 } }, { { 1, 5 }, { 1, 5 } }, { { 13, 5 }, { 14, 5 } } };
+  s = ss;
+  t = tt;
+  #pragma omp parallel num_threads (1) if (0)
+  {
+    S d[] = { { 1, 5 }, { 1, 5 } };
+    S m[3] = { { 5, 7 }, { 0, 7 }, { 5, 7 } };
+    S r[5] = { { 6, 7 }, { 7, 7 }, { 0, 7 }, { 0, 7 }, { 9, 7 } };
+    S o[4] = { { 1, 7 }, { 0, 7 }, { 0, 7 }, { 2, 7 } };
+    volatile unsigned long long x = 0;
+    volatile unsigned long long y = 4;
+    volatile unsigned long long z = 1;
+    #pragma omp parallel
+    {
+      #pragma omp for reduction (task, +: a, c) reduction (task, *: b[2 * n:3 * n], d) \
+                     reduction (task, +: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \
+                     reduction (task, +: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \
+                     reduction (task, *: t[2:2][:], s[1:n + 1]) \
+                     schedule (nonmonotonic: guided, 1)
+      for (unsigned long long i = x; i < y; i += z)
+       #pragma omp task in_reduction (+: a, c) in_reduction (*: b[2 * n:3 * n], d) \
+                        in_reduction (+: o[n:n*2], q[1:2][:], p[0], m[1], r[2:2]) \
+                        in_reduction (+: g[n:n * 2], e[1], k[1:2][:], h[0], f[2:2]) \
+                        in_reduction (*: s[1:2], t[2:2][:])
+       {
+         int j;
+         a[0].s += 2;
+         a[1].s += 3;
+         b[2].s *= 2;
+         f[3].s += 8;
+         g[1].s += 9;
+         g[2].s += 10;
+         h[0].s += 11;
+         k[1][1].s += 13;
+         k[2][1].s += 15;
+         m[1].s += 16;
+         r[2].s += 8;
+         s[1].s *= 2;
+         t[2][1].s *= 2;
+         t[3][1].s *= 2;
+         if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3)
+             || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3))
+           abort ();
+         for (int z = 0; z < 2; z++)
+           if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3)
+               || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3)
+               || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3)
+               || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3)
+               || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3)
+               || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3)
+               || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9))
+             abort ();
+         for (int z = 0; z < 3; z++)
+           if (b[z + 2].t != 5 && b[z + 2].t != 9)
+             abort ();
+         for (j = 0; j < 2; j++)
+           #pragma omp task in_reduction (+: a, c[:2]) \
+                            in_reduction (*: b[2 * n:3 * n], d[n - 1:n + 1]) \
+                            in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][:2]) \
+                            in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][:2]) \
+                            in_reduction (*: s[n:2], t[2:2][:])
+           {
+             m[1].s += 6;
+             r[2].s += 7;
+             q[1][0].s += 17;
+             q[2][0].s += 19;
+             a[0].s += 4;
+             a[1].s += 5;
+             b[3].s *= 2;
+             b[4].s *= 2;
+             f[3].s += 18;
+             g[1].s += 29;
+             g[2].s += 18;
+             h[0].s += 19;
+             s[2].s *= 2;
+             t[2][0].s *= 2;
+             t[3][0].s *= 2;
+             foo<N> (n, c, d, m, r, o, p, q);
+             if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3)
+                 || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3))
+               abort ();
+             for (int z = 0; z < 2; z++)
+               if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3)
+                   || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3)
+                   || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3)
+                   || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3)
+                   || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3)
+                   || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3)
+                   || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9))
+                 abort ();
+             for (int z = 0; z < 3; z++)
+               if (b[z + 2].t != 5 && b[z + 2].t != 9)
+                 abort ();
+             r[3].s += 18;
+             o[1].s += 29;
+             o[2].s += 18;
+             p[0].s += 19;
+             c[0].s += 4;
+             c[1].s += 5;
+             d[0].s *= 2;
+             e[1].s += 6;
+             f[2].s += 7;
+             k[1][0].s += 17;
+             k[2][0].s += 19;
+           }
+         r[3].s += 8;
+         o[1].s += 9;
+         o[2].s += 10;
+         p[0].s += 11;
+         q[1][1].s += 13;
+         q[2][1].s += 15;
+         b[3].s *= 2;
+         c[0].s += 4;
+         c[1].s += 9;
+         d[0].s *= 2;
+         e[1].s += 16;
+         f[2].s += 8;
+       }
+      if (a[0].s != 7 * 16 + 4 * 8 + 2 * 4
+         || a[1].s != 17 * 16 + 5 * 8 + 3 * 4
+         || b[0].s != 9 || b[1].s != 11
+         || b[2].s != 1LL << (16 + 4)
+         || b[3].s != 1LL << (8 + 4)
+         || b[4].s != 1LL << (16 + 8)
+         || b[5].s != 13 || b[6].s != 15
+         || c[0].s != 6 * 16 + 4 * 8 + 4 * 4
+         || c[1].s != 5 * 8 + 9 * 4
+         || e[0].s != 5
+         || e[1].s != 19 * 16 + 6 * 8 + 16 * 4
+         || e[2].s != 5
+         || f[0].s != 6
+         || f[1].s != 7
+         || f[2].s != 21 * 16 + 7 * 8 + 8 * 4
+         || f[3].s != 23 * 16 + 18 * 8 + 8 * 4
+         || f[4].s != 9
+         || g[0].s != 1
+         || g[1].s != 25 * 16 + 29 * 8 + 9 * 4
+         || g[2].s != 27 * 16 + 18 * 8 + 10 * 4
+         || g[3].s != 2
+         || h[0].s != 29 * 16 + 19 * 8 + 11 * 4
+         || h[1].s != 1 || h[2].s != 4
+         || k[0][0].s != 5 || k[0][1].s != 6
+         || k[1][0].s != 31 * 16 + 17 * 8
+         || k[1][1].s != 13 * 4
+         || k[2][0].s != 19 * 8
+         || k[2][1].s != 33 * 16 + 15 * 4
+         || k[3][0].s != 7 || k[3][1].s != 8
+         || p[0].s != 29 * 16 + 19 * 8 + 11 * 4
+         || p[1].s != 1 || p[2].s != 4
+         || q[0][0].s != 5 || q[0][1].s != 6
+         || q[1][0].s != 31 * 16 + 17 * 8
+         || q[1][1].s != 13 * 4
+         || q[2][0].s != 19 * 8
+         || q[2][1].s != 33 * 16 + 15 * 4
+         || q[3][0].s != 7 || q[3][1].s != 8
+         || ss[0].s != 5
+         || ss[1].s != 1LL << (16 + 4)
+         || ss[2].s != 1LL << 8
+         || ss[3].s != 6
+         || tt[0][0].s != 9 || tt[0][1].s != 10 || tt[1][0].s != 11 || tt[1][1].s != 12
+         || tt[2][0].s != 1LL << (16 + 8)
+         || tt[2][1].s != 1LL << 4
+         || tt[3][0].s != 1LL << 8
+         || tt[3][1].s != 1LL << (16 + 4)
+         || tt[4][0].s != 13 || tt[4][1].s != 14)
+       abort ();
+    }
+    if (d[0].s != 1LL << (8 + 4)
+       || d[1].s != 1LL << 16
+       || m[0].s != 5
+       || m[1].s != 19 * 16 + 6 * 8 + 16 * 4
+       || m[2].s != 5
+       || r[0].s != 6
+       || r[1].s != 7
+       || r[2].s != 21 * 16 + 7 * 8 + 8 * 4
+       || r[3].s != 23 * 16 + 18 * 8 + 8 * 4
+       || r[4].s != 9
+       || o[0].s != 1
+       || o[1].s != 25 * 16 + 29 * 8 + 9 * 4
+       || o[2].s != 27 * 16 + 18 * 8 + 10 * 4
+       || o[3].s != 2)
+      abort ();
+    if (e[1].t != 7 || h[0].t != 7 || m[1].t != 7 || p[0].t != 7)
+      abort ();
+    for (int z = 0; z < 2; z++)
+      if (a[z].t != 7 || c[z].t != 7 || d[z].t != 5 || f[z + 2].t != 7
+         || g[z + 1].t != 7 || r[z + 2].t != 7 || s[z + 1].t != 5 || o[z + 1].t != 7
+         || k[z + 1][0].t != 7 || k[z + 1][1].t != 7 || q[z + 1][0].t != 7 || q[z + 1][1].t != 7
+         || t[z + 2][0].t != 5 || t[z + 2][1].t != 5)
+       abort ();
+    for (int z = 0; z < 3; z++)
+      if (b[z + 2].t != 5)
+       abort ();
+  }
+}
+
+int
+main ()
+{
+  int c1 = S::cnt1, c2 = S::cnt2, c3 = S::cnt3;
+  test<0> (1);
+  if (S::cnt1 + S::cnt2 - c1 - c2 != S::cnt3 - c3)
+    abort ();
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c++/task-reduction-19.C b/libgomp/testsuite/libgomp.c++/task-reduction-19.C

new file mode 100644 (file)

index 0000000..15945c5
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/task-reduction-19.C
@@ -0,0 +1,343 @@
+extern "C" void abort ();
+
+struct S { S (); S (long int, long int); ~S (); static int cnt1, cnt2, cnt3; long int s, t; };
+
+int S::cnt1;
+int S::cnt2;
+int S::cnt3;
+
+S::S ()
+{
+  #pragma omp atomic
+  cnt1++;
+}
+
+S::S (long int x, long int y) : s (x), t (y)
+{
+  #pragma omp atomic update
+  ++cnt2;
+}
+
+S::~S ()
+{
+  #pragma omp atomic
+  cnt3 = cnt3 + 1;
+  if (t < 3 || t > 9 || (t & 1) == 0)
+    abort ();
+}
+
+void
+bar (S *p, S *o)
+{
+  p->s = 1;
+  if (o->t != 5)
+    abort ();
+  p->t = 9;
+}
+
+static inline void
+baz (S *o, S *i)
+{
+  if (o->t != 5 || i->t != 9)
+    abort ();
+  o->s *= i->s;
+}
+
+#pragma omp declare reduction (+: S : omp_out.s += omp_in.s) initializer (omp_priv (0, 3))
+#pragma omp declare reduction (*: S : baz (&omp_out, &omp_in)) initializer (bar (&omp_priv, &omp_orig))
+
+S as[2] = { { 0, 7 }, { 0, 7 } };
+S (&a)[2] = as;
+S bs[7] = { { 9, 5 }, { 11, 5 }, { 1, 5 }, { 1, 5 }, { 1, 5 }, { 13, 5 }, { 15, 5 } };
+S (&b)[7] = bs;
+S es[3] = { { 5, 7 }, { 0, 7 }, { 5, 7 } };
+S (&e)[3] = es;
+S fs[5] = { { 6, 7 }, { 7, 7 }, { 0, 7 }, { 0, 7 }, { 9, 7 } };
+S (&f)[5] = fs;
+S gs[4] = { { 1, 7 }, { 0, 7 }, { 0, 7 }, { 2, 7 } };
+S (&g)[4] = gs;
+S hs[3] = { { 0, 7 }, { 1, 7 }, { 4, 7 } };
+S (&h)[3] = hs;
+S ks[4][2] = { { { 5, 7 }, { 6, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 7, 7 }, { 8, 7 } } };
+S (&k)[4][2] = ks;
+S *ss;
+S *&s = ss;
+S (*ts)[2];
+S (*&t)[2] = ts;
+
+template <typename S, typename T>
+void
+foo (T &n, S *&c, S *&d, S (&m)[3], S *&r, S (&o)[4], S *&p, S (&q)[4][2])
+{
+  T i;
+  for (i = 0; i < 2; i++)
+    #pragma omp task in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \
+                    in_reduction (+: o[n:n*2], m[1], k[1:2][:], p[0], f[2:2]) \
+                    in_reduction (+: q[1:2][:], g[n:n*2], e[1], h[0], r[2:2]) \
+                    in_reduction (*: s[1:2], t[2:2][:])
+    {
+      a[0].s += 7;
+      a[1].s += 17;
+      b[2].s *= 2;
+      b[4].s *= 2;
+      c[0].s += 6;
+      d[1].s *= 2;
+      e[1].s += 19;
+      f[2].s += 21;
+      f[3].s += 23;
+      g[1].s += 25;
+      g[2].s += 27;
+      h[0].s += 29;
+      k[1][0].s += 31;
+      k[2][1].s += 33;
+      m[1].s += 19;
+      r[2].s += 21;
+      r[3].s += 23;
+      o[1].s += 25;
+      o[2].s += 27;
+      p[0].s += 29;
+      q[1][0].s += 31;
+      q[2][1].s += 33;
+      s[1].s *= 2;
+      t[2][0].s *= 2;
+      t[3][1].s *= 2;
+      if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3)
+         || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3))
+       abort ();
+      for (T z = 0; z < 2; z++)
+       if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3)
+           || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3)
+           || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3)
+           || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3)
+           || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3)
+           || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3)
+           || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9))
+         abort ();
+      for (T z = 0; z < 3; z++)
+       if (b[z + 2].t != 5 && b[z + 2].t != 9)
+         abort ();
+    }
+}
+
+template <typename S, typename T>
+void
+test (T &n)
+{
+  S cs[2] = { { 0, 7 }, { 0, 7 } };
+  S (&c)[2] = cs;
+  S ps[3] = { { 0, 7 }, { 1, 7 }, { 4, 7 } };
+  S (&p)[3] = ps;
+  S qs[4][2] = { { { 5, 7 }, { 6, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 7, 7 }, { 8, 7 } } };
+  S (&q)[4][2] = qs;
+  S sb[4] = { { 5, 5 }, { 1, 5 }, { 1, 5 }, { 6, 5 } };
+  S tb[5][2] = { { { 9, 5 }, { 10, 5 } }, { { 11, 5 }, { 12, 5 } }, { { 1, 5 }, { 1, 5 } }, { { 1, 5 }, { 1, 5 } }, { { 13, 5 }, { 14, 5 } } };
+  S ms[3] = { { 5, 7 }, { 0, 7 }, { 5, 7 } };
+  S os[4] = { { 1, 7 }, { 0, 7 }, { 0, 7 }, { 2, 7 } };
+  s = sb;
+  t = tb;
+  #pragma omp parallel if (0)
+  {
+    S ds[] = { { 1, 5 }, { 1, 5 } };
+    S (&d)[2] = ds;
+    S (&m)[3] = ms;
+    S rs[5] = { { 6, 7 }, { 7, 7 }, { 0, 7 }, { 0, 7 }, { 9, 7 } };
+    S (&r)[5] = rs;
+    S (&o)[4] = os;
+    #pragma omp parallel
+    {
+      #pragma omp for reduction (task, +: a, c) reduction (task, *: b[2 * n:3 * n], d) \
+                     reduction (task, +: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \
+                     reduction (task, +: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \
+                     reduction (task, *: t[2:2][:], s[1:n + 1]) \
+                     schedule (monotonic: runtime)
+      for (T i = 0; i < 4; i++)
+       #pragma omp task in_reduction (+: a, c) in_reduction (*: b[2 * n:3 * n], d) \
+                        in_reduction (+: o[n:n*2], q[1:2][:], p[0], m[1], r[2:2]) \
+                        in_reduction (+: g[n:n * 2], e[1], k[1:2][:], h[0], f[2:2]) \
+                        in_reduction (*: s[1:2], t[2:2][:])
+       {
+         T j;
+         a[0].s += 2;
+         a[1].s += 3;
+         b[2].s *= 2;
+         f[3].s += 8;
+         g[1].s += 9;
+         g[2].s += 10;
+         h[0].s += 11;
+         k[1][1].s += 13;
+         k[2][1].s += 15;
+         m[1].s += 16;
+         r[2].s += 8;
+         s[1].s *= 2;
+         t[2][1].s *= 2;
+         t[3][1].s *= 2;
+         if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3)
+             || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3))
+           abort ();
+         for (T z = 0; z < 2; z++)
+           if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3)
+               || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3)
+               || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3)
+               || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3)
+               || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3)
+               || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3)
+               || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9))
+             abort ();
+         for (T z = 0; z < 3; z++)
+           if (b[z + 2].t != 5 && b[z + 2].t != 9)
+             abort ();
+         for (j = 0; j < 2; j++)
+           #pragma omp task in_reduction (+: a, c[:2]) \
+                            in_reduction (*: b[2 * n:3 * n], d[n - 1:n + 1]) \
+                            in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][:2]) \
+                            in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][:2]) \
+                            in_reduction (*: s[n:2], t[2:2][:])
+           {
+             m[1].s += 6;
+             r[2].s += 7;
+             q[1][0].s += 17;
+             q[2][0].s += 19;
+             a[0].s += 4;
+             a[1].s += 5;
+             b[3].s *= 2;
+             b[4].s *= 2;
+             f[3].s += 18;
+             g[1].s += 29;
+             g[2].s += 18;
+             h[0].s += 19;
+             s[2].s *= 2;
+             t[2][0].s *= 2;
+             t[3][0].s *= 2;
+             S *cp = c;
+             S *dp = d;
+             S *rp = r;
+             S *pp = p;
+             if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3)
+                 || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3))
+               abort ();
+             for (T z = 0; z < 2; z++)
+               if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3)
+                   || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3)
+                   || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3)
+                   || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3)
+                   || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3)
+                   || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3)
+                   || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9))
+                 abort ();
+             for (T z = 0; z < 3; z++)
+               if (b[z + 2].t != 5 && b[z + 2].t != 9)
+                 abort ();
+             foo (n, cp, dp, m, rp, o, pp, q);
+             r[3].s += 18;
+             o[1].s += 29;
+             o[2].s += 18;
+             p[0].s += 19;
+             c[0].s += 4;
+             c[1].s += 5;
+             d[0].s *= 2;
+             e[1].s += 6;
+             f[2].s += 7;
+             k[1][0].s += 17;
+             k[2][0].s += 19;
+           }
+         r[3].s += 8;
+         o[1].s += 9;
+         o[2].s += 10;
+         p[0].s += 11;
+         q[1][1].s += 13;
+         q[2][1].s += 15;
+         b[3].s *= 2;
+         c[0].s += 4;
+         c[1].s += 9;
+         d[0].s *= 2;
+         e[1].s += 16;
+         f[2].s += 8;
+       }
+      if (a[0].s != 7 * 16 + 4 * 8 + 2 * 4
+         || a[1].s != 17 * 16 + 5 * 8 + 3 * 4
+         || b[0].s != 9 || b[1].s != 11
+         || b[2].s != 1LL << (16 + 4)
+         || b[3].s != 1LL << (8 + 4)
+         || b[4].s != 1LL << (16 + 8)
+         || b[5].s != 13 || b[6].s != 15
+         || c[0].s != 6 * 16 + 4 * 8 + 4 * 4
+         || c[1].s != 5 * 8 + 9 * 4
+         || e[0].s != 5
+         || e[1].s != 19 * 16 + 6 * 8 + 16 * 4
+         || e[2].s != 5
+         || f[0].s != 6
+         || f[1].s != 7
+         || f[2].s != 21 * 16 + 7 * 8 + 8 * 4
+         || f[3].s != 23 * 16 + 18 * 8 + 8 * 4
+         || f[4].s != 9
+         || g[0].s != 1
+         || g[1].s != 25 * 16 + 29 * 8 + 9 * 4
+         || g[2].s != 27 * 16 + 18 * 8 + 10 * 4
+         || g[3].s != 2
+         || h[0].s != 29 * 16 + 19 * 8 + 11 * 4
+         || h[1].s != 1 || h[2].s != 4
+         || k[0][0].s != 5 || k[0][1].s != 6
+         || k[1][0].s != 31 * 16 + 17 * 8
+         || k[1][1].s != 13 * 4
+         || k[2][0].s != 19 * 8
+         || k[2][1].s != 33 * 16 + 15 * 4
+         || k[3][0].s != 7 || k[3][1].s != 8
+         || p[0].s != 29 * 16 + 19 * 8 + 11 * 4
+         || p[1].s != 1 || p[2].s != 4
+         || q[0][0].s != 5 || q[0][1].s != 6
+         || q[1][0].s != 31 * 16 + 17 * 8
+         || q[1][1].s != 13 * 4
+         || q[2][0].s != 19 * 8
+         || q[2][1].s != 33 * 16 + 15 * 4
+         || q[3][0].s != 7 || q[3][1].s != 8
+         || sb[0].s != 5
+         || sb[1].s != 1LL << (16 + 4)
+         || sb[2].s != 1LL << 8
+         || sb[3].s != 6
+         || tb[0][0].s != 9 || tb[0][1].s != 10 || tb[1][0].s != 11 || tb[1][1].s != 12
+         || tb[2][0].s != 1LL << (16 + 8)
+         || tb[2][1].s != 1LL << 4
+         || tb[3][0].s != 1LL << 8
+         || tb[3][1].s != 1LL << (16 + 4)
+         || tb[4][0].s != 13 || tb[4][1].s != 14)
+       abort ();
+      if (d[0].s != 1LL << (8 + 4)
+         || d[1].s != 1LL << 16
+         || m[0].s != 5
+         || m[1].s != 19 * 16 + 6 * 8 + 16 * 4
+         || m[2].s != 5
+         || r[0].s != 6
+         || r[1].s != 7
+         || r[2].s != 21 * 16 + 7 * 8 + 8 * 4
+         || r[3].s != 23 * 16 + 18 * 8 + 8 * 4
+         || r[4].s != 9
+         || o[0].s != 1
+         || o[1].s != 25 * 16 + 29 * 8 + 9 * 4
+         || o[2].s != 27 * 16 + 18 * 8 + 10 * 4
+         || o[3].s != 2)
+       abort ();
+      if (e[1].t != 7 || h[0].t != 7 || m[1].t != 7 || p[0].t != 7)
+       abort ();
+      for (T z = 0; z < 2; z++)
+       if (a[z].t != 7 || c[z].t != 7 || d[z].t != 5 || f[z + 2].t != 7
+           || g[z + 1].t != 7 || r[z + 2].t != 7 || s[z + 1].t != 5 || o[z + 1].t != 7
+           || k[z + 1][0].t != 7 || k[z + 1][1].t != 7 || q[z + 1][0].t != 7 || q[z + 1][1].t != 7
+           || t[z + 2][0].t != 5 || t[z + 2][1].t != 5)
+         abort ();
+      for (T z = 0; z < 3; z++)
+       if (b[z + 2].t != 5)
+         abort ();
+    }
+  }
+}
+
+int
+main ()
+{
+  int c1 = S::cnt1, c2 = S::cnt2, c3 = S::cnt3;
+  int n = 1;
+  test<S, int> (n);
+  if (S::cnt1 + S::cnt2 - c1 - c2 != S::cnt3 - c3)
+    abort ();
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/cancel-parallel-1.c b/libgomp/testsuite/libgomp.c-c++-common/cancel-parallel-1.c

new file mode 100644 (file)

index 0000000..77395e2
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/cancel-parallel-1.c
@@ -0,0 +1,42 @@
+/* { dg-do run } */
+/* { dg-set-target-env-var OMP_CANCELLATION "true" } */
+
+#include <stdlib.h>
+#include <omp.h>
+
+int
+main ()
+{
+  int a[64];
+  #pragma omp parallel
+  {
+    #pragma omp barrier
+    if (omp_get_thread_num () == 0)
+      {
+       #pragma omp cancel parallel
+      }
+    #pragma omp for
+    for (int i = 0; i < 64; i++)
+      a[i] = i;
+    if (omp_get_cancellation ())
+      abort ();
+  }
+  #pragma omp parallel
+  {
+    #pragma omp barrier
+    if (omp_get_thread_num () == 0)
+      {
+       #pragma omp cancel parallel
+      }
+    #pragma omp taskgroup
+    {
+      #pragma omp for
+      for (int i = 0; i < 64; i++)
+       #pragma omp task
+       a[i] += i;
+      if (omp_get_cancellation ())
+       abort ();
+    }
+  }
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/cancel-taskgroup-3.c b/libgomp/testsuite/libgomp.c-c++-common/cancel-taskgroup-3.c

new file mode 100644 (file)

index 0000000..b9af835
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/cancel-taskgroup-3.c
@@ -0,0 +1,68 @@
+/* { dg-do run } */
+/* { dg-set-target-env-var OMP_CANCELLATION "true" } */
+
+#include <stdlib.h>
+#include <omp.h>
+
+int
+main ()
+{
+  int a = 0, i;
+  #pragma omp parallel
+  #pragma omp taskgroup
+  {
+    #pragma omp task
+    {
+      #pragma omp cancel taskgroup
+      if (omp_get_cancellation ())
+       abort ();
+    }
+    #pragma omp taskwait
+    #pragma omp for reduction (task, +: a)
+    for (i = 0; i < 64; ++i)
+      {
+       a++;
+       #pragma omp task in_reduction (+: a)
+       {
+         volatile int zero = 0;
+         a += zero;
+         if (omp_get_cancellation ())
+           abort ();
+       }
+      }
+    if (a != 64)
+      abort ();
+    #pragma omp task
+    {
+      if (omp_get_cancellation ())
+       abort ();
+    }
+  }
+  a = 0;
+  #pragma omp parallel
+  #pragma omp taskgroup
+  {
+    #pragma omp taskwait
+    #pragma omp for reduction (task, +: a)
+    for (i = 0; i < 64; ++i)
+      {
+       a++;
+       #pragma omp task in_reduction (+: a)
+       {
+         volatile int zero = 0;
+         a += zero;
+         #pragma omp cancel taskgroup
+         if (omp_get_cancellation ())
+           abort ();
+       }
+      }
+    if (a != 64)
+      abort ();
+    #pragma omp task
+    {
+      if (omp_get_cancellation ())
+       abort ();
+    }
+  }
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/task-reduction-11.c b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-11.c

new file mode 100644 (file)

index 0000000..038b0e2
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-11.c
@@ -0,0 +1,56 @@
+extern
+#ifdef __cplusplus
+"C"
+#endif
+void abort (void);
+int a, b[3] = { 1, 1, 1 };
+unsigned long int c[2] = { ~0UL, ~0UL };
+
+void
+bar (int i)
+{
+  #pragma omp task in_reduction (*: b[:3]) in_reduction (&: c[1:]) \
+             in_reduction (+: a)
+  {
+    a += 4;
+    b[1] *= 4;
+    c[1] &= ~(1UL << (i + 16));
+  }
+}
+
+void
+foo (unsigned long long int x, unsigned long long int y, unsigned long long int z)
+{
+  unsigned long long int i;
+  #pragma omp for schedule(runtime) reduction (task, +: a) \
+                 reduction (task, *: b) reduction (task, &: c[1:1])
+  for (i = x; i < y; i += z)
+    {
+      a++;
+      b[0] *= 2;
+      bar (i);
+      b[2] *= 3;
+      c[1] &= ~(1UL << i);
+    }
+}
+
+int
+main ()
+{
+  volatile int two = 2;
+  foo (two, 7 * two, two);
+  if (a != 30 || b[0] != 64 || b[1] != (1 << 12) || b[2] != 3 * 3 * 3 * 3 * 3 * 3
+      || c[0] != ~0UL || c[1] != ~0x15541554UL)
+    abort ();
+  a = 0;
+  b[0] = 1;
+  b[1] = 1;
+  b[2] = 1;
+  c[1] = ~0UL;
+  #pragma omp parallel
+  foo (two, 8 * two, two);
+  if (a != 35 || b[0] != 128 || b[1] != (1 << 14) || b[2] != 3 * 3 * 3 * 3 * 3 * 3 * 3
+      || c[0] != ~0UL || c[1] != ~0x55545554UL)
+    abort ();
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/task-reduction-12.c b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-12.c

new file mode 100644 (file)

index 0000000..0ad9273
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-12.c
@@ -0,0 +1,67 @@
+extern
+#ifdef __cplusplus
+"C"
+#endif
+void abort (void);
+int a, b[3] = { 1, 1, 1 };
+unsigned long int c[2] = { ~0UL, ~0UL };
+
+void
+bar (int i)
+{
+  #pragma omp task in_reduction (*: b[:3]) in_reduction (&: c[1:]) \
+             in_reduction (+: a)
+  {
+    a += 4;
+    b[1] *= 4;
+    c[1] &= ~(1UL << (i + 16));
+  }
+}
+
+void
+foo (int x)
+{
+  #pragma omp sections reduction (task, +: a) reduction (task, *: b) \
+                      reduction (task, &: c[1:1])
+  {
+    {
+      a++; b[0] *= 2; bar (2); b[2] *= 3; c[1] &= ~(1UL << 2);
+    }
+    #pragma omp section
+    { b[0] *= 2; bar (4); b[2] *= 3; c[1] &= ~(1UL << 4); a++; }
+    #pragma omp section
+    { bar (6); b[2] *= 3; c[1] &= ~(1UL << 6); a++; b[0] *= 2; }
+    #pragma omp section
+    { b[2] *= 3; c[1] &= ~(1UL << 8); a++; b[0] *= 2; bar (8); }
+    #pragma omp section
+    { c[1] &= ~(1UL << 10); a++; b[0] *= 2; bar (10); b[2] *= 3; }
+    #pragma omp section
+    { a++; b[0] *= 2; b[2] *= 3; c[1] &= ~(1UL << 12); bar (12); }
+    #pragma omp section
+    if (x)
+      {
+       a++; b[0] *= 2; b[2] *= 3; bar (14); c[1] &= ~(1UL << 14);
+      }
+  }
+}
+
+int
+main ()
+{
+  volatile int one = 1;
+  foo (!one);
+  if (a != 30 || b[0] != 64 || b[1] != (1 << 12) || b[2] != 3 * 3 * 3 * 3 * 3 * 3
+      || c[0] != ~0UL || c[1] != ~0x15541554UL)
+    abort ();
+  a = 0;
+  b[0] = 1;
+  b[1] = 1;
+  b[2] = 1;
+  c[1] = ~0UL;
+  #pragma omp parallel
+  foo (one);
+  if (a != 35 || b[0] != 128 || b[1] != (1 << 14) || b[2] != 3 * 3 * 3 * 3 * 3 * 3 * 3
+      || c[0] != ~0UL || c[1] != ~0x55545554UL)
+    abort ();
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/task-reduction-6.c b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-6.c

index b7a29f043250c8380366ec6404652f27b8fa57c2..e0a946efba17eae41e78e0223da21ebc34f42b2f 100644 (file)
--- a/libgomp/testsuite/libgomp.c-c++-common/task-reduction-6.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-6.c
@@ -1,7 +1,7 @@
  #include <omp.h>
  #include <stdlib.h>
  
-struct S { unsigned long int s, t; };
+struct S { unsigned long long int s, t; };
  
  void
  rbar (struct S *p, struct S *o)
@@ -119,5 +119,7 @@ main ()
      abort ();
    if (m.s != 63 * 64 * 4 || m.t != 7)
      abort ();
+  if (r != t)
+    abort ();
    return 0;
  }
diff --git a/libgomp/testsuite/libgomp.c-c++-common/task-reduction-8.c b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-8.c

new file mode 100644 (file)

index 0000000..7b0859d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-8.c
@@ -0,0 +1,141 @@
+#include <omp.h>
+#include <stdlib.h>
+
+struct S { unsigned long long int s, t; };
+
+void
+rbar (struct S *p, struct S *o)
+{
+  p->s = 1;
+  if (o->t != 5)
+    abort ();
+  p->t = 9;
+}
+
+static inline void
+rbaz (struct S *o, struct S *i)
+{
+  if (o->t != 5 || i->t != 9)
+    abort ();
+  o->s *= i->s;
+}
+
+#pragma omp declare reduction (+: struct S : omp_out.s += omp_in.s) \
+  initializer (omp_priv = { 0, 3 })
+#pragma omp declare reduction (*: struct S : rbaz (&omp_out, &omp_in)) \
+  initializer (rbar (&omp_priv, &omp_orig))
+
+struct S g = { 0, 7 };
+struct S h = { 1, 5 };
+
+int
+foo (int z, int *a, int *b)
+{
+  int x = 0;
+  #pragma omp taskloop reduction (+:x) in_reduction (+:b[0])
+  for (int i = z; i < z + 8; i++)
+    {
+      x += a[i];
+      *b += a[i] * 2;
+    }
+  return x;
+}
+
+unsigned long long int
+bar (int z, int *a, unsigned long long int *b, int *s)
+{
+  unsigned long long int x = 1;
+  #pragma omp taskloop reduction (*:x) in_reduction (*:b[0])
+  for (int i = z; i < z + 8; i++)
+    {
+      #pragma omp task in_reduction (*:x)
+      x *= a[i];
+      #pragma omp task in_reduction (*:b[0])
+      *b *= (3 - a[i]);
+      s[0]++;
+    }
+  return x;
+}
+
+void
+baz (int i, int *a, int *c)
+{
+  #pragma omp task in_reduction (*:h) in_reduction (+:g)
+  {
+    g.s += 7 * a[i];
+    h.s *= (3 - c[i]);
+    if ((g.t != 7 && g.t != 3) || (h.t != 5 && h.t != 9))
+      abort ();
+  }
+}
+
+int
+main ()
+{
+  int i, j = 0, a[64], b = 0, c[64], f = 0;
+  unsigned long long int d = 1, e = 1;
+  volatile int one = 1;
+  int r = 0, s = 0, t;
+  struct S m = { 0, 7 };
+  struct S n = { 1, 5 };
+  for (i = 0; i < 64; i++)
+    {
+      a[i] = 2 * i;
+      c[i] = 1 + ((i % 3) != 1);
+    }
+  #pragma omp parallel reduction (task, +:b) shared(t) reduction(+:r, s)
+  {
+    int z, q1, q2, q3;
+    #pragma omp master
+    t = omp_get_num_threads ();
+    #pragma omp for schedule(static) reduction (task, +: f) reduction (+: j)
+    for (z = 0; z < 64; z += 8)
+      {
+       f++;
+       j += foo (z, a, &b);
+       j += foo (z, a, &f);
+      }
+    if (j != 63 * 64 * 2 || f != 63 * 64 * 2 + 8)
+      abort ();
+    r++;
+    #pragma omp taskgroup task_reduction (+: s)
+    {
+      #pragma omp for schedule(static, 1) reduction(task, *: d) reduction (*: e)
+      for (z = 0; z < 64; z += 8)
+       e *= bar (z, c, &d, &s);
+    }
+    if (e != (1ULL << 43) || d != (1ULL << 21))
+      abort ();
+    #pragma omp for schedule(monotonic: dynamic, 1) reduction (task, +: g, m) \
+                   reduction (task, *: h, n) collapse(3)
+    for (q1 = 0; q1 < one; q1++)
+      for (q2 = 0; q2 < 64; q2 += 8)
+       for (q3 = 0; q3 < one; ++q3)
+         #pragma omp taskloop in_reduction (+: g, m) in_reduction (*: h, n) \
+                              nogroup
+         for (i = q2; i < q2 + 8; ++i)
+           {
+             g.s += 3 * a[i];
+             h.s *= (3 - c[i]);
+             m.s += 4 * a[i];
+             n.s *= c[i];
+             if ((g.t != 7 && g.t != 3) || (h.t != 5 && h.t != 9)
+                 || (m.t != 7 && m.t != 3) || (n.t != 5 && n.t != 9))
+               abort ();
+             baz (i, a, c);
+           }
+    if (n.s != (1ULL << 43) || n.t != 5)
+      abort ();
+    if (g.s != 63 * 64 * 10 || g.t != 7)
+      abort ();
+    if (h.s != (1ULL << 42) || h.t != 5)
+      abort ();
+    if (m.s != 63 * 64 * 4 || m.t != 7)
+      abort ();
+  }
+  if (b != 63 * 64 * 2)
+    abort ();
+  if (r != t || s != 64)
+    abort ();
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/task-reduction-9.c b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-9.c

new file mode 100644 (file)

index 0000000..3d71fef
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-9.c
@@ -0,0 +1,217 @@
+#ifdef __cplusplus
+extern "C"
+#endif
+void abort (void);
+
+int a[2];
+long long int b[7] = { 9, 11, 1, 1, 1, 13, 15 };
+int e[3] = { 5, 0, 5 };
+int f[5] = { 6, 7, 0, 0, 9 };
+int g[4] = { 1, 0, 0, 2 };
+int h[3] = { 0, 1, 4 };
+int k[4][2] = { { 5, 6 }, { 0, 0 }, { 0, 0 }, { 7, 8 } };
+long long *s;
+long long (*t)[2];
+
+void
+foo (int n, int *c, long long int *d, int m[3], int *r, int o[4], int *p, int q[4][2])
+{
+  int i;
+  for (i = 0; i < 2; i++)
+    #pragma omp task in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \
+                    in_reduction (+: o[n:n*2], m[1], k[1:2][:], p[0], f[2:2]) \
+                    in_reduction (+: q[1:2][:], g[n:n*2], e[1], h[0], r[2:2]) \
+                    in_reduction (*: s[1:2], t[2:2][:])
+    {
+      a[0] += 7;
+      a[1] += 17;
+      b[2] *= 2;
+      b[4] *= 2;
+      c[0] += 6;
+      d[1] *= 2;
+      e[1] += 19;
+      f[2] += 21;
+      f[3] += 23;
+      g[1] += 25;
+      g[2] += 27;
+      h[0] += 29;
+      k[1][0] += 31;
+      k[2][1] += 33;
+      m[1] += 19;
+      r[2] += 21;
+      r[3] += 23;
+      o[1] += 25;
+      o[2] += 27;
+      p[0] += 29;
+      q[1][0] += 31;
+      q[2][1] += 33;
+      s[1] *= 2;
+      t[2][0] *= 2;
+      t[3][1] *= 2;
+    }
+}
+
+void
+test (int n)
+{
+  int c[2] = { 0, 0 };
+  int p[3] = { 0, 1, 4 };
+  int q[4][2] = { { 5, 6 }, { 0, 0 }, { 0, 0 }, { 7, 8 } };
+  long long ss[4] = { 5, 1, 1, 6 };
+  long long tt[5][2] = { { 9, 10 }, { 11, 12 }, { 1, 1 }, { 1, 1 }, { 13, 14 } };
+  long long int d[] = { 1, 1 };
+  int m[3] = { 5, 0, 5 };
+  int r[5] = { 6, 7, 0, 0, 9 };
+  int o[4] = { 1, 0, 0, 2 };
+  s = ss;
+  t = tt;
+  #pragma omp parallel num_threads(4)
+  {
+    int i;
+    #pragma omp for reduction (task, +: a, c) reduction (task, *: b[2 * n:3 * n], d) \
+                   reduction (task, +: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \
+                   reduction (task, +: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \
+                   reduction (task, *: t[2:2][:], s[1:n + 1]) \
+                   schedule(nonmonotonic: runtime)
+    for (i = 0; i < 4; i++)
+      {
+       #pragma omp task in_reduction (+: a, c) in_reduction (*: b[2 * n:3 * n], d) \
+                        in_reduction (+: o[n:n*2], q[1:2][:], p[0], m[1], r[2:2]) \
+                        in_reduction (+: g[n:n * 2], e[1], k[1:2][:], h[0], f[2:2]) \
+                        in_reduction (*: s[1:2], t[2:2][:])
+       {
+         int j;
+         a[0] += 2;
+         a[1] += 3;
+         b[2] *= 2;
+         f[3] += 8;
+         g[1] += 9;
+         g[2] += 10;
+         h[0] += 11;
+         k[1][1] += 13;
+         k[2][1] += 15;
+         m[1] += 16;
+         r[2] += 8;
+         s[1] *= 2;
+         t[2][1] *= 2;
+         t[3][1] *= 2;
+         for (j = 0; j < 2; j++)
+           #pragma omp task in_reduction (+: a, c[:2]) \
+                            in_reduction (*: b[2 * n:3 * n], d[n - 1:n + 1]) \
+                            in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][:2]) \
+                            in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][:2]) \
+                            in_reduction (*: s[n:2], t[2:2][:])
+           {
+             m[1] += 6;
+             r[2] += 7;
+             q[1][0] += 17;
+             q[2][0] += 19;
+             a[0] += 4;
+             a[1] += 5;
+             b[3] *= 2;
+             b[4] *= 2;
+             f[3] += 18;
+             g[1] += 29;
+             g[2] += 18;
+             h[0] += 19;
+             s[2] *= 2;
+             t[2][0] *= 2;
+             t[3][0] *= 2;
+             foo (n, c, d, m, r, o, p, q);
+             r[3] += 18;
+             o[1] += 29;
+             o[2] += 18;
+             p[0] += 19;
+             c[0] += 4;
+             c[1] += 5;
+             d[0] *= 2;
+             e[1] += 6;
+             f[2] += 7;
+             k[1][0] += 17;
+             k[2][0] += 19;
+           }
+         r[3] += 8;
+         o[1] += 9;
+         o[2] += 10;
+         p[0] += 11;
+         q[1][1] += 13;
+         q[2][1] += 15;
+         b[3] *= 2;
+         c[0] += 4;
+         c[1] += 9;
+         d[0] *= 2;
+         e[1] += 16;
+         f[2] += 8;
+       }
+      }
+  }
+  if (a[0] != 7 * 16 + 4 * 8 + 2 * 4
+      || a[1] != 17 * 16 + 5 * 8 + 3 * 4
+      || b[0] != 9 || b[1] != 11
+      || b[2] != 1LL << (16 + 4)
+      || b[3] != 1LL << (8 + 4)
+      || b[4] != 1LL << (16 + 8)
+      || b[5] != 13 || b[6] != 15
+      || c[0] != 6 * 16 + 4 * 8 + 4 * 4
+      || c[1] != 5 * 8 + 9 * 4
+      || d[0] != 1LL << (8 + 4)
+      || d[1] != 1LL << 16
+      || e[0] != 5
+      || e[1] != 19 * 16 + 6 * 8 + 16 * 4
+      || e[2] != 5
+      || f[0] != 6
+      || f[1] != 7
+      || f[2] != 21 * 16 + 7 * 8 + 8 * 4
+      || f[3] != 23 * 16 + 18 * 8 + 8 * 4
+      || f[4] != 9
+      || g[0] != 1
+      || g[1] != 25 * 16 + 29 * 8 + 9 * 4
+      || g[2] != 27 * 16 + 18 * 8 + 10 * 4
+      || g[3] != 2
+      || h[0] != 29 * 16 + 19 * 8 + 11 * 4
+      || h[1] != 1 || h[2] != 4
+      || k[0][0] != 5 || k[0][1] != 6
+      || k[1][0] != 31 * 16 + 17 * 8
+      || k[1][1] != 13 * 4
+      || k[2][0] != 19 * 8
+      || k[2][1] != 33 * 16 + 15 * 4
+      || k[3][0] != 7 || k[3][1] != 8
+      || m[0] != 5
+      || m[1] != 19 * 16 + 6 * 8 + 16 * 4
+      || m[2] != 5
+      || o[0] != 1
+      || o[1] != 25 * 16 + 29 * 8 + 9 * 4
+      || o[2] != 27 * 16 + 18 * 8 + 10 * 4
+      || o[3] != 2
+      || p[0] != 29 * 16 + 19 * 8 + 11 * 4
+      || p[1] != 1 || p[2] != 4
+      || q[0][0] != 5 || q[0][1] != 6
+      || q[1][0] != 31 * 16 + 17 * 8
+      || q[1][1] != 13 * 4
+      || q[2][0] != 19 * 8
+      || q[2][1] != 33 * 16 + 15 * 4
+      || q[3][0] != 7 || q[3][1] != 8
+      || r[0] != 6
+      || r[1] != 7
+      || r[2] != 21 * 16 + 7 * 8 + 8 * 4
+      || r[3] != 23 * 16 + 18 * 8 + 8 * 4
+      || r[4] != 9
+      || ss[0] != 5
+      || ss[1] != 1LL << (16 + 4)
+      || ss[2] != 1LL << 8
+      || ss[3] != 6
+      || tt[0][0] != 9 || tt[0][1] != 10 || tt[1][0] != 11 || tt[1][1] != 12
+      || tt[2][0] != 1LL << (16 + 8)
+      || tt[2][1] != 1LL << 4
+      || tt[3][0] != 1LL << 8
+      || tt[3][1] != 1LL << (16 + 4)
+      || tt[4][0] != 13 || tt[4][1] != 14)
+    abort ();
+}
+
+int
+main ()
+{
+  test (1);
+  return 0;
+}
diff --git a/libgomp/work.c b/libgomp/work.c

index ac2f0233120f0ee32b08c01a961141f7bb52dcdb..16fc7076eddf8d6a69024bcc1e4a163c4ec6eee0 100644 (file)
--- a/libgomp/work.c
+++ b/libgomp/work.c
@@ -98,30 +98,35 @@ alloc_work_share (struct gomp_team *team)
     This shouldn't touch the next_alloc field.  */
  
  void
-gomp_init_work_share (struct gomp_work_share *ws, bool ordered,
+gomp_init_work_share (struct gomp_work_share *ws, size_t ordered,
                       unsigned nthreads)
  {
    gomp_mutex_init (&ws->lock);
    if (__builtin_expect (ordered, 0))
      {
-#define INLINE_ORDERED_TEAM_IDS_CNT \
-  ((sizeof (struct gomp_work_share) \
-    - offsetof (struct gomp_work_share, inline_ordered_team_ids)) \
-   / sizeof (((struct gomp_work_share *) 0)->inline_ordered_team_ids[0]))
-
-      if (nthreads > INLINE_ORDERED_TEAM_IDS_CNT)
-       ws->ordered_team_ids
-         = gomp_malloc (nthreads * sizeof (*ws->ordered_team_ids));
+#define INLINE_ORDERED_TEAM_IDS_SIZE \
+  (sizeof (struct gomp_work_share) \
+   - offsetof (struct gomp_work_share, inline_ordered_team_ids))
+
+      if (__builtin_expect (ordered != 1, 0))
+       {
+         ordered += nthreads * sizeof (*ws->ordered_team_ids) - 1;
+         ordered = ordered + __alignof__ (long long) - 1;
+         ordered &= ~(__alignof__ (long long) - 1);
+       }
+      else
+       ordered = nthreads * sizeof (*ws->ordered_team_ids);
+      if (ordered > INLINE_ORDERED_TEAM_IDS_SIZE)
+       ws->ordered_team_ids = gomp_malloc (ordered);
        else
         ws->ordered_team_ids = ws->inline_ordered_team_ids;
-      memset (ws->ordered_team_ids, '\0',
-             nthreads * sizeof (*ws->ordered_team_ids));
+      memset (ws->ordered_team_ids, '\0', ordered);
        ws->ordered_num_used = 0;
        ws->ordered_owner = -1;
        ws->ordered_cur = 0;
      }
    else
-    ws->ordered_team_ids = NULL;
+    ws->ordered_team_ids = ws->inline_ordered_team_ids;
    gomp_ptrlock_init (&ws->next_ws, NULL);
    ws->threads_completed = 0;
  }
@@ -174,7 +179,7 @@ free_work_share (struct gomp_team *team, struct gomp_work_share *ws)
     if this was the first thread to reach this point.  */
  
  bool
-gomp_work_share_start (bool ordered)
+gomp_work_share_start (size_t ordered)
  {
    struct gomp_thread *thr = gomp_thread ();
    struct gomp_team *team = thr->ts.team;
@@ -186,7 +191,7 @@ gomp_work_share_start (bool ordered)
        ws = gomp_malloc (sizeof (*ws));
        gomp_init_work_share (ws, ordered, 1);
        thr->ts.work_share = ws;
-      return ws;
+      return true;
      }
  
    ws = thr->ts.work_share;
author	Jakub Jelinek <jakub@redhat.com>
	Wed, 7 Nov 2018 19:21:43 +0000 (20:21 +0100)
committer	Jakub Jelinek <jakub@gcc.gnu.org>
	Wed, 7 Nov 2018 19:21:43 +0000 (20:21 +0100)
gcc/ChangeLog.gomp		patch \| blob \| blame \| history
gcc/builtin-types.def		patch \| blob \| blame \| history
gcc/fortran/ChangeLog.gomp		patch \| blob \| blame \| history
gcc/fortran/types.def		patch \| blob \| blame \| history
gcc/omp-builtins.def		patch \| blob \| blame \| history
gcc/omp-expand.c		patch \| blob \| blame \| history
gcc/omp-general.c		patch \| blob \| blame \| history
gcc/omp-general.h		patch \| blob \| blame \| history
gcc/omp-low.c		patch \| blob \| blame \| history
libgomp/ChangeLog.gomp		patch \| blob \| blame \| history
libgomp/libgomp.h		patch \| blob \| blame \| history
libgomp/libgomp.map		patch \| blob \| blame \| history
libgomp/libgomp_g.h		patch \| blob \| blame \| history
libgomp/loop.c		patch \| blob \| blame \| history
libgomp/loop_ull.c		patch \| blob \| blame \| history
libgomp/ordered.c		patch \| blob \| blame \| history
libgomp/parallel.c		patch \| blob \| blame \| history
libgomp/sections.c		patch \| blob \| blame \| history
libgomp/single.c		patch \| blob \| blame \| history
libgomp/target.c		patch \| blob \| blame \| history
libgomp/task.c		patch \| blob \| blame \| history
libgomp/taskloop.c		patch \| blob \| blame \| history
libgomp/team.c		patch \| blob \| blame \| history
libgomp/testsuite/libgomp.c++/task-reduction-14.C	[new file with mode: 0644]	patch \| blob
libgomp/testsuite/libgomp.c++/task-reduction-15.C	[new file with mode: 0644]	patch \| blob
libgomp/testsuite/libgomp.c++/task-reduction-16.C	[new file with mode: 0644]	patch \| blob
libgomp/testsuite/libgomp.c++/task-reduction-17.C	[new file with mode: 0644]	patch \| blob
libgomp/testsuite/libgomp.c++/task-reduction-18.C	[new file with mode: 0644]	patch \| blob
libgomp/testsuite/libgomp.c++/task-reduction-19.C	[new file with mode: 0644]	patch \| blob
libgomp/testsuite/libgomp.c-c++-common/cancel-parallel-1.c	[new file with mode: 0644]	patch \| blob
libgomp/testsuite/libgomp.c-c++-common/cancel-taskgroup-3.c	[new file with mode: 0644]	patch \| blob
libgomp/testsuite/libgomp.c-c++-common/task-reduction-11.c	[new file with mode: 0644]	patch \| blob
libgomp/testsuite/libgomp.c-c++-common/task-reduction-12.c	[new file with mode: 0644]	patch \| blob
libgomp/testsuite/libgomp.c-c++-common/task-reduction-6.c		patch \| blob \| blame \| history
libgomp/testsuite/libgomp.c-c++-common/task-reduction-8.c	[new file with mode: 0644]	patch \| blob
libgomp/testsuite/libgomp.c-c++-common/task-reduction-9.c	[new file with mode: 0644]	patch \| blob
libgomp/work.c		patch \| blob \| blame \| history