+2018-11-07 Jakub Jelinek <jakub@redhat.com>
+
+ * builtin-types.def (BT_FN_VOID_BOOL, BT_FN_UINT_UINT_PTR_PTR,
+ BT_FN_BOOL_UINT_LONGPTR_LONG_LONG_LONGPTR_LONGPTR_PTR_PTR,
+ BT_FN_BOOL_UINT_ULLPTR_LONG_ULL_ULLPTR_ULLPTR_PTR_PTR,
+ BT_FN_BOOL_LONG_LONG_LONG_LONG_LONG_LONGPTR_LONGPTR_PTR_PTR,
+ BT_FN_BOOL_BOOL_ULL_ULL_ULL_LONG_ULL_ULLPTR_ULLPTR_PTR_PTR): New.
+ * omp-builtins.def (BUILT_IN_GOMP_LOOP_START,
+ BUILT_IN_GOMP_LOOP_ORDERED_START, BUILT_IN_GOMP_LOOP_DOACROSS_START,
+ BUILT_IN_GOMP_LOOP_ULL_START, BUILT_IN_GOMP_LOOP_ULL_ORDERED_START,
+ BUILT_IN_GOMP_LOOP_ULL_DOACROSS_START, BUILT_IN_GOMP_SECTIONS2_START,
+ BUILT_IN_GOMP_WORKSHARE_TASK_REDUCTION_UNREGISTER): New.
+ * omp-general.h (struct omp_for_data): Add have_reductemp member.
+ * omp-general.c (omp_extract_for_data): Initialize it.
+ * omp-low.c (build_outer_var_ref): Ignore taskgroup outer contexts.
+ Fix up the condition when lookup_decl should be used.
+ (scan_sharing_clauses): Call install_var_local for reductions with
+ task modifier even in worksharing contexts.
+ (lower_rec_input_clauses): Don't lookup_decl reductemp in worksharing
+ contexts. Handle reductions with task modifier in worksharing
+ contexts. Ignore _reductemp_ clause in worksharing contexts.
+ (lower_reduction_clauses): Ignore reduction clause with task modifiers
+ even in worksharing contexts.
+ (lower_send_clauses): Likewise.
+ (maybe_add_implicit_barrier_cancel): Add OMP_RETURN argument, don't
+ rely that it is the last stmt in body so far. Ignore outer taskgroup
+ contexts.
+ (omp_task_reductions_find_first): Move earlier.
+ (lower_omp_task_reductions): Add forward declaration. Handle
+ OMP_FOR and OMP_SECTIONS, add support for parallel cancellation.
+ (lower_omp_sections): Handle reduction clauses with taskgroup
+ modifiers. Adjust maybe_add_implicit_barrier_cancel caller.
+ (lower_omp_single): Adjust maybe_add_implicit_barrier_cancel caller.
+ (lower_omp_for): Likewise. Handle reduction clauses with taskgroup
+ modifiers.
+ * omp-expand.c (omp_adjust_chunk_size): Don't adjust anything if
+ chunk_size is zero.
+ (determine_parallel_type): Don't combine parallel with worksharing
+ which has _reductemp_ clause.
+ (expand_omp_for_generic): Add SCHED_ARG argument. Handle expansion
+ of worksharing loops with task reductions.
+ (expand_omp_for_static_nochunk): Handle expansion of worksharing
+ loops with task reductions.
+ (expand_omp_for_static_chunk): Likewise.
+ (expand_omp_for): Adjust expand_omp_for_generic caller, use
+ GOMP_loop{,_ull}{,_ordered,_doacross}_start builtins if there are
+ task reductions.
+ (expand_omp_sections): Handle expansion of sections with task
+ reductions.
+
2018-10-25 Jakub Jelinek <jakub@redhat.com>
* omp-builtins.def (BUILT_IN_GOMP_LOOP_NONMONOTONIC_RUNTIME_START,
DEF_FUNCTION_TYPE_1 (BT_FN_PTR_PTR, BT_PTR, BT_PTR)
DEF_FUNCTION_TYPE_1 (BT_FN_VOID_VALIST_REF, BT_VOID, BT_VALIST_REF)
DEF_FUNCTION_TYPE_1 (BT_FN_VOID_INT, BT_VOID, BT_INT)
+DEF_FUNCTION_TYPE_1 (BT_FN_VOID_BOOL, BT_VOID, BT_BOOL)
DEF_FUNCTION_TYPE_1 (BT_FN_FLOAT_CONST_STRING, BT_FLOAT, BT_CONST_STRING)
DEF_FUNCTION_TYPE_1 (BT_FN_DOUBLE_CONST_STRING, BT_DOUBLE, BT_CONST_STRING)
DEF_FUNCTION_TYPE_1 (BT_FN_LONGDOUBLE_CONST_STRING,
BT_VOID, BT_UINT32, BT_UINT32, BT_PTR)
DEF_FUNCTION_TYPE_3 (BT_FN_VOID_SIZE_SIZE_PTR, BT_VOID, BT_SIZE, BT_SIZE,
BT_PTR)
+DEF_FUNCTION_TYPE_3 (BT_FN_UINT_UINT_PTR_PTR, BT_UINT, BT_UINT, BT_PTR, BT_PTR)
DEF_FUNCTION_TYPE_4 (BT_FN_SIZE_CONST_PTR_SIZE_SIZE_FILEPTR,
BT_SIZE, BT_CONST_PTR, BT_SIZE, BT_SIZE, BT_FILEPTR)
DEF_FUNCTION_TYPE_8 (BT_FN_VOID_OMPFN_PTR_UINT_LONG_LONG_LONG_LONG_UINT,
BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR, BT_UINT,
BT_LONG, BT_LONG, BT_LONG, BT_LONG, BT_UINT)
+DEF_FUNCTION_TYPE_8 (BT_FN_BOOL_UINT_LONGPTR_LONG_LONG_LONGPTR_LONGPTR_PTR_PTR,
+ BT_BOOL, BT_UINT, BT_PTR_LONG, BT_LONG, BT_LONG,
+ BT_PTR_LONG, BT_PTR_LONG, BT_PTR, BT_PTR)
+DEF_FUNCTION_TYPE_8 (BT_FN_BOOL_UINT_ULLPTR_LONG_ULL_ULLPTR_ULLPTR_PTR_PTR,
+ BT_BOOL, BT_UINT, BT_PTR_ULONGLONG, BT_LONG, BT_ULONGLONG,
+ BT_PTR_ULONGLONG, BT_PTR_ULONGLONG, BT_PTR, BT_PTR)
DEF_FUNCTION_TYPE_9 (BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_BOOL_UINT_PTR_INT,
BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR,
DEF_FUNCTION_TYPE_9 (BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_PTR,
BT_VOID, BT_INT, BT_PTR_FN_VOID_PTR, BT_SIZE, BT_PTR,
BT_PTR, BT_PTR, BT_UINT, BT_PTR, BT_PTR)
+DEF_FUNCTION_TYPE_9 (BT_FN_BOOL_LONG_LONG_LONG_LONG_LONG_LONGPTR_LONGPTR_PTR_PTR,
+ BT_BOOL, BT_LONG, BT_LONG, BT_LONG, BT_LONG, BT_LONG,
+ BT_PTR_LONG, BT_PTR_LONG, BT_PTR, BT_PTR)
+
+DEF_FUNCTION_TYPE_10 (BT_FN_BOOL_BOOL_ULL_ULL_ULL_LONG_ULL_ULLPTR_ULLPTR_PTR_PTR,
+ BT_BOOL, BT_BOOL, BT_ULONGLONG, BT_ULONGLONG,
+ BT_ULONGLONG, BT_LONG, BT_ULONGLONG, BT_PTR_ULONGLONG,
+ BT_PTR_ULONGLONG, BT_PTR, BT_PTR)
DEF_FUNCTION_TYPE_11 (BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_UINT_LONG_INT_LONG_LONG_LONG,
BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR,
+2018-11-07 Jakub Jelinek <jakub@redhat.com>
+
+ * types.def (BT_FN_VOID_BOOL, BT_FN_UINT_UINT_PTR_PTR,
+ BT_FN_BOOL_UINT_LONGPTR_LONG_LONG_LONGPTR_LONGPTR_PTR_PTR,
+ BT_FN_BOOL_UINT_ULLPTR_LONG_ULL_ULLPTR_ULLPTR_PTR_PTR,
+ BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_PTR,
+ BT_FN_BOOL_LONG_LONG_LONG_LONG_LONG_LONGPTR_LONGPTR_PTR_PTR,
+ BT_FN_BOOL_BOOL_ULL_ULL_ULL_LONG_ULL_ULLPTR_ULLPTR_PTR_PTR): New.
+
2018-10-23 Jakub Jelinek <jakub@redhat.com>
* types.def (BT_FN_UINT_OMPFN_PTR_UINT_UINT): New.
DEF_FUNCTION_TYPE_1 (BT_FN_UINT_UINT, BT_UINT, BT_UINT)
DEF_FUNCTION_TYPE_1 (BT_FN_PTR_PTR, BT_PTR, BT_PTR)
DEF_FUNCTION_TYPE_1 (BT_FN_VOID_INT, BT_VOID, BT_INT)
+DEF_FUNCTION_TYPE_1 (BT_FN_VOID_BOOL, BT_VOID, BT_BOOL)
DEF_FUNCTION_TYPE_1 (BT_FN_BOOL_INT, BT_BOOL, BT_INT)
DEF_POINTER_TYPE (BT_PTR_FN_VOID_PTR, BT_FN_VOID_PTR)
DEF_FUNCTION_TYPE_3 (BT_FN_VOID_VPTR_I16_INT, BT_VOID, BT_VOLATILE_PTR, BT_I16, BT_INT)
DEF_FUNCTION_TYPE_3 (BT_FN_VOID_SIZE_SIZE_PTR, BT_VOID, BT_SIZE, BT_SIZE,
BT_PTR)
+DEF_FUNCTION_TYPE_3 (BT_FN_UINT_UINT_PTR_PTR, BT_UINT, BT_UINT, BT_PTR, BT_PTR)
DEF_FUNCTION_TYPE_4 (BT_FN_VOID_OMPFN_PTR_UINT_UINT,
BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR, BT_UINT, BT_UINT)
DEF_FUNCTION_TYPE_8 (BT_FN_VOID_OMPFN_PTR_UINT_LONG_LONG_LONG_LONG_UINT,
BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR, BT_UINT,
BT_LONG, BT_LONG, BT_LONG, BT_LONG, BT_UINT)
+DEF_FUNCTION_TYPE_8 (BT_FN_BOOL_UINT_LONGPTR_LONG_LONG_LONGPTR_LONGPTR_PTR_PTR,
+ BT_BOOL, BT_UINT, BT_PTR_LONG, BT_LONG, BT_LONG,
+ BT_PTR_LONG, BT_PTR_LONG, BT_PTR, BT_PTR)
+DEF_FUNCTION_TYPE_8 (BT_FN_BOOL_UINT_ULLPTR_LONG_ULL_ULLPTR_ULLPTR_PTR_PTR,
+ BT_BOOL, BT_UINT, BT_PTR_ULONGLONG, BT_LONG, BT_ULONGLONG,
+ BT_PTR_ULONGLONG, BT_PTR_ULONGLONG, BT_PTR, BT_PTR)
DEF_FUNCTION_TYPE_9 (BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_BOOL_UINT_PTR_INT,
BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR,
BT_PTR_FN_VOID_PTR_PTR, BT_LONG, BT_LONG,
BT_BOOL, BT_UINT, BT_PTR, BT_INT)
DEF_FUNCTION_TYPE_9 (BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_PTR,
- BT_VOID, BT_INT, BT_PTR_FN_VOID_PTR, BT_SIZE, BT_PTR,
- BT_PTR, BT_PTR, BT_UINT, BT_PTR, BT_PTR)
+ BT_VOID, BT_INT, BT_PTR_FN_VOID_PTR, BT_SIZE, BT_PTR,
+ BT_PTR, BT_PTR, BT_UINT, BT_PTR, BT_PTR)
+DEF_FUNCTION_TYPE_9 (BT_FN_BOOL_LONG_LONG_LONG_LONG_LONG_LONGPTR_LONGPTR_PTR_PTR,
+ BT_BOOL, BT_LONG, BT_LONG, BT_LONG, BT_LONG, BT_LONG,
+ BT_PTR_LONG, BT_PTR_LONG, BT_PTR, BT_PTR)
+
+DEF_FUNCTION_TYPE_10 (BT_FN_BOOL_BOOL_ULL_ULL_ULL_LONG_ULL_ULLPTR_ULLPTR_PTR_PTR,
+ BT_BOOL, BT_BOOL, BT_ULONGLONG, BT_ULONGLONG,
+ BT_ULONGLONG, BT_LONG, BT_ULONGLONG, BT_PTR_ULONGLONG,
+ BT_PTR_ULONGLONG, BT_PTR, BT_PTR)
DEF_FUNCTION_TYPE_11 (BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_UINT_LONG_INT_LONG_LONG_LONG,
BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR,
"GOMP_loop_doacross_runtime_start",
BT_FN_BOOL_UINT_LONGPTR_LONGPTR_LONGPTR,
ATTR_NOTHROW_LEAF_LIST)
+DEF_GOMP_BUILTIN (BUILT_IN_GOMP_LOOP_START,
+ "GOMP_loop_start",
+ BT_FN_BOOL_LONG_LONG_LONG_LONG_LONG_LONGPTR_LONGPTR_PTR_PTR,
+ ATTR_NOTHROW_LEAF_LIST)
+DEF_GOMP_BUILTIN (BUILT_IN_GOMP_LOOP_ORDERED_START,
+ "GOMP_loop_ordered_start",
+ BT_FN_BOOL_LONG_LONG_LONG_LONG_LONG_LONGPTR_LONGPTR_PTR_PTR,
+ ATTR_NOTHROW_LEAF_LIST)
+DEF_GOMP_BUILTIN (BUILT_IN_GOMP_LOOP_DOACROSS_START,
+ "GOMP_loop_doacross_start",
+ BT_FN_BOOL_UINT_LONGPTR_LONG_LONG_LONGPTR_LONGPTR_PTR_PTR,
+ ATTR_NOTHROW_LEAF_LIST)
DEF_GOMP_BUILTIN (BUILT_IN_GOMP_LOOP_STATIC_NEXT, "GOMP_loop_static_next",
BT_FN_BOOL_LONGPTR_LONGPTR, ATTR_NOTHROW_LEAF_LIST)
DEF_GOMP_BUILTIN (BUILT_IN_GOMP_LOOP_DYNAMIC_NEXT, "GOMP_loop_dynamic_next",
"GOMP_loop_ull_doacross_runtime_start",
BT_FN_BOOL_UINT_ULLPTR_ULLPTR_ULLPTR,
ATTR_NOTHROW_LEAF_LIST)
+DEF_GOMP_BUILTIN (BUILT_IN_GOMP_LOOP_ULL_START,
+ "GOMP_loop_ull_start",
+ BT_FN_BOOL_BOOL_ULL_ULL_ULL_LONG_ULL_ULLPTR_ULLPTR_PTR_PTR,
+ ATTR_NOTHROW_LEAF_LIST)
+DEF_GOMP_BUILTIN (BUILT_IN_GOMP_LOOP_ULL_ORDERED_START,
+ "GOMP_loop_ull_ordered_start",
+ BT_FN_BOOL_BOOL_ULL_ULL_ULL_LONG_ULL_ULLPTR_ULLPTR_PTR_PTR,
+ ATTR_NOTHROW_LEAF_LIST)
+DEF_GOMP_BUILTIN (BUILT_IN_GOMP_LOOP_ULL_DOACROSS_START,
+ "GOMP_loop_ull_doacross_start",
+ BT_FN_BOOL_UINT_ULLPTR_LONG_ULL_ULLPTR_ULLPTR_PTR_PTR,
+ ATTR_NOTHROW_LEAF_LIST)
DEF_GOMP_BUILTIN (BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT,
"GOMP_loop_ull_static_next",
BT_FN_BOOL_ULONGLONGPTR_ULONGLONGPTR, ATTR_NOTHROW_LEAF_LIST)
ATTR_NOTHROW_LIST)
DEF_GOMP_BUILTIN (BUILT_IN_GOMP_SECTIONS_START, "GOMP_sections_start",
BT_FN_UINT_UINT, ATTR_NOTHROW_LEAF_LIST)
+DEF_GOMP_BUILTIN (BUILT_IN_GOMP_SECTIONS2_START, "GOMP_sections2_start",
+ BT_FN_UINT_UINT_PTR_PTR, ATTR_NOTHROW_LEAF_LIST)
DEF_GOMP_BUILTIN (BUILT_IN_GOMP_SECTIONS_NEXT, "GOMP_sections_next",
BT_FN_UINT, ATTR_NOTHROW_LEAF_LIST)
DEF_GOMP_BUILTIN (BUILT_IN_GOMP_PARALLEL_SECTIONS,
DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TASK_REDUCTION_REMAP,
"GOMP_task_reduction_remap",
BT_FN_VOID_SIZE_SIZE_PTR, ATTR_NOTHROW_LEAF_LIST)
+DEF_GOMP_BUILTIN (BUILT_IN_GOMP_WORKSHARE_TASK_REDUCTION_UNREGISTER,
+ "GOMP_workshare_task_reduction_unregister",
+ BT_FN_VOID_BOOL, ATTR_NOTHROW_LEAF_LIST)
DEF_GOACC_BUILTIN (BUILT_IN_GOACC_DECLARE, "GOACC_declare",
BT_FN_VOID_INT_SIZE_PTR_PTR_PTR, ATTR_NOTHROW_LIST)
static tree
omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
{
- if (!simd_schedule)
+ if (!simd_schedule || integer_zerop (chunk_size))
return chunk_size;
poly_uint64 vf = omp_max_vf ();
if (c == NULL
|| ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
== OMP_CLAUSE_SCHEDULE_STATIC)
- || omp_find_clause (clauses, OMP_CLAUSE_ORDERED))
- {
- region->is_combined_parallel = false;
- region->inner->is_combined_parallel = false;
- return;
- }
+ || omp_find_clause (clauses, OMP_CLAUSE_ORDERED)
+ || omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_))
+ return;
}
+ else if (region->inner->type == GIMPLE_OMP_SECTIONS
+ && omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
+ OMP_CLAUSE__REDUCTEMP_))
+ return;
region->is_combined_parallel = true;
region->inner->is_combined_parallel = true;
struct omp_for_data *fd,
enum built_in_function start_fn,
enum built_in_function next_fn,
+ tree sched_arg,
gimple *inner_stmt)
{
tree type, istart0, iend0, iend;
&& omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi)),
OMP_CLAUSE_LASTPRIVATE))
ordered_lastprivate = false;
+ tree reductions = NULL_TREE;
+ tree mem = NULL_TREE;
+ if (sched_arg)
+ {
+ if (fd->have_reductemp)
+ {
+ tree c = omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi)),
+ OMP_CLAUSE__REDUCTEMP_);
+ reductions = OMP_CLAUSE_DECL (c);
+ gcc_assert (TREE_CODE (reductions) == SSA_NAME);
+ gimple *g = SSA_NAME_DEF_STMT (reductions);
+ reductions = gimple_assign_rhs1 (g);
+ OMP_CLAUSE_DECL (c) = reductions;
+ entry_bb = gimple_bb (g);
+ edge e = split_block (entry_bb, g);
+ if (region->entry == entry_bb)
+ region->entry = e->dest;
+ gsi = gsi_last_bb (entry_bb);
+ }
+ else
+ reductions = null_pointer_node;
+ /* For now. */
+ mem = null_pointer_node;
+ }
if (fd->collapse > 1 || fd->ordered)
{
int first_zero_iter1 = -1, first_zero_iter2 = -1;
{
t = fold_convert (fd->iter_type, fd->chunk_size);
t = omp_adjust_chunk_size (t, fd->simd_schedule);
- if (fd->ordered)
+ if (sched_arg)
+ {
+ if (fd->ordered)
+ t = build_call_expr (builtin_decl_explicit (start_fn),
+ 8, t0, t1, sched_arg, t, t3, t4,
+ reductions, mem);
+ else
+ t = build_call_expr (builtin_decl_explicit (start_fn),
+ 9, t0, t1, t2, sched_arg, t, t3, t4,
+ reductions, mem);
+ }
+ else if (fd->ordered)
t = build_call_expr (builtin_decl_explicit (start_fn),
5, t0, t1, t, t3, t4);
else
tree bfn_decl = builtin_decl_explicit (start_fn);
t = fold_convert (fd->iter_type, fd->chunk_size);
t = omp_adjust_chunk_size (t, fd->simd_schedule);
- t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
+ if (sched_arg)
+ t = build_call_expr (bfn_decl, 10, t5, t0, t1, t2, sched_arg,
+ t, t3, t4, reductions, mem);
+ else
+ t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
}
else
t = build_call_expr (builtin_decl_explicit (start_fn),
gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
GSI_SAME_STMT);
}
+ if (fd->have_reductemp)
+ {
+ gimple *g = gsi_stmt (gsi);
+ gsi_remove (&gsi, true);
+ release_ssa_name (gimple_assign_lhs (g));
+
+ entry_bb = region->entry;
+ gsi = gsi_last_nondebug_bb (entry_bb);
+
+ gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
+ }
gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
/* Remove the GIMPLE_OMP_FOR statement. */
else
t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
gcall *call_stmt = gimple_build_call (t, 0);
- if (gimple_omp_return_lhs (gsi_stmt (gsi)))
- gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
- gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
if (fd->ordered)
{
tree arr = counts[fd->ordered];
gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
GSI_SAME_STMT);
}
+ if (gimple_omp_return_lhs (gsi_stmt (gsi)))
+ {
+ gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
+ if (fd->have_reductemp)
+ {
+ gimple *g = gimple_build_assign (reductions, NOP_EXPR,
+ gimple_call_lhs (call_stmt));
+ gsi_insert_after (&gsi, g, GSI_SAME_STMT);
+ }
+ }
+ gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
gsi_remove (&gsi, true);
/* Connect the new blocks. */
bool broken_loop = region->cont == NULL;
tree *counts = NULL;
tree n1, n2, step;
+ tree reductions = NULL_TREE;
itype = type = TREE_TYPE (fd->loop.v);
if (POINTER_TYPE_P (type))
gsi = gsi_last_bb (entry_bb);
}
+ if (fd->have_reductemp)
+ {
+ tree t1 = build_int_cst (long_integer_type_node, 0);
+ tree t2 = build_int_cst (long_integer_type_node, 1);
+ tree t3 = build_int_cstu (long_integer_type_node,
+ (HOST_WIDE_INT_1U << 31) + 1);
+ tree clauses = gimple_omp_for_clauses (fd->for_stmt);
+ clauses = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
+ reductions = OMP_CLAUSE_DECL (clauses);
+ gcc_assert (TREE_CODE (reductions) == SSA_NAME);
+ gimple *g = SSA_NAME_DEF_STMT (reductions);
+ reductions = gimple_assign_rhs1 (g);
+ OMP_CLAUSE_DECL (clauses) = reductions;
+ gimple_stmt_iterator gsi2 = gsi_for_stmt (g);
+ tree t
+ = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
+ 9, t1, t2, t2, t3, t1, null_pointer_node,
+ null_pointer_node, reductions, null_pointer_node);
+ force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
+ true, GSI_SAME_STMT);
+ gsi_remove (&gsi2, true);
+ release_ssa_name (gimple_assign_lhs (g));
+ }
switch (gimple_omp_for_kind (fd->for_stmt))
{
case GF_OMP_FOR_KIND_FOR:
if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
{
t = gimple_omp_return_lhs (gsi_stmt (gsi));
- gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
+ if (fd->have_reductemp)
+ {
+ tree fn;
+ if (t)
+ fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
+ else
+ fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
+ gcall *g = gimple_build_call (fn, 0);
+ if (t)
+ {
+ gimple_call_set_lhs (g, t);
+ gsi_insert_after (&gsi, gimple_build_assign (reductions,
+ NOP_EXPR, t),
+ GSI_SAME_STMT);
+ }
+ gsi_insert_after (&gsi, g, GSI_SAME_STMT);
+ }
+ else
+ gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
}
gsi_remove (&gsi, true);
bool broken_loop = region->cont == NULL;
tree *counts = NULL;
tree n1, n2, step;
+ tree reductions = NULL_TREE;
itype = type = TREE_TYPE (fd->loop.v);
if (POINTER_TYPE_P (type))
gsi = gsi_last_bb (entry_bb);
}
+ if (fd->have_reductemp)
+ {
+ tree t1 = build_int_cst (long_integer_type_node, 0);
+ tree t2 = build_int_cst (long_integer_type_node, 1);
+ tree t3 = build_int_cstu (long_integer_type_node,
+ (HOST_WIDE_INT_1U << 31) + 1);
+ tree clauses = gimple_omp_for_clauses (fd->for_stmt);
+ clauses = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
+ reductions = OMP_CLAUSE_DECL (clauses);
+ gcc_assert (TREE_CODE (reductions) == SSA_NAME);
+ gimple *g = SSA_NAME_DEF_STMT (reductions);
+ reductions = gimple_assign_rhs1 (g);
+ OMP_CLAUSE_DECL (clauses) = reductions;
+ gimple_stmt_iterator gsi2 = gsi_for_stmt (g);
+ tree t
+ = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
+ 9, t1, t2, t2, t3, t1, null_pointer_node,
+ null_pointer_node, reductions, null_pointer_node);
+ force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
+ true, GSI_SAME_STMT);
+ gsi_remove (&gsi2, true);
+ release_ssa_name (gimple_assign_lhs (g));
+ }
switch (gimple_omp_for_kind (fd->for_stmt))
{
case GF_OMP_FOR_KIND_FOR:
if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
{
t = gimple_omp_return_lhs (gsi_stmt (gsi));
- gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
+ if (fd->have_reductemp)
+ {
+ tree fn;
+ if (t)
+ fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
+ else
+ fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
+ gcall *g = gimple_build_call (fn, 0);
+ if (t)
+ {
+ gimple_call_set_lhs (g, t);
+ gsi_insert_after (&gsi, gimple_build_assign (reductions,
+ NOP_EXPR, t),
+ GSI_SAME_STMT);
+ }
+ gsi_insert_after (&gsi, g, GSI_SAME_STMT);
+ }
+ else
+ gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
}
gsi_remove (&gsi, true);
else
{
int fn_index, start_ix, next_ix;
+ unsigned HOST_WIDE_INT sched = 0;
+ tree sched_arg = NULL_TREE;
gcc_assert (gimple_omp_for_kind (fd.for_stmt)
== GF_OMP_FOR_KIND_FOR);
{
gcc_assert (!fd.have_ordered);
fn_index = 6;
+ sched = 4;
}
else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
&& !fd.have_ordered)
fn_index = 7;
else
- fn_index = 3;
+ {
+ fn_index = 3;
+ sched = (HOST_WIDE_INT_1U << 31);
+ }
break;
case OMP_CLAUSE_SCHEDULE_DYNAMIC:
case OMP_CLAUSE_SCHEDULE_GUIDED:
&& !fd.have_ordered)
{
fn_index = 3 + fd.sched_kind;
+ sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
break;
}
fn_index = fd.sched_kind;
+ sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
+ sched += (HOST_WIDE_INT_1U << 31);
break;
case OMP_CLAUSE_SCHEDULE_STATIC:
gcc_assert (fd.have_ordered);
fn_index = 0;
+ sched = (HOST_WIDE_INT_1U << 31) + 1;
break;
default:
gcc_unreachable ();
else
start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
+ if (fd.have_reductemp)
+ {
+ if (fd.ordered)
+ start_ix = (int)BUILT_IN_GOMP_LOOP_DOACROSS_START;
+ else if (fd.have_ordered)
+ start_ix = (int)BUILT_IN_GOMP_LOOP_ORDERED_START;
+ else
+ start_ix = (int)BUILT_IN_GOMP_LOOP_START;
+ sched_arg = build_int_cstu (long_integer_type_node, sched);
+ if (!fd.chunk_size)
+ fd.chunk_size = integer_zero_node;
+ }
if (fd.iter_type == long_long_unsigned_type_node)
{
start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
- (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
}
expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
- (enum built_in_function) next_ix, inner_stmt);
+ (enum built_in_function) next_ix, sched_arg,
+ inner_stmt);
}
if (gimple_in_ssa_p (cfun))
sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
vin = gimple_omp_sections_control (sections_stmt);
- if (!is_combined_parallel (region))
+ tree clauses = gimple_omp_sections_clauses (sections_stmt);
+ tree reductmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
+ if (reductmp)
+ {
+ tree reductions = OMP_CLAUSE_DECL (reductmp);
+ gcc_assert (TREE_CODE (reductions) == SSA_NAME);
+ gimple *g = SSA_NAME_DEF_STMT (reductions);
+ reductions = gimple_assign_rhs1 (g);
+ OMP_CLAUSE_DECL (reductmp) = reductions;
+ gimple_stmt_iterator gsi = gsi_for_stmt (g);
+ t = build_int_cst (unsigned_type_node, len - 1);
+ u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS2_START);
+ stmt = gimple_build_call (u, 3, t, reductions, null_pointer_node);
+ gimple_call_set_lhs (stmt, vin);
+ gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
+ gsi_remove (&gsi, true);
+ release_ssa_name (gimple_assign_lhs (g));
+ }
+ else if (!is_combined_parallel (region))
{
/* If we are not inside a combined parallel+sections region,
call GOMP_sections_start. */
u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
stmt = gimple_build_call (u, 0);
}
- gimple_call_set_lhs (stmt, vin);
- gsi_insert_after (&si, stmt, GSI_SAME_STMT);
+ if (!reductmp)
+ {
+ gimple_call_set_lhs (stmt, vin);
+ gsi_insert_after (&si, stmt, GSI_SAME_STMT);
+ }
gsi_remove (&si, true);
/* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
fd->pre = NULL;
fd->have_nowait = distribute || simd;
fd->have_ordered = false;
+ fd->have_reductemp = false;
fd->tiling = NULL_TREE;
fd->collapse = 1;
fd->ordered = 0;
collapse_iter = &OMP_CLAUSE_TILE_ITERVAR (t);
collapse_count = &OMP_CLAUSE_TILE_COUNT (t);
break;
+ case OMP_CLAUSE__REDUCTEMP_:
+ fd->have_reductemp = true;
default:
break;
}
tree tiling; /* Tiling values (if non null). */
int collapse; /* Collapsed loops, 1 for a non-collapsed loop. */
int ordered;
- bool have_nowait, have_ordered, simd_schedule;
+ bool have_nowait, have_ordered, simd_schedule, have_reductemp;
unsigned char sched_modifiers;
enum omp_clause_schedule_kind sched_kind;
struct omp_for_data_loop *loops;
enum omp_clause_code code = OMP_CLAUSE_ERROR)
{
tree x;
+ omp_context *outer = ctx->outer;
+ while (outer && gimple_code (outer->stmt) == GIMPLE_OMP_TASKGROUP)
+ outer = outer->outer;
if (is_global_var (maybe_lookup_decl_in_outer_ctx (var, ctx)))
x = var;
Similarly for OMP_CLAUSE_PRIVATE with outer ref, that can refer
to private vars in all worksharing constructs. */
x = NULL_TREE;
- if (ctx->outer && is_taskreg_ctx (ctx))
- x = lookup_decl (var, ctx->outer);
- else if (ctx->outer)
+ if (outer && is_taskreg_ctx (outer))
+ x = lookup_decl (var, outer);
+ else if (outer)
x = maybe_lookup_decl_in_outer_ctx (var, ctx);
if (x == NULL_TREE)
x = var;
}
else if (code == OMP_CLAUSE_LASTPRIVATE && is_taskloop_ctx (ctx))
{
- gcc_assert (ctx->outer);
+ gcc_assert (outer);
splay_tree_node n
- = splay_tree_lookup (ctx->outer->field_map,
+ = splay_tree_lookup (outer->field_map,
(splay_tree_key) &DECL_UID (var));
if (n == NULL)
{
- if (is_global_var (maybe_lookup_decl_in_outer_ctx (var, ctx->outer)))
+ if (is_global_var (maybe_lookup_decl_in_outer_ctx (var, outer)))
x = var;
else
- x = lookup_decl (var, ctx->outer);
+ x = lookup_decl (var, outer);
}
else
{
tree field = (tree) n->value;
/* If the receiver record type was remapped in the child function,
remap the field into the new record type. */
- x = maybe_lookup_field (field, ctx->outer);
+ x = maybe_lookup_field (field, outer);
if (x != NULL)
field = x;
- x = build_simple_mem_ref (ctx->outer->receiver_decl);
+ x = build_simple_mem_ref (outer->receiver_decl);
x = omp_build_component_ref (x, field);
- if (use_pointer_for_field (var, ctx->outer))
+ if (use_pointer_for_field (var, outer))
x = build_simple_mem_ref (x);
}
}
- else if (ctx->outer)
+ else if (outer)
{
- omp_context *outer = ctx->outer;
if (gimple_code (outer->stmt) == GIMPLE_OMP_GRID_BODY)
{
outer = outer->outer;
install_var_local (decl, ctx);
break;
}
+ if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
+ && OMP_CLAUSE_REDUCTION_TASK (c))
+ {
+ install_var_local (decl, ctx);
+ break;
+ }
goto do_private;
case OMP_CLAUSE_LASTPRIVATE:
gimple_call_set_lhs (g, v);
gimple_seq_add_stmt (ilist, g);
c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
- tskred_temp = lookup_decl (OMP_CLAUSE_DECL (c), ctx);
+ tskred_temp = OMP_CLAUSE_DECL (c);
+ if (is_taskreg_ctx (ctx))
+ tskred_temp = lookup_decl (tskred_temp, ctx);
tree v2 = create_tmp_var (sizetype);
g = gimple_build_assign (v2, NOP_EXPR, v);
gimple_seq_add_stmt (ilist, g);
break;
case OMP_CLAUSE_REDUCTION:
case OMP_CLAUSE_IN_REDUCTION:
- if (is_task_ctx (ctx)
- || (OMP_CLAUSE_REDUCTION_TASK (c) && is_parallel_ctx (ctx)))
+ if (is_task_ctx (ctx) || OMP_CLAUSE_REDUCTION_TASK (c))
{
task_reduction_p = true;
if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION)
else if (OMP_CLAUSE_REDUCTION_OMP_ORIG_REF (c))
reduction_omp_orig_ref = true;
break;
- case OMP_CLAUSE__LOOPTEMP_:
case OMP_CLAUSE__REDUCTEMP_:
+ if (!is_taskreg_ctx (ctx))
+ continue;
+ /* FALLTHRU */
+ case OMP_CLAUSE__LOOPTEMP_:
/* Handle _looptemp_/_reductemp_ clauses only on
parallel/task. */
if (fd)
update in that case, otherwise use a lock. */
for (c = clauses; c && count < 2; c = OMP_CLAUSE_CHAIN (c))
if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
- && (!OMP_CLAUSE_REDUCTION_TASK (c)
- || !is_parallel_ctx (ctx)))
+ && !OMP_CLAUSE_REDUCTION_TASK (c))
{
if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c)
|| TREE_CODE (OMP_CLAUSE_DECL (c)) == MEM_REF)
location_t clause_loc = OMP_CLAUSE_LOCATION (c);
if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_REDUCTION
- || (OMP_CLAUSE_REDUCTION_TASK (c)
- && is_parallel_ctx (ctx)))
+ || OMP_CLAUSE_REDUCTION_TASK (c))
continue;
enum omp_clause_code ccode = OMP_CLAUSE_REDUCTION;
case OMP_CLAUSE__REDUCTEMP_:
break;
case OMP_CLAUSE_REDUCTION:
- if (is_task_ctx (ctx))
- continue;
- if (OMP_CLAUSE_REDUCTION_TASK (c) && is_parallel_ctx (ctx))
+ if (is_task_ctx (ctx) || OMP_CLAUSE_REDUCTION_TASK (c))
continue;
break;
case OMP_CLAUSE_SHARED:
cancellation in the implicit barrier. */
static void
-maybe_add_implicit_barrier_cancel (omp_context *ctx, gimple_seq *body)
+maybe_add_implicit_barrier_cancel (omp_context *ctx, gimple *omp_return,
+ gimple_seq *body)
{
- gimple *omp_return = gimple_seq_last_stmt (*body);
gcc_assert (gimple_code (omp_return) == GIMPLE_OMP_RETURN);
if (gimple_omp_return_nowait_p (omp_return))
return;
- if (ctx->outer
- && gimple_code (ctx->outer->stmt) == GIMPLE_OMP_PARALLEL
- && ctx->outer->cancellable)
- {
- tree fndecl = builtin_decl_explicit (BUILT_IN_GOMP_CANCEL);
- tree c_bool_type = TREE_TYPE (TREE_TYPE (fndecl));
- tree lhs = create_tmp_var (c_bool_type);
- gimple_omp_return_set_lhs (omp_return, lhs);
- tree fallthru_label = create_artificial_label (UNKNOWN_LOCATION);
- gimple *g = gimple_build_cond (NE_EXPR, lhs,
- fold_convert (c_bool_type,
- boolean_false_node),
- ctx->outer->cancel_label, fallthru_label);
- gimple_seq_add_stmt (body, g);
- gimple_seq_add_stmt (body, gimple_build_label (fallthru_label));
+ for (omp_context *outer = ctx->outer; outer; outer = outer->outer)
+ if (gimple_code (outer->stmt) == GIMPLE_OMP_PARALLEL
+ && outer->cancellable)
+ {
+ tree fndecl = builtin_decl_explicit (BUILT_IN_GOMP_CANCEL);
+ tree c_bool_type = TREE_TYPE (TREE_TYPE (fndecl));
+ tree lhs = create_tmp_var (c_bool_type);
+ gimple_omp_return_set_lhs (omp_return, lhs);
+ tree fallthru_label = create_artificial_label (UNKNOWN_LOCATION);
+ gimple *g = gimple_build_cond (NE_EXPR, lhs,
+ fold_convert (c_bool_type,
+ boolean_false_node),
+ outer->cancel_label, fallthru_label);
+ gimple_seq_add_stmt (body, g);
+ gimple_seq_add_stmt (body, gimple_build_label (fallthru_label));
+ }
+ else if (gimple_code (outer->stmt) != GIMPLE_OMP_TASKGROUP)
+ return;
+}
+
+/* Find the first task_reduction or reduction clause or return NULL
+ if there are none. */
+
+static inline tree
+omp_task_reductions_find_first (tree clauses, enum tree_code code,
+ enum omp_clause_code ccode)
+{
+ while (1)
+ {
+ clauses = omp_find_clause (clauses, ccode);
+ if (clauses == NULL_TREE)
+ return NULL_TREE;
+ if (ccode != OMP_CLAUSE_REDUCTION
+ || code == OMP_TASKLOOP
+ || OMP_CLAUSE_REDUCTION_TASK (clauses))
+ return clauses;
+ clauses = OMP_CLAUSE_CHAIN (clauses);
}
}
+static void lower_omp_task_reductions (omp_context *, enum tree_code, tree,
+ gimple_seq *, gimple_seq *);
+
/* Lower the OpenMP sections directive in the current statement in GSI_P.
CTX is the enclosing OMP context for the current statement. */
gomp_sections *stmt;
gimple *t;
gbind *new_stmt, *bind;
- gimple_seq ilist, dlist, olist, new_body;
+ gimple_seq ilist, dlist, olist, tred_dlist = NULL, new_body;
stmt = as_a <gomp_sections *> (gsi_stmt (*gsi_p));
dlist = NULL;
ilist = NULL;
+
+ tree rclauses
+ = omp_task_reductions_find_first (gimple_omp_sections_clauses (stmt),
+ OMP_SECTIONS, OMP_CLAUSE_REDUCTION);
+ tree rtmp = NULL_TREE;
+ if (rclauses)
+ {
+ tree type = build_pointer_type (pointer_sized_int_node);
+ tree temp = create_tmp_var (type);
+ tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__REDUCTEMP_);
+ OMP_CLAUSE_DECL (c) = temp;
+ OMP_CLAUSE_CHAIN (c) = gimple_omp_sections_clauses (stmt);
+ gimple_omp_sections_set_clauses (stmt, c);
+ lower_omp_task_reductions (ctx, OMP_SECTIONS,
+ gimple_omp_sections_clauses (stmt),
+ &ilist, &tred_dlist);
+ rclauses = c;
+ rtmp = make_ssa_name (type);
+ gimple_seq_add_stmt (&ilist, gimple_build_assign (rtmp, temp));
+ }
+
lower_rec_input_clauses (gimple_omp_sections_clauses (stmt),
&ilist, &dlist, ctx, NULL);
OMP_CLAUSE_NOWAIT) != NULL_TREE;
t = gimple_build_omp_return (nowait);
gimple_seq_add_stmt (&new_body, t);
- maybe_add_implicit_barrier_cancel (ctx, &new_body);
+ gimple_seq_add_seq (&new_body, tred_dlist);
+ maybe_add_implicit_barrier_cancel (ctx, t, &new_body);
+
+ if (rclauses)
+ OMP_CLAUSE_DECL (rclauses) = rtmp;
gimple_bind_set_body (new_stmt, new_body);
}
OMP_CLAUSE_NOWAIT) != NULL_TREE;
gimple *g = gimple_build_omp_return (nowait);
gimple_seq_add_stmt (&bind_body_tail, g);
- maybe_add_implicit_barrier_cancel (ctx, &bind_body_tail);
+ maybe_add_implicit_barrier_cancel (ctx, g, &bind_body_tail);
if (ctx->record_type)
{
gimple_stmt_iterator gsi = gsi_start (bind_body_tail);
BLOCK_VARS (block) = ctx->block_vars;
}
-/* Find the first task_reduction or reduction clause or return NULL
- if there are none. */
-
-static inline tree
-omp_task_reductions_find_first (tree clauses, enum tree_code code,
- enum omp_clause_code ccode)
-{
- while (1)
- {
- clauses = omp_find_clause (clauses, ccode);
- if (clauses == NULL_TREE)
- return NULL_TREE;
- if (ccode != OMP_CLAUSE_REDUCTION
- || code == OMP_TASKLOOP
- || OMP_CLAUSE_REDUCTION_TASK (clauses))
- return clauses;
- clauses = OMP_CLAUSE_CHAIN (clauses);
- }
-}
-
/* Helper function for lower_omp_task_reductions. For a specific PASS
find out the current clause it should be processed, or return false
if all have been processed already. */
enum omp_clause_code ccode
= (code == OMP_TASKGROUP
? OMP_CLAUSE_TASK_REDUCTION : OMP_CLAUSE_REDUCTION);
+ tree cancellable = NULL_TREE;
clauses = omp_task_reductions_find_first (clauses, code, ccode);
if (clauses == NULL_TREE)
return;
+ if (code == OMP_FOR || code == OMP_SECTIONS)
+ {
+ for (omp_context *outer = ctx->outer; outer; outer = outer->outer)
+ if (gimple_code (outer->stmt) == GIMPLE_OMP_PARALLEL
+ && outer->cancellable)
+ {
+ cancellable = error_mark_node;
+ break;
+ }
+ else if (gimple_code (outer->stmt) != GIMPLE_OMP_TASKGROUP)
+ break;
+ }
tree record_type = lang_hooks.types.make_type (RECORD_TYPE);
tree *last = &TYPE_FIELDS (record_type);
unsigned cnt = 0;
+ if (cancellable)
+ {
+ tree field = build_decl (UNKNOWN_LOCATION, FIELD_DECL, NULL_TREE,
+ ptr_type_node);
+ tree ifield = build_decl (UNKNOWN_LOCATION, FIELD_DECL, NULL_TREE,
+ integer_type_node);
+ *last = field;
+ DECL_CHAIN (field) = ifield;
+ last = &DECL_CHAIN (ifield);
+ }
for (int pass = 0; pass < 2; pass++)
{
tree decl, type, next;
tree idx = create_tmp_var (size_type_node);
gimple_seq_add_stmt (end, gimple_build_assign (idx, size_zero_node));
tree num_thr_sz = create_tmp_var (size_type_node);
+ tree lab1 = create_artificial_label (UNKNOWN_LOCATION);
+ tree lab2 = create_artificial_label (UNKNOWN_LOCATION);
+ tree lab3 = NULL_TREE;
gimple *g;
+ if (code == OMP_FOR || code == OMP_SECTIONS)
+ {
+ /* For worksharing constructs, only perform it in the master thread,
+ with the exception of cancelled implicit barriers - then only handle
+ the current thread. */
+ tree lab4 = create_artificial_label (UNKNOWN_LOCATION);
+ t = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
+ tree thr_num = create_tmp_var (integer_type_node);
+ g = gimple_build_call (t, 0);
+ gimple_call_set_lhs (g, thr_num);
+ gimple_seq_add_stmt (end, g);
+ if (cancellable)
+ {
+ tree c;
+ tree lab5 = create_artificial_label (UNKNOWN_LOCATION);
+ tree lab6 = create_artificial_label (UNKNOWN_LOCATION);
+ lab3 = create_artificial_label (UNKNOWN_LOCATION);
+ if (code == OMP_FOR)
+ c = gimple_omp_for_clauses (ctx->stmt);
+ else if (code == OMP_SECTIONS)
+ c = gimple_omp_sections_clauses (ctx->stmt);
+ c = OMP_CLAUSE_DECL (omp_find_clause (c, OMP_CLAUSE__REDUCTEMP_));
+ cancellable = c;
+ g = gimple_build_cond (NE_EXPR, c, build_zero_cst (TREE_TYPE (c)),
+ lab5, lab6);
+ gimple_seq_add_stmt (end, g);
+ gimple_seq_add_stmt (end, gimple_build_label (lab5));
+ g = gimple_build_assign (idx, NOP_EXPR, thr_num);
+ gimple_seq_add_stmt (end, g);
+ g = gimple_build_assign (num_thr_sz, PLUS_EXPR, idx,
+ build_one_cst (TREE_TYPE (idx)));
+ gimple_seq_add_stmt (end, g);
+ gimple_seq_add_stmt (end, gimple_build_goto (lab3));
+ gimple_seq_add_stmt (end, gimple_build_label (lab6));
+ }
+ g = gimple_build_cond (NE_EXPR, thr_num, integer_zero_node, lab2, lab4);
+ gimple_seq_add_stmt (end, g);
+ gimple_seq_add_stmt (end, gimple_build_label (lab4));
+ }
if (code != OMP_PARALLEL)
{
t = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
gimple_seq_add_stmt (end, g);
g = gimple_build_assign (num_thr_sz, NOP_EXPR, num_thr);
gimple_seq_add_stmt (end, g);
+ if (cancellable)
+ gimple_seq_add_stmt (end, gimple_build_label (lab3));
}
else
{
NULL_TREE, NULL_TREE);
tree data = create_tmp_var (pointer_sized_int_node);
gimple_seq_add_stmt (end, gimple_build_assign (data, t));
- tree lab1 = create_artificial_label (UNKNOWN_LOCATION);
- tree lab2 = create_artificial_label (UNKNOWN_LOCATION);
gimple_seq_add_stmt (end, gimple_build_label (lab1));
tree ptr;
if (TREE_CODE (TYPE_SIZE_UNIT (record_type)) == INTEGER_CST)
tree field = TYPE_FIELDS (record_type);
cnt = 0;
+ if (cancellable)
+ field = DECL_CHAIN (DECL_CHAIN (field));
for (int pass = 0; pass < 2; pass++)
{
tree decl, type, next;
tree bfield = DECL_CHAIN (field);
tree cond;
- if (code == OMP_PARALLEL)
- /* In parallel all threads unconditionally initialize all their
- task reduction private variables. */
+ if (code == OMP_PARALLEL || code == OMP_FOR || code == OMP_SECTIONS)
+ /* In parallel or worksharing all threads unconditionally
+ initialize all their task reduction private variables. */
cond = boolean_true_node;
else if (TREE_TYPE (ptr) == ptr_type_node)
{
lab3, lab4);
gimple_seq_add_stmt (end, g);
gimple_seq_add_stmt (end, gimple_build_label (lab3));
+ if (cancellable && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c) == NULL_TREE)
+ {
+ /* If this reduction doesn't need destruction and parallel
+ has been cancelled, there is nothing to do for this
+ reduction, so jump around the merge operation. */
+ tree lab5 = create_artificial_label (UNKNOWN_LOCATION);
+ g = gimple_build_cond (NE_EXPR, cancellable,
+ build_zero_cst (TREE_TYPE (cancellable)),
+ lab4, lab5);
+ gimple_seq_add_stmt (end, g);
+ gimple_seq_add_stmt (end, gimple_build_label (lab5));
+ }
tree new_var;
if (TREE_TYPE (ptr) == ptr_type_node)
tree placeholder = OMP_CLAUSE_REDUCTION_PLACEHOLDER (c);
tree decl_placeholder
= OMP_CLAUSE_REDUCTION_DECL_PLACEHOLDER (c);
+ tree lab6 = NULL_TREE;
+ if (cancellable)
+ {
+ /* If this reduction needs destruction and parallel
+ has been cancelled, jump around the merge operation
+ to the destruction. */
+ tree lab5 = create_artificial_label (UNKNOWN_LOCATION);
+ lab6 = create_artificial_label (UNKNOWN_LOCATION);
+ tree zero = build_zero_cst (TREE_TYPE (cancellable));
+ g = gimple_build_cond (NE_EXPR, cancellable, zero,
+ lab6, lab5);
+ gimple_seq_add_stmt (end, g);
+ gimple_seq_add_stmt (end, gimple_build_label (lab5));
+ }
SET_DECL_VALUE_EXPR (placeholder, out);
DECL_HAS_VALUE_EXPR_P (placeholder) = 1;
SET_DECL_VALUE_EXPR (decl_placeholder, priv);
OMP_CLAUSE_REDUCTION_PLACEHOLDER (c) = NULL;
OMP_CLAUSE_REDUCTION_DECL_PLACEHOLDER (c) = NULL;
}
+ if (cancellable)
+ gimple_seq_add_stmt (end, gimple_build_label (lab6));
tree x = lang_hooks.decls.omp_clause_dtor (c, priv);
if (x)
{
{
tree placeholder = OMP_CLAUSE_REDUCTION_PLACEHOLDER (c);
tree oldv = NULL_TREE;
-
+ tree lab6 = NULL_TREE;
+ if (cancellable)
+ {
+ /* If this reduction needs destruction and parallel
+ has been cancelled, jump around the merge operation
+ to the destruction. */
+ tree lab5 = create_artificial_label (UNKNOWN_LOCATION);
+ lab6 = create_artificial_label (UNKNOWN_LOCATION);
+ tree zero = build_zero_cst (TREE_TYPE (cancellable));
+ g = gimple_build_cond (NE_EXPR, cancellable, zero,
+ lab6, lab5);
+ gimple_seq_add_stmt (end, g);
+ gimple_seq_add_stmt (end, gimple_build_label (lab5));
+ }
if (omp_is_reference (decl)
&& !useless_type_conversion_p (TREE_TYPE (placeholder),
TREE_TYPE (ref)))
OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c) = NULL;
if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_TASK_REDUCTION)
OMP_CLAUSE_REDUCTION_PLACEHOLDER (c) = NULL;
+ if (cancellable)
+ gimple_seq_add_stmt (end, gimple_build_label (lab6));
tree x = lang_hooks.decls.omp_clause_dtor (c, new_var);
if (x)
{
g = gimple_build_call (t, 1, build_fold_addr_expr (avar));
gimple_seq_add_stmt (start, g);
}
- else if (code == OMP_TASKLOOP || code == OMP_PARALLEL)
+ else
{
- tree c = omp_find_clause (gimple_omp_taskreg_clauses (ctx->stmt),
- OMP_CLAUSE__REDUCTEMP_);
+ tree c;
+ if (code == OMP_FOR)
+ c = gimple_omp_for_clauses (ctx->stmt);
+ else if (code == OMP_SECTIONS)
+ c = gimple_omp_sections_clauses (ctx->stmt);
+ else
+ c = gimple_omp_taskreg_clauses (ctx->stmt);
+ c = omp_find_clause (c, OMP_CLAUSE__REDUCTEMP_);
t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (c)),
build_fold_addr_expr (avar));
gimplify_assign (OMP_CLAUSE_DECL (c), t, start);
g = gimple_build_cond (NE_EXPR, idx, num_thr_sz, lab1, lab2);
gimple_seq_add_stmt (end, g);
gimple_seq_add_stmt (end, gimple_build_label (lab2));
- t = builtin_decl_explicit (BUILT_IN_GOMP_TASKGROUP_REDUCTION_UNREGISTER);
- g = gimple_build_call (t, 1, build_fold_addr_expr (avar));
+ if (code == OMP_FOR || code == OMP_SECTIONS)
+ {
+ enum built_in_function bfn
+ = BUILT_IN_GOMP_WORKSHARE_TASK_REDUCTION_UNREGISTER;
+ t = builtin_decl_explicit (bfn);
+ tree c_bool_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (t)));
+ tree arg;
+ if (cancellable)
+ {
+ arg = create_tmp_var (c_bool_type);
+ gimple_seq_add_stmt (end, gimple_build_assign (arg, NOP_EXPR,
+ cancellable));
+ }
+ else
+ arg = build_int_cst (c_bool_type, 0);
+ g = gimple_build_call (t, 1, arg);
+ }
+ else
+ {
+ t = builtin_decl_explicit (BUILT_IN_GOMP_TASKGROUP_REDUCTION_UNREGISTER);
+ g = gimple_build_call (t, 1, build_fold_addr_expr (avar));
+ }
gimple_seq_add_stmt (end, g);
t = build_constructor (atype, NULL);
TREE_THIS_VOLATILE (t) = 1;
struct omp_for_data fd, *fdp = NULL;
gomp_for *stmt = as_a <gomp_for *> (gsi_stmt (*gsi_p));
gbind *new_stmt;
- gimple_seq omp_for_body, body, dlist;
+ gimple_seq omp_for_body, body, dlist, tred_ilist = NULL, tred_dlist = NULL;
+ gimple_seq cnt_list = NULL;
gimple_seq oacc_head = NULL, oacc_tail = NULL;
size_t i;
/* The pre-body and input clauses go before the lowered GIMPLE_OMP_FOR. */
dlist = NULL;
body = NULL;
+ tree rclauses
+ = omp_task_reductions_find_first (gimple_omp_for_clauses (stmt), OMP_FOR,
+ OMP_CLAUSE_REDUCTION);
+ tree rtmp = NULL_TREE;
+ if (rclauses)
+ {
+ tree type = build_pointer_type (pointer_sized_int_node);
+ tree temp = create_tmp_var (type);
+ tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__REDUCTEMP_);
+ OMP_CLAUSE_DECL (c) = temp;
+ OMP_CLAUSE_CHAIN (c) = gimple_omp_for_clauses (stmt);
+ gimple_omp_for_set_clauses (stmt, c);
+ lower_omp_task_reductions (ctx, OMP_FOR,
+ gimple_omp_for_clauses (stmt),
+ &tred_ilist, &tred_dlist);
+ rclauses = c;
+ rtmp = make_ssa_name (type);
+ gimple_seq_add_stmt (&body, gimple_build_assign (rtmp, temp));
+ }
+
lower_rec_input_clauses (gimple_omp_for_clauses (stmt), &body, &dlist, ctx,
fdp);
- gimple_seq_add_seq (&body, gimple_omp_for_pre_body (stmt));
+ gimple_seq_add_seq (rclauses ? &tred_ilist : &body,
+ gimple_omp_for_pre_body (stmt));
lower_omp (gimple_omp_body_ptr (stmt), ctx);
{
rhs_p = gimple_omp_for_initial_ptr (stmt, i);
if (!is_gimple_min_invariant (*rhs_p))
- *rhs_p = get_formal_tmp_var (*rhs_p, &body);
+ *rhs_p = get_formal_tmp_var (*rhs_p, &cnt_list);
else if (TREE_CODE (*rhs_p) == ADDR_EXPR)
recompute_tree_invariant_for_addr_expr (*rhs_p);
rhs_p = gimple_omp_for_final_ptr (stmt, i);
if (!is_gimple_min_invariant (*rhs_p))
- *rhs_p = get_formal_tmp_var (*rhs_p, &body);
+ *rhs_p = get_formal_tmp_var (*rhs_p, &cnt_list);
else if (TREE_CODE (*rhs_p) == ADDR_EXPR)
recompute_tree_invariant_for_addr_expr (*rhs_p);
rhs_p = &TREE_OPERAND (gimple_omp_for_incr (stmt, i), 1);
if (!is_gimple_min_invariant (*rhs_p))
- *rhs_p = get_formal_tmp_var (*rhs_p, &body);
+ *rhs_p = get_formal_tmp_var (*rhs_p, &cnt_list);
}
+ if (rclauses)
+ gimple_seq_add_seq (&tred_ilist, cnt_list);
+ else
+ gimple_seq_add_seq (&body, cnt_list);
/* Once lowered, extract the bounds and clauses. */
omp_extract_for_data (stmt, &fd, NULL);
gimple_seq_add_seq (&body, dlist);
+ if (rclauses)
+ {
+ gimple_seq_add_seq (&tred_ilist, body);
+ body = tred_ilist;
+ }
+
body = maybe_catch_exception (body);
if (!phony_loop)
{
/* Region exit marker goes at the end of the loop body. */
- gimple_seq_add_stmt (&body, gimple_build_omp_return (fd.have_nowait));
- maybe_add_implicit_barrier_cancel (ctx, &body);
+ gimple *g = gimple_build_omp_return (fd.have_nowait);
+ gimple_seq_add_stmt (&body, g);
+
+ gimple_seq_add_seq (&body, tred_dlist);
+
+ maybe_add_implicit_barrier_cancel (ctx, g, &body);
+
+ if (rclauses)
+ OMP_CLAUSE_DECL (rclauses) = rtmp;
}
/* Add OpenACC joining and reduction markers just after the loop. */
+2018-11-07 Jakub Jelinek <jakub@redhat.com>
+
+ * libgomp_g.h (GOMP_loop_start, GOMP_loop_ordered_start,
+ GOMP_loop_doacross_start, GOMP_loop_ull_start,
+ GOMP_loop_ull_ordered_start, GOMP_loop_ull_doacross_start,
+ GOMP_workshare_task_reduction_unregister, GOMP_sections2_start): New
+ prototypes.
+ * libgomp.h (struct gomp_doacross_work_share): Add extra field.
+ (struct gomp_work_share): Add task_reductions field.
+ (struct gomp_taskgroup): Add workshare flag.
+ (gomp_doacross_init, gomp_doacross_ull_init): Add size_t argument.
+ (gomp_workshare_taskgroup_start,
+ gomp_workshare_task_reduction_register): New prototypes.
+ (gomp_init_work_share, gomp_work_share_start): Change bool argument
+ to size_t.
+ * libgomp.map (GOMP_5.0): Export GOMP_loop_start,
+ GOMP_loop_ordered_start, GOMP_loop_doacross_start,
+ GOMP_loop_ull_start, GOMP_loop_ull_ordered_start,
+ GOMP_loop_ull_doacross_start,
+ GOMP_workshare_task_reduction_unregister and GOMP_sections2_start.
+ * loop.c: Include string.h.
+ (GOMP_loop_runtime_next): Add ialias.
+ (GOMP_taskgroup_reduction_register): Add ialias_redirect.
+ (gomp_loop_static_start, gomp_loop_dynamic_start,
+ gomp_loop_guided_start, gomp_loop_ordered_static_start,
+ gomp_loop_ordered_dynamic_start, gomp_loop_ordered_guided_start,
+ gomp_loop_doacross_static_start, gomp_loop_doacross_dynamic_start,
+ gomp_loop_doacross_guided_start): Adjust gomp_work_share_start
+ or gomp_doacross_init callers.
+ (gomp_adjust_sched, GOMP_loop_start, GOMP_loop_ordered_start,
+ GOMP_loop_doacross_start): New functions.
+ * loop_ull.c: Include string.h.
+ (GOMP_loop_ull_runtime_next): Add ialias.
+ (GOMP_taskgroup_reduction_register): Add ialias_redirect.
+ (gomp_loop_ull_static_start, gomp_loop_ull_dynamic_start,
+ gomp_loop_ull_guided_start, gomp_loop_ull_ordered_static_start,
+ gomp_loop_ull_ordered_dynamic_start,
+ gomp_loop_ull_ordered_guided_start,
+ gomp_loop_ull_doacross_static_start,
+ gomp_loop_ull_doacross_dynamic_start,
+ gomp_loop_ull_doacross_guided_start): Adjust gomp_work_share_start
+ and gomp_doacross_ull_init callers.
+ (gomp_adjust_sched, GOMP_loop_ull_start, GOMP_loop_ull_ordered_start,
+ GOMP_loop_ull_doacross_start): New functions.
+ * sections.c: Include string.h.
+ (GOMP_taskgroup_reduction_register): Add ialias_redirect.
+ (GOMP_sections_start): Adjust gomp_work_share_start caller.
+ (GOMP_sections2_start): New function.
+ * ordered.c (gomp_doacross_init, gomp_doacross_ull_init): Add
+ EXTRA argument. If not needed to prepare array, if extra is 0,
+ clear ws->doacross, otherwise allocate just doacross structure and
+ extra payload. If array is needed, allocate also extra payload.
+ (GOMP_doacross_post, GOMP_doacross_wait, GOMP_doacross_ull_post,
+ GOMP_doacross_ull_wait): Handle doacross->array == NULL like
+ doacross == NULL.
+ * parallel.c (GOMP_cancellation_point): If taskgroup has workshare
+ flag set, check cancelled of prev taskgroup if any.
+ (GOMP_cancel): If taskgroup has workshare flag set, set cancelled
+ on prev taskgroup if any.
+ * single.c (GOMP_single_start, GOMP_single_copy_start): Adjust
+ gomp_work_share_start callers.
+ * target.c (GOMP_target_update_ext, GOMP_target_enter_exit_data):
+ If taskgroup has workshare flag set, check cancelled on prev
+ taskgroup if any. Guard all cancellation tests with
+ gomp_cancel_var test.
+ * taskloop.c (GOMP_taskloop): Likewise.
+ * task.c (GOMP_task, gomp_create_target_task, gomp_task_run_pre,
+ GOMP_taskwait_depend): Likewise.
+ (gomp_taskgroup_init): Clear workshare flag, reorder initialization.
+ (gomp_reduction_register): Add always_inline attribute. Add
+ ORIG argument, if non-NULL, don't allocate memory, but copy it
+ from there.
+ (gomp_create_artificial_team): New function.
+ (GOMP_taskgroup_reduction_register): Extend function comment.
+ Use gomp_create_artificial_team. Adjust gomp_reduction_register
+ caller.
+ (gomp_parallel_reduction_register): Adjust gomp_reduction_register
+ caller.
+ (gomp_workshare_task_reduction_register,
+ gomp_workshare_taskgroup_start,
+ GOMP_workshare_task_reduction_unregister): New functions.
+ * team.c (gomp_new_team): Adjust gomp_init_work_share caller.
+ * work.c (gomp_init_work_share): Change ORDERED argument from
+ bool to size_t, if more than 1 allocate also extra payload at the
+ end of array. Never keep ordered_team_ids NULL, set it
+ to inline_ordered_team_ids instead.
+ (gomp_work_share_start): Change ORDERED argument from bool to size_t,
+ return true instead of ws.
+ * testsuite/libgomp.c-c++-common/cancel-parallel-1.c: New test.
+ * testsuite/libgomp.c-c++-common/cancel-taskgroup-3.c: New test.
+ * testsuite/libgomp.c-c++-common/task-reduction-6.c (struct S):
+ Use unsigned long long int instead of unsigned long int.
+ (main): Verify r == t.
+ * testsuite/libgomp.c-c++-common/task-reduction-8.c: New test.
+ * testsuite/libgomp.c-c++-common/task-reduction-9.c: New test.
+ * testsuite/libgomp.c-c++-common/task-reduction-11.c: New test.
+ * testsuite/libgomp.c-c++-common/task-reduction-12.c: New test.
+ * testsuite/libgomp.c++/task-reduction-14.C: New test.
+ * testsuite/libgomp.c++/task-reduction-15.C: New test.
+ * testsuite/libgomp.c++/task-reduction-16.C: New test.
+ * testsuite/libgomp.c++/task-reduction-17.C: New test.
+ * testsuite/libgomp.c++/task-reduction-18.C: New test.
+ * testsuite/libgomp.c++/task-reduction-19.C: New test.
+
2018-10-26 Jakub Jelinek <jakub@redhat.com>
* libgomp.h (GOMP_HAVE_EFFICIENT_ALIGNED_ALLOC): Define unless
/* Likewise, but for the ull implementation. */
unsigned long long boundary_ull;
};
+ /* Pointer to extra memory if needed for lastprivate(conditional). */
+ void *extra;
/* Array of shift counts for each dimension if they can be flattened. */
unsigned int shift_counts[];
};
struct gomp_work_share *next_free;
};
+ /* Task reductions for this work-sharing construct. */
+ uintptr_t *task_reductions;
+
/* If only few threads are in the team, ordered_team_ids can point
to this array which fills the padding at the end of this struct. */
unsigned inline_ordered_team_ids[0];
uintptr_t *reductions;
bool in_taskgroup_wait;
bool cancelled;
+ bool workshare;
gomp_sem_t taskgroup_sem;
size_t num_children;
};
extern void gomp_ordered_static_init (void);
extern void gomp_ordered_static_next (void);
extern void gomp_ordered_sync (void);
-extern void gomp_doacross_init (unsigned, long *, long);
+extern void gomp_doacross_init (unsigned, long *, long, size_t);
extern void gomp_doacross_ull_init (unsigned, unsigned long long *,
- unsigned long long);
+ unsigned long long, size_t);
/* parallel.c */
enum gomp_target_task_state);
extern struct gomp_taskgroup *gomp_parallel_reduction_register (uintptr_t *,
unsigned);
+extern void gomp_workshare_taskgroup_start (void);
+extern void gomp_workshare_task_reduction_register (uintptr_t *, uintptr_t *);
static void inline
gomp_finish_task (struct gomp_task *task)
/* work.c */
-extern void gomp_init_work_share (struct gomp_work_share *, bool, unsigned);
+extern void gomp_init_work_share (struct gomp_work_share *, size_t, unsigned);
extern void gomp_fini_work_share (struct gomp_work_share *);
-extern bool gomp_work_share_start (bool);
+extern bool gomp_work_share_start (size_t);
extern void gomp_work_share_end (void);
extern bool gomp_work_share_end_cancel (void);
extern void gomp_work_share_end_nowait (void);
GOMP_5.0 {
global:
+ GOMP_loop_doacross_start;
GOMP_loop_maybe_nonmonotonic_runtime_next;
GOMP_loop_maybe_nonmonotonic_runtime_start;
GOMP_loop_nonmonotonic_runtime_next;
GOMP_loop_nonmonotonic_runtime_start;
+ GOMP_loop_ordered_start;
+ GOMP_loop_start;
+ GOMP_loop_ull_doacross_start;
GOMP_loop_ull_maybe_nonmonotonic_runtime_next;
GOMP_loop_ull_maybe_nonmonotonic_runtime_start;
GOMP_loop_ull_nonmonotonic_runtime_next;
GOMP_loop_ull_nonmonotonic_runtime_start;
+ GOMP_loop_ull_ordered_start;
+ GOMP_loop_ull_start;
GOMP_parallel_loop_maybe_nonmonotonic_runtime;
GOMP_parallel_loop_nonmonotonic_runtime;
GOMP_parallel_reductions;
+ GOMP_sections2_start;
GOMP_taskgroup_reduction_register;
GOMP_taskgroup_reduction_unregister;
GOMP_task_reduction_remap;
GOMP_taskwait_depend;
GOMP_teams_reg;
+ GOMP_workshare_task_reduction_unregister;
} GOMP_4.5;
OACC_2.0 {
long *, long *);
extern bool GOMP_loop_maybe_nonmonotonic_runtime_start (long, long, long,
long *, long *);
+extern bool GOMP_loop_start (long, long, long, long, long, long *, long *,
+ uintptr_t *, void **);
extern bool GOMP_loop_ordered_static_start (long, long, long, long,
long *, long *);
extern bool GOMP_loop_ordered_guided_start (long, long, long, long,
long *, long *);
extern bool GOMP_loop_ordered_runtime_start (long, long, long, long *, long *);
+extern bool GOMP_loop_ordered_start (long, long, long, long, long, long *,
+ long *, uintptr_t *, void **);
extern bool GOMP_loop_static_next (long *, long *);
extern bool GOMP_loop_dynamic_next (long *, long *);
long *);
extern bool GOMP_loop_doacross_runtime_start (unsigned, long *, long *,
long *);
+extern bool GOMP_loop_doacross_start (unsigned, long *, long, long, long *,
+ long *, uintptr_t *, void **);
extern void GOMP_parallel_loop_static_start (void (*)(void *), void *,
unsigned, long, long, long, long);
unsigned long long,
unsigned long long *,
unsigned long long *);
+extern bool GOMP_loop_ull_start (bool, unsigned long long, unsigned long long,
+ unsigned long long, long, unsigned long long,
+ unsigned long long *, unsigned long long *,
+ uintptr_t *, void **);
extern bool GOMP_loop_ull_ordered_static_start (bool, unsigned long long,
unsigned long long,
unsigned long long,
unsigned long long *,
unsigned long long *);
+extern bool GOMP_loop_ull_ordered_start (bool, unsigned long long,
+ unsigned long long,
+ unsigned long long, long,
+ unsigned long long,
+ unsigned long long *,
+ unsigned long long *,
+ uintptr_t *, void **);
extern bool GOMP_loop_ull_static_next (unsigned long long *,
unsigned long long *);
unsigned long long *,
unsigned long long *,
unsigned long long *);
+extern bool GOMP_loop_ull_doacross_start (unsigned, unsigned long long *,
+ long, unsigned long long,
+ unsigned long long *,
+ unsigned long long *,
+ uintptr_t *, void **);
/* ordered.c */
extern void GOMP_taskgroup_reduction_register (uintptr_t *);
extern void GOMP_taskgroup_reduction_unregister (uintptr_t *);
extern void GOMP_task_reduction_remap (size_t, size_t, void **);
+extern void GOMP_workshare_task_reduction_unregister (bool);
/* sections.c */
extern unsigned GOMP_sections_start (unsigned);
+extern unsigned GOMP_sections2_start (unsigned, uintptr_t *, void **);
extern unsigned GOMP_sections_next (void);
extern void GOMP_parallel_sections_start (void (*) (void *), void *,
unsigned, unsigned);
#include <limits.h>
#include <stdlib.h>
+#include <string.h>
#include "libgomp.h"
+ialias (GOMP_loop_runtime_next)
+ialias_redirect (GOMP_taskgroup_reduction_register)
+
/* Initialize the given work share construct from the given arguments. */
static inline void
struct gomp_thread *thr = gomp_thread ();
thr->ts.static_trip = 0;
- if (gomp_work_share_start (false))
+ if (gomp_work_share_start (0))
{
gomp_loop_init (thr->ts.work_share, start, end, incr,
GFS_STATIC, chunk_size);
struct gomp_thread *thr = gomp_thread ();
bool ret;
- if (gomp_work_share_start (false))
+ if (gomp_work_share_start (0))
{
gomp_loop_init (thr->ts.work_share, start, end, incr,
GFS_DYNAMIC, chunk_size);
struct gomp_thread *thr = gomp_thread ();
bool ret;
- if (gomp_work_share_start (false))
+ if (gomp_work_share_start (0))
{
gomp_loop_init (thr->ts.work_share, start, end, incr,
GFS_GUIDED, chunk_size);
}
}
+static long
+gomp_adjust_sched (long sched, long *chunk_size)
+{
+ sched &= ~GFS_MONOTONIC;
+ switch (sched)
+ {
+ case GFS_STATIC:
+ case GFS_DYNAMIC:
+ case GFS_GUIDED:
+ return sched;
+ /* GFS_RUNTIME is used for runtime schedule without monotonic
+ or nonmonotonic modifiers on the clause.
+ GFS_RUNTIME|GFS_MONOTONIC for runtime schedule with monotonic
+ modifier. */
+ case GFS_RUNTIME:
+ /* GFS_AUTO is used for runtime schedule with nonmonotonic
+ modifier. */
+ case GFS_AUTO:
+ {
+ struct gomp_task_icv *icv = gomp_icv (false);
+ sched = icv->run_sched_var & ~GFS_MONOTONIC;
+ switch (sched)
+ {
+ case GFS_STATIC:
+ case GFS_DYNAMIC:
+ case GFS_GUIDED:
+ *chunk_size = icv->run_sched_chunk_size;
+ break;
+ case GFS_AUTO:
+ sched = GFS_STATIC;
+ *chunk_size = 0;
+ break;
+ default:
+ abort ();
+ }
+ return sched;
+ }
+ default:
+ abort ();
+ }
+}
+
+bool
+GOMP_loop_start (long start, long end, long incr, long sched,
+ long chunk_size, long *istart, long *iend,
+ uintptr_t *reductions, void **mem)
+{
+ struct gomp_thread *thr = gomp_thread ();
+
+ thr->ts.static_trip = 0;
+ if (reductions)
+ gomp_workshare_taskgroup_start ();
+ if (gomp_work_share_start (0))
+ {
+ sched = gomp_adjust_sched (sched, &chunk_size);
+ gomp_loop_init (thr->ts.work_share, start, end, incr,
+ sched, chunk_size);
+ if (reductions)
+ {
+ GOMP_taskgroup_reduction_register (reductions);
+ thr->task->taskgroup->workshare = true;
+ thr->ts.work_share->task_reductions = reductions;
+ }
+ if (mem)
+ {
+ uintptr_t size = (uintptr_t) *mem;
+ if (size > (sizeof (struct gomp_work_share)
+ - offsetof (struct gomp_work_share,
+ inline_ordered_team_ids)))
+ thr->ts.work_share->ordered_team_ids
+ = gomp_malloc_cleared (size);
+ else
+ memset (thr->ts.work_share->ordered_team_ids, '\0', size);
+ *mem = (void *) thr->ts.work_share->ordered_team_ids;
+ }
+ gomp_work_share_init_done ();
+ }
+ else
+ {
+ if (reductions)
+ {
+ uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
+ gomp_workshare_task_reduction_register (reductions,
+ first_reductions);
+ }
+ if (mem)
+ *mem = (void *) thr->ts.work_share->ordered_team_ids;
+ }
+
+ if (!istart)
+ return true;
+ return ialias_call (GOMP_loop_runtime_next) (istart, iend);
+}
+
/* The *_ordered_*_start routines are similar. The only difference is that
this work-share construct is initialized to expect an ORDERED section. */
struct gomp_thread *thr = gomp_thread ();
thr->ts.static_trip = 0;
- if (gomp_work_share_start (true))
+ if (gomp_work_share_start (1))
{
gomp_loop_init (thr->ts.work_share, start, end, incr,
GFS_STATIC, chunk_size);
struct gomp_thread *thr = gomp_thread ();
bool ret;
- if (gomp_work_share_start (true))
+ if (gomp_work_share_start (1))
{
gomp_loop_init (thr->ts.work_share, start, end, incr,
GFS_DYNAMIC, chunk_size);
struct gomp_thread *thr = gomp_thread ();
bool ret;
- if (gomp_work_share_start (true))
+ if (gomp_work_share_start (1))
{
gomp_loop_init (thr->ts.work_share, start, end, incr,
GFS_GUIDED, chunk_size);
}
}
+bool
+GOMP_loop_ordered_start (long start, long end, long incr, long sched,
+ long chunk_size, long *istart, long *iend,
+ uintptr_t *reductions, void **mem)
+{
+ struct gomp_thread *thr = gomp_thread ();
+ size_t ordered = 1;
+ bool ret;
+
+ thr->ts.static_trip = 0;
+ if (reductions)
+ gomp_workshare_taskgroup_start ();
+ if (mem)
+ ordered += (uintptr_t) *mem;
+ if (gomp_work_share_start (ordered))
+ {
+ sched = gomp_adjust_sched (sched, &chunk_size);
+ gomp_loop_init (thr->ts.work_share, start, end, incr,
+ sched, chunk_size);
+ if (reductions)
+ {
+ GOMP_taskgroup_reduction_register (reductions);
+ thr->task->taskgroup->workshare = true;
+ thr->ts.work_share->task_reductions = reductions;
+ }
+ if (sched == GFS_STATIC)
+ gomp_ordered_static_init ();
+ else
+ gomp_mutex_lock (&thr->ts.work_share->lock);
+ gomp_work_share_init_done ();
+ }
+ else
+ {
+ if (reductions)
+ {
+ uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
+ gomp_workshare_task_reduction_register (reductions,
+ first_reductions);
+ }
+ sched = thr->ts.work_share->sched;
+ if (sched != GFS_STATIC)
+ gomp_mutex_lock (&thr->ts.work_share->lock);
+ }
+
+ if (mem)
+ {
+ uintptr_t p
+ = (uintptr_t) (thr->ts.work_share->ordered_team_ids
+ + (thr->ts.team ? thr->ts.team->nthreads : 1));
+ p += __alignof__ (long long) - 1;
+ p &= ~(__alignof__ (long long) - 1);
+ *mem = (void *) p;
+ }
+
+ switch (sched)
+ {
+ case GFS_STATIC:
+ case GFS_AUTO:
+ return !gomp_iter_static_next (istart, iend);
+ case GFS_DYNAMIC:
+ ret = gomp_iter_dynamic_next_locked (istart, iend);
+ break;
+ case GFS_GUIDED:
+ ret = gomp_iter_guided_next_locked (istart, iend);
+ break;
+ default:
+ abort ();
+ }
+
+ if (ret)
+ gomp_ordered_first ();
+ gomp_mutex_unlock (&thr->ts.work_share->lock);
+ return ret;
+}
+
/* The *_doacross_*_start routines are similar. The only difference is that
this work-share construct is initialized to expect an ORDERED(N) - DOACROSS
section, and the worksharing loop iterates always from 0 to COUNTS[0] - 1
struct gomp_thread *thr = gomp_thread ();
thr->ts.static_trip = 0;
- if (gomp_work_share_start (false))
+ if (gomp_work_share_start (0))
{
gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
GFS_STATIC, chunk_size);
- gomp_doacross_init (ncounts, counts, chunk_size);
+ gomp_doacross_init (ncounts, counts, chunk_size, 0);
gomp_work_share_init_done ();
}
struct gomp_thread *thr = gomp_thread ();
bool ret;
- if (gomp_work_share_start (false))
+ if (gomp_work_share_start (0))
{
gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
GFS_DYNAMIC, chunk_size);
- gomp_doacross_init (ncounts, counts, chunk_size);
+ gomp_doacross_init (ncounts, counts, chunk_size, 0);
gomp_work_share_init_done ();
}
struct gomp_thread *thr = gomp_thread ();
bool ret;
- if (gomp_work_share_start (false))
+ if (gomp_work_share_start (0))
{
gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
GFS_GUIDED, chunk_size);
- gomp_doacross_init (ncounts, counts, chunk_size);
+ gomp_doacross_init (ncounts, counts, chunk_size, 0);
gomp_work_share_init_done ();
}
}
}
+bool
+GOMP_loop_doacross_start (unsigned ncounts, long *counts, long sched,
+ long chunk_size, long *istart, long *iend,
+ uintptr_t *reductions, void **mem)
+{
+ struct gomp_thread *thr = gomp_thread ();
+
+ thr->ts.static_trip = 0;
+ if (reductions)
+ gomp_workshare_taskgroup_start ();
+ if (gomp_work_share_start (0))
+ {
+ size_t extra = 0;
+ if (mem)
+ extra = (uintptr_t) *mem;
+ sched = gomp_adjust_sched (sched, &chunk_size);
+ gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
+ sched, chunk_size);
+ gomp_doacross_init (ncounts, counts, chunk_size, extra);
+ if (reductions)
+ {
+ GOMP_taskgroup_reduction_register (reductions);
+ thr->task->taskgroup->workshare = true;
+ thr->ts.work_share->task_reductions = reductions;
+ }
+ gomp_work_share_init_done ();
+ }
+ else
+ {
+ if (reductions)
+ {
+ uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
+ gomp_workshare_task_reduction_register (reductions,
+ first_reductions);
+ }
+ sched = thr->ts.work_share->sched;
+ }
+
+ if (mem)
+ *mem = thr->ts.work_share->doacross->extra;
+
+ return ialias_call (GOMP_loop_runtime_next) (istart, iend);
+}
+
/* The *_next routines are called when the thread completes processing of
the iteration block currently assigned to it. If the work-share
construct is bound directly to a parallel construct, then the iteration
#include <limits.h>
#include <stdlib.h>
+#include <string.h>
#include "libgomp.h"
+ialias (GOMP_loop_ull_runtime_next)
+ialias_redirect (GOMP_taskgroup_reduction_register)
+
typedef unsigned long long gomp_ull;
/* Initialize the given work share construct from the given arguments. */
struct gomp_thread *thr = gomp_thread ();
thr->ts.static_trip = 0;
- if (gomp_work_share_start (false))
+ if (gomp_work_share_start (0))
{
gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
GFS_STATIC, chunk_size);
struct gomp_thread *thr = gomp_thread ();
bool ret;
- if (gomp_work_share_start (false))
+ if (gomp_work_share_start (0))
{
gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
GFS_DYNAMIC, chunk_size);
struct gomp_thread *thr = gomp_thread ();
bool ret;
- if (gomp_work_share_start (false))
+ if (gomp_work_share_start (0))
{
gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
GFS_GUIDED, chunk_size);
}
}
+static long
+gomp_adjust_sched (long sched, gomp_ull *chunk_size)
+{
+ sched &= ~GFS_MONOTONIC;
+ switch (sched)
+ {
+ case GFS_STATIC:
+ case GFS_DYNAMIC:
+ case GFS_GUIDED:
+ return sched;
+ /* GFS_RUNTIME is used for runtime schedule without monotonic
+ or nonmonotonic modifiers on the clause.
+ GFS_RUNTIME|GFS_MONOTONIC for runtime schedule with monotonic
+ modifier. */
+ case GFS_RUNTIME:
+ /* GFS_AUTO is used for runtime schedule with nonmonotonic
+ modifier. */
+ case GFS_AUTO:
+ {
+ struct gomp_task_icv *icv = gomp_icv (false);
+ sched = icv->run_sched_var & ~GFS_MONOTONIC;
+ switch (sched)
+ {
+ case GFS_STATIC:
+ case GFS_DYNAMIC:
+ case GFS_GUIDED:
+ *chunk_size = icv->run_sched_chunk_size;
+ break;
+ case GFS_AUTO:
+ sched = GFS_STATIC;
+ *chunk_size = 0;
+ break;
+ default:
+ abort ();
+ }
+ return sched;
+ }
+ default:
+ abort ();
+ }
+}
+
+bool
+GOMP_loop_ull_start (bool up, gomp_ull start, gomp_ull end,
+ gomp_ull incr, long sched, gomp_ull chunk_size,
+ gomp_ull *istart, gomp_ull *iend,
+ uintptr_t *reductions, void **mem)
+{
+ struct gomp_thread *thr = gomp_thread ();
+
+ thr->ts.static_trip = 0;
+ if (reductions)
+ gomp_workshare_taskgroup_start ();
+ if (gomp_work_share_start (0))
+ {
+ sched = gomp_adjust_sched (sched, &chunk_size);
+ gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
+ sched, chunk_size);
+ if (reductions)
+ {
+ GOMP_taskgroup_reduction_register (reductions);
+ thr->task->taskgroup->workshare = true;
+ thr->ts.work_share->task_reductions = reductions;
+ }
+ if (mem)
+ {
+ uintptr_t size = (uintptr_t) *mem;
+ if (size > (sizeof (struct gomp_work_share)
+ - offsetof (struct gomp_work_share,
+ inline_ordered_team_ids)))
+ thr->ts.work_share->ordered_team_ids
+ = gomp_malloc_cleared (size);
+ else
+ memset (thr->ts.work_share->ordered_team_ids, '\0', size);
+ *mem = (void *) thr->ts.work_share->ordered_team_ids;
+ }
+ gomp_work_share_init_done ();
+ }
+ else
+ {
+ if (reductions)
+ {
+ uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
+ gomp_workshare_task_reduction_register (reductions,
+ first_reductions);
+ }
+ if (mem)
+ *mem = (void *) thr->ts.work_share->ordered_team_ids;
+ }
+
+ return ialias_call (GOMP_loop_ull_runtime_next) (istart, iend);
+}
+
/* The *_ordered_*_start routines are similar. The only difference is that
this work-share construct is initialized to expect an ORDERED section. */
struct gomp_thread *thr = gomp_thread ();
thr->ts.static_trip = 0;
- if (gomp_work_share_start (true))
+ if (gomp_work_share_start (1))
{
gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
GFS_STATIC, chunk_size);
struct gomp_thread *thr = gomp_thread ();
bool ret;
- if (gomp_work_share_start (true))
+ if (gomp_work_share_start (1))
{
gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
GFS_DYNAMIC, chunk_size);
struct gomp_thread *thr = gomp_thread ();
bool ret;
- if (gomp_work_share_start (true))
+ if (gomp_work_share_start (1))
{
gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
GFS_GUIDED, chunk_size);
}
}
+bool
+GOMP_loop_ull_ordered_start (bool up, gomp_ull start, gomp_ull end,
+ gomp_ull incr, long sched, gomp_ull chunk_size,
+ gomp_ull *istart, gomp_ull *iend,
+ uintptr_t *reductions, void **mem)
+{
+ struct gomp_thread *thr = gomp_thread ();
+ size_t ordered = 1;
+ bool ret;
+
+ thr->ts.static_trip = 0;
+ if (reductions)
+ gomp_workshare_taskgroup_start ();
+ if (mem)
+ ordered += (uintptr_t) *mem;
+ if (gomp_work_share_start (ordered))
+ {
+ sched = gomp_adjust_sched (sched, &chunk_size);
+ gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
+ sched, chunk_size);
+ if (reductions)
+ {
+ GOMP_taskgroup_reduction_register (reductions);
+ thr->task->taskgroup->workshare = true;
+ thr->ts.work_share->task_reductions = reductions;
+ }
+ if (sched == GFS_STATIC)
+ gomp_ordered_static_init ();
+ else
+ gomp_mutex_lock (&thr->ts.work_share->lock);
+ gomp_work_share_init_done ();
+ }
+ else
+ {
+ if (reductions)
+ {
+ uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
+ gomp_workshare_task_reduction_register (reductions,
+ first_reductions);
+ }
+ sched = thr->ts.work_share->sched;
+ if (sched != GFS_STATIC)
+ gomp_mutex_lock (&thr->ts.work_share->lock);
+ }
+
+ if (mem)
+ {
+ uintptr_t p
+ = (uintptr_t) (thr->ts.work_share->ordered_team_ids
+ + (thr->ts.team ? thr->ts.team->nthreads : 1));
+ p += __alignof__ (long long) - 1;
+ p &= ~(__alignof__ (long long) - 1);
+ *mem = (void *) p;
+ }
+
+ switch (sched)
+ {
+ case GFS_STATIC:
+ case GFS_AUTO:
+ return !gomp_iter_ull_static_next (istart, iend);
+ case GFS_DYNAMIC:
+ ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
+ break;
+ case GFS_GUIDED:
+ ret = gomp_iter_ull_guided_next_locked (istart, iend);
+ break;
+ default:
+ abort ();
+ }
+
+ if (ret)
+ gomp_ordered_first ();
+ gomp_mutex_unlock (&thr->ts.work_share->lock);
+ return ret;
+}
+
/* The *_doacross_*_start routines are similar. The only difference is that
this work-share construct is initialized to expect an ORDERED(N) - DOACROSS
section, and the worksharing loop iterates always from 0 to COUNTS[0] - 1
struct gomp_thread *thr = gomp_thread ();
thr->ts.static_trip = 0;
- if (gomp_work_share_start (false))
+ if (gomp_work_share_start (0))
{
gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
GFS_STATIC, chunk_size);
- gomp_doacross_ull_init (ncounts, counts, chunk_size);
+ gomp_doacross_ull_init (ncounts, counts, chunk_size, 0);
gomp_work_share_init_done ();
}
struct gomp_thread *thr = gomp_thread ();
bool ret;
- if (gomp_work_share_start (false))
+ if (gomp_work_share_start (0))
{
gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
GFS_DYNAMIC, chunk_size);
- gomp_doacross_ull_init (ncounts, counts, chunk_size);
+ gomp_doacross_ull_init (ncounts, counts, chunk_size, 0);
gomp_work_share_init_done ();
}
struct gomp_thread *thr = gomp_thread ();
bool ret;
- if (gomp_work_share_start (false))
+ if (gomp_work_share_start (0))
{
gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
GFS_GUIDED, chunk_size);
- gomp_doacross_ull_init (ncounts, counts, chunk_size);
+ gomp_doacross_ull_init (ncounts, counts, chunk_size, 0);
gomp_work_share_init_done ();
}
}
}
+bool
+GOMP_loop_ull_doacross_start (unsigned ncounts, gomp_ull *counts,
+ long sched, gomp_ull chunk_size,
+ gomp_ull *istart, gomp_ull *iend,
+ uintptr_t *reductions, void **mem)
+{
+ struct gomp_thread *thr = gomp_thread ();
+
+ thr->ts.static_trip = 0;
+ if (reductions)
+ gomp_workshare_taskgroup_start ();
+ if (gomp_work_share_start (0))
+ {
+ size_t extra = 0;
+ if (mem)
+ extra = (uintptr_t) *mem;
+ sched = gomp_adjust_sched (sched, &chunk_size);
+ gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
+ sched, chunk_size);
+ gomp_doacross_ull_init (ncounts, counts, chunk_size, extra);
+ if (reductions)
+ {
+ GOMP_taskgroup_reduction_register (reductions);
+ thr->task->taskgroup->workshare = true;
+ thr->ts.work_share->task_reductions = reductions;
+ }
+ gomp_work_share_init_done ();
+ }
+ else
+ {
+ if (reductions)
+ {
+ uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
+ gomp_workshare_task_reduction_register (reductions,
+ first_reductions);
+ }
+ sched = thr->ts.work_share->sched;
+ }
+
+ if (mem)
+ *mem = thr->ts.work_share->doacross->extra;
+
+ return ialias_call (GOMP_loop_ull_runtime_next) (istart, iend);
+}
+
/* The *_next routines are called when the thread completes processing of
the iteration block currently assigned to it. If the work-share
construct is bound directly to a parallel construct, then the iteration
#define MAX_COLLAPSED_BITS (__SIZEOF_LONG__ * __CHAR_BIT__)
void
-gomp_doacross_init (unsigned ncounts, long *counts, long chunk_size)
+gomp_doacross_init (unsigned ncounts, long *counts, long chunk_size,
+ size_t extra)
{
struct gomp_thread *thr = gomp_thread ();
struct gomp_team *team = thr->ts.team;
struct gomp_doacross_work_share *doacross;
if (team == NULL || team->nthreads == 1)
- return;
+ {
+ empty:
+ if (!extra)
+ ws->doacross = NULL;
+ else
+ {
+ doacross = gomp_malloc_cleared (sizeof (*doacross) + extra);
+ doacross->extra = (void *) (doacross + 1);
+ ws->doacross = doacross;
+ }
+ return;
+ }
for (i = 0; i < ncounts; i++)
{
/* If any count is 0, GOMP_doacross_{post,wait} can't be called. */
if (counts[i] == 0)
- return;
+ goto empty;
if (num_bits <= MAX_COLLAPSED_BITS)
{
elt_sz = (elt_sz + 63) & ~63UL;
doacross = gomp_malloc (sizeof (*doacross) + 63 + num_ents * elt_sz
- + shift_sz);
+ + shift_sz + extra);
doacross->chunk_size = chunk_size;
doacross->elt_sz = elt_sz;
doacross->ncounts = ncounts;
doacross->array = (unsigned char *)
((((uintptr_t) (doacross + 1)) + 63 + shift_sz)
& ~(uintptr_t) 63);
+ if (extra)
+ {
+ doacross->extra = doacross->array + num_ents * elt_sz;
+ memset (doacross->extra, '\0', extra);
+ }
+ else
+ doacross->extra = NULL;
if (num_bits <= MAX_COLLAPSED_BITS)
{
unsigned int shift_count = 0;
unsigned long ent;
unsigned int i;
- if (__builtin_expect (doacross == NULL, 0))
+ if (__builtin_expect (doacross == NULL, 0)
+ || __builtin_expect (doacross->array == NULL, 0))
{
__sync_synchronize ();
return;
unsigned long ent;
unsigned int i;
- if (__builtin_expect (doacross == NULL, 0))
+ if (__builtin_expect (doacross == NULL, 0)
+ || __builtin_expect (doacross->array == NULL, 0))
{
__sync_synchronize ();
return;
typedef unsigned long long gomp_ull;
void
-gomp_doacross_ull_init (unsigned ncounts, gomp_ull *counts, gomp_ull chunk_size)
+gomp_doacross_ull_init (unsigned ncounts, gomp_ull *counts,
+ gomp_ull chunk_size, size_t extra)
{
struct gomp_thread *thr = gomp_thread ();
struct gomp_team *team = thr->ts.team;
struct gomp_doacross_work_share *doacross;
if (team == NULL || team->nthreads == 1)
- return;
+ {
+ empty:
+ if (!extra)
+ ws->doacross = NULL;
+ else
+ {
+ doacross = gomp_malloc_cleared (sizeof (*doacross) + extra);
+ doacross->extra = (void *) (doacross + 1);
+ ws->doacross = doacross;
+ }
+ return;
+ }
for (i = 0; i < ncounts; i++)
{
/* If any count is 0, GOMP_doacross_{post,wait} can't be called. */
if (counts[i] == 0)
- return;
+ goto empty;
if (num_bits <= MAX_COLLAPSED_BITS)
{
doacross->array = (unsigned char *)
((((uintptr_t) (doacross + 1)) + 63 + shift_sz)
& ~(uintptr_t) 63);
+ if (extra)
+ {
+ doacross->extra = doacross->array + num_ents * elt_sz;
+ memset (doacross->extra, '\0', extra);
+ }
+ else
+ doacross->extra = NULL;
if (num_bits <= MAX_COLLAPSED_BITS)
{
unsigned int shift_count = 0;
unsigned long ent;
unsigned int i;
- if (__builtin_expect (doacross == NULL, 0))
+ if (__builtin_expect (doacross == NULL, 0)
+ || __builtin_expect (doacross->array == NULL, 0))
{
__sync_synchronize ();
return;
unsigned long ent;
unsigned int i;
- if (__builtin_expect (doacross == NULL, 0))
+ if (__builtin_expect (doacross == NULL, 0)
+ || __builtin_expect (doacross->array == NULL, 0))
{
__sync_synchronize ();
return;
}
else if (which & GOMP_CANCEL_TASKGROUP)
{
- if (thr->task->taskgroup && thr->task->taskgroup->cancelled)
- return true;
+ if (thr->task->taskgroup)
+ {
+ if (thr->task->taskgroup->cancelled)
+ return true;
+ if (thr->task->taskgroup->workshare
+ && thr->task->taskgroup->prev
+ && thr->task->taskgroup->prev->cancelled)
+ return true;
+ }
/* FALLTHRU into the GOMP_CANCEL_PARALLEL case,
as #pragma omp cancel parallel also cancels all explicit
tasks. */
}
else if (which & GOMP_CANCEL_TASKGROUP)
{
- if (thr->task->taskgroup && !thr->task->taskgroup->cancelled)
+ if (thr->task->taskgroup)
{
- gomp_mutex_lock (&team->task_lock);
- thr->task->taskgroup->cancelled = true;
- gomp_mutex_unlock (&team->task_lock);
+ struct gomp_taskgroup *taskgroup = thr->task->taskgroup;
+ if (taskgroup->workshare && taskgroup->prev)
+ taskgroup = taskgroup->prev;
+ if (!taskgroup->cancelled)
+ {
+ gomp_mutex_lock (&team->task_lock);
+ taskgroup->cancelled = true;
+ gomp_mutex_unlock (&team->task_lock);
+ }
}
return true;
}
/* This file handles the SECTIONS construct. */
#include "libgomp.h"
+#include <string.h>
+ialias_redirect (GOMP_taskgroup_reduction_register)
+
/* Initialize the given work share construct from the given arguments. */
static inline void
struct gomp_thread *thr = gomp_thread ();
long s, e, ret;
- if (gomp_work_share_start (false))
+ if (gomp_work_share_start (0))
{
gomp_sections_init (thr->ts.work_share, count);
gomp_work_share_init_done ();
return ret;
}
+unsigned
+GOMP_sections2_start (unsigned count, uintptr_t *reductions, void **mem)
+{
+ struct gomp_thread *thr = gomp_thread ();
+ long s, e, ret;
+
+ if (reductions)
+ gomp_workshare_taskgroup_start ();
+ if (gomp_work_share_start (0))
+ {
+ gomp_sections_init (thr->ts.work_share, count);
+ if (reductions)
+ {
+ GOMP_taskgroup_reduction_register (reductions);
+ thr->task->taskgroup->workshare = true;
+ thr->ts.work_share->task_reductions = reductions;
+ }
+ if (mem)
+ {
+ uintptr_t size = (uintptr_t) *mem;
+ if (size > (sizeof (struct gomp_work_share)
+ - offsetof (struct gomp_work_share,
+ inline_ordered_team_ids)))
+ thr->ts.work_share->ordered_team_ids
+ = gomp_malloc_cleared (size);
+ else
+ memset (thr->ts.work_share->ordered_team_ids, '\0', size);
+ *mem = (void *) thr->ts.work_share->ordered_team_ids;
+ }
+ gomp_work_share_init_done ();
+ }
+ else
+ {
+ if (reductions)
+ {
+ uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
+ gomp_workshare_task_reduction_register (reductions,
+ first_reductions);
+ }
+ if (mem)
+ *mem = (void *) thr->ts.work_share->ordered_team_ids;
+ }
+
+#ifdef HAVE_SYNC_BUILTINS
+ if (gomp_iter_dynamic_next (&s, &e))
+ ret = s;
+ else
+ ret = 0;
+#else
+ gomp_mutex_lock (&thr->ts.work_share->lock);
+ if (gomp_iter_dynamic_next_locked (&s, &e))
+ ret = s;
+ else
+ ret = 0;
+ gomp_mutex_unlock (&thr->ts.work_share->lock);
+#endif
+
+ return ret;
+}
+
/* This routine is called when the thread completes processing of the
section currently assigned to it. If the work-share construct is
bound directly to a parallel construct, then the construct may have
return __sync_bool_compare_and_swap (&team->single_count, single_count,
single_count + 1L);
#else
- bool ret = gomp_work_share_start (false);
+ bool ret = gomp_work_share_start (0);
if (ret)
gomp_work_share_init_done ();
gomp_work_share_end_nowait ();
bool first;
void *ret;
- first = gomp_work_share_start (false);
+ first = gomp_work_share_start (0);
if (first)
{
struct gomp_team *team = thr->ts.team;
/* If parallel or taskgroup has been cancelled, don't start new
tasks. */
- if (team
- && (gomp_team_barrier_cancelled (&team->barrier)
- || (thr->task->taskgroup
- && thr->task->taskgroup->cancelled)))
- return;
+ if (__builtin_expect (gomp_cancel_var, 0) && team)
+ {
+ if (gomp_team_barrier_cancelled (&team->barrier))
+ return;
+ if (thr->task->taskgroup)
+ {
+ if (thr->task->taskgroup->cancelled)
+ return;
+ if (thr->task->taskgroup->workshare
+ && thr->task->taskgroup->prev
+ && thr->task->taskgroup->prev->cancelled)
+ return;
+ }
+ }
gomp_task_maybe_wait_for_dependencies (depend);
}
struct gomp_thread *thr = gomp_thread ();
struct gomp_team *team = thr->ts.team;
/* If parallel or taskgroup has been cancelled, don't start new tasks. */
- if (team
- && (gomp_team_barrier_cancelled (&team->barrier)
- || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
- return;
+ if (__builtin_expect (gomp_cancel_var, 0) && team)
+ {
+ if (gomp_team_barrier_cancelled (&team->barrier))
+ return;
+ if (thr->task->taskgroup)
+ {
+ if (thr->task->taskgroup->cancelled)
+ return;
+ if (thr->task->taskgroup->workshare
+ && thr->task->taskgroup->prev
+ && thr->task->taskgroup->prev->cancelled)
+ return;
+ }
+ }
gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
}
struct gomp_team *team = thr->ts.team;
/* If parallel or taskgroup has been cancelled, don't start new
tasks. */
- if (team
- && (gomp_team_barrier_cancelled (&team->barrier)
- || (thr->task->taskgroup
- && thr->task->taskgroup->cancelled)))
- return;
+ if (__builtin_expect (gomp_cancel_var, 0) && team)
+ {
+ if (gomp_team_barrier_cancelled (&team->barrier))
+ return;
+ if (thr->task->taskgroup)
+ {
+ if (thr->task->taskgroup->cancelled)
+ return;
+ if (thr->task->taskgroup->workshare
+ && thr->task->taskgroup->prev
+ && thr->task->taskgroup->prev->cancelled)
+ return;
+ }
+ }
gomp_task_maybe_wait_for_dependencies (depend);
}
struct gomp_thread *thr = gomp_thread ();
struct gomp_team *team = thr->ts.team;
/* If parallel or taskgroup has been cancelled, don't start new tasks. */
- if (team
- && (gomp_team_barrier_cancelled (&team->barrier)
- || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
- return;
+ if (__builtin_expect (gomp_cancel_var, 0) && team)
+ {
+ if (gomp_team_barrier_cancelled (&team->barrier))
+ return;
+ if (thr->task->taskgroup)
+ {
+ if (thr->task->taskgroup->cancelled)
+ return;
+ if (thr->task->taskgroup->workshare
+ && thr->task->taskgroup->prev
+ && thr->task->taskgroup->prev->cancelled)
+ return;
+ }
+ }
size_t i;
if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
#endif
/* If parallel or taskgroup has been cancelled, don't start new tasks. */
- if (team
- && (gomp_team_barrier_cancelled (&team->barrier)
- || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
- return;
+ if (__builtin_expect (gomp_cancel_var, 0) && team)
+ {
+ if (gomp_team_barrier_cancelled (&team->barrier))
+ return;
+ if (thr->task->taskgroup)
+ {
+ if (thr->task->taskgroup->cancelled)
+ return;
+ if (thr->task->taskgroup->workshare
+ && thr->task->taskgroup->prev
+ && thr->task->taskgroup->prev->cancelled)
+ return;
+ }
+ }
if ((flags & GOMP_TASK_FLAG_PRIORITY) == 0)
priority = 0;
gomp_mutex_lock (&team->task_lock);
/* If parallel or taskgroup has been cancelled, don't start new
tasks. */
- if (__builtin_expect ((gomp_team_barrier_cancelled (&team->barrier)
- || (taskgroup && taskgroup->cancelled))
- && !task->copy_ctors_done, 0))
+ if (__builtin_expect (gomp_cancel_var, 0)
+ && !task->copy_ctors_done)
{
- gomp_mutex_unlock (&team->task_lock);
- gomp_finish_task (task);
- free (task);
- return;
+ if (gomp_team_barrier_cancelled (&team->barrier))
+ {
+ do_cancel:
+ gomp_mutex_unlock (&team->task_lock);
+ gomp_finish_task (task);
+ free (task);
+ return;
+ }
+ if (taskgroup)
+ {
+ if (taskgroup->cancelled)
+ goto do_cancel;
+ if (taskgroup->workshare
+ && taskgroup->prev
+ && taskgroup->prev->cancelled)
+ goto do_cancel;
+ }
}
if (taskgroup)
taskgroup->num_children++;
struct gomp_team *team = thr->ts.team;
/* If parallel or taskgroup has been cancelled, don't start new tasks. */
- if (team
- && (gomp_team_barrier_cancelled (&team->barrier)
- || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
- return true;
+ if (__builtin_expect (gomp_cancel_var, 0) && team)
+ {
+ if (gomp_team_barrier_cancelled (&team->barrier))
+ return true;
+ if (thr->task->taskgroup)
+ {
+ if (thr->task->taskgroup->cancelled)
+ return true;
+ if (thr->task->taskgroup->workshare
+ && thr->task->taskgroup->prev
+ && thr->task->taskgroup->prev->cancelled)
+ return true;
+ }
+ }
struct gomp_target_task *ttask;
struct gomp_task *task;
task->final_task = 0;
gomp_mutex_lock (&team->task_lock);
/* If parallel or taskgroup has been cancelled, don't start new tasks. */
- if (__builtin_expect (gomp_team_barrier_cancelled (&team->barrier)
- || (taskgroup && taskgroup->cancelled), 0))
+ if (__builtin_expect (gomp_cancel_var, 0))
{
- gomp_mutex_unlock (&team->task_lock);
- gomp_finish_task (task);
- free (task);
- return true;
+ if (gomp_team_barrier_cancelled (&team->barrier))
+ {
+ do_cancel:
+ gomp_mutex_unlock (&team->task_lock);
+ gomp_finish_task (task);
+ free (task);
+ return true;
+ }
+ if (taskgroup)
+ {
+ if (taskgroup->cancelled)
+ goto do_cancel;
+ if (taskgroup->workshare
+ && taskgroup->prev
+ && taskgroup->prev->cancelled)
+ goto do_cancel;
+ }
}
if (depend_size)
{
if (--team->task_queued_count == 0)
gomp_team_barrier_clear_task_pending (&team->barrier);
- if ((gomp_team_barrier_cancelled (&team->barrier)
- || (taskgroup && taskgroup->cancelled))
+ if (__builtin_expect (gomp_cancel_var, 0)
&& !child_task->copy_ctors_done)
- return true;
+ {
+ if (gomp_team_barrier_cancelled (&team->barrier))
+ return true;
+ if (taskgroup)
+ {
+ if (taskgroup->cancelled)
+ return true;
+ if (taskgroup->workshare
+ && taskgroup->prev
+ && taskgroup->prev->cancelled)
+ return true;
+ }
+ }
return false;
}
struct gomp_team *team = thr->ts.team;
/* If parallel or taskgroup has been cancelled, return early. */
- if (team
- && (gomp_team_barrier_cancelled (&team->barrier)
- || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
- return;
+ if (__builtin_expect (gomp_cancel_var, 0) && team)
+ {
+ if (gomp_team_barrier_cancelled (&team->barrier))
+ return;
+ if (thr->task->taskgroup)
+ {
+ if (thr->task->taskgroup->cancelled)
+ return;
+ if (thr->task->taskgroup->workshare
+ && thr->task->taskgroup->prev
+ && thr->task->taskgroup->prev->cancelled)
+ return;
+ }
+ }
if (thr->task && thr->task->depend_hash)
gomp_task_maybe_wait_for_dependencies (depend);
= gomp_malloc (sizeof (struct gomp_taskgroup));
taskgroup->prev = prev;
priority_queue_init (&taskgroup->taskgroup_queue);
- taskgroup->in_taskgroup_wait = false;
taskgroup->reductions = prev ? prev->reductions : NULL;
+ taskgroup->in_taskgroup_wait = false;
taskgroup->cancelled = false;
+ taskgroup->workshare = false;
taskgroup->num_children = 0;
gomp_sem_init (&taskgroup->taskgroup_sem, 0);
return taskgroup;
free (taskgroup);
}
-static inline void
-gomp_reduction_register (uintptr_t *data, uintptr_t *old, unsigned nthreads)
+static inline __attribute__((always_inline)) void
+gomp_reduction_register (uintptr_t *data, uintptr_t *old, uintptr_t *orig,
+ unsigned nthreads)
{
size_t total_cnt = 0;
uintptr_t *d = data;
struct htab *old_htab = NULL, *new_htab;
do
{
- size_t sz = d[1] * nthreads;
- /* Should use omp_alloc if d[3] is not -1. */
- void *ptr = gomp_aligned_alloc (d[2], sz);
- memset (ptr, '\0', sz);
- d[2] = (uintptr_t) ptr;
+ if (__builtin_expect (orig != NULL, 0))
+ {
+ /* For worksharing task reductions, memory has been allocated
+ already by some other thread that encountered the construct
+ earlier. */
+ d[2] = orig[2];
+ d[6] = orig[6];
+ orig = (uintptr_t *) orig[4];
+ }
+ else
+ {
+ size_t sz = d[1] * nthreads;
+ /* Should use omp_alloc if d[3] is not -1. */
+ void *ptr = gomp_aligned_alloc (d[2], sz);
+ memset (ptr, '\0', sz);
+ d[2] = (uintptr_t) ptr;
+ d[6] = d[2] + sz;
+ }
d[5] = 0;
- d[6] = d[2] + sz;
total_cnt += d[0];
if (d[4] == 0)
{
d[5] = (uintptr_t) new_htab;
}
+static void
+gomp_create_artificial_team (void)
+{
+ struct gomp_thread *thr = gomp_thread ();
+ struct gomp_task_icv *icv;
+ struct gomp_team *team = gomp_new_team (1);
+ struct gomp_task *task = thr->task;
+ icv = task ? &task->icv : &gomp_global_icv;
+ team->prev_ts = thr->ts;
+ thr->ts.team = team;
+ thr->ts.team_id = 0;
+ thr->ts.work_share = &team->work_shares[0];
+ thr->ts.last_work_share = NULL;
+#ifdef HAVE_SYNC_BUILTINS
+ thr->ts.single_count = 0;
+#endif
+ thr->ts.static_trip = 0;
+ thr->task = &team->implicit_task[0];
+ gomp_init_task (thr->task, NULL, icv);
+ if (task)
+ {
+ thr->task = task;
+ gomp_end_task ();
+ free (task);
+ thr->task = &team->implicit_task[0];
+ }
+#ifdef LIBGOMP_USE_PTHREADS
+ else
+ pthread_setspecific (gomp_thread_destructor, thr);
+#endif
+}
+
/* The format of data is:
data[0] cnt
data[1] size
cnt times
ent[0] address
ent[1] offset
- ent[2] used internally (pointer to data[0]). */
+ ent[2] used internally (pointer to data[0])
+ The entries are sorted by increasing offset, so that a binary
+ search can be performed. Normally, data[8] is 0, exception is
+ for worksharing construct task reductions in cancellable parallel,
+ where at offset 0 there should be space for a pointer and an integer
+ which are used internally. */
void
GOMP_taskgroup_reduction_register (uintptr_t *data)
struct gomp_thread *thr = gomp_thread ();
struct gomp_team *team = thr->ts.team;
struct gomp_task *task;
+ unsigned nthreads;
if (__builtin_expect (team == NULL, 0))
{
/* The task reduction code needs a team and task, so for
orphaned taskgroups just create the implicit team. */
- struct gomp_task_icv *icv;
- team = gomp_new_team (1);
- task = thr->task;
- icv = task ? &task->icv : &gomp_global_icv;
- team->prev_ts = thr->ts;
- thr->ts.team = team;
- thr->ts.team_id = 0;
- thr->ts.work_share = &team->work_shares[0];
- thr->ts.last_work_share = NULL;
-#ifdef HAVE_SYNC_BUILTINS
- thr->ts.single_count = 0;
-#endif
- thr->ts.static_trip = 0;
- thr->task = &team->implicit_task[0];
- gomp_init_task (thr->task, NULL, icv);
- if (task)
- {
- thr->task = task;
- gomp_end_task ();
- free (task);
- thr->task = &team->implicit_task[0];
- }
-#ifdef LIBGOMP_USE_PTHREADS
- else
- pthread_setspecific (gomp_thread_destructor, thr);
-#endif
- GOMP_taskgroup_start ();
+ gomp_create_artificial_team ();
+ ialias_call (GOMP_taskgroup_start) ();
+ team = thr->ts.team;
}
- unsigned nthreads = team->nthreads;
+ nthreads = team->nthreads;
task = thr->task;
- gomp_reduction_register (data, task->taskgroup->reductions, nthreads);
+ gomp_reduction_register (data, task->taskgroup->reductions, NULL, nthreads);
task->taskgroup->reductions = data;
}
gomp_parallel_reduction_register (uintptr_t *data, unsigned nthreads)
{
struct gomp_taskgroup *taskgroup = gomp_taskgroup_init (NULL);
- gomp_reduction_register (data, NULL, nthreads);
+ gomp_reduction_register (data, NULL, NULL, nthreads);
taskgroup->reductions = data;
return taskgroup;
}
+void
+gomp_workshare_task_reduction_register (uintptr_t *data, uintptr_t *orig)
+{
+ struct gomp_thread *thr = gomp_thread ();
+ struct gomp_team *team = thr->ts.team;
+ struct gomp_task *task = thr->task;
+ unsigned nthreads = team->nthreads;
+ gomp_reduction_register (data, task->taskgroup->reductions, orig, nthreads);
+ task->taskgroup->reductions = data;
+}
+
+void
+gomp_workshare_taskgroup_start (void)
+{
+ struct gomp_thread *thr = gomp_thread ();
+ struct gomp_team *team = thr->ts.team;
+ struct gomp_task *task;
+
+ if (team == NULL)
+ {
+ gomp_create_artificial_team ();
+ team = thr->ts.team;
+ }
+ task = thr->task;
+ task->taskgroup = gomp_taskgroup_init (task->taskgroup);
+ task->taskgroup->workshare = true;
+}
+
+void
+GOMP_workshare_task_reduction_unregister (bool cancelled)
+{
+ struct gomp_thread *thr = gomp_thread ();
+ struct gomp_task *task = thr->task;
+ struct gomp_team *team = thr->ts.team;
+ uintptr_t *data = task->taskgroup->reductions;
+ ialias_call (GOMP_taskgroup_end) ();
+ if (thr->ts.team_id == 0)
+ ialias_call (GOMP_taskgroup_reduction_unregister) (data);
+ else
+ htab_free ((struct htab *) data[5]);
+
+ if (!cancelled)
+ gomp_team_barrier_wait (&team->barrier);
+}
+
int
omp_in_final (void)
{
if (flags & GOMP_TASK_FLAG_NOGROUP)
{
- if (thr->task && thr->task->taskgroup && thr->task->taskgroup->cancelled)
- return;
+ if (__builtin_expect (gomp_cancel_var, 0)
+ && thr->task
+ && thr->task->taskgroup)
+ {
+ if (thr->task->taskgroup->cancelled)
+ return;
+ if (thr->task->taskgroup->workshare
+ && thr->task->taskgroup->prev
+ && thr->task->taskgroup->prev->cancelled)
+ return;
+ }
}
else
{
gomp_mutex_lock (&team->task_lock);
/* If parallel or taskgroup has been cancelled, don't start new
tasks. */
- if (__builtin_expect ((gomp_team_barrier_cancelled (&team->barrier)
- || (taskgroup && taskgroup->cancelled))
- && cpyfn == NULL, 0))
+ if (__builtin_expect (gomp_cancel_var, 0)
+ && cpyfn == NULL)
{
- gomp_mutex_unlock (&team->task_lock);
- for (i = 0; i < num_tasks; i++)
+ if (gomp_team_barrier_cancelled (&team->barrier))
+ {
+ do_cancel:
+ gomp_mutex_unlock (&team->task_lock);
+ for (i = 0; i < num_tasks; i++)
+ {
+ gomp_finish_task (tasks[i]);
+ free (tasks[i]);
+ }
+ if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0)
+ ialias_call (GOMP_taskgroup_end) ();
+ return;
+ }
+ if (taskgroup)
{
- gomp_finish_task (tasks[i]);
- free (tasks[i]);
+ if (taskgroup->cancelled)
+ goto do_cancel;
+ if (taskgroup->workshare
+ && taskgroup->prev
+ && taskgroup->prev->cancelled)
+ goto do_cancel;
}
- if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0)
- ialias_call (GOMP_taskgroup_end) ();
- return;
}
if (taskgroup)
taskgroup->num_children += num_tasks;
team->single_count = 0;
#endif
team->work_shares_to_free = &team->work_shares[0];
- gomp_init_work_share (&team->work_shares[0], false, nthreads);
+ gomp_init_work_share (&team->work_shares[0], 0, nthreads);
team->work_shares[0].next_alloc = NULL;
team->work_share_list_free = NULL;
team->work_share_list_alloc = &team->work_shares[1];
--- /dev/null
+#include <omp.h>
+#include <stdlib.h>
+
+struct A { A (); ~A (); A (const A &); static int cnt1, cnt2, cnt3; int a; };
+int A::cnt1;
+int A::cnt2;
+int A::cnt3;
+A::A () : a (0)
+{
+ #pragma omp atomic
+ cnt1++;
+}
+A::A (const A &x) : a (x.a)
+{
+ #pragma omp atomic
+ cnt2++;
+}
+A::~A ()
+{
+ #pragma omp atomic
+ cnt3++;
+}
+#pragma omp declare reduction (+: A: omp_out.a += omp_in.a)
+
+void
+foo (int x)
+{
+ A a, b[2];
+ int d = 1;
+ long int e[2] = { 1L, 1L };
+ int c = 0;
+ #pragma omp parallel
+ {
+ if (x && omp_get_thread_num () == 0)
+ {
+ for (int i = 0; i < 10000000; ++i)
+ asm volatile ("");
+ c = 1;
+ #pragma omp cancel parallel
+ }
+ #pragma omp for reduction (task, +: a, b) reduction (task, *: d, e)
+ for (int i = 0; i < 64; i++)
+ #pragma omp task in_reduction (+: a, b) in_reduction (*: d, e)
+ {
+ a.a++;
+ b[0].a += 2;
+ b[1].a += 3;
+ d *= ((i & 7) == 0) + 1;
+ e[0] *= ((i & 7) == 3) + 1;
+ e[1] *= ((i & 3) == 2) + 1;
+ }
+ if (x && omp_get_cancellation ())
+ abort ();
+ }
+ if (!c)
+ {
+ if (a.a != 64 || b[0].a != 128 || b[1].a != 192)
+ abort ();
+ if (d != 256 || e[0] != 256L || e[1] != 65536L)
+ abort ();
+ }
+}
+
+int
+main ()
+{
+ int c1 = A::cnt1, c2 = A::cnt2, c3 = A::cnt3;
+ volatile int zero = 0;
+ foo (zero);
+ if (A::cnt1 + A::cnt2 - c1 - c2 != A::cnt3 - c3)
+ abort ();
+}
--- /dev/null
+extern "C" void abort ();
+
+int as;
+int &a = as;
+long int bs = 1;
+long int &b = bs;
+
+template <typename T, typename U>
+void
+foo (T &c, U &d)
+{
+ T i;
+ for (i = 0; i < 2; i++)
+ #pragma omp task in_reduction (*: d) in_reduction (+: c) \
+ in_reduction (+: a) in_reduction (*: b)
+ {
+ a += 7;
+ b *= 2;
+ c += 9;
+ d *= 3;
+ }
+}
+
+template <typename T, typename U>
+void
+bar ()
+{
+ T cs = 0;
+ T &c = cs;
+ U ds = 1;
+ #pragma omp parallel if (0)
+ {
+ U &d = ds;
+ #pragma omp parallel
+ {
+ T i;
+ #pragma omp for reduction (task, +: a, c) reduction (task, *: b, d)
+ for (i = 0; i < 4; i++)
+ #pragma omp task in_reduction (+: a, c) in_reduction (*: b, d)
+ {
+ T j;
+ a += 7;
+ b *= 2;
+ for (j = 0; j < 2; j++)
+ #pragma omp task in_reduction (+: a, c) in_reduction (*: b, d)
+ {
+ a += 7;
+ b *= 2;
+ c += 9;
+ d *= 3;
+ foo (c, d);
+ }
+ c += 9;
+ d *= 3;
+ }
+#define THREEP4 (3LL * 3LL * 3LL * 3LL)
+ if (d != (THREEP4 * THREEP4 * THREEP4 * THREEP4 * THREEP4 * THREEP4
+ * THREEP4))
+ abort ();
+ if (a != 28 * 7 || b != (1L << 28) || c != 28 * 9)
+ abort ();
+ }
+ }
+ if (a != 28 * 7 || b != (1L << 28) || c != 28 * 9)
+ abort ();
+ if (ds != (THREEP4 * THREEP4 * THREEP4 * THREEP4 * THREEP4 * THREEP4
+ * THREEP4))
+ abort ();
+}
+
+int
+main ()
+{
+ bar<int, long long int> ();
+}
--- /dev/null
+extern "C" void abort ();
+
+struct S { S (); S (long long int, int); ~S (); static int cnt1, cnt2, cnt3; long long int s; int t; };
+
+int S::cnt1;
+int S::cnt2;
+int S::cnt3;
+
+S::S ()
+{
+ #pragma omp atomic
+ cnt1++;
+}
+
+S::S (long long int x, int y) : s (x), t (y)
+{
+ #pragma omp atomic update
+ ++cnt2;
+}
+
+S::~S ()
+{
+ #pragma omp atomic
+ cnt3 = cnt3 + 1;
+ if (t < 3 || t > 9 || (t & 1) == 0)
+ abort ();
+}
+
+void
+bar (S *p, S *o)
+{
+ p->s = 1;
+ if (o->t != 5)
+ abort ();
+ p->t = 9;
+}
+
+static inline void
+baz (S *o, S *i)
+{
+ if (o->t != 5 || i->t != 9)
+ abort ();
+ o->s *= i->s;
+}
+
+#pragma omp declare reduction (+: S : omp_out.s += omp_in.s) initializer (omp_priv (0, 3))
+#pragma omp declare reduction (*: S : baz (&omp_out, &omp_in)) initializer (bar (&omp_priv, &omp_orig))
+
+S as = { 0LL, 7 };
+S &a = as;
+S bs (1LL, 5);
+S &b = bs;
+
+void
+foo (S &c, S &d)
+{
+ int i;
+ for (i = 0; i < 2; i++)
+ #pragma omp task in_reduction (+: c) in_reduction (*: b, d) in_reduction (+: a)
+ {
+ a.s += 7;
+ b.s *= 2;
+ c.s += 9;
+ d.s *= 3;
+ if ((a.t != 7 && a.t != 3) || (b.t != 5 && b.t != 9)
+ || (c.t != 7 && c.t != 3) || (d.t != 5 && d.t != 9))
+ abort ();
+ }
+}
+
+void
+test ()
+{
+ S cs = { 0LL, 7 };
+ S &c = cs;
+ S ds (1LL, 5);
+ #pragma omp parallel if (0)
+ {
+ S &d = ds;
+ #pragma omp parallel shared (a, b, c, d)
+ {
+ #pragma omp for schedule (static, 1) reduction (task, +: a, c) reduction (task, *: b, d)
+ for (int i = 0; i < 4; i++)
+ #pragma omp task in_reduction (*: b, d) in_reduction (+: a, c)
+ {
+ int j;
+ a.s += 7;
+ b.s *= 2;
+ for (j = 0; j < 2; j++)
+ #pragma omp task in_reduction (+: a) in_reduction (*: b) \
+ in_reduction (+: c) in_reduction (*: d)
+ {
+ a.s += 7;
+ b.s *= 2;
+ c.s += 9;
+ d.s *= 3;
+ foo (c, d);
+ if ((a.t != 7 && a.t != 3) || (b.t != 5 && b.t != 9)
+ || (c.t != 7 && c.t != 3) || (d.t != 5 && d.t != 9))
+ abort ();
+ }
+ c.s += 9;
+ d.s *= 3;
+ if ((a.t != 7 && a.t != 3) || (b.t != 5 && b.t != 9)
+ || (c.t != 7 && c.t != 3) || (d.t != 5 && d.t != 9))
+ abort ();
+ }
+#define THREEP7 (3LL * 3LL * 3LL * 3LL * 3LL * 3LL * 3LL)
+ if (d.s != (THREEP7 * THREEP7 * THREEP7 * THREEP7) || d.t != 5)
+ abort ();
+ if (a.s != 28 * 7 || a.t != 7 || b.s != (1L << 28) || b.t != 5
+ || c.s != 28 * 9 || c.t != 7)
+ abort ();
+ }
+ }
+ if (a.s != 28 * 7 || a.t != 7 || b.s != (1L << 28) || b.t != 5
+ || c.s != 28 * 9 || c.t != 7)
+ abort ();
+ if (ds.s != (THREEP7 * THREEP7 * THREEP7 * THREEP7) || ds.t != 5)
+ abort ();
+}
+
+int
+main ()
+{
+ int c1 = S::cnt1, c2 = S::cnt2, c3 = S::cnt3;
+ test ();
+ if (S::cnt1 + S::cnt2 - c1 - c2 != S::cnt3 - c3)
+ abort ();
+}
--- /dev/null
+extern "C" void abort ();
+
+int as[2];
+int (&a)[2] = as;
+long long int bs[7] = { 9, 11, 1, 1, 1, 13, 15 };
+long long int (&b)[7] = bs;
+int es[3] = { 5, 0, 5 };
+int (&e)[3] = es;
+int fs[5] = { 6, 7, 0, 0, 9 };
+int (&f)[5] = fs;
+int gs[4] = { 1, 0, 0, 2 };
+int (&g)[4] = gs;
+int hs[3] = { 0, 1, 4 };
+int (&h)[3] = hs;
+int ks[4][2] = { { 5, 6 }, { 0, 0 }, { 0, 0 }, { 7, 8 } };
+int (&k)[4][2] = ks;
+long long *ss;
+long long *&s = ss;
+long long (*ts)[2];
+long long (*&t)[2] = ts;
+
+template <typename T>
+void
+foo (T &n, T *&c, long long int *&d, T (&m)[3], T *&r, T (&o)[4], T *&p, T (&q)[4][2])
+{
+ T i;
+ for (i = 0; i < 2; i++)
+ #pragma omp task in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \
+ in_reduction (+: o[n:n*2], m[1], k[1:2][:], p[0], f[2:2]) \
+ in_reduction (+: q[1:2][:], g[n:n*2], e[1], h[0], r[2:2]) \
+ in_reduction (*: s[1:2], t[2:2][:])
+ {
+ a[0] += 7;
+ a[1] += 17;
+ b[2] *= 2;
+ b[4] *= 2;
+ c[0] += 6;
+ d[1] *= 2;
+ e[1] += 19;
+ f[2] += 21;
+ f[3] += 23;
+ g[1] += 25;
+ g[2] += 27;
+ h[0] += 29;
+ k[1][0] += 31;
+ k[2][1] += 33;
+ m[1] += 19;
+ r[2] += 21;
+ r[3] += 23;
+ o[1] += 25;
+ o[2] += 27;
+ p[0] += 29;
+ q[1][0] += 31;
+ q[2][1] += 33;
+ s[1] *= 2;
+ t[2][0] *= 2;
+ t[3][1] *= 2;
+ }
+}
+
+template <typename T, typename I>
+void
+test (T &n, I x, I y)
+{
+ T cs[2] = { 0, 0 };
+ T (&c)[2] = cs;
+ T ps[3] = { 0, 1, 4 };
+ T (&p)[3] = ps;
+ T qs[4][2] = { { 5, 6 }, { 0, 0 }, { 0, 0 }, { 7, 8 } };
+ T (&q)[4][2] = qs;
+ long long sb[4] = { 5, 1, 1, 6 };
+ long long tb[5][2] = { { 9, 10 }, { 11, 12 }, { 1, 1 }, { 1, 1 }, { 13, 14 } };
+ T ms[3] = { 5, 0, 5 };
+ T os[4] = { 1, 0, 0, 2 };
+ s = sb;
+ t = tb;
+ #pragma omp parallel if (0)
+ {
+ long long int ds[] = { 1, 1 };
+ long long int (&d)[2] = ds;
+ T (&m)[3] = ms;
+ T rs[5] = { 6, 7, 0, 0, 9 };
+ T (&r)[5] = rs;
+ T (&o)[4] = os;
+ #pragma omp parallel
+ {
+ #pragma omp for reduction (task,+: a, c) reduction (task,*: b[2 * n:3 * n], d) \
+ reduction (task,+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \
+ reduction (task,+: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \
+ reduction (task,*: t[2:2][:], s[1:n + 1]) schedule (dynamic)
+ for (I i = x; i != y; i++)
+ #pragma omp task in_reduction (+: a, c) in_reduction (*: b[2 * n:3 * n], d) \
+ in_reduction (+: o[n:n*2], q[1:2][:], p[0], m[1], r[2:2]) \
+ in_reduction (+: g[n:n * 2], e[1], k[1:2][:], h[0], f[2:2]) \
+ in_reduction (*: s[1:2], t[2:2][:])
+ {
+ T j;
+ a[0] += 2;
+ a[1] += 3;
+ b[2] *= 2;
+ f[3] += 8;
+ g[1] += 9;
+ g[2] += 10;
+ h[0] += 11;
+ k[1][1] += 13;
+ k[2][1] += 15;
+ m[1] += 16;
+ r[2] += 8;
+ s[1] *= 2;
+ t[2][1] *= 2;
+ t[3][1] *= 2;
+ for (j = 0; j < 2; j++)
+ #pragma omp task in_reduction (+: a, c[:2]) \
+ in_reduction (*: b[2 * n:3 * n], d[n - 1:n + 1]) \
+ in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][:2]) \
+ in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][:2]) \
+ in_reduction (*: s[n:2], t[2:2][:])
+ {
+ m[1] += 6;
+ r[2] += 7;
+ q[1][0] += 17;
+ q[2][0] += 19;
+ a[0] += 4;
+ a[1] += 5;
+ b[3] *= 2;
+ b[4] *= 2;
+ f[3] += 18;
+ g[1] += 29;
+ g[2] += 18;
+ h[0] += 19;
+ s[2] *= 2;
+ t[2][0] *= 2;
+ t[3][0] *= 2;
+ T *cp = c;
+ long long int *dp = d;
+ T *rp = r;
+ T *pp = p;
+ foo (n, cp, dp, m, rp, o, pp, q);
+ r[3] += 18;
+ o[1] += 29;
+ o[2] += 18;
+ p[0] += 19;
+ c[0] += 4;
+ c[1] += 5;
+ d[0] *= 2;
+ e[1] += 6;
+ f[2] += 7;
+ k[1][0] += 17;
+ k[2][0] += 19;
+ }
+ r[3] += 8;
+ o[1] += 9;
+ o[2] += 10;
+ p[0] += 11;
+ q[1][1] += 13;
+ q[2][1] += 15;
+ b[3] *= 2;
+ c[0] += 4;
+ c[1] += 9;
+ d[0] *= 2;
+ e[1] += 16;
+ f[2] += 8;
+ }
+ if (d[0] != 1LL << (8 + 4)
+ || d[1] != 1LL << 16
+ || m[0] != 5
+ || m[1] != 19 * 16 + 6 * 8 + 16 * 4
+ || m[2] != 5
+ || r[0] != 6
+ || r[1] != 7
+ || r[2] != 21 * 16 + 7 * 8 + 8 * 4
+ || r[3] != 23 * 16 + 18 * 8 + 8 * 4
+ || r[4] != 9
+ || o[0] != 1
+ || o[1] != 25 * 16 + 29 * 8 + 9 * 4
+ || o[2] != 27 * 16 + 18 * 8 + 10 * 4
+ || o[3] != 2)
+ abort ();
+ if (a[0] != 7 * 16 + 4 * 8 + 2 * 4
+ || a[1] != 17 * 16 + 5 * 8 + 3 * 4
+ || b[0] != 9 || b[1] != 11
+ || b[2] != 1LL << (16 + 4)
+ || b[3] != 1LL << (8 + 4)
+ || b[4] != 1LL << (16 + 8)
+ || b[5] != 13 || b[6] != 15
+ || c[0] != 6 * 16 + 4 * 8 + 4 * 4
+ || c[1] != 5 * 8 + 9 * 4
+ || e[0] != 5
+ || e[1] != 19 * 16 + 6 * 8 + 16 * 4
+ || e[2] != 5
+ || f[0] != 6
+ || f[1] != 7
+ || f[2] != 21 * 16 + 7 * 8 + 8 * 4
+ || f[3] != 23 * 16 + 18 * 8 + 8 * 4
+ || f[4] != 9
+ || g[0] != 1
+ || g[1] != 25 * 16 + 29 * 8 + 9 * 4
+ || g[2] != 27 * 16 + 18 * 8 + 10 * 4
+ || g[3] != 2
+ || h[0] != 29 * 16 + 19 * 8 + 11 * 4
+ || h[1] != 1 || h[2] != 4
+ || k[0][0] != 5 || k[0][1] != 6
+ || k[1][0] != 31 * 16 + 17 * 8
+ || k[1][1] != 13 * 4
+ || k[2][0] != 19 * 8
+ || k[2][1] != 33 * 16 + 15 * 4
+ || k[3][0] != 7 || k[3][1] != 8
+ || p[0] != 29 * 16 + 19 * 8 + 11 * 4
+ || p[1] != 1 || p[2] != 4
+ || q[0][0] != 5 || q[0][1] != 6
+ || q[1][0] != 31 * 16 + 17 * 8
+ || q[1][1] != 13 * 4
+ || q[2][0] != 19 * 8
+ || q[2][1] != 33 * 16 + 15 * 4
+ || q[3][0] != 7 || q[3][1] != 8
+ || sb[0] != 5
+ || sb[1] != 1LL << (16 + 4)
+ || sb[2] != 1LL << 8
+ || sb[3] != 6
+ || tb[0][0] != 9 || tb[0][1] != 10 || tb[1][0] != 11 || tb[1][1] != 12
+ || tb[2][0] != 1LL << (16 + 8)
+ || tb[2][1] != 1LL << 4
+ || tb[3][0] != 1LL << 8
+ || tb[3][1] != 1LL << (16 + 4)
+ || tb[4][0] != 13 || tb[4][1] != 14)
+ abort ();
+ }
+ if (d[0] != 1LL << (8 + 4)
+ || d[1] != 1LL << 16
+ || m[0] != 5
+ || m[1] != 19 * 16 + 6 * 8 + 16 * 4
+ || m[2] != 5
+ || r[0] != 6
+ || r[1] != 7
+ || r[2] != 21 * 16 + 7 * 8 + 8 * 4
+ || r[3] != 23 * 16 + 18 * 8 + 8 * 4
+ || r[4] != 9
+ || o[0] != 1
+ || o[1] != 25 * 16 + 29 * 8 + 9 * 4
+ || o[2] != 27 * 16 + 18 * 8 + 10 * 4
+ || o[3] != 2)
+ abort ();
+ }
+ if (a[0] != 7 * 16 + 4 * 8 + 2 * 4
+ || a[1] != 17 * 16 + 5 * 8 + 3 * 4
+ || b[0] != 9 || b[1] != 11
+ || b[2] != 1LL << (16 + 4)
+ || b[3] != 1LL << (8 + 4)
+ || b[4] != 1LL << (16 + 8)
+ || b[5] != 13 || b[6] != 15
+ || c[0] != 6 * 16 + 4 * 8 + 4 * 4
+ || c[1] != 5 * 8 + 9 * 4
+ || e[0] != 5
+ || e[1] != 19 * 16 + 6 * 8 + 16 * 4
+ || e[2] != 5
+ || f[0] != 6
+ || f[1] != 7
+ || f[2] != 21 * 16 + 7 * 8 + 8 * 4
+ || f[3] != 23 * 16 + 18 * 8 + 8 * 4
+ || f[4] != 9
+ || g[0] != 1
+ || g[1] != 25 * 16 + 29 * 8 + 9 * 4
+ || g[2] != 27 * 16 + 18 * 8 + 10 * 4
+ || g[3] != 2
+ || h[0] != 29 * 16 + 19 * 8 + 11 * 4
+ || h[1] != 1 || h[2] != 4
+ || k[0][0] != 5 || k[0][1] != 6
+ || k[1][0] != 31 * 16 + 17 * 8
+ || k[1][1] != 13 * 4
+ || k[2][0] != 19 * 8
+ || k[2][1] != 33 * 16 + 15 * 4
+ || k[3][0] != 7 || k[3][1] != 8
+ || p[0] != 29 * 16 + 19 * 8 + 11 * 4
+ || p[1] != 1 || p[2] != 4
+ || q[0][0] != 5 || q[0][1] != 6
+ || q[1][0] != 31 * 16 + 17 * 8
+ || q[1][1] != 13 * 4
+ || q[2][0] != 19 * 8
+ || q[2][1] != 33 * 16 + 15 * 4
+ || q[3][0] != 7 || q[3][1] != 8
+ || sb[0] != 5
+ || sb[1] != 1LL << (16 + 4)
+ || sb[2] != 1LL << 8
+ || sb[3] != 6
+ || tb[0][0] != 9 || tb[0][1] != 10 || tb[1][0] != 11 || tb[1][1] != 12
+ || tb[2][0] != 1LL << (16 + 8)
+ || tb[2][1] != 1LL << 4
+ || tb[3][0] != 1LL << 8
+ || tb[3][1] != 1LL << (16 + 4)
+ || tb[4][0] != 13 || tb[4][1] != 14)
+ abort ();
+}
+
+int
+main ()
+{
+ int n = 1;
+ test (n, 0ULL, 4ULL);
+ return 0;
+}
--- /dev/null
+extern "C" void abort ();
+
+struct S { S (); S (long int, long int); ~S (); static int cnt1, cnt2, cnt3; long int s, t; };
+
+int S::cnt1;
+int S::cnt2;
+int S::cnt3;
+
+S::S ()
+{
+ #pragma omp atomic
+ cnt1++;
+}
+
+S::S (long int x, long int y) : s (x), t (y)
+{
+ #pragma omp atomic update
+ ++cnt2;
+}
+
+S::~S ()
+{
+ #pragma omp atomic
+ cnt3 = cnt3 + 1;
+ if (t < 3 || t > 9 || (t & 1) == 0)
+ abort ();
+}
+
+void
+bar (S *p, S *o)
+{
+ p->s = 1;
+ if (o->t != 5)
+ abort ();
+ p->t = 9;
+}
+
+static inline void
+baz (S *o, S *i)
+{
+ if (o->t != 5 || i->t != 9)
+ abort ();
+ o->s *= i->s;
+}
+
+#pragma omp declare reduction (+: S : omp_out.s += omp_in.s) initializer (omp_priv (0, 3))
+#pragma omp declare reduction (*: S : baz (&omp_out, &omp_in)) initializer (bar (&omp_priv, &omp_orig))
+
+S a[2] = { { 0, 7 }, { 0, 7 } };
+S b[7] = { { 9, 5 }, { 11, 5 }, { 1, 5 }, { 1, 5 }, { 1, 5 }, { 13, 5 }, { 15, 5 } };
+S e[3] = { { 5, 7 }, { 0, 7 }, { 5, 7 } };
+S f[5] = { { 6, 7 }, { 7, 7 }, { 0, 7 }, { 0, 7 }, { 9, 7 } };
+S g[4] = { { 1, 7 }, { 0, 7 }, { 0, 7 }, { 2, 7 } };
+S h[3] = { { 0, 7 }, { 1, 7 }, { 4, 7 } };
+S k[4][2] = { { { 5, 7 }, { 6, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 7, 7 }, { 8, 7 } } };
+S *s;
+S (*t)[2];
+
+template <int N>
+void
+foo (int n, S *c, S *d, S m[3], S *r, S o[4], S *p, S q[4][2])
+{
+ int i;
+ for (i = 0; i < 2; i++)
+ #pragma omp task in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \
+ in_reduction (+: o[n:n*2], m[1], k[1:2][:], p[0], f[2:2]) \
+ in_reduction (+: q[1:2][:], g[n:n*2], e[1], h[0], r[2:2]) \
+ in_reduction (*: s[1:2], t[2:2][:])
+ {
+ a[0].s += 7;
+ a[1].s += 17;
+ b[2].s *= 2;
+ b[4].s *= 2;
+ c[0].s += 6;
+ d[1].s *= 2;
+ e[1].s += 19;
+ f[2].s += 21;
+ f[3].s += 23;
+ g[1].s += 25;
+ g[2].s += 27;
+ h[0].s += 29;
+ k[1][0].s += 31;
+ k[2][1].s += 33;
+ m[1].s += 19;
+ r[2].s += 21;
+ r[3].s += 23;
+ o[1].s += 25;
+ o[2].s += 27;
+ p[0].s += 29;
+ q[1][0].s += 31;
+ q[2][1].s += 33;
+ s[1].s *= 2;
+ t[2][0].s *= 2;
+ t[3][1].s *= 2;
+ if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3)
+ || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3))
+ abort ();
+ for (int z = 0; z < 2; z++)
+ if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3)
+ || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3)
+ || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3)
+ || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3)
+ || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3)
+ || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3)
+ || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9))
+ abort ();
+ for (int z = 0; z < 3; z++)
+ if (b[z + 2].t != 5 && b[z + 2].t != 9)
+ abort ();
+ }
+}
+
+template <int N>
+void
+test (int n)
+{
+ S c[2] = { { 0, 7 }, { 0, 7 } };
+ S p[3] = { { 0, 7 }, { 1, 7 }, { 4, 7 } };
+ S q[4][2] = { { { 5, 7 }, { 6, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 7, 7 }, { 8, 7 } } };
+ S ss[4] = { { 5, 5 }, { 1, 5 }, { 1, 5 }, { 6, 5 } };
+ S tt[5][2] = { { { 9, 5 }, { 10, 5 } }, { { 11, 5 }, { 12, 5 } }, { { 1, 5 }, { 1, 5 } }, { { 1, 5 }, { 1, 5 } }, { { 13, 5 }, { 14, 5 } } };
+ s = ss;
+ t = tt;
+ #pragma omp parallel num_threads (1) if (0)
+ {
+ S d[] = { { 1, 5 }, { 1, 5 } };
+ S m[3] = { { 5, 7 }, { 0, 7 }, { 5, 7 } };
+ S r[5] = { { 6, 7 }, { 7, 7 }, { 0, 7 }, { 0, 7 }, { 9, 7 } };
+ S o[4] = { { 1, 7 }, { 0, 7 }, { 0, 7 }, { 2, 7 } };
+ volatile unsigned long long x = 0;
+ volatile unsigned long long y = 4;
+ volatile unsigned long long z = 1;
+ #pragma omp parallel
+ {
+ #pragma omp for reduction (task, +: a, c) reduction (task, *: b[2 * n:3 * n], d) \
+ reduction (task, +: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \
+ reduction (task, +: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \
+ reduction (task, *: t[2:2][:], s[1:n + 1]) \
+ schedule (nonmonotonic: guided, 1)
+ for (unsigned long long i = x; i < y; i += z)
+ #pragma omp task in_reduction (+: a, c) in_reduction (*: b[2 * n:3 * n], d) \
+ in_reduction (+: o[n:n*2], q[1:2][:], p[0], m[1], r[2:2]) \
+ in_reduction (+: g[n:n * 2], e[1], k[1:2][:], h[0], f[2:2]) \
+ in_reduction (*: s[1:2], t[2:2][:])
+ {
+ int j;
+ a[0].s += 2;
+ a[1].s += 3;
+ b[2].s *= 2;
+ f[3].s += 8;
+ g[1].s += 9;
+ g[2].s += 10;
+ h[0].s += 11;
+ k[1][1].s += 13;
+ k[2][1].s += 15;
+ m[1].s += 16;
+ r[2].s += 8;
+ s[1].s *= 2;
+ t[2][1].s *= 2;
+ t[3][1].s *= 2;
+ if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3)
+ || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3))
+ abort ();
+ for (int z = 0; z < 2; z++)
+ if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3)
+ || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3)
+ || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3)
+ || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3)
+ || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3)
+ || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3)
+ || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9))
+ abort ();
+ for (int z = 0; z < 3; z++)
+ if (b[z + 2].t != 5 && b[z + 2].t != 9)
+ abort ();
+ for (j = 0; j < 2; j++)
+ #pragma omp task in_reduction (+: a, c[:2]) \
+ in_reduction (*: b[2 * n:3 * n], d[n - 1:n + 1]) \
+ in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][:2]) \
+ in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][:2]) \
+ in_reduction (*: s[n:2], t[2:2][:])
+ {
+ m[1].s += 6;
+ r[2].s += 7;
+ q[1][0].s += 17;
+ q[2][0].s += 19;
+ a[0].s += 4;
+ a[1].s += 5;
+ b[3].s *= 2;
+ b[4].s *= 2;
+ f[3].s += 18;
+ g[1].s += 29;
+ g[2].s += 18;
+ h[0].s += 19;
+ s[2].s *= 2;
+ t[2][0].s *= 2;
+ t[3][0].s *= 2;
+ foo<N> (n, c, d, m, r, o, p, q);
+ if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3)
+ || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3))
+ abort ();
+ for (int z = 0; z < 2; z++)
+ if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3)
+ || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3)
+ || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3)
+ || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3)
+ || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3)
+ || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3)
+ || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9))
+ abort ();
+ for (int z = 0; z < 3; z++)
+ if (b[z + 2].t != 5 && b[z + 2].t != 9)
+ abort ();
+ r[3].s += 18;
+ o[1].s += 29;
+ o[2].s += 18;
+ p[0].s += 19;
+ c[0].s += 4;
+ c[1].s += 5;
+ d[0].s *= 2;
+ e[1].s += 6;
+ f[2].s += 7;
+ k[1][0].s += 17;
+ k[2][0].s += 19;
+ }
+ r[3].s += 8;
+ o[1].s += 9;
+ o[2].s += 10;
+ p[0].s += 11;
+ q[1][1].s += 13;
+ q[2][1].s += 15;
+ b[3].s *= 2;
+ c[0].s += 4;
+ c[1].s += 9;
+ d[0].s *= 2;
+ e[1].s += 16;
+ f[2].s += 8;
+ }
+ if (a[0].s != 7 * 16 + 4 * 8 + 2 * 4
+ || a[1].s != 17 * 16 + 5 * 8 + 3 * 4
+ || b[0].s != 9 || b[1].s != 11
+ || b[2].s != 1LL << (16 + 4)
+ || b[3].s != 1LL << (8 + 4)
+ || b[4].s != 1LL << (16 + 8)
+ || b[5].s != 13 || b[6].s != 15
+ || c[0].s != 6 * 16 + 4 * 8 + 4 * 4
+ || c[1].s != 5 * 8 + 9 * 4
+ || e[0].s != 5
+ || e[1].s != 19 * 16 + 6 * 8 + 16 * 4
+ || e[2].s != 5
+ || f[0].s != 6
+ || f[1].s != 7
+ || f[2].s != 21 * 16 + 7 * 8 + 8 * 4
+ || f[3].s != 23 * 16 + 18 * 8 + 8 * 4
+ || f[4].s != 9
+ || g[0].s != 1
+ || g[1].s != 25 * 16 + 29 * 8 + 9 * 4
+ || g[2].s != 27 * 16 + 18 * 8 + 10 * 4
+ || g[3].s != 2
+ || h[0].s != 29 * 16 + 19 * 8 + 11 * 4
+ || h[1].s != 1 || h[2].s != 4
+ || k[0][0].s != 5 || k[0][1].s != 6
+ || k[1][0].s != 31 * 16 + 17 * 8
+ || k[1][1].s != 13 * 4
+ || k[2][0].s != 19 * 8
+ || k[2][1].s != 33 * 16 + 15 * 4
+ || k[3][0].s != 7 || k[3][1].s != 8
+ || p[0].s != 29 * 16 + 19 * 8 + 11 * 4
+ || p[1].s != 1 || p[2].s != 4
+ || q[0][0].s != 5 || q[0][1].s != 6
+ || q[1][0].s != 31 * 16 + 17 * 8
+ || q[1][1].s != 13 * 4
+ || q[2][0].s != 19 * 8
+ || q[2][1].s != 33 * 16 + 15 * 4
+ || q[3][0].s != 7 || q[3][1].s != 8
+ || ss[0].s != 5
+ || ss[1].s != 1LL << (16 + 4)
+ || ss[2].s != 1LL << 8
+ || ss[3].s != 6
+ || tt[0][0].s != 9 || tt[0][1].s != 10 || tt[1][0].s != 11 || tt[1][1].s != 12
+ || tt[2][0].s != 1LL << (16 + 8)
+ || tt[2][1].s != 1LL << 4
+ || tt[3][0].s != 1LL << 8
+ || tt[3][1].s != 1LL << (16 + 4)
+ || tt[4][0].s != 13 || tt[4][1].s != 14)
+ abort ();
+ }
+ if (d[0].s != 1LL << (8 + 4)
+ || d[1].s != 1LL << 16
+ || m[0].s != 5
+ || m[1].s != 19 * 16 + 6 * 8 + 16 * 4
+ || m[2].s != 5
+ || r[0].s != 6
+ || r[1].s != 7
+ || r[2].s != 21 * 16 + 7 * 8 + 8 * 4
+ || r[3].s != 23 * 16 + 18 * 8 + 8 * 4
+ || r[4].s != 9
+ || o[0].s != 1
+ || o[1].s != 25 * 16 + 29 * 8 + 9 * 4
+ || o[2].s != 27 * 16 + 18 * 8 + 10 * 4
+ || o[3].s != 2)
+ abort ();
+ if (e[1].t != 7 || h[0].t != 7 || m[1].t != 7 || p[0].t != 7)
+ abort ();
+ for (int z = 0; z < 2; z++)
+ if (a[z].t != 7 || c[z].t != 7 || d[z].t != 5 || f[z + 2].t != 7
+ || g[z + 1].t != 7 || r[z + 2].t != 7 || s[z + 1].t != 5 || o[z + 1].t != 7
+ || k[z + 1][0].t != 7 || k[z + 1][1].t != 7 || q[z + 1][0].t != 7 || q[z + 1][1].t != 7
+ || t[z + 2][0].t != 5 || t[z + 2][1].t != 5)
+ abort ();
+ for (int z = 0; z < 3; z++)
+ if (b[z + 2].t != 5)
+ abort ();
+ }
+}
+
+int
+main ()
+{
+ int c1 = S::cnt1, c2 = S::cnt2, c3 = S::cnt3;
+ test<0> (1);
+ if (S::cnt1 + S::cnt2 - c1 - c2 != S::cnt3 - c3)
+ abort ();
+ return 0;
+}
--- /dev/null
+extern "C" void abort ();
+
+struct S { S (); S (long int, long int); ~S (); static int cnt1, cnt2, cnt3; long int s, t; };
+
+int S::cnt1;
+int S::cnt2;
+int S::cnt3;
+
+S::S ()
+{
+ #pragma omp atomic
+ cnt1++;
+}
+
+S::S (long int x, long int y) : s (x), t (y)
+{
+ #pragma omp atomic update
+ ++cnt2;
+}
+
+S::~S ()
+{
+ #pragma omp atomic
+ cnt3 = cnt3 + 1;
+ if (t < 3 || t > 9 || (t & 1) == 0)
+ abort ();
+}
+
+void
+bar (S *p, S *o)
+{
+ p->s = 1;
+ if (o->t != 5)
+ abort ();
+ p->t = 9;
+}
+
+static inline void
+baz (S *o, S *i)
+{
+ if (o->t != 5 || i->t != 9)
+ abort ();
+ o->s *= i->s;
+}
+
+#pragma omp declare reduction (+: S : omp_out.s += omp_in.s) initializer (omp_priv (0, 3))
+#pragma omp declare reduction (*: S : baz (&omp_out, &omp_in)) initializer (bar (&omp_priv, &omp_orig))
+
+S as[2] = { { 0, 7 }, { 0, 7 } };
+S (&a)[2] = as;
+S bs[7] = { { 9, 5 }, { 11, 5 }, { 1, 5 }, { 1, 5 }, { 1, 5 }, { 13, 5 }, { 15, 5 } };
+S (&b)[7] = bs;
+S es[3] = { { 5, 7 }, { 0, 7 }, { 5, 7 } };
+S (&e)[3] = es;
+S fs[5] = { { 6, 7 }, { 7, 7 }, { 0, 7 }, { 0, 7 }, { 9, 7 } };
+S (&f)[5] = fs;
+S gs[4] = { { 1, 7 }, { 0, 7 }, { 0, 7 }, { 2, 7 } };
+S (&g)[4] = gs;
+S hs[3] = { { 0, 7 }, { 1, 7 }, { 4, 7 } };
+S (&h)[3] = hs;
+S ks[4][2] = { { { 5, 7 }, { 6, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 7, 7 }, { 8, 7 } } };
+S (&k)[4][2] = ks;
+S *ss;
+S *&s = ss;
+S (*ts)[2];
+S (*&t)[2] = ts;
+
+template <typename S, typename T>
+void
+foo (T &n, S *&c, S *&d, S (&m)[3], S *&r, S (&o)[4], S *&p, S (&q)[4][2])
+{
+ T i;
+ for (i = 0; i < 2; i++)
+ #pragma omp task in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \
+ in_reduction (+: o[n:n*2], m[1], k[1:2][:], p[0], f[2:2]) \
+ in_reduction (+: q[1:2][:], g[n:n*2], e[1], h[0], r[2:2]) \
+ in_reduction (*: s[1:2], t[2:2][:])
+ {
+ a[0].s += 7;
+ a[1].s += 17;
+ b[2].s *= 2;
+ b[4].s *= 2;
+ c[0].s += 6;
+ d[1].s *= 2;
+ e[1].s += 19;
+ f[2].s += 21;
+ f[3].s += 23;
+ g[1].s += 25;
+ g[2].s += 27;
+ h[0].s += 29;
+ k[1][0].s += 31;
+ k[2][1].s += 33;
+ m[1].s += 19;
+ r[2].s += 21;
+ r[3].s += 23;
+ o[1].s += 25;
+ o[2].s += 27;
+ p[0].s += 29;
+ q[1][0].s += 31;
+ q[2][1].s += 33;
+ s[1].s *= 2;
+ t[2][0].s *= 2;
+ t[3][1].s *= 2;
+ if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3)
+ || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3))
+ abort ();
+ for (T z = 0; z < 2; z++)
+ if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3)
+ || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3)
+ || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3)
+ || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3)
+ || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3)
+ || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3)
+ || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9))
+ abort ();
+ for (T z = 0; z < 3; z++)
+ if (b[z + 2].t != 5 && b[z + 2].t != 9)
+ abort ();
+ }
+}
+
+template <typename S, typename T>
+void
+test (T &n)
+{
+ S cs[2] = { { 0, 7 }, { 0, 7 } };
+ S (&c)[2] = cs;
+ S ps[3] = { { 0, 7 }, { 1, 7 }, { 4, 7 } };
+ S (&p)[3] = ps;
+ S qs[4][2] = { { { 5, 7 }, { 6, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 7, 7 }, { 8, 7 } } };
+ S (&q)[4][2] = qs;
+ S sb[4] = { { 5, 5 }, { 1, 5 }, { 1, 5 }, { 6, 5 } };
+ S tb[5][2] = { { { 9, 5 }, { 10, 5 } }, { { 11, 5 }, { 12, 5 } }, { { 1, 5 }, { 1, 5 } }, { { 1, 5 }, { 1, 5 } }, { { 13, 5 }, { 14, 5 } } };
+ S ms[3] = { { 5, 7 }, { 0, 7 }, { 5, 7 } };
+ S os[4] = { { 1, 7 }, { 0, 7 }, { 0, 7 }, { 2, 7 } };
+ s = sb;
+ t = tb;
+ #pragma omp parallel if (0)
+ {
+ S ds[] = { { 1, 5 }, { 1, 5 } };
+ S (&d)[2] = ds;
+ S (&m)[3] = ms;
+ S rs[5] = { { 6, 7 }, { 7, 7 }, { 0, 7 }, { 0, 7 }, { 9, 7 } };
+ S (&r)[5] = rs;
+ S (&o)[4] = os;
+ #pragma omp parallel
+ {
+ #pragma omp for reduction (task, +: a, c) reduction (task, *: b[2 * n:3 * n], d) \
+ reduction (task, +: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \
+ reduction (task, +: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \
+ reduction (task, *: t[2:2][:], s[1:n + 1]) \
+ schedule (monotonic: runtime)
+ for (T i = 0; i < 4; i++)
+ #pragma omp task in_reduction (+: a, c) in_reduction (*: b[2 * n:3 * n], d) \
+ in_reduction (+: o[n:n*2], q[1:2][:], p[0], m[1], r[2:2]) \
+ in_reduction (+: g[n:n * 2], e[1], k[1:2][:], h[0], f[2:2]) \
+ in_reduction (*: s[1:2], t[2:2][:])
+ {
+ T j;
+ a[0].s += 2;
+ a[1].s += 3;
+ b[2].s *= 2;
+ f[3].s += 8;
+ g[1].s += 9;
+ g[2].s += 10;
+ h[0].s += 11;
+ k[1][1].s += 13;
+ k[2][1].s += 15;
+ m[1].s += 16;
+ r[2].s += 8;
+ s[1].s *= 2;
+ t[2][1].s *= 2;
+ t[3][1].s *= 2;
+ if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3)
+ || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3))
+ abort ();
+ for (T z = 0; z < 2; z++)
+ if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3)
+ || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3)
+ || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3)
+ || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3)
+ || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3)
+ || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3)
+ || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9))
+ abort ();
+ for (T z = 0; z < 3; z++)
+ if (b[z + 2].t != 5 && b[z + 2].t != 9)
+ abort ();
+ for (j = 0; j < 2; j++)
+ #pragma omp task in_reduction (+: a, c[:2]) \
+ in_reduction (*: b[2 * n:3 * n], d[n - 1:n + 1]) \
+ in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][:2]) \
+ in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][:2]) \
+ in_reduction (*: s[n:2], t[2:2][:])
+ {
+ m[1].s += 6;
+ r[2].s += 7;
+ q[1][0].s += 17;
+ q[2][0].s += 19;
+ a[0].s += 4;
+ a[1].s += 5;
+ b[3].s *= 2;
+ b[4].s *= 2;
+ f[3].s += 18;
+ g[1].s += 29;
+ g[2].s += 18;
+ h[0].s += 19;
+ s[2].s *= 2;
+ t[2][0].s *= 2;
+ t[3][0].s *= 2;
+ S *cp = c;
+ S *dp = d;
+ S *rp = r;
+ S *pp = p;
+ if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3)
+ || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3))
+ abort ();
+ for (T z = 0; z < 2; z++)
+ if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3)
+ || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3)
+ || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3)
+ || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3)
+ || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3)
+ || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3)
+ || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9))
+ abort ();
+ for (T z = 0; z < 3; z++)
+ if (b[z + 2].t != 5 && b[z + 2].t != 9)
+ abort ();
+ foo (n, cp, dp, m, rp, o, pp, q);
+ r[3].s += 18;
+ o[1].s += 29;
+ o[2].s += 18;
+ p[0].s += 19;
+ c[0].s += 4;
+ c[1].s += 5;
+ d[0].s *= 2;
+ e[1].s += 6;
+ f[2].s += 7;
+ k[1][0].s += 17;
+ k[2][0].s += 19;
+ }
+ r[3].s += 8;
+ o[1].s += 9;
+ o[2].s += 10;
+ p[0].s += 11;
+ q[1][1].s += 13;
+ q[2][1].s += 15;
+ b[3].s *= 2;
+ c[0].s += 4;
+ c[1].s += 9;
+ d[0].s *= 2;
+ e[1].s += 16;
+ f[2].s += 8;
+ }
+ if (a[0].s != 7 * 16 + 4 * 8 + 2 * 4
+ || a[1].s != 17 * 16 + 5 * 8 + 3 * 4
+ || b[0].s != 9 || b[1].s != 11
+ || b[2].s != 1LL << (16 + 4)
+ || b[3].s != 1LL << (8 + 4)
+ || b[4].s != 1LL << (16 + 8)
+ || b[5].s != 13 || b[6].s != 15
+ || c[0].s != 6 * 16 + 4 * 8 + 4 * 4
+ || c[1].s != 5 * 8 + 9 * 4
+ || e[0].s != 5
+ || e[1].s != 19 * 16 + 6 * 8 + 16 * 4
+ || e[2].s != 5
+ || f[0].s != 6
+ || f[1].s != 7
+ || f[2].s != 21 * 16 + 7 * 8 + 8 * 4
+ || f[3].s != 23 * 16 + 18 * 8 + 8 * 4
+ || f[4].s != 9
+ || g[0].s != 1
+ || g[1].s != 25 * 16 + 29 * 8 + 9 * 4
+ || g[2].s != 27 * 16 + 18 * 8 + 10 * 4
+ || g[3].s != 2
+ || h[0].s != 29 * 16 + 19 * 8 + 11 * 4
+ || h[1].s != 1 || h[2].s != 4
+ || k[0][0].s != 5 || k[0][1].s != 6
+ || k[1][0].s != 31 * 16 + 17 * 8
+ || k[1][1].s != 13 * 4
+ || k[2][0].s != 19 * 8
+ || k[2][1].s != 33 * 16 + 15 * 4
+ || k[3][0].s != 7 || k[3][1].s != 8
+ || p[0].s != 29 * 16 + 19 * 8 + 11 * 4
+ || p[1].s != 1 || p[2].s != 4
+ || q[0][0].s != 5 || q[0][1].s != 6
+ || q[1][0].s != 31 * 16 + 17 * 8
+ || q[1][1].s != 13 * 4
+ || q[2][0].s != 19 * 8
+ || q[2][1].s != 33 * 16 + 15 * 4
+ || q[3][0].s != 7 || q[3][1].s != 8
+ || sb[0].s != 5
+ || sb[1].s != 1LL << (16 + 4)
+ || sb[2].s != 1LL << 8
+ || sb[3].s != 6
+ || tb[0][0].s != 9 || tb[0][1].s != 10 || tb[1][0].s != 11 || tb[1][1].s != 12
+ || tb[2][0].s != 1LL << (16 + 8)
+ || tb[2][1].s != 1LL << 4
+ || tb[3][0].s != 1LL << 8
+ || tb[3][1].s != 1LL << (16 + 4)
+ || tb[4][0].s != 13 || tb[4][1].s != 14)
+ abort ();
+ if (d[0].s != 1LL << (8 + 4)
+ || d[1].s != 1LL << 16
+ || m[0].s != 5
+ || m[1].s != 19 * 16 + 6 * 8 + 16 * 4
+ || m[2].s != 5
+ || r[0].s != 6
+ || r[1].s != 7
+ || r[2].s != 21 * 16 + 7 * 8 + 8 * 4
+ || r[3].s != 23 * 16 + 18 * 8 + 8 * 4
+ || r[4].s != 9
+ || o[0].s != 1
+ || o[1].s != 25 * 16 + 29 * 8 + 9 * 4
+ || o[2].s != 27 * 16 + 18 * 8 + 10 * 4
+ || o[3].s != 2)
+ abort ();
+ if (e[1].t != 7 || h[0].t != 7 || m[1].t != 7 || p[0].t != 7)
+ abort ();
+ for (T z = 0; z < 2; z++)
+ if (a[z].t != 7 || c[z].t != 7 || d[z].t != 5 || f[z + 2].t != 7
+ || g[z + 1].t != 7 || r[z + 2].t != 7 || s[z + 1].t != 5 || o[z + 1].t != 7
+ || k[z + 1][0].t != 7 || k[z + 1][1].t != 7 || q[z + 1][0].t != 7 || q[z + 1][1].t != 7
+ || t[z + 2][0].t != 5 || t[z + 2][1].t != 5)
+ abort ();
+ for (T z = 0; z < 3; z++)
+ if (b[z + 2].t != 5)
+ abort ();
+ }
+ }
+}
+
+int
+main ()
+{
+ int c1 = S::cnt1, c2 = S::cnt2, c3 = S::cnt3;
+ int n = 1;
+ test<S, int> (n);
+ if (S::cnt1 + S::cnt2 - c1 - c2 != S::cnt3 - c3)
+ abort ();
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-set-target-env-var OMP_CANCELLATION "true" } */
+
+#include <stdlib.h>
+#include <omp.h>
+
+int
+main ()
+{
+ int a[64];
+ #pragma omp parallel
+ {
+ #pragma omp barrier
+ if (omp_get_thread_num () == 0)
+ {
+ #pragma omp cancel parallel
+ }
+ #pragma omp for
+ for (int i = 0; i < 64; i++)
+ a[i] = i;
+ if (omp_get_cancellation ())
+ abort ();
+ }
+ #pragma omp parallel
+ {
+ #pragma omp barrier
+ if (omp_get_thread_num () == 0)
+ {
+ #pragma omp cancel parallel
+ }
+ #pragma omp taskgroup
+ {
+ #pragma omp for
+ for (int i = 0; i < 64; i++)
+ #pragma omp task
+ a[i] += i;
+ if (omp_get_cancellation ())
+ abort ();
+ }
+ }
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-set-target-env-var OMP_CANCELLATION "true" } */
+
+#include <stdlib.h>
+#include <omp.h>
+
+int
+main ()
+{
+ int a = 0, i;
+ #pragma omp parallel
+ #pragma omp taskgroup
+ {
+ #pragma omp task
+ {
+ #pragma omp cancel taskgroup
+ if (omp_get_cancellation ())
+ abort ();
+ }
+ #pragma omp taskwait
+ #pragma omp for reduction (task, +: a)
+ for (i = 0; i < 64; ++i)
+ {
+ a++;
+ #pragma omp task in_reduction (+: a)
+ {
+ volatile int zero = 0;
+ a += zero;
+ if (omp_get_cancellation ())
+ abort ();
+ }
+ }
+ if (a != 64)
+ abort ();
+ #pragma omp task
+ {
+ if (omp_get_cancellation ())
+ abort ();
+ }
+ }
+ a = 0;
+ #pragma omp parallel
+ #pragma omp taskgroup
+ {
+ #pragma omp taskwait
+ #pragma omp for reduction (task, +: a)
+ for (i = 0; i < 64; ++i)
+ {
+ a++;
+ #pragma omp task in_reduction (+: a)
+ {
+ volatile int zero = 0;
+ a += zero;
+ #pragma omp cancel taskgroup
+ if (omp_get_cancellation ())
+ abort ();
+ }
+ }
+ if (a != 64)
+ abort ();
+ #pragma omp task
+ {
+ if (omp_get_cancellation ())
+ abort ();
+ }
+ }
+ return 0;
+}
--- /dev/null
+extern
+#ifdef __cplusplus
+"C"
+#endif
+void abort (void);
+int a, b[3] = { 1, 1, 1 };
+unsigned long int c[2] = { ~0UL, ~0UL };
+
+void
+bar (int i)
+{
+ #pragma omp task in_reduction (*: b[:3]) in_reduction (&: c[1:]) \
+ in_reduction (+: a)
+ {
+ a += 4;
+ b[1] *= 4;
+ c[1] &= ~(1UL << (i + 16));
+ }
+}
+
+void
+foo (unsigned long long int x, unsigned long long int y, unsigned long long int z)
+{
+ unsigned long long int i;
+ #pragma omp for schedule(runtime) reduction (task, +: a) \
+ reduction (task, *: b) reduction (task, &: c[1:1])
+ for (i = x; i < y; i += z)
+ {
+ a++;
+ b[0] *= 2;
+ bar (i);
+ b[2] *= 3;
+ c[1] &= ~(1UL << i);
+ }
+}
+
+int
+main ()
+{
+ volatile int two = 2;
+ foo (two, 7 * two, two);
+ if (a != 30 || b[0] != 64 || b[1] != (1 << 12) || b[2] != 3 * 3 * 3 * 3 * 3 * 3
+ || c[0] != ~0UL || c[1] != ~0x15541554UL)
+ abort ();
+ a = 0;
+ b[0] = 1;
+ b[1] = 1;
+ b[2] = 1;
+ c[1] = ~0UL;
+ #pragma omp parallel
+ foo (two, 8 * two, two);
+ if (a != 35 || b[0] != 128 || b[1] != (1 << 14) || b[2] != 3 * 3 * 3 * 3 * 3 * 3 * 3
+ || c[0] != ~0UL || c[1] != ~0x55545554UL)
+ abort ();
+ return 0;
+}
--- /dev/null
+extern
+#ifdef __cplusplus
+"C"
+#endif
+void abort (void);
+int a, b[3] = { 1, 1, 1 };
+unsigned long int c[2] = { ~0UL, ~0UL };
+
+void
+bar (int i)
+{
+ #pragma omp task in_reduction (*: b[:3]) in_reduction (&: c[1:]) \
+ in_reduction (+: a)
+ {
+ a += 4;
+ b[1] *= 4;
+ c[1] &= ~(1UL << (i + 16));
+ }
+}
+
+void
+foo (int x)
+{
+ #pragma omp sections reduction (task, +: a) reduction (task, *: b) \
+ reduction (task, &: c[1:1])
+ {
+ {
+ a++; b[0] *= 2; bar (2); b[2] *= 3; c[1] &= ~(1UL << 2);
+ }
+ #pragma omp section
+ { b[0] *= 2; bar (4); b[2] *= 3; c[1] &= ~(1UL << 4); a++; }
+ #pragma omp section
+ { bar (6); b[2] *= 3; c[1] &= ~(1UL << 6); a++; b[0] *= 2; }
+ #pragma omp section
+ { b[2] *= 3; c[1] &= ~(1UL << 8); a++; b[0] *= 2; bar (8); }
+ #pragma omp section
+ { c[1] &= ~(1UL << 10); a++; b[0] *= 2; bar (10); b[2] *= 3; }
+ #pragma omp section
+ { a++; b[0] *= 2; b[2] *= 3; c[1] &= ~(1UL << 12); bar (12); }
+ #pragma omp section
+ if (x)
+ {
+ a++; b[0] *= 2; b[2] *= 3; bar (14); c[1] &= ~(1UL << 14);
+ }
+ }
+}
+
+int
+main ()
+{
+ volatile int one = 1;
+ foo (!one);
+ if (a != 30 || b[0] != 64 || b[1] != (1 << 12) || b[2] != 3 * 3 * 3 * 3 * 3 * 3
+ || c[0] != ~0UL || c[1] != ~0x15541554UL)
+ abort ();
+ a = 0;
+ b[0] = 1;
+ b[1] = 1;
+ b[2] = 1;
+ c[1] = ~0UL;
+ #pragma omp parallel
+ foo (one);
+ if (a != 35 || b[0] != 128 || b[1] != (1 << 14) || b[2] != 3 * 3 * 3 * 3 * 3 * 3 * 3
+ || c[0] != ~0UL || c[1] != ~0x55545554UL)
+ abort ();
+ return 0;
+}
#include <omp.h>
#include <stdlib.h>
-struct S { unsigned long int s, t; };
+struct S { unsigned long long int s, t; };
void
rbar (struct S *p, struct S *o)
abort ();
if (m.s != 63 * 64 * 4 || m.t != 7)
abort ();
+ if (r != t)
+ abort ();
return 0;
}
--- /dev/null
+#include <omp.h>
+#include <stdlib.h>
+
+struct S { unsigned long long int s, t; };
+
+void
+rbar (struct S *p, struct S *o)
+{
+ p->s = 1;
+ if (o->t != 5)
+ abort ();
+ p->t = 9;
+}
+
+static inline void
+rbaz (struct S *o, struct S *i)
+{
+ if (o->t != 5 || i->t != 9)
+ abort ();
+ o->s *= i->s;
+}
+
+#pragma omp declare reduction (+: struct S : omp_out.s += omp_in.s) \
+ initializer (omp_priv = { 0, 3 })
+#pragma omp declare reduction (*: struct S : rbaz (&omp_out, &omp_in)) \
+ initializer (rbar (&omp_priv, &omp_orig))
+
+struct S g = { 0, 7 };
+struct S h = { 1, 5 };
+
+int
+foo (int z, int *a, int *b)
+{
+ int x = 0;
+ #pragma omp taskloop reduction (+:x) in_reduction (+:b[0])
+ for (int i = z; i < z + 8; i++)
+ {
+ x += a[i];
+ *b += a[i] * 2;
+ }
+ return x;
+}
+
+unsigned long long int
+bar (int z, int *a, unsigned long long int *b, int *s)
+{
+ unsigned long long int x = 1;
+ #pragma omp taskloop reduction (*:x) in_reduction (*:b[0])
+ for (int i = z; i < z + 8; i++)
+ {
+ #pragma omp task in_reduction (*:x)
+ x *= a[i];
+ #pragma omp task in_reduction (*:b[0])
+ *b *= (3 - a[i]);
+ s[0]++;
+ }
+ return x;
+}
+
+void
+baz (int i, int *a, int *c)
+{
+ #pragma omp task in_reduction (*:h) in_reduction (+:g)
+ {
+ g.s += 7 * a[i];
+ h.s *= (3 - c[i]);
+ if ((g.t != 7 && g.t != 3) || (h.t != 5 && h.t != 9))
+ abort ();
+ }
+}
+
+int
+main ()
+{
+ int i, j = 0, a[64], b = 0, c[64], f = 0;
+ unsigned long long int d = 1, e = 1;
+ volatile int one = 1;
+ int r = 0, s = 0, t;
+ struct S m = { 0, 7 };
+ struct S n = { 1, 5 };
+ for (i = 0; i < 64; i++)
+ {
+ a[i] = 2 * i;
+ c[i] = 1 + ((i % 3) != 1);
+ }
+ #pragma omp parallel reduction (task, +:b) shared(t) reduction(+:r, s)
+ {
+ int z, q1, q2, q3;
+ #pragma omp master
+ t = omp_get_num_threads ();
+ #pragma omp for schedule(static) reduction (task, +: f) reduction (+: j)
+ for (z = 0; z < 64; z += 8)
+ {
+ f++;
+ j += foo (z, a, &b);
+ j += foo (z, a, &f);
+ }
+ if (j != 63 * 64 * 2 || f != 63 * 64 * 2 + 8)
+ abort ();
+ r++;
+ #pragma omp taskgroup task_reduction (+: s)
+ {
+ #pragma omp for schedule(static, 1) reduction(task, *: d) reduction (*: e)
+ for (z = 0; z < 64; z += 8)
+ e *= bar (z, c, &d, &s);
+ }
+ if (e != (1ULL << 43) || d != (1ULL << 21))
+ abort ();
+ #pragma omp for schedule(monotonic: dynamic, 1) reduction (task, +: g, m) \
+ reduction (task, *: h, n) collapse(3)
+ for (q1 = 0; q1 < one; q1++)
+ for (q2 = 0; q2 < 64; q2 += 8)
+ for (q3 = 0; q3 < one; ++q3)
+ #pragma omp taskloop in_reduction (+: g, m) in_reduction (*: h, n) \
+ nogroup
+ for (i = q2; i < q2 + 8; ++i)
+ {
+ g.s += 3 * a[i];
+ h.s *= (3 - c[i]);
+ m.s += 4 * a[i];
+ n.s *= c[i];
+ if ((g.t != 7 && g.t != 3) || (h.t != 5 && h.t != 9)
+ || (m.t != 7 && m.t != 3) || (n.t != 5 && n.t != 9))
+ abort ();
+ baz (i, a, c);
+ }
+ if (n.s != (1ULL << 43) || n.t != 5)
+ abort ();
+ if (g.s != 63 * 64 * 10 || g.t != 7)
+ abort ();
+ if (h.s != (1ULL << 42) || h.t != 5)
+ abort ();
+ if (m.s != 63 * 64 * 4 || m.t != 7)
+ abort ();
+ }
+ if (b != 63 * 64 * 2)
+ abort ();
+ if (r != t || s != 64)
+ abort ();
+ return 0;
+}
--- /dev/null
+#ifdef __cplusplus
+extern "C"
+#endif
+void abort (void);
+
+int a[2];
+long long int b[7] = { 9, 11, 1, 1, 1, 13, 15 };
+int e[3] = { 5, 0, 5 };
+int f[5] = { 6, 7, 0, 0, 9 };
+int g[4] = { 1, 0, 0, 2 };
+int h[3] = { 0, 1, 4 };
+int k[4][2] = { { 5, 6 }, { 0, 0 }, { 0, 0 }, { 7, 8 } };
+long long *s;
+long long (*t)[2];
+
+void
+foo (int n, int *c, long long int *d, int m[3], int *r, int o[4], int *p, int q[4][2])
+{
+ int i;
+ for (i = 0; i < 2; i++)
+ #pragma omp task in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \
+ in_reduction (+: o[n:n*2], m[1], k[1:2][:], p[0], f[2:2]) \
+ in_reduction (+: q[1:2][:], g[n:n*2], e[1], h[0], r[2:2]) \
+ in_reduction (*: s[1:2], t[2:2][:])
+ {
+ a[0] += 7;
+ a[1] += 17;
+ b[2] *= 2;
+ b[4] *= 2;
+ c[0] += 6;
+ d[1] *= 2;
+ e[1] += 19;
+ f[2] += 21;
+ f[3] += 23;
+ g[1] += 25;
+ g[2] += 27;
+ h[0] += 29;
+ k[1][0] += 31;
+ k[2][1] += 33;
+ m[1] += 19;
+ r[2] += 21;
+ r[3] += 23;
+ o[1] += 25;
+ o[2] += 27;
+ p[0] += 29;
+ q[1][0] += 31;
+ q[2][1] += 33;
+ s[1] *= 2;
+ t[2][0] *= 2;
+ t[3][1] *= 2;
+ }
+}
+
+void
+test (int n)
+{
+ int c[2] = { 0, 0 };
+ int p[3] = { 0, 1, 4 };
+ int q[4][2] = { { 5, 6 }, { 0, 0 }, { 0, 0 }, { 7, 8 } };
+ long long ss[4] = { 5, 1, 1, 6 };
+ long long tt[5][2] = { { 9, 10 }, { 11, 12 }, { 1, 1 }, { 1, 1 }, { 13, 14 } };
+ long long int d[] = { 1, 1 };
+ int m[3] = { 5, 0, 5 };
+ int r[5] = { 6, 7, 0, 0, 9 };
+ int o[4] = { 1, 0, 0, 2 };
+ s = ss;
+ t = tt;
+ #pragma omp parallel num_threads(4)
+ {
+ int i;
+ #pragma omp for reduction (task, +: a, c) reduction (task, *: b[2 * n:3 * n], d) \
+ reduction (task, +: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \
+ reduction (task, +: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \
+ reduction (task, *: t[2:2][:], s[1:n + 1]) \
+ schedule(nonmonotonic: runtime)
+ for (i = 0; i < 4; i++)
+ {
+ #pragma omp task in_reduction (+: a, c) in_reduction (*: b[2 * n:3 * n], d) \
+ in_reduction (+: o[n:n*2], q[1:2][:], p[0], m[1], r[2:2]) \
+ in_reduction (+: g[n:n * 2], e[1], k[1:2][:], h[0], f[2:2]) \
+ in_reduction (*: s[1:2], t[2:2][:])
+ {
+ int j;
+ a[0] += 2;
+ a[1] += 3;
+ b[2] *= 2;
+ f[3] += 8;
+ g[1] += 9;
+ g[2] += 10;
+ h[0] += 11;
+ k[1][1] += 13;
+ k[2][1] += 15;
+ m[1] += 16;
+ r[2] += 8;
+ s[1] *= 2;
+ t[2][1] *= 2;
+ t[3][1] *= 2;
+ for (j = 0; j < 2; j++)
+ #pragma omp task in_reduction (+: a, c[:2]) \
+ in_reduction (*: b[2 * n:3 * n], d[n - 1:n + 1]) \
+ in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][:2]) \
+ in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][:2]) \
+ in_reduction (*: s[n:2], t[2:2][:])
+ {
+ m[1] += 6;
+ r[2] += 7;
+ q[1][0] += 17;
+ q[2][0] += 19;
+ a[0] += 4;
+ a[1] += 5;
+ b[3] *= 2;
+ b[4] *= 2;
+ f[3] += 18;
+ g[1] += 29;
+ g[2] += 18;
+ h[0] += 19;
+ s[2] *= 2;
+ t[2][0] *= 2;
+ t[3][0] *= 2;
+ foo (n, c, d, m, r, o, p, q);
+ r[3] += 18;
+ o[1] += 29;
+ o[2] += 18;
+ p[0] += 19;
+ c[0] += 4;
+ c[1] += 5;
+ d[0] *= 2;
+ e[1] += 6;
+ f[2] += 7;
+ k[1][0] += 17;
+ k[2][0] += 19;
+ }
+ r[3] += 8;
+ o[1] += 9;
+ o[2] += 10;
+ p[0] += 11;
+ q[1][1] += 13;
+ q[2][1] += 15;
+ b[3] *= 2;
+ c[0] += 4;
+ c[1] += 9;
+ d[0] *= 2;
+ e[1] += 16;
+ f[2] += 8;
+ }
+ }
+ }
+ if (a[0] != 7 * 16 + 4 * 8 + 2 * 4
+ || a[1] != 17 * 16 + 5 * 8 + 3 * 4
+ || b[0] != 9 || b[1] != 11
+ || b[2] != 1LL << (16 + 4)
+ || b[3] != 1LL << (8 + 4)
+ || b[4] != 1LL << (16 + 8)
+ || b[5] != 13 || b[6] != 15
+ || c[0] != 6 * 16 + 4 * 8 + 4 * 4
+ || c[1] != 5 * 8 + 9 * 4
+ || d[0] != 1LL << (8 + 4)
+ || d[1] != 1LL << 16
+ || e[0] != 5
+ || e[1] != 19 * 16 + 6 * 8 + 16 * 4
+ || e[2] != 5
+ || f[0] != 6
+ || f[1] != 7
+ || f[2] != 21 * 16 + 7 * 8 + 8 * 4
+ || f[3] != 23 * 16 + 18 * 8 + 8 * 4
+ || f[4] != 9
+ || g[0] != 1
+ || g[1] != 25 * 16 + 29 * 8 + 9 * 4
+ || g[2] != 27 * 16 + 18 * 8 + 10 * 4
+ || g[3] != 2
+ || h[0] != 29 * 16 + 19 * 8 + 11 * 4
+ || h[1] != 1 || h[2] != 4
+ || k[0][0] != 5 || k[0][1] != 6
+ || k[1][0] != 31 * 16 + 17 * 8
+ || k[1][1] != 13 * 4
+ || k[2][0] != 19 * 8
+ || k[2][1] != 33 * 16 + 15 * 4
+ || k[3][0] != 7 || k[3][1] != 8
+ || m[0] != 5
+ || m[1] != 19 * 16 + 6 * 8 + 16 * 4
+ || m[2] != 5
+ || o[0] != 1
+ || o[1] != 25 * 16 + 29 * 8 + 9 * 4
+ || o[2] != 27 * 16 + 18 * 8 + 10 * 4
+ || o[3] != 2
+ || p[0] != 29 * 16 + 19 * 8 + 11 * 4
+ || p[1] != 1 || p[2] != 4
+ || q[0][0] != 5 || q[0][1] != 6
+ || q[1][0] != 31 * 16 + 17 * 8
+ || q[1][1] != 13 * 4
+ || q[2][0] != 19 * 8
+ || q[2][1] != 33 * 16 + 15 * 4
+ || q[3][0] != 7 || q[3][1] != 8
+ || r[0] != 6
+ || r[1] != 7
+ || r[2] != 21 * 16 + 7 * 8 + 8 * 4
+ || r[3] != 23 * 16 + 18 * 8 + 8 * 4
+ || r[4] != 9
+ || ss[0] != 5
+ || ss[1] != 1LL << (16 + 4)
+ || ss[2] != 1LL << 8
+ || ss[3] != 6
+ || tt[0][0] != 9 || tt[0][1] != 10 || tt[1][0] != 11 || tt[1][1] != 12
+ || tt[2][0] != 1LL << (16 + 8)
+ || tt[2][1] != 1LL << 4
+ || tt[3][0] != 1LL << 8
+ || tt[3][1] != 1LL << (16 + 4)
+ || tt[4][0] != 13 || tt[4][1] != 14)
+ abort ();
+}
+
+int
+main ()
+{
+ test (1);
+ return 0;
+}
This shouldn't touch the next_alloc field. */
void
-gomp_init_work_share (struct gomp_work_share *ws, bool ordered,
+gomp_init_work_share (struct gomp_work_share *ws, size_t ordered,
unsigned nthreads)
{
gomp_mutex_init (&ws->lock);
if (__builtin_expect (ordered, 0))
{
-#define INLINE_ORDERED_TEAM_IDS_CNT \
- ((sizeof (struct gomp_work_share) \
- - offsetof (struct gomp_work_share, inline_ordered_team_ids)) \
- / sizeof (((struct gomp_work_share *) 0)->inline_ordered_team_ids[0]))
-
- if (nthreads > INLINE_ORDERED_TEAM_IDS_CNT)
- ws->ordered_team_ids
- = gomp_malloc (nthreads * sizeof (*ws->ordered_team_ids));
+#define INLINE_ORDERED_TEAM_IDS_SIZE \
+ (sizeof (struct gomp_work_share) \
+ - offsetof (struct gomp_work_share, inline_ordered_team_ids))
+
+ if (__builtin_expect (ordered != 1, 0))
+ {
+ ordered += nthreads * sizeof (*ws->ordered_team_ids) - 1;
+ ordered = ordered + __alignof__ (long long) - 1;
+ ordered &= ~(__alignof__ (long long) - 1);
+ }
+ else
+ ordered = nthreads * sizeof (*ws->ordered_team_ids);
+ if (ordered > INLINE_ORDERED_TEAM_IDS_SIZE)
+ ws->ordered_team_ids = gomp_malloc (ordered);
else
ws->ordered_team_ids = ws->inline_ordered_team_ids;
- memset (ws->ordered_team_ids, '\0',
- nthreads * sizeof (*ws->ordered_team_ids));
+ memset (ws->ordered_team_ids, '\0', ordered);
ws->ordered_num_used = 0;
ws->ordered_owner = -1;
ws->ordered_cur = 0;
}
else
- ws->ordered_team_ids = NULL;
+ ws->ordered_team_ids = ws->inline_ordered_team_ids;
gomp_ptrlock_init (&ws->next_ws, NULL);
ws->threads_completed = 0;
}
if this was the first thread to reach this point. */
bool
-gomp_work_share_start (bool ordered)
+gomp_work_share_start (size_t ordered)
{
struct gomp_thread *thr = gomp_thread ();
struct gomp_team *team = thr->ts.team;
ws = gomp_malloc (sizeof (*ws));
gomp_init_work_share (ws, ordered, 1);
thr->ts.work_share = ws;
- return ws;
+ return true;
}
ws = thr->ts.work_share;