From: Jakub Jelinek Date: Wed, 7 Nov 2018 19:21:43 +0000 (+0100) Subject: builtin-types.def (BT_FN_VOID_BOOL, [...]): New. X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=0bb1abc55ad30f708911b66727140754424f9af3;p=thirdparty%2Fgcc.git builtin-types.def (BT_FN_VOID_BOOL, [...]): New. * builtin-types.def (BT_FN_VOID_BOOL, BT_FN_UINT_UINT_PTR_PTR, BT_FN_BOOL_UINT_LONGPTR_LONG_LONG_LONGPTR_LONGPTR_PTR_PTR, BT_FN_BOOL_UINT_ULLPTR_LONG_ULL_ULLPTR_ULLPTR_PTR_PTR, BT_FN_BOOL_LONG_LONG_LONG_LONG_LONG_LONGPTR_LONGPTR_PTR_PTR, BT_FN_BOOL_BOOL_ULL_ULL_ULL_LONG_ULL_ULLPTR_ULLPTR_PTR_PTR): New. * omp-builtins.def (BUILT_IN_GOMP_LOOP_START, BUILT_IN_GOMP_LOOP_ORDERED_START, BUILT_IN_GOMP_LOOP_DOACROSS_START, BUILT_IN_GOMP_LOOP_ULL_START, BUILT_IN_GOMP_LOOP_ULL_ORDERED_START, BUILT_IN_GOMP_LOOP_ULL_DOACROSS_START, BUILT_IN_GOMP_SECTIONS2_START, BUILT_IN_GOMP_WORKSHARE_TASK_REDUCTION_UNREGISTER): New. * omp-general.h (struct omp_for_data): Add have_reductemp member. * omp-general.c (omp_extract_for_data): Initialize it. * omp-low.c (build_outer_var_ref): Ignore taskgroup outer contexts. Fix up the condition when lookup_decl should be used. (scan_sharing_clauses): Call install_var_local for reductions with task modifier even in worksharing contexts. (lower_rec_input_clauses): Don't lookup_decl reductemp in worksharing contexts. Handle reductions with task modifier in worksharing contexts. Ignore _reductemp_ clause in worksharing contexts. (lower_reduction_clauses): Ignore reduction clause with task modifiers even in worksharing contexts. (lower_send_clauses): Likewise. (maybe_add_implicit_barrier_cancel): Add OMP_RETURN argument, don't rely that it is the last stmt in body so far. Ignore outer taskgroup contexts. (omp_task_reductions_find_first): Move earlier. (lower_omp_task_reductions): Add forward declaration. Handle OMP_FOR and OMP_SECTIONS, add support for parallel cancellation. (lower_omp_sections): Handle reduction clauses with taskgroup modifiers. Adjust maybe_add_implicit_barrier_cancel caller. (lower_omp_single): Adjust maybe_add_implicit_barrier_cancel caller. (lower_omp_for): Likewise. Handle reduction clauses with taskgroup modifiers. * omp-expand.c (omp_adjust_chunk_size): Don't adjust anything if chunk_size is zero. (determine_parallel_type): Don't combine parallel with worksharing which has _reductemp_ clause. (expand_omp_for_generic): Add SCHED_ARG argument. Handle expansion of worksharing loops with task reductions. (expand_omp_for_static_nochunk): Handle expansion of worksharing loops with task reductions. (expand_omp_for_static_chunk): Likewise. (expand_omp_for): Adjust expand_omp_for_generic caller, use GOMP_loop{,_ull}{,_ordered,_doacross}_start builtins if there are task reductions. (expand_omp_sections): Handle expansion of sections with task reductions. gcc/fortran/ * types.def (BT_FN_VOID_BOOL, BT_FN_UINT_UINT_PTR_PTR, BT_FN_BOOL_UINT_LONGPTR_LONG_LONG_LONGPTR_LONGPTR_PTR_PTR, BT_FN_BOOL_UINT_ULLPTR_LONG_ULL_ULLPTR_ULLPTR_PTR_PTR, BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_PTR, BT_FN_BOOL_LONG_LONG_LONG_LONG_LONG_LONGPTR_LONGPTR_PTR_PTR, BT_FN_BOOL_BOOL_ULL_ULL_ULL_LONG_ULL_ULLPTR_ULLPTR_PTR_PTR): New. libgomp/ * libgomp_g.h (GOMP_loop_start, GOMP_loop_ordered_start, GOMP_loop_doacross_start, GOMP_loop_ull_start, GOMP_loop_ull_ordered_start, GOMP_loop_ull_doacross_start, GOMP_workshare_task_reduction_unregister, GOMP_sections2_start): New prototypes. * libgomp.h (struct gomp_doacross_work_share): Add extra field. (struct gomp_work_share): Add task_reductions field. (struct gomp_taskgroup): Add workshare flag. (gomp_doacross_init, gomp_doacross_ull_init): Add size_t argument. (gomp_workshare_taskgroup_start, gomp_workshare_task_reduction_register): New prototypes. (gomp_init_work_share, gomp_work_share_start): Change bool argument to size_t. * libgomp.map (GOMP_5.0): Export GOMP_loop_start, GOMP_loop_ordered_start, GOMP_loop_doacross_start, GOMP_loop_ull_start, GOMP_loop_ull_ordered_start, GOMP_loop_ull_doacross_start, GOMP_workshare_task_reduction_unregister and GOMP_sections2_start. * loop.c: Include string.h. (GOMP_loop_runtime_next): Add ialias. (GOMP_taskgroup_reduction_register): Add ialias_redirect. (gomp_loop_static_start, gomp_loop_dynamic_start, gomp_loop_guided_start, gomp_loop_ordered_static_start, gomp_loop_ordered_dynamic_start, gomp_loop_ordered_guided_start, gomp_loop_doacross_static_start, gomp_loop_doacross_dynamic_start, gomp_loop_doacross_guided_start): Adjust gomp_work_share_start or gomp_doacross_init callers. (gomp_adjust_sched, GOMP_loop_start, GOMP_loop_ordered_start, GOMP_loop_doacross_start): New functions. * loop_ull.c: Include string.h. (GOMP_loop_ull_runtime_next): Add ialias. (GOMP_taskgroup_reduction_register): Add ialias_redirect. (gomp_loop_ull_static_start, gomp_loop_ull_dynamic_start, gomp_loop_ull_guided_start, gomp_loop_ull_ordered_static_start, gomp_loop_ull_ordered_dynamic_start, gomp_loop_ull_ordered_guided_start, gomp_loop_ull_doacross_static_start, gomp_loop_ull_doacross_dynamic_start, gomp_loop_ull_doacross_guided_start): Adjust gomp_work_share_start and gomp_doacross_ull_init callers. (gomp_adjust_sched, GOMP_loop_ull_start, GOMP_loop_ull_ordered_start, GOMP_loop_ull_doacross_start): New functions. * sections.c: Include string.h. (GOMP_taskgroup_reduction_register): Add ialias_redirect. (GOMP_sections_start): Adjust gomp_work_share_start caller. (GOMP_sections2_start): New function. * ordered.c (gomp_doacross_init, gomp_doacross_ull_init): Add EXTRA argument. If not needed to prepare array, if extra is 0, clear ws->doacross, otherwise allocate just doacross structure and extra payload. If array is needed, allocate also extra payload. (GOMP_doacross_post, GOMP_doacross_wait, GOMP_doacross_ull_post, GOMP_doacross_ull_wait): Handle doacross->array == NULL like doacross == NULL. * parallel.c (GOMP_cancellation_point): If taskgroup has workshare flag set, check cancelled of prev taskgroup if any. (GOMP_cancel): If taskgroup has workshare flag set, set cancelled on prev taskgroup if any. * single.c (GOMP_single_start, GOMP_single_copy_start): Adjust gomp_work_share_start callers. * target.c (GOMP_target_update_ext, GOMP_target_enter_exit_data): If taskgroup has workshare flag set, check cancelled on prev taskgroup if any. Guard all cancellation tests with gomp_cancel_var test. * taskloop.c (GOMP_taskloop): Likewise. * task.c (GOMP_task, gomp_create_target_task, gomp_task_run_pre, GOMP_taskwait_depend): Likewise. (gomp_taskgroup_init): Clear workshare flag, reorder initialization. (gomp_reduction_register): Add always_inline attribute. Add ORIG argument, if non-NULL, don't allocate memory, but copy it from there. (gomp_create_artificial_team): New function. (GOMP_taskgroup_reduction_register): Extend function comment. Use gomp_create_artificial_team. Adjust gomp_reduction_register caller. (gomp_parallel_reduction_register): Adjust gomp_reduction_register caller. (gomp_workshare_task_reduction_register, gomp_workshare_taskgroup_start, GOMP_workshare_task_reduction_unregister): New functions. * team.c (gomp_new_team): Adjust gomp_init_work_share caller. * work.c (gomp_init_work_share): Change ORDERED argument from bool to size_t, if more than 1 allocate also extra payload at the end of array. Never keep ordered_team_ids NULL, set it to inline_ordered_team_ids instead. (gomp_work_share_start): Change ORDERED argument from bool to size_t, return true instead of ws. * testsuite/libgomp.c-c++-common/cancel-parallel-1.c: New test. * testsuite/libgomp.c-c++-common/cancel-taskgroup-3.c: New test. * testsuite/libgomp.c-c++-common/task-reduction-6.c (struct S): Use unsigned long long int instead of unsigned long int. (main): Verify r == t. * testsuite/libgomp.c-c++-common/task-reduction-8.c: New test. * testsuite/libgomp.c-c++-common/task-reduction-9.c: New test. * testsuite/libgomp.c-c++-common/task-reduction-11.c: New test. * testsuite/libgomp.c-c++-common/task-reduction-12.c: New test. * testsuite/libgomp.c++/task-reduction-14.C: New test. * testsuite/libgomp.c++/task-reduction-15.C: New test. * testsuite/libgomp.c++/task-reduction-16.C: New test. * testsuite/libgomp.c++/task-reduction-17.C: New test. * testsuite/libgomp.c++/task-reduction-18.C: New test. * testsuite/libgomp.c++/task-reduction-19.C: New test. From-SVN: r265885 --- diff --git a/gcc/ChangeLog.gomp b/gcc/ChangeLog.gomp index 7940067c0a40..66fd79a4c4ff 100644 --- a/gcc/ChangeLog.gomp +++ b/gcc/ChangeLog.gomp @@ -1,3 +1,53 @@ +2018-11-07 Jakub Jelinek + + * builtin-types.def (BT_FN_VOID_BOOL, BT_FN_UINT_UINT_PTR_PTR, + BT_FN_BOOL_UINT_LONGPTR_LONG_LONG_LONGPTR_LONGPTR_PTR_PTR, + BT_FN_BOOL_UINT_ULLPTR_LONG_ULL_ULLPTR_ULLPTR_PTR_PTR, + BT_FN_BOOL_LONG_LONG_LONG_LONG_LONG_LONGPTR_LONGPTR_PTR_PTR, + BT_FN_BOOL_BOOL_ULL_ULL_ULL_LONG_ULL_ULLPTR_ULLPTR_PTR_PTR): New. + * omp-builtins.def (BUILT_IN_GOMP_LOOP_START, + BUILT_IN_GOMP_LOOP_ORDERED_START, BUILT_IN_GOMP_LOOP_DOACROSS_START, + BUILT_IN_GOMP_LOOP_ULL_START, BUILT_IN_GOMP_LOOP_ULL_ORDERED_START, + BUILT_IN_GOMP_LOOP_ULL_DOACROSS_START, BUILT_IN_GOMP_SECTIONS2_START, + BUILT_IN_GOMP_WORKSHARE_TASK_REDUCTION_UNREGISTER): New. + * omp-general.h (struct omp_for_data): Add have_reductemp member. + * omp-general.c (omp_extract_for_data): Initialize it. + * omp-low.c (build_outer_var_ref): Ignore taskgroup outer contexts. + Fix up the condition when lookup_decl should be used. + (scan_sharing_clauses): Call install_var_local for reductions with + task modifier even in worksharing contexts. + (lower_rec_input_clauses): Don't lookup_decl reductemp in worksharing + contexts. Handle reductions with task modifier in worksharing + contexts. Ignore _reductemp_ clause in worksharing contexts. + (lower_reduction_clauses): Ignore reduction clause with task modifiers + even in worksharing contexts. + (lower_send_clauses): Likewise. + (maybe_add_implicit_barrier_cancel): Add OMP_RETURN argument, don't + rely that it is the last stmt in body so far. Ignore outer taskgroup + contexts. + (omp_task_reductions_find_first): Move earlier. + (lower_omp_task_reductions): Add forward declaration. Handle + OMP_FOR and OMP_SECTIONS, add support for parallel cancellation. + (lower_omp_sections): Handle reduction clauses with taskgroup + modifiers. Adjust maybe_add_implicit_barrier_cancel caller. + (lower_omp_single): Adjust maybe_add_implicit_barrier_cancel caller. + (lower_omp_for): Likewise. Handle reduction clauses with taskgroup + modifiers. + * omp-expand.c (omp_adjust_chunk_size): Don't adjust anything if + chunk_size is zero. + (determine_parallel_type): Don't combine parallel with worksharing + which has _reductemp_ clause. + (expand_omp_for_generic): Add SCHED_ARG argument. Handle expansion + of worksharing loops with task reductions. + (expand_omp_for_static_nochunk): Handle expansion of worksharing + loops with task reductions. + (expand_omp_for_static_chunk): Likewise. + (expand_omp_for): Adjust expand_omp_for_generic caller, use + GOMP_loop{,_ull}{,_ordered,_doacross}_start builtins if there are + task reductions. + (expand_omp_sections): Handle expansion of sections with task + reductions. + 2018-10-25 Jakub Jelinek * omp-builtins.def (BUILT_IN_GOMP_LOOP_NONMONOTONIC_RUNTIME_START, diff --git a/gcc/builtin-types.def b/gcc/builtin-types.def index 6e8448486a1a..92c0b0bb779e 100644 --- a/gcc/builtin-types.def +++ b/gcc/builtin-types.def @@ -251,6 +251,7 @@ DEF_FUNCTION_TYPE_1 (BT_FN_INT_CONST_STRING, BT_INT, BT_CONST_STRING) DEF_FUNCTION_TYPE_1 (BT_FN_PTR_PTR, BT_PTR, BT_PTR) DEF_FUNCTION_TYPE_1 (BT_FN_VOID_VALIST_REF, BT_VOID, BT_VALIST_REF) DEF_FUNCTION_TYPE_1 (BT_FN_VOID_INT, BT_VOID, BT_INT) +DEF_FUNCTION_TYPE_1 (BT_FN_VOID_BOOL, BT_VOID, BT_BOOL) DEF_FUNCTION_TYPE_1 (BT_FN_FLOAT_CONST_STRING, BT_FLOAT, BT_CONST_STRING) DEF_FUNCTION_TYPE_1 (BT_FN_DOUBLE_CONST_STRING, BT_DOUBLE, BT_CONST_STRING) DEF_FUNCTION_TYPE_1 (BT_FN_LONGDOUBLE_CONST_STRING, @@ -621,6 +622,7 @@ DEF_FUNCTION_TYPE_3 (BT_FN_VOID_UINT32_UINT32_PTR, BT_VOID, BT_UINT32, BT_UINT32, BT_PTR) DEF_FUNCTION_TYPE_3 (BT_FN_VOID_SIZE_SIZE_PTR, BT_VOID, BT_SIZE, BT_SIZE, BT_PTR) +DEF_FUNCTION_TYPE_3 (BT_FN_UINT_UINT_PTR_PTR, BT_UINT, BT_UINT, BT_PTR, BT_PTR) DEF_FUNCTION_TYPE_4 (BT_FN_SIZE_CONST_PTR_SIZE_SIZE_FILEPTR, BT_SIZE, BT_CONST_PTR, BT_SIZE, BT_SIZE, BT_FILEPTR) @@ -731,6 +733,12 @@ DEF_FUNCTION_TYPE_7 (BT_FN_VOID_INT_SIZE_PTR_PTR_PTR_UINT_PTR, DEF_FUNCTION_TYPE_8 (BT_FN_VOID_OMPFN_PTR_UINT_LONG_LONG_LONG_LONG_UINT, BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR, BT_UINT, BT_LONG, BT_LONG, BT_LONG, BT_LONG, BT_UINT) +DEF_FUNCTION_TYPE_8 (BT_FN_BOOL_UINT_LONGPTR_LONG_LONG_LONGPTR_LONGPTR_PTR_PTR, + BT_BOOL, BT_UINT, BT_PTR_LONG, BT_LONG, BT_LONG, + BT_PTR_LONG, BT_PTR_LONG, BT_PTR, BT_PTR) +DEF_FUNCTION_TYPE_8 (BT_FN_BOOL_UINT_ULLPTR_LONG_ULL_ULLPTR_ULLPTR_PTR_PTR, + BT_BOOL, BT_UINT, BT_PTR_ULONGLONG, BT_LONG, BT_ULONGLONG, + BT_PTR_ULONGLONG, BT_PTR_ULONGLONG, BT_PTR, BT_PTR) DEF_FUNCTION_TYPE_9 (BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_BOOL_UINT_PTR_INT, BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR, @@ -739,6 +747,14 @@ DEF_FUNCTION_TYPE_9 (BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_BOOL_UINT_PTR_INT, DEF_FUNCTION_TYPE_9 (BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_PTR, BT_VOID, BT_INT, BT_PTR_FN_VOID_PTR, BT_SIZE, BT_PTR, BT_PTR, BT_PTR, BT_UINT, BT_PTR, BT_PTR) +DEF_FUNCTION_TYPE_9 (BT_FN_BOOL_LONG_LONG_LONG_LONG_LONG_LONGPTR_LONGPTR_PTR_PTR, + BT_BOOL, BT_LONG, BT_LONG, BT_LONG, BT_LONG, BT_LONG, + BT_PTR_LONG, BT_PTR_LONG, BT_PTR, BT_PTR) + +DEF_FUNCTION_TYPE_10 (BT_FN_BOOL_BOOL_ULL_ULL_ULL_LONG_ULL_ULLPTR_ULLPTR_PTR_PTR, + BT_BOOL, BT_BOOL, BT_ULONGLONG, BT_ULONGLONG, + BT_ULONGLONG, BT_LONG, BT_ULONGLONG, BT_PTR_ULONGLONG, + BT_PTR_ULONGLONG, BT_PTR, BT_PTR) DEF_FUNCTION_TYPE_11 (BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_UINT_LONG_INT_LONG_LONG_LONG, BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR, diff --git a/gcc/fortran/ChangeLog.gomp b/gcc/fortran/ChangeLog.gomp index 999509f3af9f..5355ef1f35df 100644 --- a/gcc/fortran/ChangeLog.gomp +++ b/gcc/fortran/ChangeLog.gomp @@ -1,3 +1,12 @@ +2018-11-07 Jakub Jelinek + + * types.def (BT_FN_VOID_BOOL, BT_FN_UINT_UINT_PTR_PTR, + BT_FN_BOOL_UINT_LONGPTR_LONG_LONG_LONGPTR_LONGPTR_PTR_PTR, + BT_FN_BOOL_UINT_ULLPTR_LONG_ULL_ULLPTR_ULLPTR_PTR_PTR, + BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_PTR, + BT_FN_BOOL_LONG_LONG_LONG_LONG_LONG_LONGPTR_LONGPTR_PTR_PTR, + BT_FN_BOOL_BOOL_ULL_ULL_ULL_LONG_ULL_ULLPTR_ULLPTR_PTR_PTR): New. + 2018-10-23 Jakub Jelinek * types.def (BT_FN_UINT_OMPFN_PTR_UINT_UINT): New. diff --git a/gcc/fortran/types.def b/gcc/fortran/types.def index ba12ede7d938..7ba23bac34c0 100644 --- a/gcc/fortran/types.def +++ b/gcc/fortran/types.def @@ -86,6 +86,7 @@ DEF_FUNCTION_TYPE_1 (BT_FN_INT_INT, BT_INT, BT_INT) DEF_FUNCTION_TYPE_1 (BT_FN_UINT_UINT, BT_UINT, BT_UINT) DEF_FUNCTION_TYPE_1 (BT_FN_PTR_PTR, BT_PTR, BT_PTR) DEF_FUNCTION_TYPE_1 (BT_FN_VOID_INT, BT_VOID, BT_INT) +DEF_FUNCTION_TYPE_1 (BT_FN_VOID_BOOL, BT_VOID, BT_BOOL) DEF_FUNCTION_TYPE_1 (BT_FN_BOOL_INT, BT_BOOL, BT_INT) DEF_POINTER_TYPE (BT_PTR_FN_VOID_PTR, BT_FN_VOID_PTR) @@ -147,6 +148,7 @@ DEF_FUNCTION_TYPE_3 (BT_FN_VOID_VPTR_I8_INT, BT_VOID, BT_VOLATILE_PTR, BT_I8, BT DEF_FUNCTION_TYPE_3 (BT_FN_VOID_VPTR_I16_INT, BT_VOID, BT_VOLATILE_PTR, BT_I16, BT_INT) DEF_FUNCTION_TYPE_3 (BT_FN_VOID_SIZE_SIZE_PTR, BT_VOID, BT_SIZE, BT_SIZE, BT_PTR) +DEF_FUNCTION_TYPE_3 (BT_FN_UINT_UINT_PTR_PTR, BT_UINT, BT_UINT, BT_PTR, BT_PTR) DEF_FUNCTION_TYPE_4 (BT_FN_VOID_OMPFN_PTR_UINT_UINT, BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR, BT_UINT, BT_UINT) @@ -221,14 +223,28 @@ DEF_FUNCTION_TYPE_7 (BT_FN_VOID_INT_SIZE_PTR_PTR_PTR_UINT_PTR, DEF_FUNCTION_TYPE_8 (BT_FN_VOID_OMPFN_PTR_UINT_LONG_LONG_LONG_LONG_UINT, BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR, BT_UINT, BT_LONG, BT_LONG, BT_LONG, BT_LONG, BT_UINT) +DEF_FUNCTION_TYPE_8 (BT_FN_BOOL_UINT_LONGPTR_LONG_LONG_LONGPTR_LONGPTR_PTR_PTR, + BT_BOOL, BT_UINT, BT_PTR_LONG, BT_LONG, BT_LONG, + BT_PTR_LONG, BT_PTR_LONG, BT_PTR, BT_PTR) +DEF_FUNCTION_TYPE_8 (BT_FN_BOOL_UINT_ULLPTR_LONG_ULL_ULLPTR_ULLPTR_PTR_PTR, + BT_BOOL, BT_UINT, BT_PTR_ULONGLONG, BT_LONG, BT_ULONGLONG, + BT_PTR_ULONGLONG, BT_PTR_ULONGLONG, BT_PTR, BT_PTR) DEF_FUNCTION_TYPE_9 (BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_BOOL_UINT_PTR_INT, BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR, BT_PTR_FN_VOID_PTR_PTR, BT_LONG, BT_LONG, BT_BOOL, BT_UINT, BT_PTR, BT_INT) DEF_FUNCTION_TYPE_9 (BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_PTR, - BT_VOID, BT_INT, BT_PTR_FN_VOID_PTR, BT_SIZE, BT_PTR, - BT_PTR, BT_PTR, BT_UINT, BT_PTR, BT_PTR) + BT_VOID, BT_INT, BT_PTR_FN_VOID_PTR, BT_SIZE, BT_PTR, + BT_PTR, BT_PTR, BT_UINT, BT_PTR, BT_PTR) +DEF_FUNCTION_TYPE_9 (BT_FN_BOOL_LONG_LONG_LONG_LONG_LONG_LONGPTR_LONGPTR_PTR_PTR, + BT_BOOL, BT_LONG, BT_LONG, BT_LONG, BT_LONG, BT_LONG, + BT_PTR_LONG, BT_PTR_LONG, BT_PTR, BT_PTR) + +DEF_FUNCTION_TYPE_10 (BT_FN_BOOL_BOOL_ULL_ULL_ULL_LONG_ULL_ULLPTR_ULLPTR_PTR_PTR, + BT_BOOL, BT_BOOL, BT_ULONGLONG, BT_ULONGLONG, + BT_ULONGLONG, BT_LONG, BT_ULONGLONG, BT_PTR_ULONGLONG, + BT_PTR_ULONGLONG, BT_PTR, BT_PTR) DEF_FUNCTION_TYPE_11 (BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_UINT_LONG_INT_LONG_LONG_LONG, BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR, diff --git a/gcc/omp-builtins.def b/gcc/omp-builtins.def index 5698af7a6a21..70051635fa0a 100644 --- a/gcc/omp-builtins.def +++ b/gcc/omp-builtins.def @@ -164,6 +164,18 @@ DEF_GOMP_BUILTIN (BUILT_IN_GOMP_LOOP_DOACROSS_RUNTIME_START, "GOMP_loop_doacross_runtime_start", BT_FN_BOOL_UINT_LONGPTR_LONGPTR_LONGPTR, ATTR_NOTHROW_LEAF_LIST) +DEF_GOMP_BUILTIN (BUILT_IN_GOMP_LOOP_START, + "GOMP_loop_start", + BT_FN_BOOL_LONG_LONG_LONG_LONG_LONG_LONGPTR_LONGPTR_PTR_PTR, + ATTR_NOTHROW_LEAF_LIST) +DEF_GOMP_BUILTIN (BUILT_IN_GOMP_LOOP_ORDERED_START, + "GOMP_loop_ordered_start", + BT_FN_BOOL_LONG_LONG_LONG_LONG_LONG_LONGPTR_LONGPTR_PTR_PTR, + ATTR_NOTHROW_LEAF_LIST) +DEF_GOMP_BUILTIN (BUILT_IN_GOMP_LOOP_DOACROSS_START, + "GOMP_loop_doacross_start", + BT_FN_BOOL_UINT_LONGPTR_LONG_LONG_LONGPTR_LONGPTR_PTR_PTR, + ATTR_NOTHROW_LEAF_LIST) DEF_GOMP_BUILTIN (BUILT_IN_GOMP_LOOP_STATIC_NEXT, "GOMP_loop_static_next", BT_FN_BOOL_LONGPTR_LONGPTR, ATTR_NOTHROW_LEAF_LIST) DEF_GOMP_BUILTIN (BUILT_IN_GOMP_LOOP_DYNAMIC_NEXT, "GOMP_loop_dynamic_next", @@ -260,6 +272,18 @@ DEF_GOMP_BUILTIN (BUILT_IN_GOMP_LOOP_ULL_DOACROSS_RUNTIME_START, "GOMP_loop_ull_doacross_runtime_start", BT_FN_BOOL_UINT_ULLPTR_ULLPTR_ULLPTR, ATTR_NOTHROW_LEAF_LIST) +DEF_GOMP_BUILTIN (BUILT_IN_GOMP_LOOP_ULL_START, + "GOMP_loop_ull_start", + BT_FN_BOOL_BOOL_ULL_ULL_ULL_LONG_ULL_ULLPTR_ULLPTR_PTR_PTR, + ATTR_NOTHROW_LEAF_LIST) +DEF_GOMP_BUILTIN (BUILT_IN_GOMP_LOOP_ULL_ORDERED_START, + "GOMP_loop_ull_ordered_start", + BT_FN_BOOL_BOOL_ULL_ULL_ULL_LONG_ULL_ULLPTR_ULLPTR_PTR_PTR, + ATTR_NOTHROW_LEAF_LIST) +DEF_GOMP_BUILTIN (BUILT_IN_GOMP_LOOP_ULL_DOACROSS_START, + "GOMP_loop_ull_doacross_start", + BT_FN_BOOL_UINT_ULLPTR_LONG_ULL_ULLPTR_ULLPTR_PTR_PTR, + ATTR_NOTHROW_LEAF_LIST) DEF_GOMP_BUILTIN (BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT, "GOMP_loop_ull_static_next", BT_FN_BOOL_ULONGLONGPTR_ULONGLONGPTR, ATTR_NOTHROW_LEAF_LIST) @@ -365,6 +389,8 @@ DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TASKLOOP_ULL, "GOMP_taskloop_ull", ATTR_NOTHROW_LIST) DEF_GOMP_BUILTIN (BUILT_IN_GOMP_SECTIONS_START, "GOMP_sections_start", BT_FN_UINT_UINT, ATTR_NOTHROW_LEAF_LIST) +DEF_GOMP_BUILTIN (BUILT_IN_GOMP_SECTIONS2_START, "GOMP_sections2_start", + BT_FN_UINT_UINT_PTR_PTR, ATTR_NOTHROW_LEAF_LIST) DEF_GOMP_BUILTIN (BUILT_IN_GOMP_SECTIONS_NEXT, "GOMP_sections_next", BT_FN_UINT, ATTR_NOTHROW_LEAF_LIST) DEF_GOMP_BUILTIN (BUILT_IN_GOMP_PARALLEL_SECTIONS, @@ -415,5 +441,8 @@ DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TASKGROUP_REDUCTION_UNREGISTER, DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TASK_REDUCTION_REMAP, "GOMP_task_reduction_remap", BT_FN_VOID_SIZE_SIZE_PTR, ATTR_NOTHROW_LEAF_LIST) +DEF_GOMP_BUILTIN (BUILT_IN_GOMP_WORKSHARE_TASK_REDUCTION_UNREGISTER, + "GOMP_workshare_task_reduction_unregister", + BT_FN_VOID_BOOL, ATTR_NOTHROW_LEAF_LIST) DEF_GOACC_BUILTIN (BUILT_IN_GOACC_DECLARE, "GOACC_declare", BT_FN_VOID_INT_SIZE_PTR_PTR_PTR, ATTR_NOTHROW_LIST) diff --git a/gcc/omp-expand.c b/gcc/omp-expand.c index 11790682857f..ccb94ba9de4a 100644 --- a/gcc/omp-expand.c +++ b/gcc/omp-expand.c @@ -204,7 +204,7 @@ workshare_safe_to_combine_p (basic_block ws_entry_bb) static tree omp_adjust_chunk_size (tree chunk_size, bool simd_schedule) { - if (!simd_schedule) + if (!simd_schedule || integer_zerop (chunk_size)) return chunk_size; poly_uint64 vf = omp_max_vf (); @@ -345,13 +345,14 @@ determine_parallel_type (struct omp_region *region) if (c == NULL || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK) == OMP_CLAUSE_SCHEDULE_STATIC) - || omp_find_clause (clauses, OMP_CLAUSE_ORDERED)) - { - region->is_combined_parallel = false; - region->inner->is_combined_parallel = false; - return; - } + || omp_find_clause (clauses, OMP_CLAUSE_ORDERED) + || omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_)) + return; } + else if (region->inner->type == GIMPLE_OMP_SECTIONS + && omp_find_clause (gimple_omp_sections_clauses (ws_stmt), + OMP_CLAUSE__REDUCTEMP_)) + return; region->is_combined_parallel = true; region->inner->is_combined_parallel = true; @@ -2618,6 +2619,7 @@ expand_omp_for_generic (struct omp_region *region, struct omp_for_data *fd, enum built_in_function start_fn, enum built_in_function next_fn, + tree sched_arg, gimple *inner_stmt) { tree type, istart0, iend0, iend; @@ -2665,6 +2667,30 @@ expand_omp_for_generic (struct omp_region *region, && omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi)), OMP_CLAUSE_LASTPRIVATE)) ordered_lastprivate = false; + tree reductions = NULL_TREE; + tree mem = NULL_TREE; + if (sched_arg) + { + if (fd->have_reductemp) + { + tree c = omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi)), + OMP_CLAUSE__REDUCTEMP_); + reductions = OMP_CLAUSE_DECL (c); + gcc_assert (TREE_CODE (reductions) == SSA_NAME); + gimple *g = SSA_NAME_DEF_STMT (reductions); + reductions = gimple_assign_rhs1 (g); + OMP_CLAUSE_DECL (c) = reductions; + entry_bb = gimple_bb (g); + edge e = split_block (entry_bb, g); + if (region->entry == entry_bb) + region->entry = e->dest; + gsi = gsi_last_bb (entry_bb); + } + else + reductions = null_pointer_node; + /* For now. */ + mem = null_pointer_node; + } if (fd->collapse > 1 || fd->ordered) { int first_zero_iter1 = -1, first_zero_iter2 = -1; @@ -2851,7 +2877,18 @@ expand_omp_for_generic (struct omp_region *region, { t = fold_convert (fd->iter_type, fd->chunk_size); t = omp_adjust_chunk_size (t, fd->simd_schedule); - if (fd->ordered) + if (sched_arg) + { + if (fd->ordered) + t = build_call_expr (builtin_decl_explicit (start_fn), + 8, t0, t1, sched_arg, t, t3, t4, + reductions, mem); + else + t = build_call_expr (builtin_decl_explicit (start_fn), + 9, t0, t1, t2, sched_arg, t, t3, t4, + reductions, mem); + } + else if (fd->ordered) t = build_call_expr (builtin_decl_explicit (start_fn), 5, t0, t1, t, t3, t4); else @@ -2884,7 +2921,11 @@ expand_omp_for_generic (struct omp_region *region, tree bfn_decl = builtin_decl_explicit (start_fn); t = fold_convert (fd->iter_type, fd->chunk_size); t = omp_adjust_chunk_size (t, fd->simd_schedule); - t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4); + if (sched_arg) + t = build_call_expr (bfn_decl, 10, t5, t0, t1, t2, sched_arg, + t, t3, t4, reductions, mem); + else + t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4); } else t = build_call_expr (builtin_decl_explicit (start_fn), @@ -2903,6 +2944,17 @@ expand_omp_for_generic (struct omp_region *region, gsi_insert_before (&gsi, gimple_build_assign (arr, clobber), GSI_SAME_STMT); } + if (fd->have_reductemp) + { + gimple *g = gsi_stmt (gsi); + gsi_remove (&gsi, true); + release_ssa_name (gimple_assign_lhs (g)); + + entry_bb = region->entry; + gsi = gsi_last_nondebug_bb (entry_bb); + + gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); + } gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT); /* Remove the GIMPLE_OMP_FOR statement. */ @@ -3201,9 +3253,6 @@ expand_omp_for_generic (struct omp_region *region, else t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END); gcall *call_stmt = gimple_build_call (t, 0); - if (gimple_omp_return_lhs (gsi_stmt (gsi))) - gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi))); - gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT); if (fd->ordered) { tree arr = counts[fd->ordered]; @@ -3212,6 +3261,17 @@ expand_omp_for_generic (struct omp_region *region, gsi_insert_after (&gsi, gimple_build_assign (arr, clobber), GSI_SAME_STMT); } + if (gimple_omp_return_lhs (gsi_stmt (gsi))) + { + gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi))); + if (fd->have_reductemp) + { + gimple *g = gimple_build_assign (reductions, NOP_EXPR, + gimple_call_lhs (call_stmt)); + gsi_insert_after (&gsi, g, GSI_SAME_STMT); + } + } + gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT); gsi_remove (&gsi, true); /* Connect the new blocks. */ @@ -3394,6 +3454,7 @@ expand_omp_for_static_nochunk (struct omp_region *region, bool broken_loop = region->cont == NULL; tree *counts = NULL; tree n1, n2, step; + tree reductions = NULL_TREE; itype = type = TREE_TYPE (fd->loop.v); if (POINTER_TYPE_P (type)) @@ -3477,6 +3538,29 @@ expand_omp_for_static_nochunk (struct omp_region *region, gsi = gsi_last_bb (entry_bb); } + if (fd->have_reductemp) + { + tree t1 = build_int_cst (long_integer_type_node, 0); + tree t2 = build_int_cst (long_integer_type_node, 1); + tree t3 = build_int_cstu (long_integer_type_node, + (HOST_WIDE_INT_1U << 31) + 1); + tree clauses = gimple_omp_for_clauses (fd->for_stmt); + clauses = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_); + reductions = OMP_CLAUSE_DECL (clauses); + gcc_assert (TREE_CODE (reductions) == SSA_NAME); + gimple *g = SSA_NAME_DEF_STMT (reductions); + reductions = gimple_assign_rhs1 (g); + OMP_CLAUSE_DECL (clauses) = reductions; + gimple_stmt_iterator gsi2 = gsi_for_stmt (g); + tree t + = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START), + 9, t1, t2, t2, t3, t1, null_pointer_node, + null_pointer_node, reductions, null_pointer_node); + force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE, + true, GSI_SAME_STMT); + gsi_remove (&gsi2, true); + release_ssa_name (gimple_assign_lhs (g)); + } switch (gimple_omp_for_kind (fd->for_stmt)) { case GF_OMP_FOR_KIND_FOR: @@ -3747,7 +3831,25 @@ expand_omp_for_static_nochunk (struct omp_region *region, if (!gimple_omp_return_nowait_p (gsi_stmt (gsi))) { t = gimple_omp_return_lhs (gsi_stmt (gsi)); - gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT); + if (fd->have_reductemp) + { + tree fn; + if (t) + fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL); + else + fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END); + gcall *g = gimple_build_call (fn, 0); + if (t) + { + gimple_call_set_lhs (g, t); + gsi_insert_after (&gsi, gimple_build_assign (reductions, + NOP_EXPR, t), + GSI_SAME_STMT); + } + gsi_insert_after (&gsi, g, GSI_SAME_STMT); + } + else + gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT); } gsi_remove (&gsi, true); @@ -3884,6 +3986,7 @@ expand_omp_for_static_chunk (struct omp_region *region, bool broken_loop = region->cont == NULL; tree *counts = NULL; tree n1, n2, step; + tree reductions = NULL_TREE; itype = type = TREE_TYPE (fd->loop.v); if (POINTER_TYPE_P (type)) @@ -3971,6 +4074,29 @@ expand_omp_for_static_chunk (struct omp_region *region, gsi = gsi_last_bb (entry_bb); } + if (fd->have_reductemp) + { + tree t1 = build_int_cst (long_integer_type_node, 0); + tree t2 = build_int_cst (long_integer_type_node, 1); + tree t3 = build_int_cstu (long_integer_type_node, + (HOST_WIDE_INT_1U << 31) + 1); + tree clauses = gimple_omp_for_clauses (fd->for_stmt); + clauses = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_); + reductions = OMP_CLAUSE_DECL (clauses); + gcc_assert (TREE_CODE (reductions) == SSA_NAME); + gimple *g = SSA_NAME_DEF_STMT (reductions); + reductions = gimple_assign_rhs1 (g); + OMP_CLAUSE_DECL (clauses) = reductions; + gimple_stmt_iterator gsi2 = gsi_for_stmt (g); + tree t + = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START), + 9, t1, t2, t2, t3, t1, null_pointer_node, + null_pointer_node, reductions, null_pointer_node); + force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE, + true, GSI_SAME_STMT); + gsi_remove (&gsi2, true); + release_ssa_name (gimple_assign_lhs (g)); + } switch (gimple_omp_for_kind (fd->for_stmt)) { case GF_OMP_FOR_KIND_FOR: @@ -4274,7 +4400,25 @@ expand_omp_for_static_chunk (struct omp_region *region, if (!gimple_omp_return_nowait_p (gsi_stmt (gsi))) { t = gimple_omp_return_lhs (gsi_stmt (gsi)); - gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT); + if (fd->have_reductemp) + { + tree fn; + if (t) + fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL); + else + fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END); + gcall *g = gimple_build_call (fn, 0); + if (t) + { + gimple_call_set_lhs (g, t); + gsi_insert_after (&gsi, gimple_build_assign (reductions, + NOP_EXPR, t), + GSI_SAME_STMT); + } + gsi_insert_after (&gsi, g, GSI_SAME_STMT); + } + else + gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT); } gsi_remove (&gsi, true); @@ -5809,6 +5953,8 @@ expand_omp_for (struct omp_region *region, gimple *inner_stmt) else { int fn_index, start_ix, next_ix; + unsigned HOST_WIDE_INT sched = 0; + tree sched_arg = NULL_TREE; gcc_assert (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_FOR); @@ -5822,12 +5968,16 @@ expand_omp_for (struct omp_region *region, gimple *inner_stmt) { gcc_assert (!fd.have_ordered); fn_index = 6; + sched = 4; } else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0 && !fd.have_ordered) fn_index = 7; else - fn_index = 3; + { + fn_index = 3; + sched = (HOST_WIDE_INT_1U << 31); + } break; case OMP_CLAUSE_SCHEDULE_DYNAMIC: case OMP_CLAUSE_SCHEDULE_GUIDED: @@ -5835,13 +5985,17 @@ expand_omp_for (struct omp_region *region, gimple *inner_stmt) && !fd.have_ordered) { fn_index = 3 + fd.sched_kind; + sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2; break; } fn_index = fd.sched_kind; + sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2; + sched += (HOST_WIDE_INT_1U << 31); break; case OMP_CLAUSE_SCHEDULE_STATIC: gcc_assert (fd.have_ordered); fn_index = 0; + sched = (HOST_WIDE_INT_1U << 31) + 1; break; default: gcc_unreachable (); @@ -5853,6 +6007,18 @@ expand_omp_for (struct omp_region *region, gimple *inner_stmt) else start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index; next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index; + if (fd.have_reductemp) + { + if (fd.ordered) + start_ix = (int)BUILT_IN_GOMP_LOOP_DOACROSS_START; + else if (fd.have_ordered) + start_ix = (int)BUILT_IN_GOMP_LOOP_ORDERED_START; + else + start_ix = (int)BUILT_IN_GOMP_LOOP_START; + sched_arg = build_int_cstu (long_integer_type_node, sched); + if (!fd.chunk_size) + fd.chunk_size = integer_zero_node; + } if (fd.iter_type == long_long_unsigned_type_node) { start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START @@ -5861,7 +6027,8 @@ expand_omp_for (struct omp_region *region, gimple *inner_stmt) - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT); } expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix, - (enum built_in_function) next_ix, inner_stmt); + (enum built_in_function) next_ix, sched_arg, + inner_stmt); } if (gimple_in_ssa_p (cfun)) @@ -5961,7 +6128,25 @@ expand_omp_sections (struct omp_region *region) sections_stmt = as_a (gsi_stmt (si)); gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS); vin = gimple_omp_sections_control (sections_stmt); - if (!is_combined_parallel (region)) + tree clauses = gimple_omp_sections_clauses (sections_stmt); + tree reductmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_); + if (reductmp) + { + tree reductions = OMP_CLAUSE_DECL (reductmp); + gcc_assert (TREE_CODE (reductions) == SSA_NAME); + gimple *g = SSA_NAME_DEF_STMT (reductions); + reductions = gimple_assign_rhs1 (g); + OMP_CLAUSE_DECL (reductmp) = reductions; + gimple_stmt_iterator gsi = gsi_for_stmt (g); + t = build_int_cst (unsigned_type_node, len - 1); + u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS2_START); + stmt = gimple_build_call (u, 3, t, reductions, null_pointer_node); + gimple_call_set_lhs (stmt, vin); + gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); + gsi_remove (&gsi, true); + release_ssa_name (gimple_assign_lhs (g)); + } + else if (!is_combined_parallel (region)) { /* If we are not inside a combined parallel+sections region, call GOMP_sections_start. */ @@ -5975,8 +6160,11 @@ expand_omp_sections (struct omp_region *region) u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT); stmt = gimple_build_call (u, 0); } - gimple_call_set_lhs (stmt, vin); - gsi_insert_after (&si, stmt, GSI_SAME_STMT); + if (!reductmp) + { + gimple_call_set_lhs (stmt, vin); + gsi_insert_after (&si, stmt, GSI_SAME_STMT); + } gsi_remove (&si, true); /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in diff --git a/gcc/omp-general.c b/gcc/omp-general.c index 2d53e105ec39..99d8226ef213 100644 --- a/gcc/omp-general.c +++ b/gcc/omp-general.c @@ -138,6 +138,7 @@ omp_extract_for_data (gomp_for *for_stmt, struct omp_for_data *fd, fd->pre = NULL; fd->have_nowait = distribute || simd; fd->have_ordered = false; + fd->have_reductemp = false; fd->tiling = NULL_TREE; fd->collapse = 1; fd->ordered = 0; @@ -188,6 +189,8 @@ omp_extract_for_data (gomp_for *for_stmt, struct omp_for_data *fd, collapse_iter = &OMP_CLAUSE_TILE_ITERVAR (t); collapse_count = &OMP_CLAUSE_TILE_COUNT (t); break; + case OMP_CLAUSE__REDUCTEMP_: + fd->have_reductemp = true; default: break; } diff --git a/gcc/omp-general.h b/gcc/omp-general.h index b5af0e07ebc0..b847506d4528 100644 --- a/gcc/omp-general.h +++ b/gcc/omp-general.h @@ -62,7 +62,7 @@ struct omp_for_data tree tiling; /* Tiling values (if non null). */ int collapse; /* Collapsed loops, 1 for a non-collapsed loop. */ int ordered; - bool have_nowait, have_ordered, simd_schedule; + bool have_nowait, have_ordered, simd_schedule, have_reductemp; unsigned char sched_modifiers; enum omp_clause_schedule_kind sched_kind; struct omp_for_data_loop *loops; diff --git a/gcc/omp-low.c b/gcc/omp-low.c index 040795950e64..0679904c7edb 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -542,6 +542,9 @@ build_outer_var_ref (tree var, omp_context *ctx, enum omp_clause_code code = OMP_CLAUSE_ERROR) { tree x; + omp_context *outer = ctx->outer; + while (outer && gimple_code (outer->stmt) == GIMPLE_OMP_TASKGROUP) + outer = outer->outer; if (is_global_var (maybe_lookup_decl_in_outer_ctx (var, ctx))) x = var; @@ -568,44 +571,43 @@ build_outer_var_ref (tree var, omp_context *ctx, Similarly for OMP_CLAUSE_PRIVATE with outer ref, that can refer to private vars in all worksharing constructs. */ x = NULL_TREE; - if (ctx->outer && is_taskreg_ctx (ctx)) - x = lookup_decl (var, ctx->outer); - else if (ctx->outer) + if (outer && is_taskreg_ctx (outer)) + x = lookup_decl (var, outer); + else if (outer) x = maybe_lookup_decl_in_outer_ctx (var, ctx); if (x == NULL_TREE) x = var; } else if (code == OMP_CLAUSE_LASTPRIVATE && is_taskloop_ctx (ctx)) { - gcc_assert (ctx->outer); + gcc_assert (outer); splay_tree_node n - = splay_tree_lookup (ctx->outer->field_map, + = splay_tree_lookup (outer->field_map, (splay_tree_key) &DECL_UID (var)); if (n == NULL) { - if (is_global_var (maybe_lookup_decl_in_outer_ctx (var, ctx->outer))) + if (is_global_var (maybe_lookup_decl_in_outer_ctx (var, outer))) x = var; else - x = lookup_decl (var, ctx->outer); + x = lookup_decl (var, outer); } else { tree field = (tree) n->value; /* If the receiver record type was remapped in the child function, remap the field into the new record type. */ - x = maybe_lookup_field (field, ctx->outer); + x = maybe_lookup_field (field, outer); if (x != NULL) field = x; - x = build_simple_mem_ref (ctx->outer->receiver_decl); + x = build_simple_mem_ref (outer->receiver_decl); x = omp_build_component_ref (x, field); - if (use_pointer_for_field (var, ctx->outer)) + if (use_pointer_for_field (var, outer)) x = build_simple_mem_ref (x); } } - else if (ctx->outer) + else if (outer) { - omp_context *outer = ctx->outer; if (gimple_code (outer->stmt) == GIMPLE_OMP_GRID_BODY) { outer = outer->outer; @@ -1130,6 +1132,12 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) install_var_local (decl, ctx); break; } + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION + && OMP_CLAUSE_REDUCTION_TASK (c)) + { + install_var_local (decl, ctx); + break; + } goto do_private; case OMP_CLAUSE_LASTPRIVATE: @@ -3833,7 +3841,9 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, gimple_call_set_lhs (g, v); gimple_seq_add_stmt (ilist, g); c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_); - tskred_temp = lookup_decl (OMP_CLAUSE_DECL (c), ctx); + tskred_temp = OMP_CLAUSE_DECL (c); + if (is_taskreg_ctx (ctx)) + tskred_temp = lookup_decl (tskred_temp, ctx); tree v2 = create_tmp_var (sizetype); g = gimple_build_assign (v2, NOP_EXPR, v); gimple_seq_add_stmt (ilist, g); @@ -3890,8 +3900,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, break; case OMP_CLAUSE_REDUCTION: case OMP_CLAUSE_IN_REDUCTION: - if (is_task_ctx (ctx) - || (OMP_CLAUSE_REDUCTION_TASK (c) && is_parallel_ctx (ctx))) + if (is_task_ctx (ctx) || OMP_CLAUSE_REDUCTION_TASK (c)) { task_reduction_p = true; if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION) @@ -3923,8 +3932,11 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, else if (OMP_CLAUSE_REDUCTION_OMP_ORIG_REF (c)) reduction_omp_orig_ref = true; break; - case OMP_CLAUSE__LOOPTEMP_: case OMP_CLAUSE__REDUCTEMP_: + if (!is_taskreg_ctx (ctx)) + continue; + /* FALLTHRU */ + case OMP_CLAUSE__LOOPTEMP_: /* Handle _looptemp_/_reductemp_ clauses only on parallel/task. */ if (fd) @@ -5761,8 +5773,7 @@ lower_reduction_clauses (tree clauses, gimple_seq *stmt_seqp, omp_context *ctx) update in that case, otherwise use a lock. */ for (c = clauses; c && count < 2; c = OMP_CLAUSE_CHAIN (c)) if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION - && (!OMP_CLAUSE_REDUCTION_TASK (c) - || !is_parallel_ctx (ctx))) + && !OMP_CLAUSE_REDUCTION_TASK (c)) { if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c) || TREE_CODE (OMP_CLAUSE_DECL (c)) == MEM_REF) @@ -5784,8 +5795,7 @@ lower_reduction_clauses (tree clauses, gimple_seq *stmt_seqp, omp_context *ctx) location_t clause_loc = OMP_CLAUSE_LOCATION (c); if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_REDUCTION - || (OMP_CLAUSE_REDUCTION_TASK (c) - && is_parallel_ctx (ctx))) + || OMP_CLAUSE_REDUCTION_TASK (c)) continue; enum omp_clause_code ccode = OMP_CLAUSE_REDUCTION; @@ -6078,9 +6088,7 @@ lower_send_clauses (tree clauses, gimple_seq *ilist, gimple_seq *olist, case OMP_CLAUSE__REDUCTEMP_: break; case OMP_CLAUSE_REDUCTION: - if (is_task_ctx (ctx)) - continue; - if (OMP_CLAUSE_REDUCTION_TASK (c) && is_parallel_ctx (ctx)) + if (is_task_ctx (ctx) || OMP_CLAUSE_REDUCTION_TASK (c)) continue; break; case OMP_CLAUSE_SHARED: @@ -6511,30 +6519,55 @@ maybe_catch_exception (gimple_seq body) cancellation in the implicit barrier. */ static void -maybe_add_implicit_barrier_cancel (omp_context *ctx, gimple_seq *body) +maybe_add_implicit_barrier_cancel (omp_context *ctx, gimple *omp_return, + gimple_seq *body) { - gimple *omp_return = gimple_seq_last_stmt (*body); gcc_assert (gimple_code (omp_return) == GIMPLE_OMP_RETURN); if (gimple_omp_return_nowait_p (omp_return)) return; - if (ctx->outer - && gimple_code (ctx->outer->stmt) == GIMPLE_OMP_PARALLEL - && ctx->outer->cancellable) - { - tree fndecl = builtin_decl_explicit (BUILT_IN_GOMP_CANCEL); - tree c_bool_type = TREE_TYPE (TREE_TYPE (fndecl)); - tree lhs = create_tmp_var (c_bool_type); - gimple_omp_return_set_lhs (omp_return, lhs); - tree fallthru_label = create_artificial_label (UNKNOWN_LOCATION); - gimple *g = gimple_build_cond (NE_EXPR, lhs, - fold_convert (c_bool_type, - boolean_false_node), - ctx->outer->cancel_label, fallthru_label); - gimple_seq_add_stmt (body, g); - gimple_seq_add_stmt (body, gimple_build_label (fallthru_label)); + for (omp_context *outer = ctx->outer; outer; outer = outer->outer) + if (gimple_code (outer->stmt) == GIMPLE_OMP_PARALLEL + && outer->cancellable) + { + tree fndecl = builtin_decl_explicit (BUILT_IN_GOMP_CANCEL); + tree c_bool_type = TREE_TYPE (TREE_TYPE (fndecl)); + tree lhs = create_tmp_var (c_bool_type); + gimple_omp_return_set_lhs (omp_return, lhs); + tree fallthru_label = create_artificial_label (UNKNOWN_LOCATION); + gimple *g = gimple_build_cond (NE_EXPR, lhs, + fold_convert (c_bool_type, + boolean_false_node), + outer->cancel_label, fallthru_label); + gimple_seq_add_stmt (body, g); + gimple_seq_add_stmt (body, gimple_build_label (fallthru_label)); + } + else if (gimple_code (outer->stmt) != GIMPLE_OMP_TASKGROUP) + return; +} + +/* Find the first task_reduction or reduction clause or return NULL + if there are none. */ + +static inline tree +omp_task_reductions_find_first (tree clauses, enum tree_code code, + enum omp_clause_code ccode) +{ + while (1) + { + clauses = omp_find_clause (clauses, ccode); + if (clauses == NULL_TREE) + return NULL_TREE; + if (ccode != OMP_CLAUSE_REDUCTION + || code == OMP_TASKLOOP + || OMP_CLAUSE_REDUCTION_TASK (clauses)) + return clauses; + clauses = OMP_CLAUSE_CHAIN (clauses); } } +static void lower_omp_task_reductions (omp_context *, enum tree_code, tree, + gimple_seq *, gimple_seq *); + /* Lower the OpenMP sections directive in the current statement in GSI_P. CTX is the enclosing OMP context for the current statement. */ @@ -6546,7 +6579,7 @@ lower_omp_sections (gimple_stmt_iterator *gsi_p, omp_context *ctx) gomp_sections *stmt; gimple *t; gbind *new_stmt, *bind; - gimple_seq ilist, dlist, olist, new_body; + gimple_seq ilist, dlist, olist, tred_dlist = NULL, new_body; stmt = as_a (gsi_stmt (*gsi_p)); @@ -6554,6 +6587,27 @@ lower_omp_sections (gimple_stmt_iterator *gsi_p, omp_context *ctx) dlist = NULL; ilist = NULL; + + tree rclauses + = omp_task_reductions_find_first (gimple_omp_sections_clauses (stmt), + OMP_SECTIONS, OMP_CLAUSE_REDUCTION); + tree rtmp = NULL_TREE; + if (rclauses) + { + tree type = build_pointer_type (pointer_sized_int_node); + tree temp = create_tmp_var (type); + tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__REDUCTEMP_); + OMP_CLAUSE_DECL (c) = temp; + OMP_CLAUSE_CHAIN (c) = gimple_omp_sections_clauses (stmt); + gimple_omp_sections_set_clauses (stmt, c); + lower_omp_task_reductions (ctx, OMP_SECTIONS, + gimple_omp_sections_clauses (stmt), + &ilist, &tred_dlist); + rclauses = c; + rtmp = make_ssa_name (type); + gimple_seq_add_stmt (&ilist, gimple_build_assign (rtmp, temp)); + } + lower_rec_input_clauses (gimple_omp_sections_clauses (stmt), &ilist, &dlist, ctx, NULL); @@ -6625,7 +6679,11 @@ lower_omp_sections (gimple_stmt_iterator *gsi_p, omp_context *ctx) OMP_CLAUSE_NOWAIT) != NULL_TREE; t = gimple_build_omp_return (nowait); gimple_seq_add_stmt (&new_body, t); - maybe_add_implicit_barrier_cancel (ctx, &new_body); + gimple_seq_add_seq (&new_body, tred_dlist); + maybe_add_implicit_barrier_cancel (ctx, t, &new_body); + + if (rclauses) + OMP_CLAUSE_DECL (rclauses) = rtmp; gimple_bind_set_body (new_stmt, new_body); } @@ -6787,7 +6845,7 @@ lower_omp_single (gimple_stmt_iterator *gsi_p, omp_context *ctx) OMP_CLAUSE_NOWAIT) != NULL_TREE; gimple *g = gimple_build_omp_return (nowait); gimple_seq_add_stmt (&bind_body_tail, g); - maybe_add_implicit_barrier_cancel (ctx, &bind_body_tail); + maybe_add_implicit_barrier_cancel (ctx, g, &bind_body_tail); if (ctx->record_type) { gimple_stmt_iterator gsi = gsi_start (bind_body_tail); @@ -6849,26 +6907,6 @@ lower_omp_master (gimple_stmt_iterator *gsi_p, omp_context *ctx) BLOCK_VARS (block) = ctx->block_vars; } -/* Find the first task_reduction or reduction clause or return NULL - if there are none. */ - -static inline tree -omp_task_reductions_find_first (tree clauses, enum tree_code code, - enum omp_clause_code ccode) -{ - while (1) - { - clauses = omp_find_clause (clauses, ccode); - if (clauses == NULL_TREE) - return NULL_TREE; - if (ccode != OMP_CLAUSE_REDUCTION - || code == OMP_TASKLOOP - || OMP_CLAUSE_REDUCTION_TASK (clauses)) - return clauses; - clauses = OMP_CLAUSE_CHAIN (clauses); - } -} - /* Helper function for lower_omp_task_reductions. For a specific PASS find out the current clause it should be processed, or return false if all have been processed already. */ @@ -6918,12 +6956,35 @@ lower_omp_task_reductions (omp_context *ctx, enum tree_code code, tree clauses, enum omp_clause_code ccode = (code == OMP_TASKGROUP ? OMP_CLAUSE_TASK_REDUCTION : OMP_CLAUSE_REDUCTION); + tree cancellable = NULL_TREE; clauses = omp_task_reductions_find_first (clauses, code, ccode); if (clauses == NULL_TREE) return; + if (code == OMP_FOR || code == OMP_SECTIONS) + { + for (omp_context *outer = ctx->outer; outer; outer = outer->outer) + if (gimple_code (outer->stmt) == GIMPLE_OMP_PARALLEL + && outer->cancellable) + { + cancellable = error_mark_node; + break; + } + else if (gimple_code (outer->stmt) != GIMPLE_OMP_TASKGROUP) + break; + } tree record_type = lang_hooks.types.make_type (RECORD_TYPE); tree *last = &TYPE_FIELDS (record_type); unsigned cnt = 0; + if (cancellable) + { + tree field = build_decl (UNKNOWN_LOCATION, FIELD_DECL, NULL_TREE, + ptr_type_node); + tree ifield = build_decl (UNKNOWN_LOCATION, FIELD_DECL, NULL_TREE, + integer_type_node); + *last = field; + DECL_CHAIN (field) = ifield; + last = &DECL_CHAIN (ifield); + } for (int pass = 0; pass < 2; pass++) { tree decl, type, next; @@ -7010,7 +7071,49 @@ lower_omp_task_reductions (omp_context *ctx, enum tree_code code, tree clauses, tree idx = create_tmp_var (size_type_node); gimple_seq_add_stmt (end, gimple_build_assign (idx, size_zero_node)); tree num_thr_sz = create_tmp_var (size_type_node); + tree lab1 = create_artificial_label (UNKNOWN_LOCATION); + tree lab2 = create_artificial_label (UNKNOWN_LOCATION); + tree lab3 = NULL_TREE; gimple *g; + if (code == OMP_FOR || code == OMP_SECTIONS) + { + /* For worksharing constructs, only perform it in the master thread, + with the exception of cancelled implicit barriers - then only handle + the current thread. */ + tree lab4 = create_artificial_label (UNKNOWN_LOCATION); + t = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM); + tree thr_num = create_tmp_var (integer_type_node); + g = gimple_build_call (t, 0); + gimple_call_set_lhs (g, thr_num); + gimple_seq_add_stmt (end, g); + if (cancellable) + { + tree c; + tree lab5 = create_artificial_label (UNKNOWN_LOCATION); + tree lab6 = create_artificial_label (UNKNOWN_LOCATION); + lab3 = create_artificial_label (UNKNOWN_LOCATION); + if (code == OMP_FOR) + c = gimple_omp_for_clauses (ctx->stmt); + else if (code == OMP_SECTIONS) + c = gimple_omp_sections_clauses (ctx->stmt); + c = OMP_CLAUSE_DECL (omp_find_clause (c, OMP_CLAUSE__REDUCTEMP_)); + cancellable = c; + g = gimple_build_cond (NE_EXPR, c, build_zero_cst (TREE_TYPE (c)), + lab5, lab6); + gimple_seq_add_stmt (end, g); + gimple_seq_add_stmt (end, gimple_build_label (lab5)); + g = gimple_build_assign (idx, NOP_EXPR, thr_num); + gimple_seq_add_stmt (end, g); + g = gimple_build_assign (num_thr_sz, PLUS_EXPR, idx, + build_one_cst (TREE_TYPE (idx))); + gimple_seq_add_stmt (end, g); + gimple_seq_add_stmt (end, gimple_build_goto (lab3)); + gimple_seq_add_stmt (end, gimple_build_label (lab6)); + } + g = gimple_build_cond (NE_EXPR, thr_num, integer_zero_node, lab2, lab4); + gimple_seq_add_stmt (end, g); + gimple_seq_add_stmt (end, gimple_build_label (lab4)); + } if (code != OMP_PARALLEL) { t = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); @@ -7020,6 +7123,8 @@ lower_omp_task_reductions (omp_context *ctx, enum tree_code code, tree clauses, gimple_seq_add_stmt (end, g); g = gimple_build_assign (num_thr_sz, NOP_EXPR, num_thr); gimple_seq_add_stmt (end, g); + if (cancellable) + gimple_seq_add_stmt (end, gimple_build_label (lab3)); } else { @@ -7033,8 +7138,6 @@ lower_omp_task_reductions (omp_context *ctx, enum tree_code code, tree clauses, NULL_TREE, NULL_TREE); tree data = create_tmp_var (pointer_sized_int_node); gimple_seq_add_stmt (end, gimple_build_assign (data, t)); - tree lab1 = create_artificial_label (UNKNOWN_LOCATION); - tree lab2 = create_artificial_label (UNKNOWN_LOCATION); gimple_seq_add_stmt (end, gimple_build_label (lab1)); tree ptr; if (TREE_CODE (TYPE_SIZE_UNIT (record_type)) == INTEGER_CST) @@ -7045,6 +7148,8 @@ lower_omp_task_reductions (omp_context *ctx, enum tree_code code, tree clauses, tree field = TYPE_FIELDS (record_type); cnt = 0; + if (cancellable) + field = DECL_CHAIN (DECL_CHAIN (field)); for (int pass = 0; pass < 2; pass++) { tree decl, type, next; @@ -7117,9 +7222,9 @@ lower_omp_task_reductions (omp_context *ctx, enum tree_code code, tree clauses, tree bfield = DECL_CHAIN (field); tree cond; - if (code == OMP_PARALLEL) - /* In parallel all threads unconditionally initialize all their - task reduction private variables. */ + if (code == OMP_PARALLEL || code == OMP_FOR || code == OMP_SECTIONS) + /* In parallel or worksharing all threads unconditionally + initialize all their task reduction private variables. */ cond = boolean_true_node; else if (TREE_TYPE (ptr) == ptr_type_node) { @@ -7143,6 +7248,18 @@ lower_omp_task_reductions (omp_context *ctx, enum tree_code code, tree clauses, lab3, lab4); gimple_seq_add_stmt (end, g); gimple_seq_add_stmt (end, gimple_build_label (lab3)); + if (cancellable && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c) == NULL_TREE) + { + /* If this reduction doesn't need destruction and parallel + has been cancelled, there is nothing to do for this + reduction, so jump around the merge operation. */ + tree lab5 = create_artificial_label (UNKNOWN_LOCATION); + g = gimple_build_cond (NE_EXPR, cancellable, + build_zero_cst (TREE_TYPE (cancellable)), + lab4, lab5); + gimple_seq_add_stmt (end, g); + gimple_seq_add_stmt (end, gimple_build_label (lab5)); + } tree new_var; if (TREE_TYPE (ptr) == ptr_type_node) @@ -7202,6 +7319,20 @@ lower_omp_task_reductions (omp_context *ctx, enum tree_code code, tree clauses, tree placeholder = OMP_CLAUSE_REDUCTION_PLACEHOLDER (c); tree decl_placeholder = OMP_CLAUSE_REDUCTION_DECL_PLACEHOLDER (c); + tree lab6 = NULL_TREE; + if (cancellable) + { + /* If this reduction needs destruction and parallel + has been cancelled, jump around the merge operation + to the destruction. */ + tree lab5 = create_artificial_label (UNKNOWN_LOCATION); + lab6 = create_artificial_label (UNKNOWN_LOCATION); + tree zero = build_zero_cst (TREE_TYPE (cancellable)); + g = gimple_build_cond (NE_EXPR, cancellable, zero, + lab6, lab5); + gimple_seq_add_stmt (end, g); + gimple_seq_add_stmt (end, gimple_build_label (lab5)); + } SET_DECL_VALUE_EXPR (placeholder, out); DECL_HAS_VALUE_EXPR_P (placeholder) = 1; SET_DECL_VALUE_EXPR (decl_placeholder, priv); @@ -7215,6 +7346,8 @@ lower_omp_task_reductions (omp_context *ctx, enum tree_code code, tree clauses, OMP_CLAUSE_REDUCTION_PLACEHOLDER (c) = NULL; OMP_CLAUSE_REDUCTION_DECL_PLACEHOLDER (c) = NULL; } + if (cancellable) + gimple_seq_add_stmt (end, gimple_build_label (lab6)); tree x = lang_hooks.decls.omp_clause_dtor (c, priv); if (x) { @@ -7247,7 +7380,20 @@ lower_omp_task_reductions (omp_context *ctx, enum tree_code code, tree clauses, { tree placeholder = OMP_CLAUSE_REDUCTION_PLACEHOLDER (c); tree oldv = NULL_TREE; - + tree lab6 = NULL_TREE; + if (cancellable) + { + /* If this reduction needs destruction and parallel + has been cancelled, jump around the merge operation + to the destruction. */ + tree lab5 = create_artificial_label (UNKNOWN_LOCATION); + lab6 = create_artificial_label (UNKNOWN_LOCATION); + tree zero = build_zero_cst (TREE_TYPE (cancellable)); + g = gimple_build_cond (NE_EXPR, cancellable, zero, + lab6, lab5); + gimple_seq_add_stmt (end, g); + gimple_seq_add_stmt (end, gimple_build_label (lab5)); + } if (omp_is_reference (decl) && !useless_type_conversion_p (TREE_TYPE (placeholder), TREE_TYPE (ref))) @@ -7283,6 +7429,8 @@ lower_omp_task_reductions (omp_context *ctx, enum tree_code code, tree clauses, OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c) = NULL; if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_TASK_REDUCTION) OMP_CLAUSE_REDUCTION_PLACEHOLDER (c) = NULL; + if (cancellable) + gimple_seq_add_stmt (end, gimple_build_label (lab6)); tree x = lang_hooks.decls.omp_clause_dtor (c, new_var); if (x) { @@ -7309,10 +7457,16 @@ lower_omp_task_reductions (omp_context *ctx, enum tree_code code, tree clauses, g = gimple_build_call (t, 1, build_fold_addr_expr (avar)); gimple_seq_add_stmt (start, g); } - else if (code == OMP_TASKLOOP || code == OMP_PARALLEL) + else { - tree c = omp_find_clause (gimple_omp_taskreg_clauses (ctx->stmt), - OMP_CLAUSE__REDUCTEMP_); + tree c; + if (code == OMP_FOR) + c = gimple_omp_for_clauses (ctx->stmt); + else if (code == OMP_SECTIONS) + c = gimple_omp_sections_clauses (ctx->stmt); + else + c = gimple_omp_taskreg_clauses (ctx->stmt); + c = omp_find_clause (c, OMP_CLAUSE__REDUCTEMP_); t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (c)), build_fold_addr_expr (avar)); gimplify_assign (OMP_CLAUSE_DECL (c), t, start); @@ -7324,8 +7478,28 @@ lower_omp_task_reductions (omp_context *ctx, enum tree_code code, tree clauses, g = gimple_build_cond (NE_EXPR, idx, num_thr_sz, lab1, lab2); gimple_seq_add_stmt (end, g); gimple_seq_add_stmt (end, gimple_build_label (lab2)); - t = builtin_decl_explicit (BUILT_IN_GOMP_TASKGROUP_REDUCTION_UNREGISTER); - g = gimple_build_call (t, 1, build_fold_addr_expr (avar)); + if (code == OMP_FOR || code == OMP_SECTIONS) + { + enum built_in_function bfn + = BUILT_IN_GOMP_WORKSHARE_TASK_REDUCTION_UNREGISTER; + t = builtin_decl_explicit (bfn); + tree c_bool_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (t))); + tree arg; + if (cancellable) + { + arg = create_tmp_var (c_bool_type); + gimple_seq_add_stmt (end, gimple_build_assign (arg, NOP_EXPR, + cancellable)); + } + else + arg = build_int_cst (c_bool_type, 0); + g = gimple_build_call (t, 1, arg); + } + else + { + t = builtin_decl_explicit (BUILT_IN_GOMP_TASKGROUP_REDUCTION_UNREGISTER); + g = gimple_build_call (t, 1, build_fold_addr_expr (avar)); + } gimple_seq_add_stmt (end, g); t = build_constructor (atype, NULL); TREE_THIS_VOLATILE (t) = 1; @@ -7953,7 +8127,8 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx) struct omp_for_data fd, *fdp = NULL; gomp_for *stmt = as_a (gsi_stmt (*gsi_p)); gbind *new_stmt; - gimple_seq omp_for_body, body, dlist; + gimple_seq omp_for_body, body, dlist, tred_ilist = NULL, tred_dlist = NULL; + gimple_seq cnt_list = NULL; gimple_seq oacc_head = NULL, oacc_tail = NULL; size_t i; @@ -8046,9 +8221,30 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx) /* The pre-body and input clauses go before the lowered GIMPLE_OMP_FOR. */ dlist = NULL; body = NULL; + tree rclauses + = omp_task_reductions_find_first (gimple_omp_for_clauses (stmt), OMP_FOR, + OMP_CLAUSE_REDUCTION); + tree rtmp = NULL_TREE; + if (rclauses) + { + tree type = build_pointer_type (pointer_sized_int_node); + tree temp = create_tmp_var (type); + tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__REDUCTEMP_); + OMP_CLAUSE_DECL (c) = temp; + OMP_CLAUSE_CHAIN (c) = gimple_omp_for_clauses (stmt); + gimple_omp_for_set_clauses (stmt, c); + lower_omp_task_reductions (ctx, OMP_FOR, + gimple_omp_for_clauses (stmt), + &tred_ilist, &tred_dlist); + rclauses = c; + rtmp = make_ssa_name (type); + gimple_seq_add_stmt (&body, gimple_build_assign (rtmp, temp)); + } + lower_rec_input_clauses (gimple_omp_for_clauses (stmt), &body, &dlist, ctx, fdp); - gimple_seq_add_seq (&body, gimple_omp_for_pre_body (stmt)); + gimple_seq_add_seq (rclauses ? &tred_ilist : &body, + gimple_omp_for_pre_body (stmt)); lower_omp (gimple_omp_body_ptr (stmt), ctx); @@ -8063,20 +8259,24 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx) { rhs_p = gimple_omp_for_initial_ptr (stmt, i); if (!is_gimple_min_invariant (*rhs_p)) - *rhs_p = get_formal_tmp_var (*rhs_p, &body); + *rhs_p = get_formal_tmp_var (*rhs_p, &cnt_list); else if (TREE_CODE (*rhs_p) == ADDR_EXPR) recompute_tree_invariant_for_addr_expr (*rhs_p); rhs_p = gimple_omp_for_final_ptr (stmt, i); if (!is_gimple_min_invariant (*rhs_p)) - *rhs_p = get_formal_tmp_var (*rhs_p, &body); + *rhs_p = get_formal_tmp_var (*rhs_p, &cnt_list); else if (TREE_CODE (*rhs_p) == ADDR_EXPR) recompute_tree_invariant_for_addr_expr (*rhs_p); rhs_p = &TREE_OPERAND (gimple_omp_for_incr (stmt, i), 1); if (!is_gimple_min_invariant (*rhs_p)) - *rhs_p = get_formal_tmp_var (*rhs_p, &body); + *rhs_p = get_formal_tmp_var (*rhs_p, &cnt_list); } + if (rclauses) + gimple_seq_add_seq (&tred_ilist, cnt_list); + else + gimple_seq_add_seq (&body, cnt_list); /* Once lowered, extract the bounds and clauses. */ omp_extract_for_data (stmt, &fd, NULL); @@ -8123,13 +8323,26 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx) gimple_seq_add_seq (&body, dlist); + if (rclauses) + { + gimple_seq_add_seq (&tred_ilist, body); + body = tred_ilist; + } + body = maybe_catch_exception (body); if (!phony_loop) { /* Region exit marker goes at the end of the loop body. */ - gimple_seq_add_stmt (&body, gimple_build_omp_return (fd.have_nowait)); - maybe_add_implicit_barrier_cancel (ctx, &body); + gimple *g = gimple_build_omp_return (fd.have_nowait); + gimple_seq_add_stmt (&body, g); + + gimple_seq_add_seq (&body, tred_dlist); + + maybe_add_implicit_barrier_cancel (ctx, g, &body); + + if (rclauses) + OMP_CLAUSE_DECL (rclauses) = rtmp; } /* Add OpenACC joining and reduction markers just after the loop. */ diff --git a/libgomp/ChangeLog.gomp b/libgomp/ChangeLog.gomp index cddc1365d05d..0f459346aeea 100644 --- a/libgomp/ChangeLog.gomp +++ b/libgomp/ChangeLog.gomp @@ -1,3 +1,107 @@ +2018-11-07 Jakub Jelinek + + * libgomp_g.h (GOMP_loop_start, GOMP_loop_ordered_start, + GOMP_loop_doacross_start, GOMP_loop_ull_start, + GOMP_loop_ull_ordered_start, GOMP_loop_ull_doacross_start, + GOMP_workshare_task_reduction_unregister, GOMP_sections2_start): New + prototypes. + * libgomp.h (struct gomp_doacross_work_share): Add extra field. + (struct gomp_work_share): Add task_reductions field. + (struct gomp_taskgroup): Add workshare flag. + (gomp_doacross_init, gomp_doacross_ull_init): Add size_t argument. + (gomp_workshare_taskgroup_start, + gomp_workshare_task_reduction_register): New prototypes. + (gomp_init_work_share, gomp_work_share_start): Change bool argument + to size_t. + * libgomp.map (GOMP_5.0): Export GOMP_loop_start, + GOMP_loop_ordered_start, GOMP_loop_doacross_start, + GOMP_loop_ull_start, GOMP_loop_ull_ordered_start, + GOMP_loop_ull_doacross_start, + GOMP_workshare_task_reduction_unregister and GOMP_sections2_start. + * loop.c: Include string.h. + (GOMP_loop_runtime_next): Add ialias. + (GOMP_taskgroup_reduction_register): Add ialias_redirect. + (gomp_loop_static_start, gomp_loop_dynamic_start, + gomp_loop_guided_start, gomp_loop_ordered_static_start, + gomp_loop_ordered_dynamic_start, gomp_loop_ordered_guided_start, + gomp_loop_doacross_static_start, gomp_loop_doacross_dynamic_start, + gomp_loop_doacross_guided_start): Adjust gomp_work_share_start + or gomp_doacross_init callers. + (gomp_adjust_sched, GOMP_loop_start, GOMP_loop_ordered_start, + GOMP_loop_doacross_start): New functions. + * loop_ull.c: Include string.h. + (GOMP_loop_ull_runtime_next): Add ialias. + (GOMP_taskgroup_reduction_register): Add ialias_redirect. + (gomp_loop_ull_static_start, gomp_loop_ull_dynamic_start, + gomp_loop_ull_guided_start, gomp_loop_ull_ordered_static_start, + gomp_loop_ull_ordered_dynamic_start, + gomp_loop_ull_ordered_guided_start, + gomp_loop_ull_doacross_static_start, + gomp_loop_ull_doacross_dynamic_start, + gomp_loop_ull_doacross_guided_start): Adjust gomp_work_share_start + and gomp_doacross_ull_init callers. + (gomp_adjust_sched, GOMP_loop_ull_start, GOMP_loop_ull_ordered_start, + GOMP_loop_ull_doacross_start): New functions. + * sections.c: Include string.h. + (GOMP_taskgroup_reduction_register): Add ialias_redirect. + (GOMP_sections_start): Adjust gomp_work_share_start caller. + (GOMP_sections2_start): New function. + * ordered.c (gomp_doacross_init, gomp_doacross_ull_init): Add + EXTRA argument. If not needed to prepare array, if extra is 0, + clear ws->doacross, otherwise allocate just doacross structure and + extra payload. If array is needed, allocate also extra payload. + (GOMP_doacross_post, GOMP_doacross_wait, GOMP_doacross_ull_post, + GOMP_doacross_ull_wait): Handle doacross->array == NULL like + doacross == NULL. + * parallel.c (GOMP_cancellation_point): If taskgroup has workshare + flag set, check cancelled of prev taskgroup if any. + (GOMP_cancel): If taskgroup has workshare flag set, set cancelled + on prev taskgroup if any. + * single.c (GOMP_single_start, GOMP_single_copy_start): Adjust + gomp_work_share_start callers. + * target.c (GOMP_target_update_ext, GOMP_target_enter_exit_data): + If taskgroup has workshare flag set, check cancelled on prev + taskgroup if any. Guard all cancellation tests with + gomp_cancel_var test. + * taskloop.c (GOMP_taskloop): Likewise. + * task.c (GOMP_task, gomp_create_target_task, gomp_task_run_pre, + GOMP_taskwait_depend): Likewise. + (gomp_taskgroup_init): Clear workshare flag, reorder initialization. + (gomp_reduction_register): Add always_inline attribute. Add + ORIG argument, if non-NULL, don't allocate memory, but copy it + from there. + (gomp_create_artificial_team): New function. + (GOMP_taskgroup_reduction_register): Extend function comment. + Use gomp_create_artificial_team. Adjust gomp_reduction_register + caller. + (gomp_parallel_reduction_register): Adjust gomp_reduction_register + caller. + (gomp_workshare_task_reduction_register, + gomp_workshare_taskgroup_start, + GOMP_workshare_task_reduction_unregister): New functions. + * team.c (gomp_new_team): Adjust gomp_init_work_share caller. + * work.c (gomp_init_work_share): Change ORDERED argument from + bool to size_t, if more than 1 allocate also extra payload at the + end of array. Never keep ordered_team_ids NULL, set it + to inline_ordered_team_ids instead. + (gomp_work_share_start): Change ORDERED argument from bool to size_t, + return true instead of ws. + * testsuite/libgomp.c-c++-common/cancel-parallel-1.c: New test. + * testsuite/libgomp.c-c++-common/cancel-taskgroup-3.c: New test. + * testsuite/libgomp.c-c++-common/task-reduction-6.c (struct S): + Use unsigned long long int instead of unsigned long int. + (main): Verify r == t. + * testsuite/libgomp.c-c++-common/task-reduction-8.c: New test. + * testsuite/libgomp.c-c++-common/task-reduction-9.c: New test. + * testsuite/libgomp.c-c++-common/task-reduction-11.c: New test. + * testsuite/libgomp.c-c++-common/task-reduction-12.c: New test. + * testsuite/libgomp.c++/task-reduction-14.C: New test. + * testsuite/libgomp.c++/task-reduction-15.C: New test. + * testsuite/libgomp.c++/task-reduction-16.C: New test. + * testsuite/libgomp.c++/task-reduction-17.C: New test. + * testsuite/libgomp.c++/task-reduction-18.C: New test. + * testsuite/libgomp.c++/task-reduction-19.C: New test. + 2018-10-26 Jakub Jelinek * libgomp.h (GOMP_HAVE_EFFICIENT_ALIGNED_ALLOC): Define unless diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h index 9728c8e5f687..828e9b0095b3 100644 --- a/libgomp/libgomp.h +++ b/libgomp/libgomp.h @@ -188,6 +188,8 @@ struct gomp_doacross_work_share /* Likewise, but for the ull implementation. */ unsigned long long boundary_ull; }; + /* Pointer to extra memory if needed for lastprivate(conditional). */ + void *extra; /* Array of shift counts for each dimension if they can be flattened. */ unsigned int shift_counts[]; }; @@ -289,6 +291,9 @@ struct gomp_work_share struct gomp_work_share *next_free; }; + /* Task reductions for this work-sharing construct. */ + uintptr_t *task_reductions; + /* If only few threads are in the team, ordered_team_ids can point to this array which fills the padding at the end of this struct. */ unsigned inline_ordered_team_ids[0]; @@ -490,6 +495,7 @@ struct gomp_taskgroup uintptr_t *reductions; bool in_taskgroup_wait; bool cancelled; + bool workshare; gomp_sem_t taskgroup_sem; size_t num_children; }; @@ -795,9 +801,9 @@ extern void gomp_ordered_next (void); extern void gomp_ordered_static_init (void); extern void gomp_ordered_static_next (void); extern void gomp_ordered_sync (void); -extern void gomp_doacross_init (unsigned, long *, long); +extern void gomp_doacross_init (unsigned, long *, long, size_t); extern void gomp_doacross_ull_init (unsigned, unsigned long long *, - unsigned long long); + unsigned long long, size_t); /* parallel.c */ @@ -822,6 +828,8 @@ extern bool gomp_create_target_task (struct gomp_device_descr *, enum gomp_target_task_state); extern struct gomp_taskgroup *gomp_parallel_reduction_register (uintptr_t *, unsigned); +extern void gomp_workshare_taskgroup_start (void); +extern void gomp_workshare_task_reduction_register (uintptr_t *, uintptr_t *); static void inline gomp_finish_task (struct gomp_task *task) @@ -1061,9 +1069,9 @@ extern bool gomp_remove_var (struct gomp_device_descr *, splay_tree_key); /* work.c */ -extern void gomp_init_work_share (struct gomp_work_share *, bool, unsigned); +extern void gomp_init_work_share (struct gomp_work_share *, size_t, unsigned); extern void gomp_fini_work_share (struct gomp_work_share *); -extern bool gomp_work_share_start (bool); +extern bool gomp_work_share_start (size_t); extern void gomp_work_share_end (void); extern bool gomp_work_share_end_cancel (void); extern void gomp_work_share_end_nowait (void); diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map index 0ea7578e027b..4c19a259eaa6 100644 --- a/libgomp/libgomp.map +++ b/libgomp/libgomp.map @@ -316,22 +316,30 @@ GOMP_4.5 { GOMP_5.0 { global: + GOMP_loop_doacross_start; GOMP_loop_maybe_nonmonotonic_runtime_next; GOMP_loop_maybe_nonmonotonic_runtime_start; GOMP_loop_nonmonotonic_runtime_next; GOMP_loop_nonmonotonic_runtime_start; + GOMP_loop_ordered_start; + GOMP_loop_start; + GOMP_loop_ull_doacross_start; GOMP_loop_ull_maybe_nonmonotonic_runtime_next; GOMP_loop_ull_maybe_nonmonotonic_runtime_start; GOMP_loop_ull_nonmonotonic_runtime_next; GOMP_loop_ull_nonmonotonic_runtime_start; + GOMP_loop_ull_ordered_start; + GOMP_loop_ull_start; GOMP_parallel_loop_maybe_nonmonotonic_runtime; GOMP_parallel_loop_nonmonotonic_runtime; GOMP_parallel_reductions; + GOMP_sections2_start; GOMP_taskgroup_reduction_register; GOMP_taskgroup_reduction_unregister; GOMP_task_reduction_remap; GOMP_taskwait_depend; GOMP_teams_reg; + GOMP_workshare_task_reduction_unregister; } GOMP_4.5; OACC_2.0 { diff --git a/libgomp/libgomp_g.h b/libgomp/libgomp_g.h index 6d24a4f0192a..5b54839b29e5 100644 --- a/libgomp/libgomp_g.h +++ b/libgomp/libgomp_g.h @@ -61,6 +61,8 @@ extern bool GOMP_loop_nonmonotonic_runtime_start (long, long, long, long *, long *); extern bool GOMP_loop_maybe_nonmonotonic_runtime_start (long, long, long, long *, long *); +extern bool GOMP_loop_start (long, long, long, long, long, long *, long *, + uintptr_t *, void **); extern bool GOMP_loop_ordered_static_start (long, long, long, long, long *, long *); @@ -69,6 +71,8 @@ extern bool GOMP_loop_ordered_dynamic_start (long, long, long, long, extern bool GOMP_loop_ordered_guided_start (long, long, long, long, long *, long *); extern bool GOMP_loop_ordered_runtime_start (long, long, long, long *, long *); +extern bool GOMP_loop_ordered_start (long, long, long, long, long, long *, + long *, uintptr_t *, void **); extern bool GOMP_loop_static_next (long *, long *); extern bool GOMP_loop_dynamic_next (long *, long *); @@ -92,6 +96,8 @@ extern bool GOMP_loop_doacross_guided_start (unsigned, long *, long, long *, long *); extern bool GOMP_loop_doacross_runtime_start (unsigned, long *, long *, long *); +extern bool GOMP_loop_doacross_start (unsigned, long *, long, long, long *, + long *, uintptr_t *, void **); extern void GOMP_parallel_loop_static_start (void (*)(void *), void *, unsigned, long, long, long, long); @@ -179,6 +185,10 @@ extern bool GOMP_loop_ull_maybe_nonmonotonic_runtime_start (bool, unsigned long long, unsigned long long *, unsigned long long *); +extern bool GOMP_loop_ull_start (bool, unsigned long long, unsigned long long, + unsigned long long, long, unsigned long long, + unsigned long long *, unsigned long long *, + uintptr_t *, void **); extern bool GOMP_loop_ull_ordered_static_start (bool, unsigned long long, unsigned long long, @@ -203,6 +213,13 @@ extern bool GOMP_loop_ull_ordered_runtime_start (bool, unsigned long long, unsigned long long, unsigned long long *, unsigned long long *); +extern bool GOMP_loop_ull_ordered_start (bool, unsigned long long, + unsigned long long, + unsigned long long, long, + unsigned long long, + unsigned long long *, + unsigned long long *, + uintptr_t *, void **); extern bool GOMP_loop_ull_static_next (unsigned long long *, unsigned long long *); @@ -249,6 +266,11 @@ extern bool GOMP_loop_ull_doacross_runtime_start (unsigned, unsigned long long *, unsigned long long *, unsigned long long *); +extern bool GOMP_loop_ull_doacross_start (unsigned, unsigned long long *, + long, unsigned long long, + unsigned long long *, + unsigned long long *, + uintptr_t *, void **); /* ordered.c */ @@ -289,10 +311,12 @@ extern void GOMP_taskgroup_end (void); extern void GOMP_taskgroup_reduction_register (uintptr_t *); extern void GOMP_taskgroup_reduction_unregister (uintptr_t *); extern void GOMP_task_reduction_remap (size_t, size_t, void **); +extern void GOMP_workshare_task_reduction_unregister (bool); /* sections.c */ extern unsigned GOMP_sections_start (unsigned); +extern unsigned GOMP_sections2_start (unsigned, uintptr_t *, void **); extern unsigned GOMP_sections_next (void); extern void GOMP_parallel_sections_start (void (*) (void *), void *, unsigned, unsigned); diff --git a/libgomp/loop.c b/libgomp/loop.c index bafd89a0c20b..4e0683ba675a 100644 --- a/libgomp/loop.c +++ b/libgomp/loop.c @@ -27,9 +27,13 @@ #include #include +#include #include "libgomp.h" +ialias (GOMP_loop_runtime_next) +ialias_redirect (GOMP_taskgroup_reduction_register) + /* Initialize the given work share construct from the given arguments. */ static inline void @@ -101,7 +105,7 @@ gomp_loop_static_start (long start, long end, long incr, long chunk_size, struct gomp_thread *thr = gomp_thread (); thr->ts.static_trip = 0; - if (gomp_work_share_start (false)) + if (gomp_work_share_start (0)) { gomp_loop_init (thr->ts.work_share, start, end, incr, GFS_STATIC, chunk_size); @@ -123,7 +127,7 @@ gomp_loop_dynamic_start (long start, long end, long incr, long chunk_size, struct gomp_thread *thr = gomp_thread (); bool ret; - if (gomp_work_share_start (false)) + if (gomp_work_share_start (0)) { gomp_loop_init (thr->ts.work_share, start, end, incr, GFS_DYNAMIC, chunk_size); @@ -151,7 +155,7 @@ gomp_loop_guided_start (long start, long end, long incr, long chunk_size, struct gomp_thread *thr = gomp_thread (); bool ret; - if (gomp_work_share_start (false)) + if (gomp_work_share_start (0)) { gomp_loop_init (thr->ts.work_share, start, end, incr, GFS_GUIDED, chunk_size); @@ -197,6 +201,100 @@ GOMP_loop_runtime_start (long start, long end, long incr, } } +static long +gomp_adjust_sched (long sched, long *chunk_size) +{ + sched &= ~GFS_MONOTONIC; + switch (sched) + { + case GFS_STATIC: + case GFS_DYNAMIC: + case GFS_GUIDED: + return sched; + /* GFS_RUNTIME is used for runtime schedule without monotonic + or nonmonotonic modifiers on the clause. + GFS_RUNTIME|GFS_MONOTONIC for runtime schedule with monotonic + modifier. */ + case GFS_RUNTIME: + /* GFS_AUTO is used for runtime schedule with nonmonotonic + modifier. */ + case GFS_AUTO: + { + struct gomp_task_icv *icv = gomp_icv (false); + sched = icv->run_sched_var & ~GFS_MONOTONIC; + switch (sched) + { + case GFS_STATIC: + case GFS_DYNAMIC: + case GFS_GUIDED: + *chunk_size = icv->run_sched_chunk_size; + break; + case GFS_AUTO: + sched = GFS_STATIC; + *chunk_size = 0; + break; + default: + abort (); + } + return sched; + } + default: + abort (); + } +} + +bool +GOMP_loop_start (long start, long end, long incr, long sched, + long chunk_size, long *istart, long *iend, + uintptr_t *reductions, void **mem) +{ + struct gomp_thread *thr = gomp_thread (); + + thr->ts.static_trip = 0; + if (reductions) + gomp_workshare_taskgroup_start (); + if (gomp_work_share_start (0)) + { + sched = gomp_adjust_sched (sched, &chunk_size); + gomp_loop_init (thr->ts.work_share, start, end, incr, + sched, chunk_size); + if (reductions) + { + GOMP_taskgroup_reduction_register (reductions); + thr->task->taskgroup->workshare = true; + thr->ts.work_share->task_reductions = reductions; + } + if (mem) + { + uintptr_t size = (uintptr_t) *mem; + if (size > (sizeof (struct gomp_work_share) + - offsetof (struct gomp_work_share, + inline_ordered_team_ids))) + thr->ts.work_share->ordered_team_ids + = gomp_malloc_cleared (size); + else + memset (thr->ts.work_share->ordered_team_ids, '\0', size); + *mem = (void *) thr->ts.work_share->ordered_team_ids; + } + gomp_work_share_init_done (); + } + else + { + if (reductions) + { + uintptr_t *first_reductions = thr->ts.work_share->task_reductions; + gomp_workshare_task_reduction_register (reductions, + first_reductions); + } + if (mem) + *mem = (void *) thr->ts.work_share->ordered_team_ids; + } + + if (!istart) + return true; + return ialias_call (GOMP_loop_runtime_next) (istart, iend); +} + /* The *_ordered_*_start routines are similar. The only difference is that this work-share construct is initialized to expect an ORDERED section. */ @@ -207,7 +305,7 @@ gomp_loop_ordered_static_start (long start, long end, long incr, struct gomp_thread *thr = gomp_thread (); thr->ts.static_trip = 0; - if (gomp_work_share_start (true)) + if (gomp_work_share_start (1)) { gomp_loop_init (thr->ts.work_share, start, end, incr, GFS_STATIC, chunk_size); @@ -225,7 +323,7 @@ gomp_loop_ordered_dynamic_start (long start, long end, long incr, struct gomp_thread *thr = gomp_thread (); bool ret; - if (gomp_work_share_start (true)) + if (gomp_work_share_start (1)) { gomp_loop_init (thr->ts.work_share, start, end, incr, GFS_DYNAMIC, chunk_size); @@ -250,7 +348,7 @@ gomp_loop_ordered_guided_start (long start, long end, long incr, struct gomp_thread *thr = gomp_thread (); bool ret; - if (gomp_work_share_start (true)) + if (gomp_work_share_start (1)) { gomp_loop_init (thr->ts.work_share, start, end, incr, GFS_GUIDED, chunk_size); @@ -297,6 +395,81 @@ GOMP_loop_ordered_runtime_start (long start, long end, long incr, } } +bool +GOMP_loop_ordered_start (long start, long end, long incr, long sched, + long chunk_size, long *istart, long *iend, + uintptr_t *reductions, void **mem) +{ + struct gomp_thread *thr = gomp_thread (); + size_t ordered = 1; + bool ret; + + thr->ts.static_trip = 0; + if (reductions) + gomp_workshare_taskgroup_start (); + if (mem) + ordered += (uintptr_t) *mem; + if (gomp_work_share_start (ordered)) + { + sched = gomp_adjust_sched (sched, &chunk_size); + gomp_loop_init (thr->ts.work_share, start, end, incr, + sched, chunk_size); + if (reductions) + { + GOMP_taskgroup_reduction_register (reductions); + thr->task->taskgroup->workshare = true; + thr->ts.work_share->task_reductions = reductions; + } + if (sched == GFS_STATIC) + gomp_ordered_static_init (); + else + gomp_mutex_lock (&thr->ts.work_share->lock); + gomp_work_share_init_done (); + } + else + { + if (reductions) + { + uintptr_t *first_reductions = thr->ts.work_share->task_reductions; + gomp_workshare_task_reduction_register (reductions, + first_reductions); + } + sched = thr->ts.work_share->sched; + if (sched != GFS_STATIC) + gomp_mutex_lock (&thr->ts.work_share->lock); + } + + if (mem) + { + uintptr_t p + = (uintptr_t) (thr->ts.work_share->ordered_team_ids + + (thr->ts.team ? thr->ts.team->nthreads : 1)); + p += __alignof__ (long long) - 1; + p &= ~(__alignof__ (long long) - 1); + *mem = (void *) p; + } + + switch (sched) + { + case GFS_STATIC: + case GFS_AUTO: + return !gomp_iter_static_next (istart, iend); + case GFS_DYNAMIC: + ret = gomp_iter_dynamic_next_locked (istart, iend); + break; + case GFS_GUIDED: + ret = gomp_iter_guided_next_locked (istart, iend); + break; + default: + abort (); + } + + if (ret) + gomp_ordered_first (); + gomp_mutex_unlock (&thr->ts.work_share->lock); + return ret; +} + /* The *_doacross_*_start routines are similar. The only difference is that this work-share construct is initialized to expect an ORDERED(N) - DOACROSS section, and the worksharing loop iterates always from 0 to COUNTS[0] - 1 @@ -310,11 +483,11 @@ gomp_loop_doacross_static_start (unsigned ncounts, long *counts, struct gomp_thread *thr = gomp_thread (); thr->ts.static_trip = 0; - if (gomp_work_share_start (false)) + if (gomp_work_share_start (0)) { gomp_loop_init (thr->ts.work_share, 0, counts[0], 1, GFS_STATIC, chunk_size); - gomp_doacross_init (ncounts, counts, chunk_size); + gomp_doacross_init (ncounts, counts, chunk_size, 0); gomp_work_share_init_done (); } @@ -328,11 +501,11 @@ gomp_loop_doacross_dynamic_start (unsigned ncounts, long *counts, struct gomp_thread *thr = gomp_thread (); bool ret; - if (gomp_work_share_start (false)) + if (gomp_work_share_start (0)) { gomp_loop_init (thr->ts.work_share, 0, counts[0], 1, GFS_DYNAMIC, chunk_size); - gomp_doacross_init (ncounts, counts, chunk_size); + gomp_doacross_init (ncounts, counts, chunk_size, 0); gomp_work_share_init_done (); } @@ -354,11 +527,11 @@ gomp_loop_doacross_guided_start (unsigned ncounts, long *counts, struct gomp_thread *thr = gomp_thread (); bool ret; - if (gomp_work_share_start (false)) + if (gomp_work_share_start (0)) { gomp_loop_init (thr->ts.work_share, 0, counts[0], 1, GFS_GUIDED, chunk_size); - gomp_doacross_init (ncounts, counts, chunk_size); + gomp_doacross_init (ncounts, counts, chunk_size, 0); gomp_work_share_init_done (); } @@ -402,6 +575,50 @@ GOMP_loop_doacross_runtime_start (unsigned ncounts, long *counts, } } +bool +GOMP_loop_doacross_start (unsigned ncounts, long *counts, long sched, + long chunk_size, long *istart, long *iend, + uintptr_t *reductions, void **mem) +{ + struct gomp_thread *thr = gomp_thread (); + + thr->ts.static_trip = 0; + if (reductions) + gomp_workshare_taskgroup_start (); + if (gomp_work_share_start (0)) + { + size_t extra = 0; + if (mem) + extra = (uintptr_t) *mem; + sched = gomp_adjust_sched (sched, &chunk_size); + gomp_loop_init (thr->ts.work_share, 0, counts[0], 1, + sched, chunk_size); + gomp_doacross_init (ncounts, counts, chunk_size, extra); + if (reductions) + { + GOMP_taskgroup_reduction_register (reductions); + thr->task->taskgroup->workshare = true; + thr->ts.work_share->task_reductions = reductions; + } + gomp_work_share_init_done (); + } + else + { + if (reductions) + { + uintptr_t *first_reductions = thr->ts.work_share->task_reductions; + gomp_workshare_task_reduction_register (reductions, + first_reductions); + } + sched = thr->ts.work_share->sched; + } + + if (mem) + *mem = thr->ts.work_share->doacross->extra; + + return ialias_call (GOMP_loop_runtime_next) (istart, iend); +} + /* The *_next routines are called when the thread completes processing of the iteration block currently assigned to it. If the work-share construct is bound directly to a parallel construct, then the iteration diff --git a/libgomp/loop_ull.c b/libgomp/loop_ull.c index 7b2dfe6e26b2..ac658023e13b 100644 --- a/libgomp/loop_ull.c +++ b/libgomp/loop_ull.c @@ -27,8 +27,12 @@ #include #include +#include #include "libgomp.h" +ialias (GOMP_loop_ull_runtime_next) +ialias_redirect (GOMP_taskgroup_reduction_register) + typedef unsigned long long gomp_ull; /* Initialize the given work share construct from the given arguments. */ @@ -104,7 +108,7 @@ gomp_loop_ull_static_start (bool up, gomp_ull start, gomp_ull end, struct gomp_thread *thr = gomp_thread (); thr->ts.static_trip = 0; - if (gomp_work_share_start (false)) + if (gomp_work_share_start (0)) { gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr, GFS_STATIC, chunk_size); @@ -122,7 +126,7 @@ gomp_loop_ull_dynamic_start (bool up, gomp_ull start, gomp_ull end, struct gomp_thread *thr = gomp_thread (); bool ret; - if (gomp_work_share_start (false)) + if (gomp_work_share_start (0)) { gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr, GFS_DYNAMIC, chunk_size); @@ -148,7 +152,7 @@ gomp_loop_ull_guided_start (bool up, gomp_ull start, gomp_ull end, struct gomp_thread *thr = gomp_thread (); bool ret; - if (gomp_work_share_start (false)) + if (gomp_work_share_start (0)) { gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr, GFS_GUIDED, chunk_size); @@ -195,6 +199,99 @@ GOMP_loop_ull_runtime_start (bool up, gomp_ull start, gomp_ull end, } } +static long +gomp_adjust_sched (long sched, gomp_ull *chunk_size) +{ + sched &= ~GFS_MONOTONIC; + switch (sched) + { + case GFS_STATIC: + case GFS_DYNAMIC: + case GFS_GUIDED: + return sched; + /* GFS_RUNTIME is used for runtime schedule without monotonic + or nonmonotonic modifiers on the clause. + GFS_RUNTIME|GFS_MONOTONIC for runtime schedule with monotonic + modifier. */ + case GFS_RUNTIME: + /* GFS_AUTO is used for runtime schedule with nonmonotonic + modifier. */ + case GFS_AUTO: + { + struct gomp_task_icv *icv = gomp_icv (false); + sched = icv->run_sched_var & ~GFS_MONOTONIC; + switch (sched) + { + case GFS_STATIC: + case GFS_DYNAMIC: + case GFS_GUIDED: + *chunk_size = icv->run_sched_chunk_size; + break; + case GFS_AUTO: + sched = GFS_STATIC; + *chunk_size = 0; + break; + default: + abort (); + } + return sched; + } + default: + abort (); + } +} + +bool +GOMP_loop_ull_start (bool up, gomp_ull start, gomp_ull end, + gomp_ull incr, long sched, gomp_ull chunk_size, + gomp_ull *istart, gomp_ull *iend, + uintptr_t *reductions, void **mem) +{ + struct gomp_thread *thr = gomp_thread (); + + thr->ts.static_trip = 0; + if (reductions) + gomp_workshare_taskgroup_start (); + if (gomp_work_share_start (0)) + { + sched = gomp_adjust_sched (sched, &chunk_size); + gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr, + sched, chunk_size); + if (reductions) + { + GOMP_taskgroup_reduction_register (reductions); + thr->task->taskgroup->workshare = true; + thr->ts.work_share->task_reductions = reductions; + } + if (mem) + { + uintptr_t size = (uintptr_t) *mem; + if (size > (sizeof (struct gomp_work_share) + - offsetof (struct gomp_work_share, + inline_ordered_team_ids))) + thr->ts.work_share->ordered_team_ids + = gomp_malloc_cleared (size); + else + memset (thr->ts.work_share->ordered_team_ids, '\0', size); + *mem = (void *) thr->ts.work_share->ordered_team_ids; + } + gomp_work_share_init_done (); + } + else + { + if (reductions) + { + uintptr_t *first_reductions = thr->ts.work_share->task_reductions; + gomp_workshare_task_reduction_register (reductions, + first_reductions); + } + if (mem) + *mem = (void *) thr->ts.work_share->ordered_team_ids; + } + + return ialias_call (GOMP_loop_ull_runtime_next) (istart, iend); +} + /* The *_ordered_*_start routines are similar. The only difference is that this work-share construct is initialized to expect an ORDERED section. */ @@ -206,7 +303,7 @@ gomp_loop_ull_ordered_static_start (bool up, gomp_ull start, gomp_ull end, struct gomp_thread *thr = gomp_thread (); thr->ts.static_trip = 0; - if (gomp_work_share_start (true)) + if (gomp_work_share_start (1)) { gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr, GFS_STATIC, chunk_size); @@ -225,7 +322,7 @@ gomp_loop_ull_ordered_dynamic_start (bool up, gomp_ull start, gomp_ull end, struct gomp_thread *thr = gomp_thread (); bool ret; - if (gomp_work_share_start (true)) + if (gomp_work_share_start (1)) { gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr, GFS_DYNAMIC, chunk_size); @@ -251,7 +348,7 @@ gomp_loop_ull_ordered_guided_start (bool up, gomp_ull start, gomp_ull end, struct gomp_thread *thr = gomp_thread (); bool ret; - if (gomp_work_share_start (true)) + if (gomp_work_share_start (1)) { gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr, GFS_GUIDED, chunk_size); @@ -299,6 +396,82 @@ GOMP_loop_ull_ordered_runtime_start (bool up, gomp_ull start, gomp_ull end, } } +bool +GOMP_loop_ull_ordered_start (bool up, gomp_ull start, gomp_ull end, + gomp_ull incr, long sched, gomp_ull chunk_size, + gomp_ull *istart, gomp_ull *iend, + uintptr_t *reductions, void **mem) +{ + struct gomp_thread *thr = gomp_thread (); + size_t ordered = 1; + bool ret; + + thr->ts.static_trip = 0; + if (reductions) + gomp_workshare_taskgroup_start (); + if (mem) + ordered += (uintptr_t) *mem; + if (gomp_work_share_start (ordered)) + { + sched = gomp_adjust_sched (sched, &chunk_size); + gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr, + sched, chunk_size); + if (reductions) + { + GOMP_taskgroup_reduction_register (reductions); + thr->task->taskgroup->workshare = true; + thr->ts.work_share->task_reductions = reductions; + } + if (sched == GFS_STATIC) + gomp_ordered_static_init (); + else + gomp_mutex_lock (&thr->ts.work_share->lock); + gomp_work_share_init_done (); + } + else + { + if (reductions) + { + uintptr_t *first_reductions = thr->ts.work_share->task_reductions; + gomp_workshare_task_reduction_register (reductions, + first_reductions); + } + sched = thr->ts.work_share->sched; + if (sched != GFS_STATIC) + gomp_mutex_lock (&thr->ts.work_share->lock); + } + + if (mem) + { + uintptr_t p + = (uintptr_t) (thr->ts.work_share->ordered_team_ids + + (thr->ts.team ? thr->ts.team->nthreads : 1)); + p += __alignof__ (long long) - 1; + p &= ~(__alignof__ (long long) - 1); + *mem = (void *) p; + } + + switch (sched) + { + case GFS_STATIC: + case GFS_AUTO: + return !gomp_iter_ull_static_next (istart, iend); + case GFS_DYNAMIC: + ret = gomp_iter_ull_dynamic_next_locked (istart, iend); + break; + case GFS_GUIDED: + ret = gomp_iter_ull_guided_next_locked (istart, iend); + break; + default: + abort (); + } + + if (ret) + gomp_ordered_first (); + gomp_mutex_unlock (&thr->ts.work_share->lock); + return ret; +} + /* The *_doacross_*_start routines are similar. The only difference is that this work-share construct is initialized to expect an ORDERED(N) - DOACROSS section, and the worksharing loop iterates always from 0 to COUNTS[0] - 1 @@ -313,11 +486,11 @@ gomp_loop_ull_doacross_static_start (unsigned ncounts, gomp_ull *counts, struct gomp_thread *thr = gomp_thread (); thr->ts.static_trip = 0; - if (gomp_work_share_start (false)) + if (gomp_work_share_start (0)) { gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1, GFS_STATIC, chunk_size); - gomp_doacross_ull_init (ncounts, counts, chunk_size); + gomp_doacross_ull_init (ncounts, counts, chunk_size, 0); gomp_work_share_init_done (); } @@ -332,11 +505,11 @@ gomp_loop_ull_doacross_dynamic_start (unsigned ncounts, gomp_ull *counts, struct gomp_thread *thr = gomp_thread (); bool ret; - if (gomp_work_share_start (false)) + if (gomp_work_share_start (0)) { gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1, GFS_DYNAMIC, chunk_size); - gomp_doacross_ull_init (ncounts, counts, chunk_size); + gomp_doacross_ull_init (ncounts, counts, chunk_size, 0); gomp_work_share_init_done (); } @@ -359,11 +532,11 @@ gomp_loop_ull_doacross_guided_start (unsigned ncounts, gomp_ull *counts, struct gomp_thread *thr = gomp_thread (); bool ret; - if (gomp_work_share_start (false)) + if (gomp_work_share_start (0)) { gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1, GFS_GUIDED, chunk_size); - gomp_doacross_ull_init (ncounts, counts, chunk_size); + gomp_doacross_ull_init (ncounts, counts, chunk_size, 0); gomp_work_share_init_done (); } @@ -407,6 +580,51 @@ GOMP_loop_ull_doacross_runtime_start (unsigned ncounts, gomp_ull *counts, } } +bool +GOMP_loop_ull_doacross_start (unsigned ncounts, gomp_ull *counts, + long sched, gomp_ull chunk_size, + gomp_ull *istart, gomp_ull *iend, + uintptr_t *reductions, void **mem) +{ + struct gomp_thread *thr = gomp_thread (); + + thr->ts.static_trip = 0; + if (reductions) + gomp_workshare_taskgroup_start (); + if (gomp_work_share_start (0)) + { + size_t extra = 0; + if (mem) + extra = (uintptr_t) *mem; + sched = gomp_adjust_sched (sched, &chunk_size); + gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1, + sched, chunk_size); + gomp_doacross_ull_init (ncounts, counts, chunk_size, extra); + if (reductions) + { + GOMP_taskgroup_reduction_register (reductions); + thr->task->taskgroup->workshare = true; + thr->ts.work_share->task_reductions = reductions; + } + gomp_work_share_init_done (); + } + else + { + if (reductions) + { + uintptr_t *first_reductions = thr->ts.work_share->task_reductions; + gomp_workshare_task_reduction_register (reductions, + first_reductions); + } + sched = thr->ts.work_share->sched; + } + + if (mem) + *mem = thr->ts.work_share->doacross->extra; + + return ialias_call (GOMP_loop_ull_runtime_next) (istart, iend); +} + /* The *_next routines are called when the thread completes processing of the iteration block currently assigned to it. If the work-share construct is bound directly to a parallel construct, then the iteration diff --git a/libgomp/ordered.c b/libgomp/ordered.c index 1bdd5b2f25bc..521e9122d908 100644 --- a/libgomp/ordered.c +++ b/libgomp/ordered.c @@ -259,7 +259,8 @@ GOMP_ordered_end (void) #define MAX_COLLAPSED_BITS (__SIZEOF_LONG__ * __CHAR_BIT__) void -gomp_doacross_init (unsigned ncounts, long *counts, long chunk_size) +gomp_doacross_init (unsigned ncounts, long *counts, long chunk_size, + size_t extra) { struct gomp_thread *thr = gomp_thread (); struct gomp_team *team = thr->ts.team; @@ -269,13 +270,24 @@ gomp_doacross_init (unsigned ncounts, long *counts, long chunk_size) struct gomp_doacross_work_share *doacross; if (team == NULL || team->nthreads == 1) - return; + { + empty: + if (!extra) + ws->doacross = NULL; + else + { + doacross = gomp_malloc_cleared (sizeof (*doacross) + extra); + doacross->extra = (void *) (doacross + 1); + ws->doacross = doacross; + } + return; + } for (i = 0; i < ncounts; i++) { /* If any count is 0, GOMP_doacross_{post,wait} can't be called. */ if (counts[i] == 0) - return; + goto empty; if (num_bits <= MAX_COLLAPSED_BITS) { @@ -314,7 +326,7 @@ gomp_doacross_init (unsigned ncounts, long *counts, long chunk_size) elt_sz = (elt_sz + 63) & ~63UL; doacross = gomp_malloc (sizeof (*doacross) + 63 + num_ents * elt_sz - + shift_sz); + + shift_sz + extra); doacross->chunk_size = chunk_size; doacross->elt_sz = elt_sz; doacross->ncounts = ncounts; @@ -322,6 +334,13 @@ gomp_doacross_init (unsigned ncounts, long *counts, long chunk_size) doacross->array = (unsigned char *) ((((uintptr_t) (doacross + 1)) + 63 + shift_sz) & ~(uintptr_t) 63); + if (extra) + { + doacross->extra = doacross->array + num_ents * elt_sz; + memset (doacross->extra, '\0', extra); + } + else + doacross->extra = NULL; if (num_bits <= MAX_COLLAPSED_BITS) { unsigned int shift_count = 0; @@ -360,7 +379,8 @@ GOMP_doacross_post (long *counts) unsigned long ent; unsigned int i; - if (__builtin_expect (doacross == NULL, 0)) + if (__builtin_expect (doacross == NULL, 0) + || __builtin_expect (doacross->array == NULL, 0)) { __sync_synchronize (); return; @@ -411,7 +431,8 @@ GOMP_doacross_wait (long first, ...) unsigned long ent; unsigned int i; - if (__builtin_expect (doacross == NULL, 0)) + if (__builtin_expect (doacross == NULL, 0) + || __builtin_expect (doacross->array == NULL, 0)) { __sync_synchronize (); return; @@ -488,7 +509,8 @@ GOMP_doacross_wait (long first, ...) typedef unsigned long long gomp_ull; void -gomp_doacross_ull_init (unsigned ncounts, gomp_ull *counts, gomp_ull chunk_size) +gomp_doacross_ull_init (unsigned ncounts, gomp_ull *counts, + gomp_ull chunk_size, size_t extra) { struct gomp_thread *thr = gomp_thread (); struct gomp_team *team = thr->ts.team; @@ -498,13 +520,24 @@ gomp_doacross_ull_init (unsigned ncounts, gomp_ull *counts, gomp_ull chunk_size) struct gomp_doacross_work_share *doacross; if (team == NULL || team->nthreads == 1) - return; + { + empty: + if (!extra) + ws->doacross = NULL; + else + { + doacross = gomp_malloc_cleared (sizeof (*doacross) + extra); + doacross->extra = (void *) (doacross + 1); + ws->doacross = doacross; + } + return; + } for (i = 0; i < ncounts; i++) { /* If any count is 0, GOMP_doacross_{post,wait} can't be called. */ if (counts[i] == 0) - return; + goto empty; if (num_bits <= MAX_COLLAPSED_BITS) { @@ -557,6 +590,13 @@ gomp_doacross_ull_init (unsigned ncounts, gomp_ull *counts, gomp_ull chunk_size) doacross->array = (unsigned char *) ((((uintptr_t) (doacross + 1)) + 63 + shift_sz) & ~(uintptr_t) 63); + if (extra) + { + doacross->extra = doacross->array + num_ents * elt_sz; + memset (doacross->extra, '\0', extra); + } + else + doacross->extra = NULL; if (num_bits <= MAX_COLLAPSED_BITS) { unsigned int shift_count = 0; @@ -595,7 +635,8 @@ GOMP_doacross_ull_post (gomp_ull *counts) unsigned long ent; unsigned int i; - if (__builtin_expect (doacross == NULL, 0)) + if (__builtin_expect (doacross == NULL, 0) + || __builtin_expect (doacross->array == NULL, 0)) { __sync_synchronize (); return; @@ -667,7 +708,8 @@ GOMP_doacross_ull_wait (gomp_ull first, ...) unsigned long ent; unsigned int i; - if (__builtin_expect (doacross == NULL, 0)) + if (__builtin_expect (doacross == NULL, 0) + || __builtin_expect (doacross->array == NULL, 0)) { __sync_synchronize (); return; diff --git a/libgomp/parallel.c b/libgomp/parallel.c index ead1394aaa3a..c7a8c788a3bc 100644 --- a/libgomp/parallel.c +++ b/libgomp/parallel.c @@ -205,8 +205,15 @@ GOMP_cancellation_point (int which) } else if (which & GOMP_CANCEL_TASKGROUP) { - if (thr->task->taskgroup && thr->task->taskgroup->cancelled) - return true; + if (thr->task->taskgroup) + { + if (thr->task->taskgroup->cancelled) + return true; + if (thr->task->taskgroup->workshare + && thr->task->taskgroup->prev + && thr->task->taskgroup->prev->cancelled) + return true; + } /* FALLTHRU into the GOMP_CANCEL_PARALLEL case, as #pragma omp cancel parallel also cancels all explicit tasks. */ @@ -238,11 +245,17 @@ GOMP_cancel (int which, bool do_cancel) } else if (which & GOMP_CANCEL_TASKGROUP) { - if (thr->task->taskgroup && !thr->task->taskgroup->cancelled) + if (thr->task->taskgroup) { - gomp_mutex_lock (&team->task_lock); - thr->task->taskgroup->cancelled = true; - gomp_mutex_unlock (&team->task_lock); + struct gomp_taskgroup *taskgroup = thr->task->taskgroup; + if (taskgroup->workshare && taskgroup->prev) + taskgroup = taskgroup->prev; + if (!taskgroup->cancelled) + { + gomp_mutex_lock (&team->task_lock); + taskgroup->cancelled = true; + gomp_mutex_unlock (&team->task_lock); + } } return true; } diff --git a/libgomp/sections.c b/libgomp/sections.c index 2a6e6ec1c9d4..3449e0067ddc 100644 --- a/libgomp/sections.c +++ b/libgomp/sections.c @@ -26,8 +26,11 @@ /* This file handles the SECTIONS construct. */ #include "libgomp.h" +#include +ialias_redirect (GOMP_taskgroup_reduction_register) + /* Initialize the given work share construct from the given arguments. */ static inline void @@ -72,7 +75,7 @@ GOMP_sections_start (unsigned count) struct gomp_thread *thr = gomp_thread (); long s, e, ret; - if (gomp_work_share_start (false)) + if (gomp_work_share_start (0)) { gomp_sections_init (thr->ts.work_share, count); gomp_work_share_init_done (); @@ -95,6 +98,66 @@ GOMP_sections_start (unsigned count) return ret; } +unsigned +GOMP_sections2_start (unsigned count, uintptr_t *reductions, void **mem) +{ + struct gomp_thread *thr = gomp_thread (); + long s, e, ret; + + if (reductions) + gomp_workshare_taskgroup_start (); + if (gomp_work_share_start (0)) + { + gomp_sections_init (thr->ts.work_share, count); + if (reductions) + { + GOMP_taskgroup_reduction_register (reductions); + thr->task->taskgroup->workshare = true; + thr->ts.work_share->task_reductions = reductions; + } + if (mem) + { + uintptr_t size = (uintptr_t) *mem; + if (size > (sizeof (struct gomp_work_share) + - offsetof (struct gomp_work_share, + inline_ordered_team_ids))) + thr->ts.work_share->ordered_team_ids + = gomp_malloc_cleared (size); + else + memset (thr->ts.work_share->ordered_team_ids, '\0', size); + *mem = (void *) thr->ts.work_share->ordered_team_ids; + } + gomp_work_share_init_done (); + } + else + { + if (reductions) + { + uintptr_t *first_reductions = thr->ts.work_share->task_reductions; + gomp_workshare_task_reduction_register (reductions, + first_reductions); + } + if (mem) + *mem = (void *) thr->ts.work_share->ordered_team_ids; + } + +#ifdef HAVE_SYNC_BUILTINS + if (gomp_iter_dynamic_next (&s, &e)) + ret = s; + else + ret = 0; +#else + gomp_mutex_lock (&thr->ts.work_share->lock); + if (gomp_iter_dynamic_next_locked (&s, &e)) + ret = s; + else + ret = 0; + gomp_mutex_unlock (&thr->ts.work_share->lock); +#endif + + return ret; +} + /* This routine is called when the thread completes processing of the section currently assigned to it. If the work-share construct is bound directly to a parallel construct, then the construct may have diff --git a/libgomp/single.c b/libgomp/single.c index 24a7780ad93a..d5093c6730c3 100644 --- a/libgomp/single.c +++ b/libgomp/single.c @@ -47,7 +47,7 @@ GOMP_single_start (void) return __sync_bool_compare_and_swap (&team->single_count, single_count, single_count + 1L); #else - bool ret = gomp_work_share_start (false); + bool ret = gomp_work_share_start (0); if (ret) gomp_work_share_init_done (); gomp_work_share_end_nowait (); @@ -68,7 +68,7 @@ GOMP_single_copy_start (void) bool first; void *ret; - first = gomp_work_share_start (false); + first = gomp_work_share_start (0); if (first) { diff --git a/libgomp/target.c b/libgomp/target.c index d9288274243d..8ebc2a370a16 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -1854,11 +1854,20 @@ GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs, struct gomp_team *team = thr->ts.team; /* If parallel or taskgroup has been cancelled, don't start new tasks. */ - if (team - && (gomp_team_barrier_cancelled (&team->barrier) - || (thr->task->taskgroup - && thr->task->taskgroup->cancelled))) - return; + if (__builtin_expect (gomp_cancel_var, 0) && team) + { + if (gomp_team_barrier_cancelled (&team->barrier)) + return; + if (thr->task->taskgroup) + { + if (thr->task->taskgroup->cancelled) + return; + if (thr->task->taskgroup->workshare + && thr->task->taskgroup->prev + && thr->task->taskgroup->prev->cancelled) + return; + } + } gomp_task_maybe_wait_for_dependencies (depend); } @@ -1873,10 +1882,20 @@ GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs, struct gomp_thread *thr = gomp_thread (); struct gomp_team *team = thr->ts.team; /* If parallel or taskgroup has been cancelled, don't start new tasks. */ - if (team - && (gomp_team_barrier_cancelled (&team->barrier) - || (thr->task->taskgroup && thr->task->taskgroup->cancelled))) - return; + if (__builtin_expect (gomp_cancel_var, 0) && team) + { + if (gomp_team_barrier_cancelled (&team->barrier)) + return; + if (thr->task->taskgroup) + { + if (thr->task->taskgroup->cancelled) + return; + if (thr->task->taskgroup->workshare + && thr->task->taskgroup->prev + && thr->task->taskgroup->prev->cancelled) + return; + } + } gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true); } @@ -1985,11 +2004,20 @@ GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs, struct gomp_team *team = thr->ts.team; /* If parallel or taskgroup has been cancelled, don't start new tasks. */ - if (team - && (gomp_team_barrier_cancelled (&team->barrier) - || (thr->task->taskgroup - && thr->task->taskgroup->cancelled))) - return; + if (__builtin_expect (gomp_cancel_var, 0) && team) + { + if (gomp_team_barrier_cancelled (&team->barrier)) + return; + if (thr->task->taskgroup) + { + if (thr->task->taskgroup->cancelled) + return; + if (thr->task->taskgroup->workshare + && thr->task->taskgroup->prev + && thr->task->taskgroup->prev->cancelled) + return; + } + } gomp_task_maybe_wait_for_dependencies (depend); } @@ -2004,10 +2032,20 @@ GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs, struct gomp_thread *thr = gomp_thread (); struct gomp_team *team = thr->ts.team; /* If parallel or taskgroup has been cancelled, don't start new tasks. */ - if (team - && (gomp_team_barrier_cancelled (&team->barrier) - || (thr->task->taskgroup && thr->task->taskgroup->cancelled))) - return; + if (__builtin_expect (gomp_cancel_var, 0) && team) + { + if (gomp_team_barrier_cancelled (&team->barrier)) + return; + if (thr->task->taskgroup) + { + if (thr->task->taskgroup->cancelled) + return; + if (thr->task->taskgroup->workshare + && thr->task->taskgroup->prev + && thr->task->taskgroup->prev->cancelled) + return; + } + } size_t i; if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0) diff --git a/libgomp/task.c b/libgomp/task.c index 77e12c404ac5..0c78b3c939cd 100644 --- a/libgomp/task.c +++ b/libgomp/task.c @@ -363,10 +363,20 @@ GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), #endif /* If parallel or taskgroup has been cancelled, don't start new tasks. */ - if (team - && (gomp_team_barrier_cancelled (&team->barrier) - || (thr->task->taskgroup && thr->task->taskgroup->cancelled))) - return; + if (__builtin_expect (gomp_cancel_var, 0) && team) + { + if (gomp_team_barrier_cancelled (&team->barrier)) + return; + if (thr->task->taskgroup) + { + if (thr->task->taskgroup->cancelled) + return; + if (thr->task->taskgroup->workshare + && thr->task->taskgroup->prev + && thr->task->taskgroup->prev->cancelled) + return; + } + } if ((flags & GOMP_TASK_FLAG_PRIORITY) == 0) priority = 0; @@ -464,14 +474,26 @@ GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), gomp_mutex_lock (&team->task_lock); /* If parallel or taskgroup has been cancelled, don't start new tasks. */ - if (__builtin_expect ((gomp_team_barrier_cancelled (&team->barrier) - || (taskgroup && taskgroup->cancelled)) - && !task->copy_ctors_done, 0)) + if (__builtin_expect (gomp_cancel_var, 0) + && !task->copy_ctors_done) { - gomp_mutex_unlock (&team->task_lock); - gomp_finish_task (task); - free (task); - return; + if (gomp_team_barrier_cancelled (&team->barrier)) + { + do_cancel: + gomp_mutex_unlock (&team->task_lock); + gomp_finish_task (task); + free (task); + return; + } + if (taskgroup) + { + if (taskgroup->cancelled) + goto do_cancel; + if (taskgroup->workshare + && taskgroup->prev + && taskgroup->prev->cancelled) + goto do_cancel; + } } if (taskgroup) taskgroup->num_children++; @@ -662,10 +684,20 @@ gomp_create_target_task (struct gomp_device_descr *devicep, struct gomp_team *team = thr->ts.team; /* If parallel or taskgroup has been cancelled, don't start new tasks. */ - if (team - && (gomp_team_barrier_cancelled (&team->barrier) - || (thr->task->taskgroup && thr->task->taskgroup->cancelled))) - return true; + if (__builtin_expect (gomp_cancel_var, 0) && team) + { + if (gomp_team_barrier_cancelled (&team->barrier)) + return true; + if (thr->task->taskgroup) + { + if (thr->task->taskgroup->cancelled) + return true; + if (thr->task->taskgroup->workshare + && thr->task->taskgroup->prev + && thr->task->taskgroup->prev->cancelled) + return true; + } + } struct gomp_target_task *ttask; struct gomp_task *task; @@ -748,13 +780,25 @@ gomp_create_target_task (struct gomp_device_descr *devicep, task->final_task = 0; gomp_mutex_lock (&team->task_lock); /* If parallel or taskgroup has been cancelled, don't start new tasks. */ - if (__builtin_expect (gomp_team_barrier_cancelled (&team->barrier) - || (taskgroup && taskgroup->cancelled), 0)) + if (__builtin_expect (gomp_cancel_var, 0)) { - gomp_mutex_unlock (&team->task_lock); - gomp_finish_task (task); - free (task); - return true; + if (gomp_team_barrier_cancelled (&team->barrier)) + { + do_cancel: + gomp_mutex_unlock (&team->task_lock); + gomp_finish_task (task); + free (task); + return true; + } + if (taskgroup) + { + if (taskgroup->cancelled) + goto do_cancel; + if (taskgroup->workshare + && taskgroup->prev + && taskgroup->prev->cancelled) + goto do_cancel; + } } if (depend_size) { @@ -1047,10 +1091,21 @@ gomp_task_run_pre (struct gomp_task *child_task, struct gomp_task *parent, if (--team->task_queued_count == 0) gomp_team_barrier_clear_task_pending (&team->barrier); - if ((gomp_team_barrier_cancelled (&team->barrier) - || (taskgroup && taskgroup->cancelled)) + if (__builtin_expect (gomp_cancel_var, 0) && !child_task->copy_ctors_done) - return true; + { + if (gomp_team_barrier_cancelled (&team->barrier)) + return true; + if (taskgroup) + { + if (taskgroup->cancelled) + return true; + if (taskgroup->workshare + && taskgroup->prev + && taskgroup->prev->cancelled) + return true; + } + } return false; } @@ -1527,10 +1582,20 @@ GOMP_taskwait_depend (void **depend) struct gomp_team *team = thr->ts.team; /* If parallel or taskgroup has been cancelled, return early. */ - if (team - && (gomp_team_barrier_cancelled (&team->barrier) - || (thr->task->taskgroup && thr->task->taskgroup->cancelled))) - return; + if (__builtin_expect (gomp_cancel_var, 0) && team) + { + if (gomp_team_barrier_cancelled (&team->barrier)) + return; + if (thr->task->taskgroup) + { + if (thr->task->taskgroup->cancelled) + return; + if (thr->task->taskgroup->workshare + && thr->task->taskgroup->prev + && thr->task->taskgroup->prev->cancelled) + return; + } + } if (thr->task && thr->task->depend_hash) gomp_task_maybe_wait_for_dependencies (depend); @@ -1770,9 +1835,10 @@ gomp_taskgroup_init (struct gomp_taskgroup *prev) = gomp_malloc (sizeof (struct gomp_taskgroup)); taskgroup->prev = prev; priority_queue_init (&taskgroup->taskgroup_queue); - taskgroup->in_taskgroup_wait = false; taskgroup->reductions = prev ? prev->reductions : NULL; + taskgroup->in_taskgroup_wait = false; taskgroup->cancelled = false; + taskgroup->workshare = false; taskgroup->num_children = 0; gomp_sem_init (&taskgroup->taskgroup_sem, 0); return taskgroup; @@ -1956,21 +2022,34 @@ GOMP_taskgroup_end (void) free (taskgroup); } -static inline void -gomp_reduction_register (uintptr_t *data, uintptr_t *old, unsigned nthreads) +static inline __attribute__((always_inline)) void +gomp_reduction_register (uintptr_t *data, uintptr_t *old, uintptr_t *orig, + unsigned nthreads) { size_t total_cnt = 0; uintptr_t *d = data; struct htab *old_htab = NULL, *new_htab; do { - size_t sz = d[1] * nthreads; - /* Should use omp_alloc if d[3] is not -1. */ - void *ptr = gomp_aligned_alloc (d[2], sz); - memset (ptr, '\0', sz); - d[2] = (uintptr_t) ptr; + if (__builtin_expect (orig != NULL, 0)) + { + /* For worksharing task reductions, memory has been allocated + already by some other thread that encountered the construct + earlier. */ + d[2] = orig[2]; + d[6] = orig[6]; + orig = (uintptr_t *) orig[4]; + } + else + { + size_t sz = d[1] * nthreads; + /* Should use omp_alloc if d[3] is not -1. */ + void *ptr = gomp_aligned_alloc (d[2], sz); + memset (ptr, '\0', sz); + d[2] = (uintptr_t) ptr; + d[6] = d[2] + sz; + } d[5] = 0; - d[6] = d[2] + sz; total_cnt += d[0]; if (d[4] == 0) { @@ -2028,6 +2107,38 @@ gomp_reduction_register (uintptr_t *data, uintptr_t *old, unsigned nthreads) d[5] = (uintptr_t) new_htab; } +static void +gomp_create_artificial_team (void) +{ + struct gomp_thread *thr = gomp_thread (); + struct gomp_task_icv *icv; + struct gomp_team *team = gomp_new_team (1); + struct gomp_task *task = thr->task; + icv = task ? &task->icv : &gomp_global_icv; + team->prev_ts = thr->ts; + thr->ts.team = team; + thr->ts.team_id = 0; + thr->ts.work_share = &team->work_shares[0]; + thr->ts.last_work_share = NULL; +#ifdef HAVE_SYNC_BUILTINS + thr->ts.single_count = 0; +#endif + thr->ts.static_trip = 0; + thr->task = &team->implicit_task[0]; + gomp_init_task (thr->task, NULL, icv); + if (task) + { + thr->task = task; + gomp_end_task (); + free (task); + thr->task = &team->implicit_task[0]; + } +#ifdef LIBGOMP_USE_PTHREADS + else + pthread_setspecific (gomp_thread_destructor, thr); +#endif +} + /* The format of data is: data[0] cnt data[1] size @@ -2039,7 +2150,12 @@ gomp_reduction_register (uintptr_t *data, uintptr_t *old, unsigned nthreads) cnt times ent[0] address ent[1] offset - ent[2] used internally (pointer to data[0]). */ + ent[2] used internally (pointer to data[0]) + The entries are sorted by increasing offset, so that a binary + search can be performed. Normally, data[8] is 0, exception is + for worksharing construct task reductions in cancellable parallel, + where at offset 0 there should be space for a pointer and an integer + which are used internally. */ void GOMP_taskgroup_reduction_register (uintptr_t *data) @@ -2047,41 +2163,18 @@ GOMP_taskgroup_reduction_register (uintptr_t *data) struct gomp_thread *thr = gomp_thread (); struct gomp_team *team = thr->ts.team; struct gomp_task *task; + unsigned nthreads; if (__builtin_expect (team == NULL, 0)) { /* The task reduction code needs a team and task, so for orphaned taskgroups just create the implicit team. */ - struct gomp_task_icv *icv; - team = gomp_new_team (1); - task = thr->task; - icv = task ? &task->icv : &gomp_global_icv; - team->prev_ts = thr->ts; - thr->ts.team = team; - thr->ts.team_id = 0; - thr->ts.work_share = &team->work_shares[0]; - thr->ts.last_work_share = NULL; -#ifdef HAVE_SYNC_BUILTINS - thr->ts.single_count = 0; -#endif - thr->ts.static_trip = 0; - thr->task = &team->implicit_task[0]; - gomp_init_task (thr->task, NULL, icv); - if (task) - { - thr->task = task; - gomp_end_task (); - free (task); - thr->task = &team->implicit_task[0]; - } -#ifdef LIBGOMP_USE_PTHREADS - else - pthread_setspecific (gomp_thread_destructor, thr); -#endif - GOMP_taskgroup_start (); + gomp_create_artificial_team (); + ialias_call (GOMP_taskgroup_start) (); + team = thr->ts.team; } - unsigned nthreads = team->nthreads; + nthreads = team->nthreads; task = thr->task; - gomp_reduction_register (data, task->taskgroup->reductions, nthreads); + gomp_reduction_register (data, task->taskgroup->reductions, NULL, nthreads); task->taskgroup->reductions = data; } @@ -2175,11 +2268,56 @@ struct gomp_taskgroup * gomp_parallel_reduction_register (uintptr_t *data, unsigned nthreads) { struct gomp_taskgroup *taskgroup = gomp_taskgroup_init (NULL); - gomp_reduction_register (data, NULL, nthreads); + gomp_reduction_register (data, NULL, NULL, nthreads); taskgroup->reductions = data; return taskgroup; } +void +gomp_workshare_task_reduction_register (uintptr_t *data, uintptr_t *orig) +{ + struct gomp_thread *thr = gomp_thread (); + struct gomp_team *team = thr->ts.team; + struct gomp_task *task = thr->task; + unsigned nthreads = team->nthreads; + gomp_reduction_register (data, task->taskgroup->reductions, orig, nthreads); + task->taskgroup->reductions = data; +} + +void +gomp_workshare_taskgroup_start (void) +{ + struct gomp_thread *thr = gomp_thread (); + struct gomp_team *team = thr->ts.team; + struct gomp_task *task; + + if (team == NULL) + { + gomp_create_artificial_team (); + team = thr->ts.team; + } + task = thr->task; + task->taskgroup = gomp_taskgroup_init (task->taskgroup); + task->taskgroup->workshare = true; +} + +void +GOMP_workshare_task_reduction_unregister (bool cancelled) +{ + struct gomp_thread *thr = gomp_thread (); + struct gomp_task *task = thr->task; + struct gomp_team *team = thr->ts.team; + uintptr_t *data = task->taskgroup->reductions; + ialias_call (GOMP_taskgroup_end) (); + if (thr->ts.team_id == 0) + ialias_call (GOMP_taskgroup_reduction_unregister) (data); + else + htab_free ((struct htab *) data[5]); + + if (!cancelled) + gomp_team_barrier_wait (&team->barrier); +} + int omp_in_final (void) { diff --git a/libgomp/taskloop.c b/libgomp/taskloop.c index d20af399d385..4621405aa58e 100644 --- a/libgomp/taskloop.c +++ b/libgomp/taskloop.c @@ -149,8 +149,17 @@ GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), if (flags & GOMP_TASK_FLAG_NOGROUP) { - if (thr->task && thr->task->taskgroup && thr->task->taskgroup->cancelled) - return; + if (__builtin_expect (gomp_cancel_var, 0) + && thr->task + && thr->task->taskgroup) + { + if (thr->task->taskgroup->cancelled) + return; + if (thr->task->taskgroup->workshare + && thr->task->taskgroup->prev + && thr->task->taskgroup->prev->cancelled) + return; + } } else { @@ -292,19 +301,31 @@ GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), gomp_mutex_lock (&team->task_lock); /* If parallel or taskgroup has been cancelled, don't start new tasks. */ - if (__builtin_expect ((gomp_team_barrier_cancelled (&team->barrier) - || (taskgroup && taskgroup->cancelled)) - && cpyfn == NULL, 0)) + if (__builtin_expect (gomp_cancel_var, 0) + && cpyfn == NULL) { - gomp_mutex_unlock (&team->task_lock); - for (i = 0; i < num_tasks; i++) + if (gomp_team_barrier_cancelled (&team->barrier)) + { + do_cancel: + gomp_mutex_unlock (&team->task_lock); + for (i = 0; i < num_tasks; i++) + { + gomp_finish_task (tasks[i]); + free (tasks[i]); + } + if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0) + ialias_call (GOMP_taskgroup_end) (); + return; + } + if (taskgroup) { - gomp_finish_task (tasks[i]); - free (tasks[i]); + if (taskgroup->cancelled) + goto do_cancel; + if (taskgroup->workshare + && taskgroup->prev + && taskgroup->prev->cancelled) + goto do_cancel; } - if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0) - ialias_call (GOMP_taskgroup_end) (); - return; } if (taskgroup) taskgroup->num_children += num_tasks; diff --git a/libgomp/team.c b/libgomp/team.c index 0956a1f8f1ff..e3e4c4d1ef27 100644 --- a/libgomp/team.c +++ b/libgomp/team.c @@ -187,7 +187,7 @@ gomp_new_team (unsigned nthreads) team->single_count = 0; #endif team->work_shares_to_free = &team->work_shares[0]; - gomp_init_work_share (&team->work_shares[0], false, nthreads); + gomp_init_work_share (&team->work_shares[0], 0, nthreads); team->work_shares[0].next_alloc = NULL; team->work_share_list_free = NULL; team->work_share_list_alloc = &team->work_shares[1]; diff --git a/libgomp/testsuite/libgomp.c++/task-reduction-14.C b/libgomp/testsuite/libgomp.c++/task-reduction-14.C new file mode 100644 index 000000000000..3f4e79b16c59 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/task-reduction-14.C @@ -0,0 +1,72 @@ +#include +#include + +struct A { A (); ~A (); A (const A &); static int cnt1, cnt2, cnt3; int a; }; +int A::cnt1; +int A::cnt2; +int A::cnt3; +A::A () : a (0) +{ + #pragma omp atomic + cnt1++; +} +A::A (const A &x) : a (x.a) +{ + #pragma omp atomic + cnt2++; +} +A::~A () +{ + #pragma omp atomic + cnt3++; +} +#pragma omp declare reduction (+: A: omp_out.a += omp_in.a) + +void +foo (int x) +{ + A a, b[2]; + int d = 1; + long int e[2] = { 1L, 1L }; + int c = 0; + #pragma omp parallel + { + if (x && omp_get_thread_num () == 0) + { + for (int i = 0; i < 10000000; ++i) + asm volatile (""); + c = 1; + #pragma omp cancel parallel + } + #pragma omp for reduction (task, +: a, b) reduction (task, *: d, e) + for (int i = 0; i < 64; i++) + #pragma omp task in_reduction (+: a, b) in_reduction (*: d, e) + { + a.a++; + b[0].a += 2; + b[1].a += 3; + d *= ((i & 7) == 0) + 1; + e[0] *= ((i & 7) == 3) + 1; + e[1] *= ((i & 3) == 2) + 1; + } + if (x && omp_get_cancellation ()) + abort (); + } + if (!c) + { + if (a.a != 64 || b[0].a != 128 || b[1].a != 192) + abort (); + if (d != 256 || e[0] != 256L || e[1] != 65536L) + abort (); + } +} + +int +main () +{ + int c1 = A::cnt1, c2 = A::cnt2, c3 = A::cnt3; + volatile int zero = 0; + foo (zero); + if (A::cnt1 + A::cnt2 - c1 - c2 != A::cnt3 - c3) + abort (); +} diff --git a/libgomp/testsuite/libgomp.c++/task-reduction-15.C b/libgomp/testsuite/libgomp.c++/task-reduction-15.C new file mode 100644 index 000000000000..8a01e6b240a1 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/task-reduction-15.C @@ -0,0 +1,75 @@ +extern "C" void abort (); + +int as; +int &a = as; +long int bs = 1; +long int &b = bs; + +template +void +foo (T &c, U &d) +{ + T i; + for (i = 0; i < 2; i++) + #pragma omp task in_reduction (*: d) in_reduction (+: c) \ + in_reduction (+: a) in_reduction (*: b) + { + a += 7; + b *= 2; + c += 9; + d *= 3; + } +} + +template +void +bar () +{ + T cs = 0; + T &c = cs; + U ds = 1; + #pragma omp parallel if (0) + { + U &d = ds; + #pragma omp parallel + { + T i; + #pragma omp for reduction (task, +: a, c) reduction (task, *: b, d) + for (i = 0; i < 4; i++) + #pragma omp task in_reduction (+: a, c) in_reduction (*: b, d) + { + T j; + a += 7; + b *= 2; + for (j = 0; j < 2; j++) + #pragma omp task in_reduction (+: a, c) in_reduction (*: b, d) + { + a += 7; + b *= 2; + c += 9; + d *= 3; + foo (c, d); + } + c += 9; + d *= 3; + } +#define THREEP4 (3LL * 3LL * 3LL * 3LL) + if (d != (THREEP4 * THREEP4 * THREEP4 * THREEP4 * THREEP4 * THREEP4 + * THREEP4)) + abort (); + if (a != 28 * 7 || b != (1L << 28) || c != 28 * 9) + abort (); + } + } + if (a != 28 * 7 || b != (1L << 28) || c != 28 * 9) + abort (); + if (ds != (THREEP4 * THREEP4 * THREEP4 * THREEP4 * THREEP4 * THREEP4 + * THREEP4)) + abort (); +} + +int +main () +{ + bar (); +} diff --git a/libgomp/testsuite/libgomp.c++/task-reduction-16.C b/libgomp/testsuite/libgomp.c++/task-reduction-16.C new file mode 100644 index 000000000000..5835edcbd5b3 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/task-reduction-16.C @@ -0,0 +1,130 @@ +extern "C" void abort (); + +struct S { S (); S (long long int, int); ~S (); static int cnt1, cnt2, cnt3; long long int s; int t; }; + +int S::cnt1; +int S::cnt2; +int S::cnt3; + +S::S () +{ + #pragma omp atomic + cnt1++; +} + +S::S (long long int x, int y) : s (x), t (y) +{ + #pragma omp atomic update + ++cnt2; +} + +S::~S () +{ + #pragma omp atomic + cnt3 = cnt3 + 1; + if (t < 3 || t > 9 || (t & 1) == 0) + abort (); +} + +void +bar (S *p, S *o) +{ + p->s = 1; + if (o->t != 5) + abort (); + p->t = 9; +} + +static inline void +baz (S *o, S *i) +{ + if (o->t != 5 || i->t != 9) + abort (); + o->s *= i->s; +} + +#pragma omp declare reduction (+: S : omp_out.s += omp_in.s) initializer (omp_priv (0, 3)) +#pragma omp declare reduction (*: S : baz (&omp_out, &omp_in)) initializer (bar (&omp_priv, &omp_orig)) + +S as = { 0LL, 7 }; +S &a = as; +S bs (1LL, 5); +S &b = bs; + +void +foo (S &c, S &d) +{ + int i; + for (i = 0; i < 2; i++) + #pragma omp task in_reduction (+: c) in_reduction (*: b, d) in_reduction (+: a) + { + a.s += 7; + b.s *= 2; + c.s += 9; + d.s *= 3; + if ((a.t != 7 && a.t != 3) || (b.t != 5 && b.t != 9) + || (c.t != 7 && c.t != 3) || (d.t != 5 && d.t != 9)) + abort (); + } +} + +void +test () +{ + S cs = { 0LL, 7 }; + S &c = cs; + S ds (1LL, 5); + #pragma omp parallel if (0) + { + S &d = ds; + #pragma omp parallel shared (a, b, c, d) + { + #pragma omp for schedule (static, 1) reduction (task, +: a, c) reduction (task, *: b, d) + for (int i = 0; i < 4; i++) + #pragma omp task in_reduction (*: b, d) in_reduction (+: a, c) + { + int j; + a.s += 7; + b.s *= 2; + for (j = 0; j < 2; j++) + #pragma omp task in_reduction (+: a) in_reduction (*: b) \ + in_reduction (+: c) in_reduction (*: d) + { + a.s += 7; + b.s *= 2; + c.s += 9; + d.s *= 3; + foo (c, d); + if ((a.t != 7 && a.t != 3) || (b.t != 5 && b.t != 9) + || (c.t != 7 && c.t != 3) || (d.t != 5 && d.t != 9)) + abort (); + } + c.s += 9; + d.s *= 3; + if ((a.t != 7 && a.t != 3) || (b.t != 5 && b.t != 9) + || (c.t != 7 && c.t != 3) || (d.t != 5 && d.t != 9)) + abort (); + } +#define THREEP7 (3LL * 3LL * 3LL * 3LL * 3LL * 3LL * 3LL) + if (d.s != (THREEP7 * THREEP7 * THREEP7 * THREEP7) || d.t != 5) + abort (); + if (a.s != 28 * 7 || a.t != 7 || b.s != (1L << 28) || b.t != 5 + || c.s != 28 * 9 || c.t != 7) + abort (); + } + } + if (a.s != 28 * 7 || a.t != 7 || b.s != (1L << 28) || b.t != 5 + || c.s != 28 * 9 || c.t != 7) + abort (); + if (ds.s != (THREEP7 * THREEP7 * THREEP7 * THREEP7) || ds.t != 5) + abort (); +} + +int +main () +{ + int c1 = S::cnt1, c2 = S::cnt2, c3 = S::cnt3; + test (); + if (S::cnt1 + S::cnt2 - c1 - c2 != S::cnt3 - c3) + abort (); +} diff --git a/libgomp/testsuite/libgomp.c++/task-reduction-17.C b/libgomp/testsuite/libgomp.c++/task-reduction-17.C new file mode 100644 index 000000000000..c00c8e46542e --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/task-reduction-17.C @@ -0,0 +1,300 @@ +extern "C" void abort (); + +int as[2]; +int (&a)[2] = as; +long long int bs[7] = { 9, 11, 1, 1, 1, 13, 15 }; +long long int (&b)[7] = bs; +int es[3] = { 5, 0, 5 }; +int (&e)[3] = es; +int fs[5] = { 6, 7, 0, 0, 9 }; +int (&f)[5] = fs; +int gs[4] = { 1, 0, 0, 2 }; +int (&g)[4] = gs; +int hs[3] = { 0, 1, 4 }; +int (&h)[3] = hs; +int ks[4][2] = { { 5, 6 }, { 0, 0 }, { 0, 0 }, { 7, 8 } }; +int (&k)[4][2] = ks; +long long *ss; +long long *&s = ss; +long long (*ts)[2]; +long long (*&t)[2] = ts; + +template +void +foo (T &n, T *&c, long long int *&d, T (&m)[3], T *&r, T (&o)[4], T *&p, T (&q)[4][2]) +{ + T i; + for (i = 0; i < 2; i++) + #pragma omp task in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \ + in_reduction (+: o[n:n*2], m[1], k[1:2][:], p[0], f[2:2]) \ + in_reduction (+: q[1:2][:], g[n:n*2], e[1], h[0], r[2:2]) \ + in_reduction (*: s[1:2], t[2:2][:]) + { + a[0] += 7; + a[1] += 17; + b[2] *= 2; + b[4] *= 2; + c[0] += 6; + d[1] *= 2; + e[1] += 19; + f[2] += 21; + f[3] += 23; + g[1] += 25; + g[2] += 27; + h[0] += 29; + k[1][0] += 31; + k[2][1] += 33; + m[1] += 19; + r[2] += 21; + r[3] += 23; + o[1] += 25; + o[2] += 27; + p[0] += 29; + q[1][0] += 31; + q[2][1] += 33; + s[1] *= 2; + t[2][0] *= 2; + t[3][1] *= 2; + } +} + +template +void +test (T &n, I x, I y) +{ + T cs[2] = { 0, 0 }; + T (&c)[2] = cs; + T ps[3] = { 0, 1, 4 }; + T (&p)[3] = ps; + T qs[4][2] = { { 5, 6 }, { 0, 0 }, { 0, 0 }, { 7, 8 } }; + T (&q)[4][2] = qs; + long long sb[4] = { 5, 1, 1, 6 }; + long long tb[5][2] = { { 9, 10 }, { 11, 12 }, { 1, 1 }, { 1, 1 }, { 13, 14 } }; + T ms[3] = { 5, 0, 5 }; + T os[4] = { 1, 0, 0, 2 }; + s = sb; + t = tb; + #pragma omp parallel if (0) + { + long long int ds[] = { 1, 1 }; + long long int (&d)[2] = ds; + T (&m)[3] = ms; + T rs[5] = { 6, 7, 0, 0, 9 }; + T (&r)[5] = rs; + T (&o)[4] = os; + #pragma omp parallel + { + #pragma omp for reduction (task,+: a, c) reduction (task,*: b[2 * n:3 * n], d) \ + reduction (task,+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \ + reduction (task,+: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \ + reduction (task,*: t[2:2][:], s[1:n + 1]) schedule (dynamic) + for (I i = x; i != y; i++) + #pragma omp task in_reduction (+: a, c) in_reduction (*: b[2 * n:3 * n], d) \ + in_reduction (+: o[n:n*2], q[1:2][:], p[0], m[1], r[2:2]) \ + in_reduction (+: g[n:n * 2], e[1], k[1:2][:], h[0], f[2:2]) \ + in_reduction (*: s[1:2], t[2:2][:]) + { + T j; + a[0] += 2; + a[1] += 3; + b[2] *= 2; + f[3] += 8; + g[1] += 9; + g[2] += 10; + h[0] += 11; + k[1][1] += 13; + k[2][1] += 15; + m[1] += 16; + r[2] += 8; + s[1] *= 2; + t[2][1] *= 2; + t[3][1] *= 2; + for (j = 0; j < 2; j++) + #pragma omp task in_reduction (+: a, c[:2]) \ + in_reduction (*: b[2 * n:3 * n], d[n - 1:n + 1]) \ + in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][:2]) \ + in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][:2]) \ + in_reduction (*: s[n:2], t[2:2][:]) + { + m[1] += 6; + r[2] += 7; + q[1][0] += 17; + q[2][0] += 19; + a[0] += 4; + a[1] += 5; + b[3] *= 2; + b[4] *= 2; + f[3] += 18; + g[1] += 29; + g[2] += 18; + h[0] += 19; + s[2] *= 2; + t[2][0] *= 2; + t[3][0] *= 2; + T *cp = c; + long long int *dp = d; + T *rp = r; + T *pp = p; + foo (n, cp, dp, m, rp, o, pp, q); + r[3] += 18; + o[1] += 29; + o[2] += 18; + p[0] += 19; + c[0] += 4; + c[1] += 5; + d[0] *= 2; + e[1] += 6; + f[2] += 7; + k[1][0] += 17; + k[2][0] += 19; + } + r[3] += 8; + o[1] += 9; + o[2] += 10; + p[0] += 11; + q[1][1] += 13; + q[2][1] += 15; + b[3] *= 2; + c[0] += 4; + c[1] += 9; + d[0] *= 2; + e[1] += 16; + f[2] += 8; + } + if (d[0] != 1LL << (8 + 4) + || d[1] != 1LL << 16 + || m[0] != 5 + || m[1] != 19 * 16 + 6 * 8 + 16 * 4 + || m[2] != 5 + || r[0] != 6 + || r[1] != 7 + || r[2] != 21 * 16 + 7 * 8 + 8 * 4 + || r[3] != 23 * 16 + 18 * 8 + 8 * 4 + || r[4] != 9 + || o[0] != 1 + || o[1] != 25 * 16 + 29 * 8 + 9 * 4 + || o[2] != 27 * 16 + 18 * 8 + 10 * 4 + || o[3] != 2) + abort (); + if (a[0] != 7 * 16 + 4 * 8 + 2 * 4 + || a[1] != 17 * 16 + 5 * 8 + 3 * 4 + || b[0] != 9 || b[1] != 11 + || b[2] != 1LL << (16 + 4) + || b[3] != 1LL << (8 + 4) + || b[4] != 1LL << (16 + 8) + || b[5] != 13 || b[6] != 15 + || c[0] != 6 * 16 + 4 * 8 + 4 * 4 + || c[1] != 5 * 8 + 9 * 4 + || e[0] != 5 + || e[1] != 19 * 16 + 6 * 8 + 16 * 4 + || e[2] != 5 + || f[0] != 6 + || f[1] != 7 + || f[2] != 21 * 16 + 7 * 8 + 8 * 4 + || f[3] != 23 * 16 + 18 * 8 + 8 * 4 + || f[4] != 9 + || g[0] != 1 + || g[1] != 25 * 16 + 29 * 8 + 9 * 4 + || g[2] != 27 * 16 + 18 * 8 + 10 * 4 + || g[3] != 2 + || h[0] != 29 * 16 + 19 * 8 + 11 * 4 + || h[1] != 1 || h[2] != 4 + || k[0][0] != 5 || k[0][1] != 6 + || k[1][0] != 31 * 16 + 17 * 8 + || k[1][1] != 13 * 4 + || k[2][0] != 19 * 8 + || k[2][1] != 33 * 16 + 15 * 4 + || k[3][0] != 7 || k[3][1] != 8 + || p[0] != 29 * 16 + 19 * 8 + 11 * 4 + || p[1] != 1 || p[2] != 4 + || q[0][0] != 5 || q[0][1] != 6 + || q[1][0] != 31 * 16 + 17 * 8 + || q[1][1] != 13 * 4 + || q[2][0] != 19 * 8 + || q[2][1] != 33 * 16 + 15 * 4 + || q[3][0] != 7 || q[3][1] != 8 + || sb[0] != 5 + || sb[1] != 1LL << (16 + 4) + || sb[2] != 1LL << 8 + || sb[3] != 6 + || tb[0][0] != 9 || tb[0][1] != 10 || tb[1][0] != 11 || tb[1][1] != 12 + || tb[2][0] != 1LL << (16 + 8) + || tb[2][1] != 1LL << 4 + || tb[3][0] != 1LL << 8 + || tb[3][1] != 1LL << (16 + 4) + || tb[4][0] != 13 || tb[4][1] != 14) + abort (); + } + if (d[0] != 1LL << (8 + 4) + || d[1] != 1LL << 16 + || m[0] != 5 + || m[1] != 19 * 16 + 6 * 8 + 16 * 4 + || m[2] != 5 + || r[0] != 6 + || r[1] != 7 + || r[2] != 21 * 16 + 7 * 8 + 8 * 4 + || r[3] != 23 * 16 + 18 * 8 + 8 * 4 + || r[4] != 9 + || o[0] != 1 + || o[1] != 25 * 16 + 29 * 8 + 9 * 4 + || o[2] != 27 * 16 + 18 * 8 + 10 * 4 + || o[3] != 2) + abort (); + } + if (a[0] != 7 * 16 + 4 * 8 + 2 * 4 + || a[1] != 17 * 16 + 5 * 8 + 3 * 4 + || b[0] != 9 || b[1] != 11 + || b[2] != 1LL << (16 + 4) + || b[3] != 1LL << (8 + 4) + || b[4] != 1LL << (16 + 8) + || b[5] != 13 || b[6] != 15 + || c[0] != 6 * 16 + 4 * 8 + 4 * 4 + || c[1] != 5 * 8 + 9 * 4 + || e[0] != 5 + || e[1] != 19 * 16 + 6 * 8 + 16 * 4 + || e[2] != 5 + || f[0] != 6 + || f[1] != 7 + || f[2] != 21 * 16 + 7 * 8 + 8 * 4 + || f[3] != 23 * 16 + 18 * 8 + 8 * 4 + || f[4] != 9 + || g[0] != 1 + || g[1] != 25 * 16 + 29 * 8 + 9 * 4 + || g[2] != 27 * 16 + 18 * 8 + 10 * 4 + || g[3] != 2 + || h[0] != 29 * 16 + 19 * 8 + 11 * 4 + || h[1] != 1 || h[2] != 4 + || k[0][0] != 5 || k[0][1] != 6 + || k[1][0] != 31 * 16 + 17 * 8 + || k[1][1] != 13 * 4 + || k[2][0] != 19 * 8 + || k[2][1] != 33 * 16 + 15 * 4 + || k[3][0] != 7 || k[3][1] != 8 + || p[0] != 29 * 16 + 19 * 8 + 11 * 4 + || p[1] != 1 || p[2] != 4 + || q[0][0] != 5 || q[0][1] != 6 + || q[1][0] != 31 * 16 + 17 * 8 + || q[1][1] != 13 * 4 + || q[2][0] != 19 * 8 + || q[2][1] != 33 * 16 + 15 * 4 + || q[3][0] != 7 || q[3][1] != 8 + || sb[0] != 5 + || sb[1] != 1LL << (16 + 4) + || sb[2] != 1LL << 8 + || sb[3] != 6 + || tb[0][0] != 9 || tb[0][1] != 10 || tb[1][0] != 11 || tb[1][1] != 12 + || tb[2][0] != 1LL << (16 + 8) + || tb[2][1] != 1LL << 4 + || tb[3][0] != 1LL << 8 + || tb[3][1] != 1LL << (16 + 4) + || tb[4][0] != 13 || tb[4][1] != 14) + abort (); +} + +int +main () +{ + int n = 1; + test (n, 0ULL, 4ULL); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/task-reduction-18.C b/libgomp/testsuite/libgomp.c++/task-reduction-18.C new file mode 100644 index 000000000000..99c0e3727d43 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/task-reduction-18.C @@ -0,0 +1,325 @@ +extern "C" void abort (); + +struct S { S (); S (long int, long int); ~S (); static int cnt1, cnt2, cnt3; long int s, t; }; + +int S::cnt1; +int S::cnt2; +int S::cnt3; + +S::S () +{ + #pragma omp atomic + cnt1++; +} + +S::S (long int x, long int y) : s (x), t (y) +{ + #pragma omp atomic update + ++cnt2; +} + +S::~S () +{ + #pragma omp atomic + cnt3 = cnt3 + 1; + if (t < 3 || t > 9 || (t & 1) == 0) + abort (); +} + +void +bar (S *p, S *o) +{ + p->s = 1; + if (o->t != 5) + abort (); + p->t = 9; +} + +static inline void +baz (S *o, S *i) +{ + if (o->t != 5 || i->t != 9) + abort (); + o->s *= i->s; +} + +#pragma omp declare reduction (+: S : omp_out.s += omp_in.s) initializer (omp_priv (0, 3)) +#pragma omp declare reduction (*: S : baz (&omp_out, &omp_in)) initializer (bar (&omp_priv, &omp_orig)) + +S a[2] = { { 0, 7 }, { 0, 7 } }; +S b[7] = { { 9, 5 }, { 11, 5 }, { 1, 5 }, { 1, 5 }, { 1, 5 }, { 13, 5 }, { 15, 5 } }; +S e[3] = { { 5, 7 }, { 0, 7 }, { 5, 7 } }; +S f[5] = { { 6, 7 }, { 7, 7 }, { 0, 7 }, { 0, 7 }, { 9, 7 } }; +S g[4] = { { 1, 7 }, { 0, 7 }, { 0, 7 }, { 2, 7 } }; +S h[3] = { { 0, 7 }, { 1, 7 }, { 4, 7 } }; +S k[4][2] = { { { 5, 7 }, { 6, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 7, 7 }, { 8, 7 } } }; +S *s; +S (*t)[2]; + +template +void +foo (int n, S *c, S *d, S m[3], S *r, S o[4], S *p, S q[4][2]) +{ + int i; + for (i = 0; i < 2; i++) + #pragma omp task in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \ + in_reduction (+: o[n:n*2], m[1], k[1:2][:], p[0], f[2:2]) \ + in_reduction (+: q[1:2][:], g[n:n*2], e[1], h[0], r[2:2]) \ + in_reduction (*: s[1:2], t[2:2][:]) + { + a[0].s += 7; + a[1].s += 17; + b[2].s *= 2; + b[4].s *= 2; + c[0].s += 6; + d[1].s *= 2; + e[1].s += 19; + f[2].s += 21; + f[3].s += 23; + g[1].s += 25; + g[2].s += 27; + h[0].s += 29; + k[1][0].s += 31; + k[2][1].s += 33; + m[1].s += 19; + r[2].s += 21; + r[3].s += 23; + o[1].s += 25; + o[2].s += 27; + p[0].s += 29; + q[1][0].s += 31; + q[2][1].s += 33; + s[1].s *= 2; + t[2][0].s *= 2; + t[3][1].s *= 2; + if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3) + || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3)) + abort (); + for (int z = 0; z < 2; z++) + if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3) + || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3) + || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3) + || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3) + || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3) + || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3) + || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9)) + abort (); + for (int z = 0; z < 3; z++) + if (b[z + 2].t != 5 && b[z + 2].t != 9) + abort (); + } +} + +template +void +test (int n) +{ + S c[2] = { { 0, 7 }, { 0, 7 } }; + S p[3] = { { 0, 7 }, { 1, 7 }, { 4, 7 } }; + S q[4][2] = { { { 5, 7 }, { 6, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 7, 7 }, { 8, 7 } } }; + S ss[4] = { { 5, 5 }, { 1, 5 }, { 1, 5 }, { 6, 5 } }; + S tt[5][2] = { { { 9, 5 }, { 10, 5 } }, { { 11, 5 }, { 12, 5 } }, { { 1, 5 }, { 1, 5 } }, { { 1, 5 }, { 1, 5 } }, { { 13, 5 }, { 14, 5 } } }; + s = ss; + t = tt; + #pragma omp parallel num_threads (1) if (0) + { + S d[] = { { 1, 5 }, { 1, 5 } }; + S m[3] = { { 5, 7 }, { 0, 7 }, { 5, 7 } }; + S r[5] = { { 6, 7 }, { 7, 7 }, { 0, 7 }, { 0, 7 }, { 9, 7 } }; + S o[4] = { { 1, 7 }, { 0, 7 }, { 0, 7 }, { 2, 7 } }; + volatile unsigned long long x = 0; + volatile unsigned long long y = 4; + volatile unsigned long long z = 1; + #pragma omp parallel + { + #pragma omp for reduction (task, +: a, c) reduction (task, *: b[2 * n:3 * n], d) \ + reduction (task, +: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \ + reduction (task, +: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \ + reduction (task, *: t[2:2][:], s[1:n + 1]) \ + schedule (nonmonotonic: guided, 1) + for (unsigned long long i = x; i < y; i += z) + #pragma omp task in_reduction (+: a, c) in_reduction (*: b[2 * n:3 * n], d) \ + in_reduction (+: o[n:n*2], q[1:2][:], p[0], m[1], r[2:2]) \ + in_reduction (+: g[n:n * 2], e[1], k[1:2][:], h[0], f[2:2]) \ + in_reduction (*: s[1:2], t[2:2][:]) + { + int j; + a[0].s += 2; + a[1].s += 3; + b[2].s *= 2; + f[3].s += 8; + g[1].s += 9; + g[2].s += 10; + h[0].s += 11; + k[1][1].s += 13; + k[2][1].s += 15; + m[1].s += 16; + r[2].s += 8; + s[1].s *= 2; + t[2][1].s *= 2; + t[3][1].s *= 2; + if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3) + || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3)) + abort (); + for (int z = 0; z < 2; z++) + if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3) + || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3) + || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3) + || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3) + || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3) + || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3) + || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9)) + abort (); + for (int z = 0; z < 3; z++) + if (b[z + 2].t != 5 && b[z + 2].t != 9) + abort (); + for (j = 0; j < 2; j++) + #pragma omp task in_reduction (+: a, c[:2]) \ + in_reduction (*: b[2 * n:3 * n], d[n - 1:n + 1]) \ + in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][:2]) \ + in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][:2]) \ + in_reduction (*: s[n:2], t[2:2][:]) + { + m[1].s += 6; + r[2].s += 7; + q[1][0].s += 17; + q[2][0].s += 19; + a[0].s += 4; + a[1].s += 5; + b[3].s *= 2; + b[4].s *= 2; + f[3].s += 18; + g[1].s += 29; + g[2].s += 18; + h[0].s += 19; + s[2].s *= 2; + t[2][0].s *= 2; + t[3][0].s *= 2; + foo (n, c, d, m, r, o, p, q); + if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3) + || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3)) + abort (); + for (int z = 0; z < 2; z++) + if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3) + || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3) + || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3) + || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3) + || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3) + || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3) + || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9)) + abort (); + for (int z = 0; z < 3; z++) + if (b[z + 2].t != 5 && b[z + 2].t != 9) + abort (); + r[3].s += 18; + o[1].s += 29; + o[2].s += 18; + p[0].s += 19; + c[0].s += 4; + c[1].s += 5; + d[0].s *= 2; + e[1].s += 6; + f[2].s += 7; + k[1][0].s += 17; + k[2][0].s += 19; + } + r[3].s += 8; + o[1].s += 9; + o[2].s += 10; + p[0].s += 11; + q[1][1].s += 13; + q[2][1].s += 15; + b[3].s *= 2; + c[0].s += 4; + c[1].s += 9; + d[0].s *= 2; + e[1].s += 16; + f[2].s += 8; + } + if (a[0].s != 7 * 16 + 4 * 8 + 2 * 4 + || a[1].s != 17 * 16 + 5 * 8 + 3 * 4 + || b[0].s != 9 || b[1].s != 11 + || b[2].s != 1LL << (16 + 4) + || b[3].s != 1LL << (8 + 4) + || b[4].s != 1LL << (16 + 8) + || b[5].s != 13 || b[6].s != 15 + || c[0].s != 6 * 16 + 4 * 8 + 4 * 4 + || c[1].s != 5 * 8 + 9 * 4 + || e[0].s != 5 + || e[1].s != 19 * 16 + 6 * 8 + 16 * 4 + || e[2].s != 5 + || f[0].s != 6 + || f[1].s != 7 + || f[2].s != 21 * 16 + 7 * 8 + 8 * 4 + || f[3].s != 23 * 16 + 18 * 8 + 8 * 4 + || f[4].s != 9 + || g[0].s != 1 + || g[1].s != 25 * 16 + 29 * 8 + 9 * 4 + || g[2].s != 27 * 16 + 18 * 8 + 10 * 4 + || g[3].s != 2 + || h[0].s != 29 * 16 + 19 * 8 + 11 * 4 + || h[1].s != 1 || h[2].s != 4 + || k[0][0].s != 5 || k[0][1].s != 6 + || k[1][0].s != 31 * 16 + 17 * 8 + || k[1][1].s != 13 * 4 + || k[2][0].s != 19 * 8 + || k[2][1].s != 33 * 16 + 15 * 4 + || k[3][0].s != 7 || k[3][1].s != 8 + || p[0].s != 29 * 16 + 19 * 8 + 11 * 4 + || p[1].s != 1 || p[2].s != 4 + || q[0][0].s != 5 || q[0][1].s != 6 + || q[1][0].s != 31 * 16 + 17 * 8 + || q[1][1].s != 13 * 4 + || q[2][0].s != 19 * 8 + || q[2][1].s != 33 * 16 + 15 * 4 + || q[3][0].s != 7 || q[3][1].s != 8 + || ss[0].s != 5 + || ss[1].s != 1LL << (16 + 4) + || ss[2].s != 1LL << 8 + || ss[3].s != 6 + || tt[0][0].s != 9 || tt[0][1].s != 10 || tt[1][0].s != 11 || tt[1][1].s != 12 + || tt[2][0].s != 1LL << (16 + 8) + || tt[2][1].s != 1LL << 4 + || tt[3][0].s != 1LL << 8 + || tt[3][1].s != 1LL << (16 + 4) + || tt[4][0].s != 13 || tt[4][1].s != 14) + abort (); + } + if (d[0].s != 1LL << (8 + 4) + || d[1].s != 1LL << 16 + || m[0].s != 5 + || m[1].s != 19 * 16 + 6 * 8 + 16 * 4 + || m[2].s != 5 + || r[0].s != 6 + || r[1].s != 7 + || r[2].s != 21 * 16 + 7 * 8 + 8 * 4 + || r[3].s != 23 * 16 + 18 * 8 + 8 * 4 + || r[4].s != 9 + || o[0].s != 1 + || o[1].s != 25 * 16 + 29 * 8 + 9 * 4 + || o[2].s != 27 * 16 + 18 * 8 + 10 * 4 + || o[3].s != 2) + abort (); + if (e[1].t != 7 || h[0].t != 7 || m[1].t != 7 || p[0].t != 7) + abort (); + for (int z = 0; z < 2; z++) + if (a[z].t != 7 || c[z].t != 7 || d[z].t != 5 || f[z + 2].t != 7 + || g[z + 1].t != 7 || r[z + 2].t != 7 || s[z + 1].t != 5 || o[z + 1].t != 7 + || k[z + 1][0].t != 7 || k[z + 1][1].t != 7 || q[z + 1][0].t != 7 || q[z + 1][1].t != 7 + || t[z + 2][0].t != 5 || t[z + 2][1].t != 5) + abort (); + for (int z = 0; z < 3; z++) + if (b[z + 2].t != 5) + abort (); + } +} + +int +main () +{ + int c1 = S::cnt1, c2 = S::cnt2, c3 = S::cnt3; + test<0> (1); + if (S::cnt1 + S::cnt2 - c1 - c2 != S::cnt3 - c3) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/task-reduction-19.C b/libgomp/testsuite/libgomp.c++/task-reduction-19.C new file mode 100644 index 000000000000..15945c57cc24 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/task-reduction-19.C @@ -0,0 +1,343 @@ +extern "C" void abort (); + +struct S { S (); S (long int, long int); ~S (); static int cnt1, cnt2, cnt3; long int s, t; }; + +int S::cnt1; +int S::cnt2; +int S::cnt3; + +S::S () +{ + #pragma omp atomic + cnt1++; +} + +S::S (long int x, long int y) : s (x), t (y) +{ + #pragma omp atomic update + ++cnt2; +} + +S::~S () +{ + #pragma omp atomic + cnt3 = cnt3 + 1; + if (t < 3 || t > 9 || (t & 1) == 0) + abort (); +} + +void +bar (S *p, S *o) +{ + p->s = 1; + if (o->t != 5) + abort (); + p->t = 9; +} + +static inline void +baz (S *o, S *i) +{ + if (o->t != 5 || i->t != 9) + abort (); + o->s *= i->s; +} + +#pragma omp declare reduction (+: S : omp_out.s += omp_in.s) initializer (omp_priv (0, 3)) +#pragma omp declare reduction (*: S : baz (&omp_out, &omp_in)) initializer (bar (&omp_priv, &omp_orig)) + +S as[2] = { { 0, 7 }, { 0, 7 } }; +S (&a)[2] = as; +S bs[7] = { { 9, 5 }, { 11, 5 }, { 1, 5 }, { 1, 5 }, { 1, 5 }, { 13, 5 }, { 15, 5 } }; +S (&b)[7] = bs; +S es[3] = { { 5, 7 }, { 0, 7 }, { 5, 7 } }; +S (&e)[3] = es; +S fs[5] = { { 6, 7 }, { 7, 7 }, { 0, 7 }, { 0, 7 }, { 9, 7 } }; +S (&f)[5] = fs; +S gs[4] = { { 1, 7 }, { 0, 7 }, { 0, 7 }, { 2, 7 } }; +S (&g)[4] = gs; +S hs[3] = { { 0, 7 }, { 1, 7 }, { 4, 7 } }; +S (&h)[3] = hs; +S ks[4][2] = { { { 5, 7 }, { 6, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 7, 7 }, { 8, 7 } } }; +S (&k)[4][2] = ks; +S *ss; +S *&s = ss; +S (*ts)[2]; +S (*&t)[2] = ts; + +template +void +foo (T &n, S *&c, S *&d, S (&m)[3], S *&r, S (&o)[4], S *&p, S (&q)[4][2]) +{ + T i; + for (i = 0; i < 2; i++) + #pragma omp task in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \ + in_reduction (+: o[n:n*2], m[1], k[1:2][:], p[0], f[2:2]) \ + in_reduction (+: q[1:2][:], g[n:n*2], e[1], h[0], r[2:2]) \ + in_reduction (*: s[1:2], t[2:2][:]) + { + a[0].s += 7; + a[1].s += 17; + b[2].s *= 2; + b[4].s *= 2; + c[0].s += 6; + d[1].s *= 2; + e[1].s += 19; + f[2].s += 21; + f[3].s += 23; + g[1].s += 25; + g[2].s += 27; + h[0].s += 29; + k[1][0].s += 31; + k[2][1].s += 33; + m[1].s += 19; + r[2].s += 21; + r[3].s += 23; + o[1].s += 25; + o[2].s += 27; + p[0].s += 29; + q[1][0].s += 31; + q[2][1].s += 33; + s[1].s *= 2; + t[2][0].s *= 2; + t[3][1].s *= 2; + if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3) + || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3)) + abort (); + for (T z = 0; z < 2; z++) + if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3) + || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3) + || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3) + || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3) + || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3) + || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3) + || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9)) + abort (); + for (T z = 0; z < 3; z++) + if (b[z + 2].t != 5 && b[z + 2].t != 9) + abort (); + } +} + +template +void +test (T &n) +{ + S cs[2] = { { 0, 7 }, { 0, 7 } }; + S (&c)[2] = cs; + S ps[3] = { { 0, 7 }, { 1, 7 }, { 4, 7 } }; + S (&p)[3] = ps; + S qs[4][2] = { { { 5, 7 }, { 6, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 7, 7 }, { 8, 7 } } }; + S (&q)[4][2] = qs; + S sb[4] = { { 5, 5 }, { 1, 5 }, { 1, 5 }, { 6, 5 } }; + S tb[5][2] = { { { 9, 5 }, { 10, 5 } }, { { 11, 5 }, { 12, 5 } }, { { 1, 5 }, { 1, 5 } }, { { 1, 5 }, { 1, 5 } }, { { 13, 5 }, { 14, 5 } } }; + S ms[3] = { { 5, 7 }, { 0, 7 }, { 5, 7 } }; + S os[4] = { { 1, 7 }, { 0, 7 }, { 0, 7 }, { 2, 7 } }; + s = sb; + t = tb; + #pragma omp parallel if (0) + { + S ds[] = { { 1, 5 }, { 1, 5 } }; + S (&d)[2] = ds; + S (&m)[3] = ms; + S rs[5] = { { 6, 7 }, { 7, 7 }, { 0, 7 }, { 0, 7 }, { 9, 7 } }; + S (&r)[5] = rs; + S (&o)[4] = os; + #pragma omp parallel + { + #pragma omp for reduction (task, +: a, c) reduction (task, *: b[2 * n:3 * n], d) \ + reduction (task, +: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \ + reduction (task, +: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \ + reduction (task, *: t[2:2][:], s[1:n + 1]) \ + schedule (monotonic: runtime) + for (T i = 0; i < 4; i++) + #pragma omp task in_reduction (+: a, c) in_reduction (*: b[2 * n:3 * n], d) \ + in_reduction (+: o[n:n*2], q[1:2][:], p[0], m[1], r[2:2]) \ + in_reduction (+: g[n:n * 2], e[1], k[1:2][:], h[0], f[2:2]) \ + in_reduction (*: s[1:2], t[2:2][:]) + { + T j; + a[0].s += 2; + a[1].s += 3; + b[2].s *= 2; + f[3].s += 8; + g[1].s += 9; + g[2].s += 10; + h[0].s += 11; + k[1][1].s += 13; + k[2][1].s += 15; + m[1].s += 16; + r[2].s += 8; + s[1].s *= 2; + t[2][1].s *= 2; + t[3][1].s *= 2; + if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3) + || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3)) + abort (); + for (T z = 0; z < 2; z++) + if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3) + || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3) + || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3) + || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3) + || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3) + || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3) + || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9)) + abort (); + for (T z = 0; z < 3; z++) + if (b[z + 2].t != 5 && b[z + 2].t != 9) + abort (); + for (j = 0; j < 2; j++) + #pragma omp task in_reduction (+: a, c[:2]) \ + in_reduction (*: b[2 * n:3 * n], d[n - 1:n + 1]) \ + in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][:2]) \ + in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][:2]) \ + in_reduction (*: s[n:2], t[2:2][:]) + { + m[1].s += 6; + r[2].s += 7; + q[1][0].s += 17; + q[2][0].s += 19; + a[0].s += 4; + a[1].s += 5; + b[3].s *= 2; + b[4].s *= 2; + f[3].s += 18; + g[1].s += 29; + g[2].s += 18; + h[0].s += 19; + s[2].s *= 2; + t[2][0].s *= 2; + t[3][0].s *= 2; + S *cp = c; + S *dp = d; + S *rp = r; + S *pp = p; + if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3) + || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3)) + abort (); + for (T z = 0; z < 2; z++) + if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3) + || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3) + || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3) + || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3) + || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3) + || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3) + || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9)) + abort (); + for (T z = 0; z < 3; z++) + if (b[z + 2].t != 5 && b[z + 2].t != 9) + abort (); + foo (n, cp, dp, m, rp, o, pp, q); + r[3].s += 18; + o[1].s += 29; + o[2].s += 18; + p[0].s += 19; + c[0].s += 4; + c[1].s += 5; + d[0].s *= 2; + e[1].s += 6; + f[2].s += 7; + k[1][0].s += 17; + k[2][0].s += 19; + } + r[3].s += 8; + o[1].s += 9; + o[2].s += 10; + p[0].s += 11; + q[1][1].s += 13; + q[2][1].s += 15; + b[3].s *= 2; + c[0].s += 4; + c[1].s += 9; + d[0].s *= 2; + e[1].s += 16; + f[2].s += 8; + } + if (a[0].s != 7 * 16 + 4 * 8 + 2 * 4 + || a[1].s != 17 * 16 + 5 * 8 + 3 * 4 + || b[0].s != 9 || b[1].s != 11 + || b[2].s != 1LL << (16 + 4) + || b[3].s != 1LL << (8 + 4) + || b[4].s != 1LL << (16 + 8) + || b[5].s != 13 || b[6].s != 15 + || c[0].s != 6 * 16 + 4 * 8 + 4 * 4 + || c[1].s != 5 * 8 + 9 * 4 + || e[0].s != 5 + || e[1].s != 19 * 16 + 6 * 8 + 16 * 4 + || e[2].s != 5 + || f[0].s != 6 + || f[1].s != 7 + || f[2].s != 21 * 16 + 7 * 8 + 8 * 4 + || f[3].s != 23 * 16 + 18 * 8 + 8 * 4 + || f[4].s != 9 + || g[0].s != 1 + || g[1].s != 25 * 16 + 29 * 8 + 9 * 4 + || g[2].s != 27 * 16 + 18 * 8 + 10 * 4 + || g[3].s != 2 + || h[0].s != 29 * 16 + 19 * 8 + 11 * 4 + || h[1].s != 1 || h[2].s != 4 + || k[0][0].s != 5 || k[0][1].s != 6 + || k[1][0].s != 31 * 16 + 17 * 8 + || k[1][1].s != 13 * 4 + || k[2][0].s != 19 * 8 + || k[2][1].s != 33 * 16 + 15 * 4 + || k[3][0].s != 7 || k[3][1].s != 8 + || p[0].s != 29 * 16 + 19 * 8 + 11 * 4 + || p[1].s != 1 || p[2].s != 4 + || q[0][0].s != 5 || q[0][1].s != 6 + || q[1][0].s != 31 * 16 + 17 * 8 + || q[1][1].s != 13 * 4 + || q[2][0].s != 19 * 8 + || q[2][1].s != 33 * 16 + 15 * 4 + || q[3][0].s != 7 || q[3][1].s != 8 + || sb[0].s != 5 + || sb[1].s != 1LL << (16 + 4) + || sb[2].s != 1LL << 8 + || sb[3].s != 6 + || tb[0][0].s != 9 || tb[0][1].s != 10 || tb[1][0].s != 11 || tb[1][1].s != 12 + || tb[2][0].s != 1LL << (16 + 8) + || tb[2][1].s != 1LL << 4 + || tb[3][0].s != 1LL << 8 + || tb[3][1].s != 1LL << (16 + 4) + || tb[4][0].s != 13 || tb[4][1].s != 14) + abort (); + if (d[0].s != 1LL << (8 + 4) + || d[1].s != 1LL << 16 + || m[0].s != 5 + || m[1].s != 19 * 16 + 6 * 8 + 16 * 4 + || m[2].s != 5 + || r[0].s != 6 + || r[1].s != 7 + || r[2].s != 21 * 16 + 7 * 8 + 8 * 4 + || r[3].s != 23 * 16 + 18 * 8 + 8 * 4 + || r[4].s != 9 + || o[0].s != 1 + || o[1].s != 25 * 16 + 29 * 8 + 9 * 4 + || o[2].s != 27 * 16 + 18 * 8 + 10 * 4 + || o[3].s != 2) + abort (); + if (e[1].t != 7 || h[0].t != 7 || m[1].t != 7 || p[0].t != 7) + abort (); + for (T z = 0; z < 2; z++) + if (a[z].t != 7 || c[z].t != 7 || d[z].t != 5 || f[z + 2].t != 7 + || g[z + 1].t != 7 || r[z + 2].t != 7 || s[z + 1].t != 5 || o[z + 1].t != 7 + || k[z + 1][0].t != 7 || k[z + 1][1].t != 7 || q[z + 1][0].t != 7 || q[z + 1][1].t != 7 + || t[z + 2][0].t != 5 || t[z + 2][1].t != 5) + abort (); + for (T z = 0; z < 3; z++) + if (b[z + 2].t != 5) + abort (); + } + } +} + +int +main () +{ + int c1 = S::cnt1, c2 = S::cnt2, c3 = S::cnt3; + int n = 1; + test (n); + if (S::cnt1 + S::cnt2 - c1 - c2 != S::cnt3 - c3) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/cancel-parallel-1.c b/libgomp/testsuite/libgomp.c-c++-common/cancel-parallel-1.c new file mode 100644 index 000000000000..77395e2b0f3e --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/cancel-parallel-1.c @@ -0,0 +1,42 @@ +/* { dg-do run } */ +/* { dg-set-target-env-var OMP_CANCELLATION "true" } */ + +#include +#include + +int +main () +{ + int a[64]; + #pragma omp parallel + { + #pragma omp barrier + if (omp_get_thread_num () == 0) + { + #pragma omp cancel parallel + } + #pragma omp for + for (int i = 0; i < 64; i++) + a[i] = i; + if (omp_get_cancellation ()) + abort (); + } + #pragma omp parallel + { + #pragma omp barrier + if (omp_get_thread_num () == 0) + { + #pragma omp cancel parallel + } + #pragma omp taskgroup + { + #pragma omp for + for (int i = 0; i < 64; i++) + #pragma omp task + a[i] += i; + if (omp_get_cancellation ()) + abort (); + } + } + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/cancel-taskgroup-3.c b/libgomp/testsuite/libgomp.c-c++-common/cancel-taskgroup-3.c new file mode 100644 index 000000000000..b9af83595b0e --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/cancel-taskgroup-3.c @@ -0,0 +1,68 @@ +/* { dg-do run } */ +/* { dg-set-target-env-var OMP_CANCELLATION "true" } */ + +#include +#include + +int +main () +{ + int a = 0, i; + #pragma omp parallel + #pragma omp taskgroup + { + #pragma omp task + { + #pragma omp cancel taskgroup + if (omp_get_cancellation ()) + abort (); + } + #pragma omp taskwait + #pragma omp for reduction (task, +: a) + for (i = 0; i < 64; ++i) + { + a++; + #pragma omp task in_reduction (+: a) + { + volatile int zero = 0; + a += zero; + if (omp_get_cancellation ()) + abort (); + } + } + if (a != 64) + abort (); + #pragma omp task + { + if (omp_get_cancellation ()) + abort (); + } + } + a = 0; + #pragma omp parallel + #pragma omp taskgroup + { + #pragma omp taskwait + #pragma omp for reduction (task, +: a) + for (i = 0; i < 64; ++i) + { + a++; + #pragma omp task in_reduction (+: a) + { + volatile int zero = 0; + a += zero; + #pragma omp cancel taskgroup + if (omp_get_cancellation ()) + abort (); + } + } + if (a != 64) + abort (); + #pragma omp task + { + if (omp_get_cancellation ()) + abort (); + } + } + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/task-reduction-11.c b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-11.c new file mode 100644 index 000000000000..038b0e269e76 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-11.c @@ -0,0 +1,56 @@ +extern +#ifdef __cplusplus +"C" +#endif +void abort (void); +int a, b[3] = { 1, 1, 1 }; +unsigned long int c[2] = { ~0UL, ~0UL }; + +void +bar (int i) +{ + #pragma omp task in_reduction (*: b[:3]) in_reduction (&: c[1:]) \ + in_reduction (+: a) + { + a += 4; + b[1] *= 4; + c[1] &= ~(1UL << (i + 16)); + } +} + +void +foo (unsigned long long int x, unsigned long long int y, unsigned long long int z) +{ + unsigned long long int i; + #pragma omp for schedule(runtime) reduction (task, +: a) \ + reduction (task, *: b) reduction (task, &: c[1:1]) + for (i = x; i < y; i += z) + { + a++; + b[0] *= 2; + bar (i); + b[2] *= 3; + c[1] &= ~(1UL << i); + } +} + +int +main () +{ + volatile int two = 2; + foo (two, 7 * two, two); + if (a != 30 || b[0] != 64 || b[1] != (1 << 12) || b[2] != 3 * 3 * 3 * 3 * 3 * 3 + || c[0] != ~0UL || c[1] != ~0x15541554UL) + abort (); + a = 0; + b[0] = 1; + b[1] = 1; + b[2] = 1; + c[1] = ~0UL; + #pragma omp parallel + foo (two, 8 * two, two); + if (a != 35 || b[0] != 128 || b[1] != (1 << 14) || b[2] != 3 * 3 * 3 * 3 * 3 * 3 * 3 + || c[0] != ~0UL || c[1] != ~0x55545554UL) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/task-reduction-12.c b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-12.c new file mode 100644 index 000000000000..0ad92735ca74 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-12.c @@ -0,0 +1,67 @@ +extern +#ifdef __cplusplus +"C" +#endif +void abort (void); +int a, b[3] = { 1, 1, 1 }; +unsigned long int c[2] = { ~0UL, ~0UL }; + +void +bar (int i) +{ + #pragma omp task in_reduction (*: b[:3]) in_reduction (&: c[1:]) \ + in_reduction (+: a) + { + a += 4; + b[1] *= 4; + c[1] &= ~(1UL << (i + 16)); + } +} + +void +foo (int x) +{ + #pragma omp sections reduction (task, +: a) reduction (task, *: b) \ + reduction (task, &: c[1:1]) + { + { + a++; b[0] *= 2; bar (2); b[2] *= 3; c[1] &= ~(1UL << 2); + } + #pragma omp section + { b[0] *= 2; bar (4); b[2] *= 3; c[1] &= ~(1UL << 4); a++; } + #pragma omp section + { bar (6); b[2] *= 3; c[1] &= ~(1UL << 6); a++; b[0] *= 2; } + #pragma omp section + { b[2] *= 3; c[1] &= ~(1UL << 8); a++; b[0] *= 2; bar (8); } + #pragma omp section + { c[1] &= ~(1UL << 10); a++; b[0] *= 2; bar (10); b[2] *= 3; } + #pragma omp section + { a++; b[0] *= 2; b[2] *= 3; c[1] &= ~(1UL << 12); bar (12); } + #pragma omp section + if (x) + { + a++; b[0] *= 2; b[2] *= 3; bar (14); c[1] &= ~(1UL << 14); + } + } +} + +int +main () +{ + volatile int one = 1; + foo (!one); + if (a != 30 || b[0] != 64 || b[1] != (1 << 12) || b[2] != 3 * 3 * 3 * 3 * 3 * 3 + || c[0] != ~0UL || c[1] != ~0x15541554UL) + abort (); + a = 0; + b[0] = 1; + b[1] = 1; + b[2] = 1; + c[1] = ~0UL; + #pragma omp parallel + foo (one); + if (a != 35 || b[0] != 128 || b[1] != (1 << 14) || b[2] != 3 * 3 * 3 * 3 * 3 * 3 * 3 + || c[0] != ~0UL || c[1] != ~0x55545554UL) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/task-reduction-6.c b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-6.c index b7a29f043250..e0a946efba17 100644 --- a/libgomp/testsuite/libgomp.c-c++-common/task-reduction-6.c +++ b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-6.c @@ -1,7 +1,7 @@ #include #include -struct S { unsigned long int s, t; }; +struct S { unsigned long long int s, t; }; void rbar (struct S *p, struct S *o) @@ -119,5 +119,7 @@ main () abort (); if (m.s != 63 * 64 * 4 || m.t != 7) abort (); + if (r != t) + abort (); return 0; } diff --git a/libgomp/testsuite/libgomp.c-c++-common/task-reduction-8.c b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-8.c new file mode 100644 index 000000000000..7b0859db6f01 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-8.c @@ -0,0 +1,141 @@ +#include +#include + +struct S { unsigned long long int s, t; }; + +void +rbar (struct S *p, struct S *o) +{ + p->s = 1; + if (o->t != 5) + abort (); + p->t = 9; +} + +static inline void +rbaz (struct S *o, struct S *i) +{ + if (o->t != 5 || i->t != 9) + abort (); + o->s *= i->s; +} + +#pragma omp declare reduction (+: struct S : omp_out.s += omp_in.s) \ + initializer (omp_priv = { 0, 3 }) +#pragma omp declare reduction (*: struct S : rbaz (&omp_out, &omp_in)) \ + initializer (rbar (&omp_priv, &omp_orig)) + +struct S g = { 0, 7 }; +struct S h = { 1, 5 }; + +int +foo (int z, int *a, int *b) +{ + int x = 0; + #pragma omp taskloop reduction (+:x) in_reduction (+:b[0]) + for (int i = z; i < z + 8; i++) + { + x += a[i]; + *b += a[i] * 2; + } + return x; +} + +unsigned long long int +bar (int z, int *a, unsigned long long int *b, int *s) +{ + unsigned long long int x = 1; + #pragma omp taskloop reduction (*:x) in_reduction (*:b[0]) + for (int i = z; i < z + 8; i++) + { + #pragma omp task in_reduction (*:x) + x *= a[i]; + #pragma omp task in_reduction (*:b[0]) + *b *= (3 - a[i]); + s[0]++; + } + return x; +} + +void +baz (int i, int *a, int *c) +{ + #pragma omp task in_reduction (*:h) in_reduction (+:g) + { + g.s += 7 * a[i]; + h.s *= (3 - c[i]); + if ((g.t != 7 && g.t != 3) || (h.t != 5 && h.t != 9)) + abort (); + } +} + +int +main () +{ + int i, j = 0, a[64], b = 0, c[64], f = 0; + unsigned long long int d = 1, e = 1; + volatile int one = 1; + int r = 0, s = 0, t; + struct S m = { 0, 7 }; + struct S n = { 1, 5 }; + for (i = 0; i < 64; i++) + { + a[i] = 2 * i; + c[i] = 1 + ((i % 3) != 1); + } + #pragma omp parallel reduction (task, +:b) shared(t) reduction(+:r, s) + { + int z, q1, q2, q3; + #pragma omp master + t = omp_get_num_threads (); + #pragma omp for schedule(static) reduction (task, +: f) reduction (+: j) + for (z = 0; z < 64; z += 8) + { + f++; + j += foo (z, a, &b); + j += foo (z, a, &f); + } + if (j != 63 * 64 * 2 || f != 63 * 64 * 2 + 8) + abort (); + r++; + #pragma omp taskgroup task_reduction (+: s) + { + #pragma omp for schedule(static, 1) reduction(task, *: d) reduction (*: e) + for (z = 0; z < 64; z += 8) + e *= bar (z, c, &d, &s); + } + if (e != (1ULL << 43) || d != (1ULL << 21)) + abort (); + #pragma omp for schedule(monotonic: dynamic, 1) reduction (task, +: g, m) \ + reduction (task, *: h, n) collapse(3) + for (q1 = 0; q1 < one; q1++) + for (q2 = 0; q2 < 64; q2 += 8) + for (q3 = 0; q3 < one; ++q3) + #pragma omp taskloop in_reduction (+: g, m) in_reduction (*: h, n) \ + nogroup + for (i = q2; i < q2 + 8; ++i) + { + g.s += 3 * a[i]; + h.s *= (3 - c[i]); + m.s += 4 * a[i]; + n.s *= c[i]; + if ((g.t != 7 && g.t != 3) || (h.t != 5 && h.t != 9) + || (m.t != 7 && m.t != 3) || (n.t != 5 && n.t != 9)) + abort (); + baz (i, a, c); + } + if (n.s != (1ULL << 43) || n.t != 5) + abort (); + if (g.s != 63 * 64 * 10 || g.t != 7) + abort (); + if (h.s != (1ULL << 42) || h.t != 5) + abort (); + if (m.s != 63 * 64 * 4 || m.t != 7) + abort (); + } + if (b != 63 * 64 * 2) + abort (); + if (r != t || s != 64) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/task-reduction-9.c b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-9.c new file mode 100644 index 000000000000..3d71fef86708 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-9.c @@ -0,0 +1,217 @@ +#ifdef __cplusplus +extern "C" +#endif +void abort (void); + +int a[2]; +long long int b[7] = { 9, 11, 1, 1, 1, 13, 15 }; +int e[3] = { 5, 0, 5 }; +int f[5] = { 6, 7, 0, 0, 9 }; +int g[4] = { 1, 0, 0, 2 }; +int h[3] = { 0, 1, 4 }; +int k[4][2] = { { 5, 6 }, { 0, 0 }, { 0, 0 }, { 7, 8 } }; +long long *s; +long long (*t)[2]; + +void +foo (int n, int *c, long long int *d, int m[3], int *r, int o[4], int *p, int q[4][2]) +{ + int i; + for (i = 0; i < 2; i++) + #pragma omp task in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \ + in_reduction (+: o[n:n*2], m[1], k[1:2][:], p[0], f[2:2]) \ + in_reduction (+: q[1:2][:], g[n:n*2], e[1], h[0], r[2:2]) \ + in_reduction (*: s[1:2], t[2:2][:]) + { + a[0] += 7; + a[1] += 17; + b[2] *= 2; + b[4] *= 2; + c[0] += 6; + d[1] *= 2; + e[1] += 19; + f[2] += 21; + f[3] += 23; + g[1] += 25; + g[2] += 27; + h[0] += 29; + k[1][0] += 31; + k[2][1] += 33; + m[1] += 19; + r[2] += 21; + r[3] += 23; + o[1] += 25; + o[2] += 27; + p[0] += 29; + q[1][0] += 31; + q[2][1] += 33; + s[1] *= 2; + t[2][0] *= 2; + t[3][1] *= 2; + } +} + +void +test (int n) +{ + int c[2] = { 0, 0 }; + int p[3] = { 0, 1, 4 }; + int q[4][2] = { { 5, 6 }, { 0, 0 }, { 0, 0 }, { 7, 8 } }; + long long ss[4] = { 5, 1, 1, 6 }; + long long tt[5][2] = { { 9, 10 }, { 11, 12 }, { 1, 1 }, { 1, 1 }, { 13, 14 } }; + long long int d[] = { 1, 1 }; + int m[3] = { 5, 0, 5 }; + int r[5] = { 6, 7, 0, 0, 9 }; + int o[4] = { 1, 0, 0, 2 }; + s = ss; + t = tt; + #pragma omp parallel num_threads(4) + { + int i; + #pragma omp for reduction (task, +: a, c) reduction (task, *: b[2 * n:3 * n], d) \ + reduction (task, +: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \ + reduction (task, +: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \ + reduction (task, *: t[2:2][:], s[1:n + 1]) \ + schedule(nonmonotonic: runtime) + for (i = 0; i < 4; i++) + { + #pragma omp task in_reduction (+: a, c) in_reduction (*: b[2 * n:3 * n], d) \ + in_reduction (+: o[n:n*2], q[1:2][:], p[0], m[1], r[2:2]) \ + in_reduction (+: g[n:n * 2], e[1], k[1:2][:], h[0], f[2:2]) \ + in_reduction (*: s[1:2], t[2:2][:]) + { + int j; + a[0] += 2; + a[1] += 3; + b[2] *= 2; + f[3] += 8; + g[1] += 9; + g[2] += 10; + h[0] += 11; + k[1][1] += 13; + k[2][1] += 15; + m[1] += 16; + r[2] += 8; + s[1] *= 2; + t[2][1] *= 2; + t[3][1] *= 2; + for (j = 0; j < 2; j++) + #pragma omp task in_reduction (+: a, c[:2]) \ + in_reduction (*: b[2 * n:3 * n], d[n - 1:n + 1]) \ + in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][:2]) \ + in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][:2]) \ + in_reduction (*: s[n:2], t[2:2][:]) + { + m[1] += 6; + r[2] += 7; + q[1][0] += 17; + q[2][0] += 19; + a[0] += 4; + a[1] += 5; + b[3] *= 2; + b[4] *= 2; + f[3] += 18; + g[1] += 29; + g[2] += 18; + h[0] += 19; + s[2] *= 2; + t[2][0] *= 2; + t[3][0] *= 2; + foo (n, c, d, m, r, o, p, q); + r[3] += 18; + o[1] += 29; + o[2] += 18; + p[0] += 19; + c[0] += 4; + c[1] += 5; + d[0] *= 2; + e[1] += 6; + f[2] += 7; + k[1][0] += 17; + k[2][0] += 19; + } + r[3] += 8; + o[1] += 9; + o[2] += 10; + p[0] += 11; + q[1][1] += 13; + q[2][1] += 15; + b[3] *= 2; + c[0] += 4; + c[1] += 9; + d[0] *= 2; + e[1] += 16; + f[2] += 8; + } + } + } + if (a[0] != 7 * 16 + 4 * 8 + 2 * 4 + || a[1] != 17 * 16 + 5 * 8 + 3 * 4 + || b[0] != 9 || b[1] != 11 + || b[2] != 1LL << (16 + 4) + || b[3] != 1LL << (8 + 4) + || b[4] != 1LL << (16 + 8) + || b[5] != 13 || b[6] != 15 + || c[0] != 6 * 16 + 4 * 8 + 4 * 4 + || c[1] != 5 * 8 + 9 * 4 + || d[0] != 1LL << (8 + 4) + || d[1] != 1LL << 16 + || e[0] != 5 + || e[1] != 19 * 16 + 6 * 8 + 16 * 4 + || e[2] != 5 + || f[0] != 6 + || f[1] != 7 + || f[2] != 21 * 16 + 7 * 8 + 8 * 4 + || f[3] != 23 * 16 + 18 * 8 + 8 * 4 + || f[4] != 9 + || g[0] != 1 + || g[1] != 25 * 16 + 29 * 8 + 9 * 4 + || g[2] != 27 * 16 + 18 * 8 + 10 * 4 + || g[3] != 2 + || h[0] != 29 * 16 + 19 * 8 + 11 * 4 + || h[1] != 1 || h[2] != 4 + || k[0][0] != 5 || k[0][1] != 6 + || k[1][0] != 31 * 16 + 17 * 8 + || k[1][1] != 13 * 4 + || k[2][0] != 19 * 8 + || k[2][1] != 33 * 16 + 15 * 4 + || k[3][0] != 7 || k[3][1] != 8 + || m[0] != 5 + || m[1] != 19 * 16 + 6 * 8 + 16 * 4 + || m[2] != 5 + || o[0] != 1 + || o[1] != 25 * 16 + 29 * 8 + 9 * 4 + || o[2] != 27 * 16 + 18 * 8 + 10 * 4 + || o[3] != 2 + || p[0] != 29 * 16 + 19 * 8 + 11 * 4 + || p[1] != 1 || p[2] != 4 + || q[0][0] != 5 || q[0][1] != 6 + || q[1][0] != 31 * 16 + 17 * 8 + || q[1][1] != 13 * 4 + || q[2][0] != 19 * 8 + || q[2][1] != 33 * 16 + 15 * 4 + || q[3][0] != 7 || q[3][1] != 8 + || r[0] != 6 + || r[1] != 7 + || r[2] != 21 * 16 + 7 * 8 + 8 * 4 + || r[3] != 23 * 16 + 18 * 8 + 8 * 4 + || r[4] != 9 + || ss[0] != 5 + || ss[1] != 1LL << (16 + 4) + || ss[2] != 1LL << 8 + || ss[3] != 6 + || tt[0][0] != 9 || tt[0][1] != 10 || tt[1][0] != 11 || tt[1][1] != 12 + || tt[2][0] != 1LL << (16 + 8) + || tt[2][1] != 1LL << 4 + || tt[3][0] != 1LL << 8 + || tt[3][1] != 1LL << (16 + 4) + || tt[4][0] != 13 || tt[4][1] != 14) + abort (); +} + +int +main () +{ + test (1); + return 0; +} diff --git a/libgomp/work.c b/libgomp/work.c index ac2f0233120f..16fc7076eddf 100644 --- a/libgomp/work.c +++ b/libgomp/work.c @@ -98,30 +98,35 @@ alloc_work_share (struct gomp_team *team) This shouldn't touch the next_alloc field. */ void -gomp_init_work_share (struct gomp_work_share *ws, bool ordered, +gomp_init_work_share (struct gomp_work_share *ws, size_t ordered, unsigned nthreads) { gomp_mutex_init (&ws->lock); if (__builtin_expect (ordered, 0)) { -#define INLINE_ORDERED_TEAM_IDS_CNT \ - ((sizeof (struct gomp_work_share) \ - - offsetof (struct gomp_work_share, inline_ordered_team_ids)) \ - / sizeof (((struct gomp_work_share *) 0)->inline_ordered_team_ids[0])) - - if (nthreads > INLINE_ORDERED_TEAM_IDS_CNT) - ws->ordered_team_ids - = gomp_malloc (nthreads * sizeof (*ws->ordered_team_ids)); +#define INLINE_ORDERED_TEAM_IDS_SIZE \ + (sizeof (struct gomp_work_share) \ + - offsetof (struct gomp_work_share, inline_ordered_team_ids)) + + if (__builtin_expect (ordered != 1, 0)) + { + ordered += nthreads * sizeof (*ws->ordered_team_ids) - 1; + ordered = ordered + __alignof__ (long long) - 1; + ordered &= ~(__alignof__ (long long) - 1); + } + else + ordered = nthreads * sizeof (*ws->ordered_team_ids); + if (ordered > INLINE_ORDERED_TEAM_IDS_SIZE) + ws->ordered_team_ids = gomp_malloc (ordered); else ws->ordered_team_ids = ws->inline_ordered_team_ids; - memset (ws->ordered_team_ids, '\0', - nthreads * sizeof (*ws->ordered_team_ids)); + memset (ws->ordered_team_ids, '\0', ordered); ws->ordered_num_used = 0; ws->ordered_owner = -1; ws->ordered_cur = 0; } else - ws->ordered_team_ids = NULL; + ws->ordered_team_ids = ws->inline_ordered_team_ids; gomp_ptrlock_init (&ws->next_ws, NULL); ws->threads_completed = 0; } @@ -174,7 +179,7 @@ free_work_share (struct gomp_team *team, struct gomp_work_share *ws) if this was the first thread to reach this point. */ bool -gomp_work_share_start (bool ordered) +gomp_work_share_start (size_t ordered) { struct gomp_thread *thr = gomp_thread (); struct gomp_team *team = thr->ts.team; @@ -186,7 +191,7 @@ gomp_work_share_start (bool ordered) ws = gomp_malloc (sizeof (*ws)); gomp_init_work_share (ws, ordered, 1); thr->ts.work_share = ws; - return ws; + return true; } ws = thr->ts.work_share;