From 80b2531e2fafa375fc36c9e954f6660936b7527d Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Thu, 27 Sep 2018 19:25:38 +0200 Subject: [PATCH] builtin-types.def (BT_FN_VOID_SIZE_SIZE_PTR): New. * builtin-types.def (BT_FN_VOID_SIZE_SIZE_PTR): New. * omp-builtins.def (BUILT_IN_GOMP_TASKGROUP_REDUCTION_REGISTER, BUILT_IN_GOMP_TASKGROUP_REDUCTION_UNREGISTER, BUILT_IN_GOMP_TASK_REDUCTION_REMAP): New builtins. * omp-low.c (use_pointer_for_field): Use is_global_var instead of TREE_STATIC || DECL_EXTERNAL, and apply only if not privatized in outer contexts. (scan_sharing_clauses): Handle OMP_CLAUSE_IN_REDUCTION in task contexts. Handle OMP_CLAUSE_TASK_REDUCTION. (scan_omp_1_stmt): Call scan_sharing_clauses for taskgroups. (lower_rec_input_clauses): Handle OMP_CLAUSE_IN_REDUCTION. Set TREE_THIS_NOTRAP instead of just noop testing it. (lower_send_clauses): Handle OMP_CLAUSE_IN_REDUCTION. (omp_task_reductions_find_first, omp_task_reduction_iterate, lower_omp_task_reductions): New functions. (lower_omp_taskgroup): Handle taskgroup reductions. (create_task_copyfn): Handle OMP_CLAUSE_IN_REDUCTION and OMP_CLAUSE_REDUCTION clauses. gcc/cp/ * semantics.c (finish_omp_reduction_clause): Don't mark OMP_CLAUSE_DECL addressable if it has reference type. Do mark decl_placeholder addressable if needed. gcc/fortran/ * types.def (BT_FN_VOID_SIZE_SIZE_PTR): New. libgomp/ * configure.ac: Check for aligned_alloc, posix_memalign, memalign and _aligned_malloc. * libgomp.h (gomp_aligned_alloc, gomp_aligned_free): New prototypes. (struct gomp_taskgroup): Add reductions field. * libgomp.map (GOMP_5.0): Export GOMP_taskgroup_reduction_register, GOMP_taskgroup_reduction_unregister and GOMP_task_reduction_remap. * task.c (GOMP_taskgroup_start): Initialize taskgroup->reductions. (GOMP_taskgroup_reduction_register, GOMP_taskgroup_reduction_unregister, GOMP_task_reduction_remap): New functions. * alloc.c (gomp_aligned_alloc, gomp_aligned_free): New functions. * configure: Regenerated. * config.h.in: Regenerated. * testsuite/libgomp.c-c++-common/task-reduction-1.c: New test. * testsuite/libgomp.c-c++-common/task-reduction-2.c: New test. * testsuite/libgomp.c-c++-common/task-reduction-3.c: New test. * testsuite/libgomp.c++/task-reduction-1.C: New test. * testsuite/libgomp.c++/task-reduction-2.C: New test. * testsuite/libgomp.c++/task-reduction-3.C: New test. * testsuite/libgomp.c++/task-reduction-4.C: New test. * testsuite/libgomp.c++/task-reduction-5.C: New test. From-SVN: r264673 --- gcc/ChangeLog.gomp | 21 + gcc/builtin-types.def | 2 + gcc/cp/ChangeLog.gomp | 6 + gcc/cp/semantics.c | 7 +- gcc/fortran/ChangeLog.gomp | 4 + gcc/fortran/types.def | 2 + gcc/omp-builtins.def | 9 + gcc/omp-low.c | 936 +++++++++++++++++- libgomp/ChangeLog.gomp | 24 + libgomp/alloc.c | 48 + libgomp/config.h.in | 12 + libgomp/configure | 13 + libgomp/configure.ac | 1 + libgomp/libgomp.h | 4 + libgomp/libgomp.map | 3 + libgomp/task.c | 182 +++- .../testsuite/libgomp.c++/task-reduction-1.C | 63 ++ .../testsuite/libgomp.c++/task-reduction-2.C | 119 +++ .../testsuite/libgomp.c++/task-reduction-3.C | 126 +++ .../testsuite/libgomp.c++/task-reduction-4.C | 236 +++++ .../testsuite/libgomp.c++/task-reduction-5.C | 317 ++++++ .../libgomp.c-c++-common/task-reduction-1.c | 58 ++ .../libgomp.c-c++-common/task-reduction-2.c | 90 ++ .../libgomp.c-c++-common/task-reduction-3.c | 218 ++++ 24 files changed, 2466 insertions(+), 35 deletions(-) create mode 100644 libgomp/testsuite/libgomp.c++/task-reduction-1.C create mode 100644 libgomp/testsuite/libgomp.c++/task-reduction-2.C create mode 100644 libgomp/testsuite/libgomp.c++/task-reduction-3.C create mode 100644 libgomp/testsuite/libgomp.c++/task-reduction-4.C create mode 100644 libgomp/testsuite/libgomp.c++/task-reduction-5.C create mode 100644 libgomp/testsuite/libgomp.c-c++-common/task-reduction-1.c create mode 100644 libgomp/testsuite/libgomp.c-c++-common/task-reduction-2.c create mode 100644 libgomp/testsuite/libgomp.c-c++-common/task-reduction-3.c diff --git a/gcc/ChangeLog.gomp b/gcc/ChangeLog.gomp index e3f5efd9d231..570057166da0 100644 --- a/gcc/ChangeLog.gomp +++ b/gcc/ChangeLog.gomp @@ -1,3 +1,24 @@ +2018-09-27 Jakub Jelinek + + * builtin-types.def (BT_FN_VOID_SIZE_SIZE_PTR): New. + * omp-builtins.def (BUILT_IN_GOMP_TASKGROUP_REDUCTION_REGISTER, + BUILT_IN_GOMP_TASKGROUP_REDUCTION_UNREGISTER, + BUILT_IN_GOMP_TASK_REDUCTION_REMAP): New builtins. + * omp-low.c (use_pointer_for_field): Use is_global_var instead + of TREE_STATIC || DECL_EXTERNAL, and apply only if not privatized + in outer contexts. + (scan_sharing_clauses): Handle OMP_CLAUSE_IN_REDUCTION in task + contexts. Handle OMP_CLAUSE_TASK_REDUCTION. + (scan_omp_1_stmt): Call scan_sharing_clauses for taskgroups. + (lower_rec_input_clauses): Handle OMP_CLAUSE_IN_REDUCTION. Set + TREE_THIS_NOTRAP instead of just noop testing it. + (lower_send_clauses): Handle OMP_CLAUSE_IN_REDUCTION. + (omp_task_reductions_find_first, omp_task_reduction_iterate, + lower_omp_task_reductions): New functions. + (lower_omp_taskgroup): Handle taskgroup reductions. + (create_task_copyfn): Handle OMP_CLAUSE_IN_REDUCTION and + OMP_CLAUSE_REDUCTION clauses. + 2018-08-02 Jakub Jelinek * gimplify.c (gimplify_omp_depend): Load block from elt 5 instead diff --git a/gcc/builtin-types.def b/gcc/builtin-types.def index b01095c420fe..51f08e5da660 100644 --- a/gcc/builtin-types.def +++ b/gcc/builtin-types.def @@ -619,6 +619,8 @@ DEF_FUNCTION_TYPE_3 (BT_FN_VOID_UINT32_UINT64_PTR, BT_VOID, BT_UINT32, BT_UINT64, BT_PTR) DEF_FUNCTION_TYPE_3 (BT_FN_VOID_UINT32_UINT32_PTR, BT_VOID, BT_UINT32, BT_UINT32, BT_PTR) +DEF_FUNCTION_TYPE_3 (BT_FN_VOID_SIZE_SIZE_PTR, BT_VOID, BT_SIZE, BT_SIZE, + BT_PTR) DEF_FUNCTION_TYPE_4 (BT_FN_SIZE_CONST_PTR_SIZE_SIZE_FILEPTR, BT_SIZE, BT_CONST_PTR, BT_SIZE, BT_SIZE, BT_FILEPTR) diff --git a/gcc/cp/ChangeLog.gomp b/gcc/cp/ChangeLog.gomp index 308f1e23fcf7..b7db7b3690d4 100644 --- a/gcc/cp/ChangeLog.gomp +++ b/gcc/cp/ChangeLog.gomp @@ -1,3 +1,9 @@ +2018-09-27 Jakub Jelinek + + * semantics.c (finish_omp_reduction_clause): Don't mark + OMP_CLAUSE_DECL addressable if it has reference type. Do mark + decl_placeholder addressable if needed. + 2018-08-02 Jakub Jelinek * parser.c (cp_parser_omp_iterators): Build vector with 6 elts diff --git a/gcc/cp/semantics.c b/gcc/cp/semantics.c index f29e4683087d..2b19c775d52a 100644 --- a/gcc/cp/semantics.c +++ b/gcc/cp/semantics.c @@ -5685,7 +5685,8 @@ finish_omp_reduction_clause (tree c, bool *need_default_ctor, bool *need_dtor) if (TREE_ADDRESSABLE (DECL_EXPR_DECL (stmts[0]))) cxx_mark_addressable (placeholder); if (TREE_ADDRESSABLE (DECL_EXPR_DECL (stmts[1])) - && !TYPE_REF_P (TREE_TYPE (OMP_CLAUSE_DECL (c)))) + && (decl_placeholder + || !TYPE_REF_P (TREE_TYPE (OMP_CLAUSE_DECL (c))))) cxx_mark_addressable (decl_placeholder ? decl_placeholder : OMP_CLAUSE_DECL (c)); tree omp_out = placeholder; @@ -5711,7 +5712,9 @@ finish_omp_reduction_clause (tree c, bool *need_default_ctor, bool *need_dtor) { gcc_assert (TREE_CODE (stmts[3]) == DECL_EXPR && TREE_CODE (stmts[4]) == DECL_EXPR); - if (TREE_ADDRESSABLE (DECL_EXPR_DECL (stmts[3]))) + if (TREE_ADDRESSABLE (DECL_EXPR_DECL (stmts[3])) + && (decl_placeholder + || !TYPE_REF_P (TREE_TYPE (OMP_CLAUSE_DECL (c))))) cxx_mark_addressable (decl_placeholder ? decl_placeholder : OMP_CLAUSE_DECL (c)); if (TREE_ADDRESSABLE (DECL_EXPR_DECL (stmts[4]))) diff --git a/gcc/fortran/ChangeLog.gomp b/gcc/fortran/ChangeLog.gomp index 6e45e8ba931a..6505312f7cd4 100644 --- a/gcc/fortran/ChangeLog.gomp +++ b/gcc/fortran/ChangeLog.gomp @@ -1,3 +1,7 @@ +2018-09-27 Jakub Jelinek + + * types.def (BT_FN_VOID_SIZE_SIZE_PTR): New. + 2018-05-31 Jakub Jelinek * trans-openmp.c (gfc_trans_omp_atomic): Set OMP_ATOMIC_MEMORY_ORDER diff --git a/gcc/fortran/types.def b/gcc/fortran/types.def index 78f3e20417cc..13641bb3c825 100644 --- a/gcc/fortran/types.def +++ b/gcc/fortran/types.def @@ -145,6 +145,8 @@ DEF_FUNCTION_TYPE_3 (BT_FN_VOID_VPTR_I2_INT, BT_VOID, BT_VOLATILE_PTR, BT_I2, BT DEF_FUNCTION_TYPE_3 (BT_FN_VOID_VPTR_I4_INT, BT_VOID, BT_VOLATILE_PTR, BT_I4, BT_INT) DEF_FUNCTION_TYPE_3 (BT_FN_VOID_VPTR_I8_INT, BT_VOID, BT_VOLATILE_PTR, BT_I8, BT_INT) DEF_FUNCTION_TYPE_3 (BT_FN_VOID_VPTR_I16_INT, BT_VOID, BT_VOLATILE_PTR, BT_I16, BT_INT) +DEF_FUNCTION_TYPE_3 (BT_FN_VOID_SIZE_SIZE_PTR, BT_VOID, BT_SIZE, BT_SIZE, + BT_PTR) DEF_FUNCTION_TYPE_4 (BT_FN_VOID_OMPFN_PTR_UINT_UINT, BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR, BT_UINT, BT_UINT) diff --git a/gcc/omp-builtins.def b/gcc/omp-builtins.def index b81ac2889e5e..8bc4c6077437 100644 --- a/gcc/omp-builtins.def +++ b/gcc/omp-builtins.def @@ -367,5 +367,14 @@ DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TEAMS, "GOMP_teams", BT_FN_VOID_UINT_UINT, ATTR_NOTHROW_LIST) DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TEAMS_REG, "GOMP_teams_reg", BT_FN_VOID_OMPFN_PTR_UINT_UINT_UINT, ATTR_NOTHROW_LIST) +DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TASKGROUP_REDUCTION_REGISTER, + "GOMP_taskgroup_reduction_register", + BT_FN_VOID_PTR, ATTR_NOTHROW_LEAF_LIST) +DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TASKGROUP_REDUCTION_UNREGISTER, + "GOMP_taskgroup_reduction_unregister", + BT_FN_VOID_PTR, ATTR_NOTHROW_LEAF_LIST) +DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TASK_REDUCTION_REMAP, + "GOMP_task_reduction_remap", + BT_FN_VOID_SIZE_SIZE_PTR, ATTR_NOTHROW_LEAF_LIST) DEF_GOACC_BUILTIN (BUILT_IN_GOACC_DECLARE, "GOACC_declare", BT_FN_VOID_INT_SIZE_PTR_PTR_PTR, ATTR_NOTHROW_LIST) diff --git a/gcc/omp-low.c b/gcc/omp-low.c index 505ac4884981..02f0abbe76f8 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -382,7 +382,7 @@ use_pointer_for_field (tree decl, omp_context *shared_ctx) be passing an address in this case? Should we simply assert this to be false, or should we have a cleanup pass that removes these from the list of mappings? */ - if (TREE_STATIC (decl) || DECL_EXTERNAL (decl)) + if (is_global_var (maybe_lookup_decl_in_outer_ctx (decl, shared_ctx))) return true; /* For variables with DECL_HAS_VALUE_EXPR_P set, we cannot tell @@ -1075,14 +1075,39 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) t = TREE_OPERAND (t, 0); install_var_local (t, ctx); if (is_taskreg_ctx (ctx) - && !is_global_var (maybe_lookup_decl_in_outer_ctx (t, ctx)) + && (!is_global_var (maybe_lookup_decl_in_outer_ctx (t, ctx)) + || (is_task_ctx (ctx) + && (TREE_CODE (TREE_TYPE (t)) == POINTER_TYPE + || (TREE_CODE (TREE_TYPE (t)) == REFERENCE_TYPE + && (TREE_CODE (TREE_TYPE (TREE_TYPE (t))) + == POINTER_TYPE))))) && !is_variable_sized (t)) { - by_ref = use_pointer_for_field (t, ctx); - install_var_field (t, by_ref, 3, ctx); + by_ref = use_pointer_for_field (t, NULL); + if (is_task_ctx (ctx) + && TREE_CODE (TREE_TYPE (t)) == REFERENCE_TYPE + && TREE_CODE (TREE_TYPE (TREE_TYPE (t))) == POINTER_TYPE) + { + install_var_field (t, false, 1, ctx); + install_var_field (t, by_ref, 2, ctx); + } + else + install_var_field (t, by_ref, 3, ctx); } break; } + if (is_task_ctx (ctx)) + { + /* Global variables don't need to be copied, + the receiver side will use them directly. */ + if (!is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx))) + { + by_ref = use_pointer_for_field (decl, ctx); + install_var_field (decl, by_ref, 3, ctx); + } + install_var_local (decl, ctx); + break; + } goto do_private; case OMP_CLAUSE_LASTPRIVATE: @@ -1336,6 +1361,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) case OMP_CLAUSE_NONTEMPORAL: case OMP_CLAUSE_IF_PRESENT: case OMP_CLAUSE_FINALIZE: + case OMP_CLAUSE_TASK_REDUCTION: break; case OMP_CLAUSE_ALIGNED: @@ -1405,6 +1431,11 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) scan_array_reductions = true; break; + case OMP_CLAUSE_TASK_REDUCTION: + if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c)) + scan_array_reductions = true; + break; + case OMP_CLAUSE_SHARED: /* Ignore shared directives in teams construct inside of target construct. */ @@ -3141,7 +3172,6 @@ scan_omp_1_stmt (gimple_stmt_iterator *gsi, bool *handled_ops_p, case GIMPLE_OMP_SECTION: case GIMPLE_OMP_MASTER: - case GIMPLE_OMP_TASKGROUP: case GIMPLE_OMP_ORDERED: case GIMPLE_OMP_CRITICAL: case GIMPLE_OMP_GRID_BODY: @@ -3149,6 +3179,12 @@ scan_omp_1_stmt (gimple_stmt_iterator *gsi, bool *handled_ops_p, scan_omp (gimple_omp_body_ptr (stmt), ctx); break; + case GIMPLE_OMP_TASKGROUP: + ctx = new_omp_context (stmt, ctx); + scan_sharing_clauses (gimple_omp_taskgroup_clauses (stmt), ctx); + scan_omp (gimple_omp_body_ptr (stmt), ctx); + break; + case GIMPLE_OMP_TARGET: scan_omp_target (as_a (stmt), ctx); break; @@ -3629,18 +3665,51 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, if (sctx.is_simt && maybe_ne (sctx.max_vf, 1U)) sctx.simt_eargs.safe_push (NULL_TREE); + unsigned task_reduction_cnt = 0; + unsigned task_reduction_cntorig = 0; + unsigned task_reduction_cnt_full = 0; + unsigned task_reduction_cntorig_full = 0; + tree tskred_atype = NULL_TREE, tskred_avar = NULL_TREE; /* Do all the fixed sized types in the first pass, and the variable sized types in the second pass. This makes sure that the scalar arguments to the variable sized types are processed before we use them in the - variable sized operations. */ - for (pass = 0; pass < 2; ++pass) + variable sized operations. For task reductions we use 4 passes, in the + first two we ignore them, in the third one gather arguments for + GOMP_task_reduction_remap call and in the last pass actually handle + the task reductions. */ + for (pass = 0; pass < (task_reduction_cnt ? 4 : 2); ++pass) { + if (pass == 2) + { + tskred_atype + = build_array_type_nelts (ptr_type_node, task_reduction_cnt + + task_reduction_cntorig); + tskred_avar = create_tmp_var_raw (tskred_atype); + gimple_add_tmp_var (tskred_avar); + TREE_ADDRESSABLE (tskred_avar) = 1; + task_reduction_cnt_full = task_reduction_cnt; + task_reduction_cntorig_full = task_reduction_cntorig; + } + else if (pass == 3) + { + x = builtin_decl_explicit (BUILT_IN_GOMP_TASK_REDUCTION_REMAP); + gimple *g + = gimple_build_call (x, 3, size_int (task_reduction_cnt), + size_int (task_reduction_cntorig), + build_fold_addr_expr (tskred_avar)); + gimple_seq_add_stmt (ilist, g); + } + task_reduction_cnt = 0; + task_reduction_cntorig = 0; for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c)) { enum omp_clause_code c_kind = OMP_CLAUSE_CODE (c); tree var, new_var; bool by_ref; location_t clause_loc = OMP_CLAUSE_LOCATION (c); + bool task_reduction_p = false; + bool task_reduction_needs_orig_p = false; + tree cond = NULL_TREE; switch (c_kind) { @@ -3672,6 +3741,27 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, case OMP_CLAUSE_IN_REDUCTION: if (OMP_CLAUSE_REDUCTION_OMP_ORIG_REF (c)) reduction_omp_orig_ref = true; + if (is_task_ctx (ctx) /* || OMP_CLAUSE_REDUCTION_TASK (c) */) + { + task_reduction_p = true; + task_reduction_cnt++; + if (OMP_CLAUSE_REDUCTION_OMP_ORIG_REF (c)) + { + var = OMP_CLAUSE_DECL (c); + /* If var is a global variable that isn't privatized + in outer contexts, we don't need to look up the + original address, it is always the address of the + global variable itself. */ + if (!DECL_P (var) + || omp_is_reference (var) + || !is_global_var + (maybe_lookup_decl_in_outer_ctx (var, ctx))) + { + task_reduction_needs_orig_p = true; + task_reduction_cntorig++; + } + } + } break; case OMP_CLAUSE__LOOPTEMP_: /* Handle _looptemp_ clauses only on parallel/task. */ @@ -3694,7 +3784,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, lastprivate_firstprivate = true; break; case OMP_CLAUSE_ALIGNED: - if (pass == 0) + if (pass != 1) continue; var = OMP_CLAUSE_DECL (c); if (TREE_CODE (TREE_TYPE (var)) == POINTER_TYPE @@ -3735,6 +3825,9 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, continue; } + if (task_reduction_p != (pass >= 2)) + continue; + new_var = var = OMP_CLAUSE_DECL (c); if ((c_kind == OMP_CLAUSE_REDUCTION || c_kind == OMP_CLAUSE_IN_REDUCTION) @@ -3774,6 +3867,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, tree bias = TREE_OPERAND (OMP_CLAUSE_DECL (c), 1); tree orig_var = TREE_OPERAND (OMP_CLAUSE_DECL (c), 0); + if (TREE_CODE (orig_var) == POINTER_PLUS_EXPR) { tree b = TREE_OPERAND (orig_var, 1); @@ -3794,6 +3888,44 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, } orig_var = TREE_OPERAND (orig_var, 0); } + if (pass == 2) + { + tree out = maybe_lookup_decl_in_outer_ctx (var, ctx); + if (is_global_var (out) + && TREE_CODE (TREE_TYPE (out)) != POINTER_TYPE + && (TREE_CODE (TREE_TYPE (out)) != REFERENCE_TYPE + || (TREE_CODE (TREE_TYPE (TREE_TYPE (out))) + != POINTER_TYPE))) + x = var; + else + { + bool by_ref = use_pointer_for_field (var, NULL); + x = build_receiver_ref (var, by_ref, ctx); + if (TREE_CODE (TREE_TYPE (var)) == REFERENCE_TYPE + && (TREE_CODE (TREE_TYPE (TREE_TYPE (var))) + == POINTER_TYPE)) + x = build_fold_addr_expr (x); + } + if (TREE_CODE (orig_var) == INDIRECT_REF) + x = build_simple_mem_ref (x); + else if (TREE_CODE (orig_var) == ADDR_EXPR) + x = build_fold_addr_expr (x); + bias = fold_convert (sizetype, bias); + x = fold_convert (ptr_type_node, x); + x = fold_build2_loc (clause_loc, POINTER_PLUS_EXPR, + TREE_TYPE (x), x, bias); + unsigned cnt = task_reduction_cnt - 1; + if (!task_reduction_needs_orig_p) + cnt += (task_reduction_cntorig_full + - task_reduction_cntorig); + else + cnt = task_reduction_cntorig - 1; + tree r = build4 (ARRAY_REF, ptr_type_node, tskred_avar, + size_int (cnt), NULL_TREE, NULL_TREE); + gimplify_assign (r, x, ilist); + continue; + } + if (TREE_CODE (orig_var) == INDIRECT_REF || TREE_CODE (orig_var) == ADDR_EXPR) orig_var = TREE_OPERAND (orig_var, 0); @@ -3802,7 +3934,46 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, gcc_assert (TREE_CODE (type) == ARRAY_TYPE); tree v = TYPE_MAX_VALUE (TYPE_DOMAIN (type)); const char *name = get_name (orig_var); - if (TREE_CONSTANT (v)) + if (pass == 3) + { + unsigned cnt = task_reduction_cnt - 1; + if (!task_reduction_needs_orig_p) + cnt += (task_reduction_cntorig_full + - task_reduction_cntorig); + else + cnt = task_reduction_cntorig - 1; + x = build4 (ARRAY_REF, ptr_type_node, tskred_avar, + size_int (cnt), NULL_TREE, NULL_TREE); + tree xv = create_tmp_var (ptr_type_node); + gimple *g = gimple_build_assign (xv, x); + gimple_seq_add_stmt (ilist, g); + x = fold_convert (build_pointer_type (boolean_type_node), + xv); + if (TREE_CONSTANT (v)) + x = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (x), x, + TYPE_SIZE_UNIT (type)); + else + { + tree t = maybe_lookup_decl (v, ctx); + if (t) + v = t; + else + v = maybe_lookup_decl_in_outer_ctx (v, ctx); + gimplify_expr (&v, ilist, NULL, is_gimple_val, + fb_rvalue); + t = fold_build2_loc (clause_loc, PLUS_EXPR, + TREE_TYPE (v), v, + build_int_cst (TREE_TYPE (v), 1)); + t = fold_build2_loc (clause_loc, MULT_EXPR, + TREE_TYPE (v), t, + TYPE_SIZE_UNIT (TREE_TYPE (type))); + x = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (x), x, t); + } + cond = create_tmp_var (TREE_TYPE (x)); + gimplify_assign (cond, x, ilist); + x = xv; + } + else if (TREE_CONSTANT (v)) { x = create_tmp_var_raw (type, name); gimple_add_tmp_var (x); @@ -3864,7 +4035,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, tree new_orig_var = lookup_decl (orig_var, ctx); tree t = build_fold_indirect_ref (new_var); DECL_IGNORED_P (new_var) = 0; - TREE_THIS_NOTRAP (t); + TREE_THIS_NOTRAP (t) = 1; SET_DECL_VALUE_EXPR (new_orig_var, t); DECL_HAS_VALUE_EXPR_P (new_orig_var) = 1; } @@ -3889,6 +4060,37 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, x = fold_convert_loc (clause_loc, TREE_TYPE (new_var), x); gimplify_assign (new_var, x, ilist); } + /* GOMP_taskgroup_reduction_register memsets the whole + array to zero. If the initializer is zero, we don't + need to initialize it again, just mark it as ever + used unconditionally, i.e. cond = true. */ + if (cond + && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c) == NULL_TREE + && initializer_zerop (omp_reduction_init (c, + TREE_TYPE (type)))) + { + gimple *g = gimple_build_assign (build_simple_mem_ref (cond), + boolean_true_node); + gimple_seq_add_stmt (ilist, g); + continue; + } + tree end = create_artificial_label (UNKNOWN_LOCATION); + if (cond) + { + tree condv = create_tmp_var (boolean_type_node); + gimple *g + = gimple_build_assign (condv, build_simple_mem_ref (cond)); + gimple_seq_add_stmt (ilist, g); + tree lab1 = create_artificial_label (UNKNOWN_LOCATION); + g = gimple_build_cond (NE_EXPR, condv, + boolean_false_node, end, lab1); + gimple_seq_add_stmt (ilist, g); + gimple_seq_add_stmt (ilist, gimple_build_label (lab1)); + g = gimple_build_assign (build_simple_mem_ref (cond), + boolean_true_node); + gimple_seq_add_stmt (ilist, g); + } + tree y1 = create_tmp_var (ptype, NULL); gimplify_assign (y1, y, ilist); tree i2 = NULL_TREE, y2 = NULL_TREE; @@ -3922,9 +4124,8 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, tree i = create_tmp_var (TREE_TYPE (v), NULL); gimplify_assign (i, build_int_cst (TREE_TYPE (v), 0), ilist); tree body = create_artificial_label (UNKNOWN_LOCATION); - tree end = create_artificial_label (UNKNOWN_LOCATION); gimple_seq_add_stmt (ilist, gimple_build_label (body)); - if (y2) + if (y2 && pass != 3) { i2 = create_tmp_var (TREE_TYPE (v), NULL); gimplify_assign (i2, build_int_cst (TREE_TYPE (v), 0), dlist); @@ -3969,14 +4170,17 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, } DECL_HAS_VALUE_EXPR_P (placeholder) = 0; DECL_HAS_VALUE_EXPR_P (decl_placeholder) = 0; - x = lang_hooks.decls.omp_clause_dtor - (c, build_simple_mem_ref (y2)); - if (x) + if (pass != 3) { - gimple_seq tseq = NULL; - dtor = x; - gimplify_stmt (&dtor, &tseq); - gimple_seq_add_seq (dlist, tseq); + x = lang_hooks.decls.omp_clause_dtor + (c, build_simple_mem_ref (y2)); + if (x) + { + gimple_seq tseq = NULL; + dtor = x; + gimplify_stmt (&dtor, &tseq); + gimple_seq_add_seq (dlist, tseq); + } } } else @@ -4014,7 +4218,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, g = gimple_build_cond (LE_EXPR, i, v, body, end); gimple_seq_add_stmt (ilist, g); gimple_seq_add_stmt (ilist, gimple_build_label (end)); - if (y2) + if (y2 && pass != 3) { g = gimple_build_assign (y2, POINTER_PLUS_EXPR, y2, TYPE_SIZE_UNIT (TREE_TYPE (type))); @@ -4035,6 +4239,61 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, } continue; } + else if (pass == 2) + { + if (is_global_var (maybe_lookup_decl_in_outer_ctx (var, ctx))) + x = var; + else + { + bool by_ref = use_pointer_for_field (var, ctx); + x = build_receiver_ref (var, by_ref, ctx); + } + if (!omp_is_reference (var)) + x = build_fold_addr_expr (x); + x = fold_convert (ptr_type_node, x); + unsigned cnt = task_reduction_cnt - 1; + if (!task_reduction_needs_orig_p) + cnt += task_reduction_cntorig_full - task_reduction_cntorig; + else + cnt = task_reduction_cntorig - 1; + tree r = build4 (ARRAY_REF, ptr_type_node, tskred_avar, + size_int (cnt), NULL_TREE, NULL_TREE); + gimplify_assign (r, x, ilist); + continue; + } + else if (pass == 3) + { + tree type = TREE_TYPE (new_var); + if (!omp_is_reference (var)) + type = build_pointer_type (type); + unsigned cnt = task_reduction_cnt - 1; + if (!task_reduction_needs_orig_p) + cnt += task_reduction_cntorig_full - task_reduction_cntorig; + else + cnt = task_reduction_cntorig - 1; + x = build4 (ARRAY_REF, ptr_type_node, tskred_avar, + size_int (cnt), NULL_TREE, NULL_TREE); + x = fold_convert (type, x); + tree t; + if (omp_is_reference (var)) + { + gimplify_assign (new_var, x, ilist); + t = new_var; + new_var = build_simple_mem_ref (new_var); + } + else + { + t = create_tmp_var (type); + gimplify_assign (t, x, ilist); + SET_DECL_VALUE_EXPR (new_var, build_simple_mem_ref (t)); + DECL_HAS_VALUE_EXPR_P (new_var) = 1; + } + t = fold_convert (build_pointer_type (boolean_type_node), t); + t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t, + TYPE_SIZE_UNIT (TREE_TYPE (type))); + cond = create_tmp_var (TREE_TYPE (t)); + gimplify_assign (cond, t, ilist); + } else if (is_variable_sized (var)) { /* For variable sized types, we need to allocate the @@ -4381,12 +4640,31 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, { tree placeholder = OMP_CLAUSE_REDUCTION_PLACEHOLDER (c); gimple *tseq; - x = build_outer_var_ref (var, ctx); + tree ptype = TREE_TYPE (placeholder); + if (cond) + { + x = error_mark_node; + if (OMP_CLAUSE_REDUCTION_OMP_ORIG_REF (c) + && !task_reduction_needs_orig_p) + x = var; + else if (OMP_CLAUSE_REDUCTION_OMP_ORIG_REF (c)) + { + x = build4 (ARRAY_REF, ptr_type_node, tskred_avar, + size_int (task_reduction_cnt_full + + task_reduction_cntorig - 1), + NULL_TREE, NULL_TREE); + x = fold_convert (build_pointer_type (ptype), x); + x = build_simple_mem_ref (x); + } + } + else + { + x = build_outer_var_ref (var, ctx); - if (omp_is_reference (var) - && !useless_type_conversion_p (TREE_TYPE (placeholder), - TREE_TYPE (x))) - x = build_fold_addr_expr_loc (clause_loc, x); + if (omp_is_reference (var) + && !useless_type_conversion_p (ptype, TREE_TYPE (x))) + x = build_fold_addr_expr_loc (clause_loc, x); + } SET_DECL_VALUE_EXPR (placeholder, x); DECL_HAS_VALUE_EXPR_P (placeholder) = 1; tree new_vard = new_var; @@ -4450,6 +4728,25 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, initialization now. */ else if (omp_is_reference (var) && is_simd) handle_simd_reference (clause_loc, new_vard, ilist); + + tree lab2 = NULL_TREE; + if (cond) + { + tree condv = create_tmp_var (boolean_type_node); + gimple *g + = gimple_build_assign (condv, + build_simple_mem_ref (cond)); + gimple_seq_add_stmt (ilist, g); + tree lab1 = create_artificial_label (UNKNOWN_LOCATION); + lab2 = create_artificial_label (UNKNOWN_LOCATION); + g = gimple_build_cond (NE_EXPR, condv, + boolean_false_node, lab2, lab1); + gimple_seq_add_stmt (ilist, g); + gimple_seq_add_stmt (ilist, gimple_build_label (lab1)); + g = gimple_build_assign (build_simple_mem_ref (cond), + boolean_true_node); + gimple_seq_add_stmt (ilist, g); + } x = lang_hooks.decls.omp_clause_default_ctor (c, unshare_expr (new_var), build_outer_var_ref (var, ctx)); @@ -4470,6 +4767,11 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c) = NULL; } DECL_HAS_VALUE_EXPR_P (placeholder) = 0; + if (cond) + { + gimple_seq_add_stmt (ilist, gimple_build_label (lab2)); + break; + } goto do_dtor; } else @@ -4478,6 +4780,41 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, gcc_assert (TREE_CODE (TREE_TYPE (new_var)) != ARRAY_TYPE); enum tree_code code = OMP_CLAUSE_REDUCTION_CODE (c); + if (cond) + { + gimple *g; + /* GOMP_taskgroup_reduction_register memsets the whole + array to zero. If the initializer is zero, we don't + need to initialize it again, just mark it as ever + used unconditionally, i.e. cond = true. */ + if (initializer_zerop (x)) + { + g = gimple_build_assign (build_simple_mem_ref (cond), + boolean_true_node); + gimple_seq_add_stmt (ilist, g); + break; + } + + /* Otherwise, emit + if (!cond) { cond = true; new_var = x; } */ + tree condv = create_tmp_var (boolean_type_node); + g = gimple_build_assign (condv, + build_simple_mem_ref (cond)); + gimple_seq_add_stmt (ilist, g); + tree lab1 = create_artificial_label (UNKNOWN_LOCATION); + tree lab2 = create_artificial_label (UNKNOWN_LOCATION); + g = gimple_build_cond (NE_EXPR, condv, + boolean_false_node, lab2, lab1); + gimple_seq_add_stmt (ilist, g); + gimple_seq_add_stmt (ilist, gimple_build_label (lab1)); + g = gimple_build_assign (build_simple_mem_ref (cond), + boolean_true_node); + gimple_seq_add_stmt (ilist, g); + gimplify_assign (new_var, x, ilist); + gimple_seq_add_stmt (ilist, gimple_build_label (lab2)); + break; + } + /* reduction(-:var) sums up the partial results, so it acts identically to reduction(+:var). */ if (code == MINUS_EXPR) @@ -4541,6 +4878,12 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, } } } + if (tskred_avar) + { + tree clobber = build_constructor (TREE_TYPE (tskred_avar), NULL); + TREE_THIS_VOLATILE (clobber) = 1; + gimple_seq_add_stmt (ilist, gimple_build_assign (tskred_avar, clobber)); + } if (known_eq (sctx.max_vf, 1U)) sctx.is_simt = false; @@ -4672,8 +5015,9 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, { /* Don't add any barrier for #pragma omp simd or #pragma omp distribute. */ - if (gimple_code (ctx->stmt) != GIMPLE_OMP_FOR - || gimple_omp_for_kind (ctx->stmt) == GF_OMP_FOR_KIND_FOR) + if (!is_task_ctx (ctx) + && (gimple_code (ctx->stmt) != GIMPLE_OMP_FOR + || gimple_omp_for_kind (ctx->stmt) == GF_OMP_FOR_KIND_FOR)) gimple_seq_add_stmt (ilist, omp_build_barrier (NULL_TREE)); } @@ -5474,6 +5818,7 @@ lower_send_clauses (tree clauses, gimple_seq *ilist, gimple_seq *olist, case OMP_CLAUSE_COPYIN: case OMP_CLAUSE_LASTPRIVATE: case OMP_CLAUSE_REDUCTION: + case OMP_CLAUSE_IN_REDUCTION: break; case OMP_CLAUSE_SHARED: if (OMP_CLAUSE_SHARED_FIRSTPRIVATE (c)) @@ -5491,7 +5836,8 @@ lower_send_clauses (tree clauses, gimple_seq *ilist, gimple_seq *olist, } val = OMP_CLAUSE_DECL (c); - if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION + if ((OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION + || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_IN_REDUCTION) && TREE_CODE (val) == MEM_REF) { val = TREE_OPERAND (val, 0); @@ -5515,7 +5861,13 @@ lower_send_clauses (tree clauses, gimple_seq *ilist, gimple_seq *olist, var = lookup_decl_in_outer_ctx (val, ctx_for_o); if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_COPYIN - && is_global_var (var)) + && is_global_var (var) + && (val == OMP_CLAUSE_DECL (c) + || !is_task_ctx (ctx) + || (TREE_CODE (TREE_TYPE (val)) != POINTER_TYPE + && (TREE_CODE (TREE_TYPE (val)) != REFERENCE_TYPE + || (TREE_CODE (TREE_TYPE (TREE_TYPE (val))) + != POINTER_TYPE))))) continue; t = omp_member_access_dummy_var (var); @@ -5543,7 +5895,8 @@ lower_send_clauses (tree clauses, gimple_seq *ilist, gimple_seq *olist, continue; } - if ((OMP_CLAUSE_CODE (c) != OMP_CLAUSE_REDUCTION + if (((OMP_CLAUSE_CODE (c) != OMP_CLAUSE_REDUCTION + && OMP_CLAUSE_CODE (c) != OMP_CLAUSE_IN_REDUCTION) || val == OMP_CLAUSE_DECL (c)) && is_variable_sized (val)) continue; @@ -5581,9 +5934,15 @@ lower_send_clauses (tree clauses, gimple_seq *ilist, gimple_seq *olist, break; case OMP_CLAUSE_REDUCTION: + case OMP_CLAUSE_IN_REDUCTION: do_in = true; if (val == OMP_CLAUSE_DECL (c)) - do_out = !(by_ref || omp_is_reference (val)); + { + if (is_task_ctx (ctx)) + by_ref = use_pointer_for_field (val, ctx); + else + do_out = !(by_ref || omp_is_reference (val)); + } else by_ref = TREE_CODE (TREE_TYPE (val)) == ARRAY_TYPE; break; @@ -6226,6 +6585,477 @@ lower_omp_master (gimple_stmt_iterator *gsi_p, omp_context *ctx) BLOCK_VARS (block) = ctx->block_vars; } +/* Find the first task_reduction or reduction clause or return NULL + if there are none. */ + +static inline tree +omp_task_reductions_find_first (tree clauses, enum tree_code code, + enum omp_clause_code ccode) +{ + while (1) + { + clauses = omp_find_clause (clauses, ccode); + if (clauses == NULL_TREE) + return NULL_TREE; + if (ccode != OMP_CLAUSE_REDUCTION + || code == OMP_TASKLOOP + || OMP_CLAUSE_REDUCTION_TASK (clauses)) + return clauses; + clauses = OMP_CLAUSE_CHAIN (clauses); + } +} + +/* Helper function for lower_omp_task_reductions. For a specific PASS + find out the current clause it should be processed, or return false + if all have been processed already. */ + +static inline bool +omp_task_reduction_iterate (int pass, enum tree_code code, + enum omp_clause_code ccode, tree *c, tree *decl, + tree *type, tree *next) +{ + for (; *c; *c = omp_find_clause (OMP_CLAUSE_CHAIN (*c), ccode)) + { + if (ccode == OMP_CLAUSE_REDUCTION + && code != OMP_TASKLOOP + && !OMP_CLAUSE_REDUCTION_TASK (*c)) + continue; + *decl = OMP_CLAUSE_DECL (*c); + *type = TREE_TYPE (*decl); + if (TREE_CODE (*decl) == MEM_REF) + { + if (pass != 1) + continue; + } + else + { + if (omp_is_reference (*decl)) + *type = TREE_TYPE (*type); + if (pass != (!TREE_CONSTANT (TYPE_SIZE_UNIT (*type)))) + continue; + } + *next = omp_find_clause (OMP_CLAUSE_CHAIN (*c), ccode); + return true; + } + *decl = NULL_TREE; + *type = NULL_TREE; + *next = NULL_TREE; + return false; +} + +/* Lower task_reduction and reduction clauses (the latter unless CODE is + OMP_TASKGROUP only with task modifier). Register mapping of those in + START sequence and reducing them and unregister them in the END sequence. */ + +static void +lower_omp_task_reductions (omp_context *ctx, enum tree_code code, tree clauses, + gimple_seq *start, gimple_seq *end) +{ + enum omp_clause_code ccode + = (code == OMP_TASKGROUP + ? OMP_CLAUSE_TASK_REDUCTION : OMP_CLAUSE_REDUCTION); + clauses = omp_task_reductions_find_first (clauses, code, ccode); + if (clauses == NULL_TREE) + return; + tree record_type = lang_hooks.types.make_type (RECORD_TYPE); + tree *last = &TYPE_FIELDS (record_type); + unsigned cnt = 0; + for (int pass = 0; pass < 2; pass++) + { + tree decl, type, next; + for (tree c = clauses; + omp_task_reduction_iterate (pass, code, ccode, + &c, &decl, &type, &next); c = next) + { + ++cnt; + tree new_type = type; + if (ctx->outer) + new_type = remap_type (type, &ctx->outer->cb); + tree field + = build_decl (OMP_CLAUSE_LOCATION (c), FIELD_DECL, + DECL_P (decl) ? DECL_NAME (decl) : NULL_TREE, + new_type); + if (DECL_P (decl) && type == TREE_TYPE (decl)) + { + SET_DECL_ALIGN (field, DECL_ALIGN (decl)); + DECL_USER_ALIGN (field) = DECL_USER_ALIGN (decl); + TREE_THIS_VOLATILE (field) = TREE_THIS_VOLATILE (decl); + } + else + SET_DECL_ALIGN (field, TYPE_ALIGN (type)); + DECL_CONTEXT (field) = record_type; + *last = field; + last = &DECL_CHAIN (field); + tree bfield + = build_decl (OMP_CLAUSE_LOCATION (c), FIELD_DECL, NULL_TREE, + boolean_type_node); + DECL_CONTEXT (bfield) = record_type; + *last = bfield; + last = &DECL_CHAIN (bfield); + } + } + *last = NULL_TREE; + layout_type (record_type); + + /* Build up an array which registers with the runtime all the reductions + and deregisters them at the end. Format documented in libgomp/task.c. */ + tree atype = build_array_type_nelts (pointer_sized_int_node, 7 + cnt * 3); + tree avar = create_tmp_var_raw (atype); + gimple_add_tmp_var (avar); + TREE_ADDRESSABLE (avar) = 1; + tree r = build4 (ARRAY_REF, pointer_sized_int_node, avar, size_zero_node, + NULL_TREE, NULL_TREE); + tree t = build_int_cst (pointer_sized_int_node, cnt); + gimple_seq_add_stmt (start, gimple_build_assign (r, t)); + gimple_seq seq = NULL; + tree sz = fold_convert (pointer_sized_int_node, + TYPE_SIZE_UNIT (record_type)); + int cachesz = 64; + sz = fold_build2 (PLUS_EXPR, pointer_sized_int_node, sz, + build_int_cst (pointer_sized_int_node, cachesz - 1)); + sz = fold_build2 (BIT_AND_EXPR, pointer_sized_int_node, sz, + build_int_cst (pointer_sized_int_node, ~(cachesz - 1))); + sz = force_gimple_operand (sz, &seq, true, NULL_TREE); + gimple_seq_add_seq (start, seq); + r = build4 (ARRAY_REF, pointer_sized_int_node, avar, size_one_node, + NULL_TREE, NULL_TREE); + gimple_seq_add_stmt (start, gimple_build_assign (r, sz)); + r = build4 (ARRAY_REF, pointer_sized_int_node, avar, size_int (2), + NULL_TREE, NULL_TREE); + t = build_int_cst (pointer_sized_int_node, + MAX (TYPE_ALIGN_UNIT (record_type), (unsigned) cachesz)); + gimple_seq_add_stmt (start, gimple_build_assign (r, t)); + r = build4 (ARRAY_REF, pointer_sized_int_node, avar, size_int (3), + NULL_TREE, NULL_TREE); + t = build_int_cst (pointer_sized_int_node, -1); + gimple_seq_add_stmt (start, gimple_build_assign (r, t)); + r = build4 (ARRAY_REF, pointer_sized_int_node, avar, size_int (4), + NULL_TREE, NULL_TREE); + t = build_int_cst (pointer_sized_int_node, 0); + gimple_seq_add_stmt (start, gimple_build_assign (r, t)); + + /* In end, build a loop that iterates from 0 to < omp_get_num_threads () + and for each task reduction checks a bool right after the private variable + within that thread's chunk; if the bool is clear, it hasn't been + initialized and thus isn't going to be reduced nor destructed, otherwise + reduce and destruct it. */ + tree idx = create_tmp_var (size_type_node); + gimple_seq_add_stmt (end, gimple_build_assign (idx, size_zero_node)); + t = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); + tree num_thr = create_tmp_var (integer_type_node); + gimple *g = gimple_build_call (t, 0); + gimple_call_set_lhs (g, num_thr); + gimple_seq_add_stmt (end, g); + tree num_thr_sz = create_tmp_var (size_type_node); + g = gimple_build_assign (num_thr_sz, NOP_EXPR, num_thr); + gimple_seq_add_stmt (end, g); + t = build4 (ARRAY_REF, pointer_sized_int_node, avar, size_int (2), + NULL_TREE, NULL_TREE); + tree data = create_tmp_var (pointer_sized_int_node); + gimple_seq_add_stmt (end, gimple_build_assign (data, t)); + tree lab1 = create_artificial_label (UNKNOWN_LOCATION); + tree lab2 = create_artificial_label (UNKNOWN_LOCATION); + gimple_seq_add_stmt (end, gimple_build_label (lab1)); + tree ptr; + if (TREE_CODE (TYPE_SIZE_UNIT (record_type)) == INTEGER_CST) + ptr = create_tmp_var (build_pointer_type (record_type)); + else + ptr = create_tmp_var (ptr_type_node); + gimple_seq_add_stmt (end, gimple_build_assign (ptr, NOP_EXPR, data)); + + tree field = TYPE_FIELDS (record_type); + cnt = 0; + for (int pass = 0; pass < 2; pass++) + { + tree decl, type, next; + for (tree c = clauses; + omp_task_reduction_iterate (pass, code, ccode, + &c, &decl, &type, &next); c = next) + { + tree var = decl, ref, orig_var = decl; + if (TREE_CODE (decl) == MEM_REF) + { + var = TREE_OPERAND (var, 0); + if (TREE_CODE (var) == POINTER_PLUS_EXPR) + var = TREE_OPERAND (var, 0); + tree v = var; + if (TREE_CODE (var) == ADDR_EXPR) + var = TREE_OPERAND (var, 0); + else if (TREE_CODE (var) == INDIRECT_REF) + var = TREE_OPERAND (var, 0); + orig_var = var; + if (is_variable_sized (var)) + { + gcc_assert (DECL_HAS_VALUE_EXPR_P (var)); + var = DECL_VALUE_EXPR (var); + gcc_assert (TREE_CODE (var) == INDIRECT_REF); + var = TREE_OPERAND (var, 0); + gcc_assert (DECL_P (var)); + } + t = ref = maybe_lookup_decl_in_outer_ctx (var, ctx); + if (TREE_CODE (v) == ADDR_EXPR) + t = build_fold_addr_expr (t); + else if (TREE_CODE (v) == INDIRECT_REF) + t = build_fold_indirect_ref (t); + if (TREE_CODE (TREE_OPERAND (decl, 0)) == POINTER_PLUS_EXPR) + { + tree b = TREE_OPERAND (TREE_OPERAND (decl, 0), 1); + b = maybe_lookup_decl_in_outer_ctx (b, ctx); + t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t, b); + } + if (!integer_zerop (TREE_OPERAND (decl, 1))) + t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t, + fold_convert (size_type_node, + TREE_OPERAND (decl, 1))); + } + else + { + t = ref = maybe_lookup_decl_in_outer_ctx (var, ctx); + if (!omp_is_reference (decl)) + t = build_fold_addr_expr (t); + } + t = fold_convert (pointer_sized_int_node, t); + seq = NULL; + t = force_gimple_operand (t, &seq, true, NULL_TREE); + gimple_seq_add_seq (start, seq); + r = build4 (ARRAY_REF, pointer_sized_int_node, avar, + size_int (7 + cnt * 3), NULL_TREE, NULL_TREE); + gimple_seq_add_stmt (start, gimple_build_assign (r, t)); + t = byte_position (field); + t = fold_convert (pointer_sized_int_node, t); + seq = NULL; + t = force_gimple_operand (t, &seq, true, NULL_TREE); + gimple_seq_add_seq (start, seq); + r = build4 (ARRAY_REF, pointer_sized_int_node, avar, + size_int (7 + cnt * 3 + 1), NULL_TREE, NULL_TREE); + gimple_seq_add_stmt (start, gimple_build_assign (r, t)); + + tree bfield = DECL_CHAIN (field); + tree cond; + if (TREE_TYPE (ptr) == ptr_type_node) + { + cond = build2 (POINTER_PLUS_EXPR, ptr_type_node, ptr, + byte_position (bfield)); + seq = NULL; + cond = force_gimple_operand (cond, &seq, true, NULL_TREE); + gimple_seq_add_seq (end, seq); + tree pbool = build_pointer_type (TREE_TYPE (bfield)); + cond = build2 (MEM_REF, TREE_TYPE (bfield), cond, + build_int_cst (pbool, 0)); + } + else + cond = build3 (COMPONENT_REF, TREE_TYPE (bfield), + build_simple_mem_ref (ptr), bfield, NULL_TREE); + tree lab3 = create_artificial_label (UNKNOWN_LOCATION); + tree lab4 = create_artificial_label (UNKNOWN_LOCATION); + tree condv = create_tmp_var (boolean_type_node); + gimple_seq_add_stmt (end, gimple_build_assign (condv, cond)); + g = gimple_build_cond (NE_EXPR, condv, boolean_false_node, + lab3, lab4); + gimple_seq_add_stmt (end, g); + gimple_seq_add_stmt (end, gimple_build_label (lab3)); + + tree new_var; + if (TREE_TYPE (ptr) == ptr_type_node) + { + new_var = build2 (POINTER_PLUS_EXPR, ptr_type_node, ptr, + byte_position (field)); + seq = NULL; + new_var = force_gimple_operand (new_var, &seq, true, NULL_TREE); + gimple_seq_add_seq (end, seq); + tree pbool = build_pointer_type (TREE_TYPE (field)); + new_var = build2 (MEM_REF, TREE_TYPE (field), new_var, + build_int_cst (pbool, 0)); + } + else + new_var = build3 (COMPONENT_REF, TREE_TYPE (field), + build_simple_mem_ref (ptr), field, NULL_TREE); + + enum tree_code rcode = OMP_CLAUSE_REDUCTION_CODE (c); + if (TREE_CODE (decl) != MEM_REF && omp_is_reference (decl)) + ref = build_simple_mem_ref (ref); + /* reduction(-:var) sums up the partial results, so it acts + identically to reduction(+:var). */ + if (rcode == MINUS_EXPR) + rcode = PLUS_EXPR; + if (TREE_CODE (decl) == MEM_REF) + { + tree d = decl; + tree type = TREE_TYPE (new_var); + tree v = TYPE_MAX_VALUE (TYPE_DOMAIN (type)); + tree i = create_tmp_var (TREE_TYPE (v), NULL); + tree ptype = build_pointer_type (TREE_TYPE (type)); + tree bias = TREE_OPERAND (d, 1); + d = TREE_OPERAND (d, 0); + if (TREE_CODE (d) == POINTER_PLUS_EXPR) + { + tree b = TREE_OPERAND (d, 1); + b = maybe_lookup_decl_in_outer_ctx (b, ctx); + if (integer_zerop (bias)) + bias = b; + else + { + bias = fold_convert (TREE_TYPE (b), bias); + bias = fold_build2 (PLUS_EXPR, TREE_TYPE (b), b, bias); + } + d = TREE_OPERAND (d, 0); + } + /* For ref build_outer_var_ref already performs this, so + only new_var needs a dereference. */ + if (TREE_CODE (d) == INDIRECT_REF) + ref = build_fold_indirect_ref (ref); + else if (TREE_CODE (d) == ADDR_EXPR) + { + if (orig_var == var) + ref = build_fold_addr_expr (ref); + } + else + gcc_assert (orig_var == var); + if (DECL_P (v)) + { + v = maybe_lookup_decl_in_outer_ctx (v, ctx); + gimplify_expr (&v, end, NULL, is_gimple_val, fb_rvalue); + } + if (!integer_zerop (bias)) + { + bias = fold_convert (sizetype, bias); + ref = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ref), + ref, bias); + } + new_var = build_fold_addr_expr (new_var); + new_var = fold_convert (ptype, new_var); + ref = fold_convert (ptype, ref); + tree m = create_tmp_var (ptype, NULL); + gimplify_assign (m, new_var, end); + new_var = m; + m = create_tmp_var (ptype, NULL); + gimplify_assign (m, ref, end); + ref = m; + gimplify_assign (i, build_int_cst (TREE_TYPE (v), 0), end); + tree body = create_artificial_label (UNKNOWN_LOCATION); + tree endl = create_artificial_label (UNKNOWN_LOCATION); + gimple_seq_add_stmt (end, gimple_build_label (body)); + tree priv = build_simple_mem_ref (new_var); + tree out = build_simple_mem_ref (ref); + if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c)) + { + tree placeholder = OMP_CLAUSE_REDUCTION_PLACEHOLDER (c); + tree decl_placeholder + = OMP_CLAUSE_REDUCTION_DECL_PLACEHOLDER (c); + SET_DECL_VALUE_EXPR (placeholder, out); + DECL_HAS_VALUE_EXPR_P (placeholder) = 1; + SET_DECL_VALUE_EXPR (decl_placeholder, priv); + DECL_HAS_VALUE_EXPR_P (decl_placeholder) = 1; + lower_omp (&OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c), ctx); + gimple_seq_add_seq (end, + OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c)); + OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c) = NULL; + OMP_CLAUSE_REDUCTION_PLACEHOLDER (c) = NULL; + OMP_CLAUSE_REDUCTION_DECL_PLACEHOLDER (c) = NULL; + tree x = lang_hooks.decls.omp_clause_dtor (c, priv); + if (x) + { + gimple_seq tseq = NULL; + gimplify_stmt (&x, &tseq); + gimple_seq_add_seq (end, tseq); + } + } + else + { + tree x = build2 (rcode, TREE_TYPE (out), out, priv); + out = unshare_expr (out); + gimplify_assign (out, x, end); + } + gimple *g + = gimple_build_assign (new_var, POINTER_PLUS_EXPR, new_var, + TYPE_SIZE_UNIT (TREE_TYPE (type))); + gimple_seq_add_stmt (end, g); + g = gimple_build_assign (ref, POINTER_PLUS_EXPR, ref, + TYPE_SIZE_UNIT (TREE_TYPE (type))); + gimple_seq_add_stmt (end, g); + g = gimple_build_assign (i, PLUS_EXPR, i, + build_int_cst (TREE_TYPE (i), 1)); + gimple_seq_add_stmt (end, g); + g = gimple_build_cond (LE_EXPR, i, v, body, endl); + gimple_seq_add_stmt (end, g); + gimple_seq_add_stmt (end, gimple_build_label (endl)); + } + else if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c)) + { + tree placeholder = OMP_CLAUSE_REDUCTION_PLACEHOLDER (c); + tree oldv = NULL_TREE; + + if (omp_is_reference (decl) + && !useless_type_conversion_p (TREE_TYPE (placeholder), + TREE_TYPE (ref))) + ref = build_fold_addr_expr_loc (OMP_CLAUSE_LOCATION (c), ref); + ref = build_fold_addr_expr_loc (OMP_CLAUSE_LOCATION (c), ref); + tree refv = create_tmp_var (TREE_TYPE (ref)); + gimplify_assign (refv, ref, end); + ref = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), refv); + SET_DECL_VALUE_EXPR (placeholder, ref); + DECL_HAS_VALUE_EXPR_P (placeholder) = 1; + tree d = maybe_lookup_decl (decl, ctx); + gcc_assert (d); + if (DECL_HAS_VALUE_EXPR_P (d)) + oldv = DECL_VALUE_EXPR (d); + if (omp_is_reference (var)) + { + tree v = fold_convert (TREE_TYPE (d), + build_fold_addr_expr (new_var)); + SET_DECL_VALUE_EXPR (d, v); + } + else + SET_DECL_VALUE_EXPR (d, new_var); + DECL_HAS_VALUE_EXPR_P (d) = 1; + lower_omp (&OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c), ctx->outer); + if (oldv) + SET_DECL_VALUE_EXPR (d, oldv); + else + { + SET_DECL_VALUE_EXPR (d, NULL_TREE); + DECL_HAS_VALUE_EXPR_P (d) = 0; + } + gimple_seq_add_seq (end, OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c)); + OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c) = NULL; + OMP_CLAUSE_REDUCTION_PLACEHOLDER (c) = NULL; + tree x = lang_hooks.decls.omp_clause_dtor (c, new_var); + if (x) + { + gimple_seq tseq = NULL; + gimplify_stmt (&x, &tseq); + gimple_seq_add_seq (end, tseq); + } + } + else + { + tree x = build2 (rcode, TREE_TYPE (ref), ref, new_var); + ref = unshare_expr (ref); + gimplify_assign (ref, x, end); + } + gimple_seq_add_stmt (end, gimple_build_label (lab4)); + ++cnt; + field = DECL_CHAIN (bfield); + } + } + + t = builtin_decl_explicit (BUILT_IN_GOMP_TASKGROUP_REDUCTION_REGISTER); + g = gimple_build_call (t, 1, build_fold_addr_expr (avar)); + gimple_seq_add_stmt (start, g); + + gimple_seq_add_stmt (end, gimple_build_assign (data, PLUS_EXPR, data, sz)); + gimple_seq_add_stmt (end, gimple_build_assign (idx, PLUS_EXPR, idx, + size_one_node)); + g = gimple_build_cond (NE_EXPR, idx, num_thr_sz, lab1, lab2); + gimple_seq_add_stmt (end, g); + gimple_seq_add_stmt (end, gimple_build_label (lab2)); + t = builtin_decl_explicit (BUILT_IN_GOMP_TASKGROUP_REDUCTION_UNREGISTER); + g = gimple_build_call (t, 1, build_fold_addr_expr (avar)); + gimple_seq_add_stmt (end, g); + t = build_constructor (atype, NULL); + TREE_THIS_VOLATILE (t) = 1; + gimple_seq_add_stmt (end, gimple_build_assign (avar, t)); +} /* Expand code for an OpenMP taskgroup directive. */ @@ -6235,21 +7065,31 @@ lower_omp_taskgroup (gimple_stmt_iterator *gsi_p, omp_context *ctx) gimple *stmt = gsi_stmt (*gsi_p); gcall *x; gbind *bind; + gimple_seq dseq = NULL; tree block = make_node (BLOCK); bind = gimple_build_bind (NULL, NULL, block); gsi_replace (gsi_p, bind, true); gimple_bind_add_stmt (bind, stmt); + push_gimplify_context (); + x = gimple_build_call (builtin_decl_explicit (BUILT_IN_GOMP_TASKGROUP_START), 0); gimple_bind_add_stmt (bind, x); + lower_omp_task_reductions (ctx, OMP_TASKGROUP, + gimple_omp_taskgroup_clauses (stmt), + gimple_bind_body_ptr (bind), &dseq); + lower_omp (gimple_omp_body_ptr (stmt), ctx); gimple_bind_add_seq (bind, gimple_omp_body (stmt)); gimple_omp_set_body (stmt, NULL); gimple_bind_add_stmt (bind, gimple_build_omp_return (true)); + gimple_bind_add_seq (bind, dseq); + + pop_gimplify_context (bind); gimple_bind_append_vars (bind, ctx->block_vars); BLOCK_VARS (block) = ctx->block_vars; @@ -7239,6 +8079,40 @@ create_task_copyfn (gomp_task *task_stmt, omp_context *ctx) t = build2 (MODIFY_EXPR, TREE_TYPE (dst), dst, src); append_to_statement_list (t, &list); break; + case OMP_CLAUSE_REDUCTION: + case OMP_CLAUSE_IN_REDUCTION: + decl = OMP_CLAUSE_DECL (c); + if (TREE_CODE (decl) == MEM_REF) + { + decl = TREE_OPERAND (decl, 0); + if (TREE_CODE (decl) == POINTER_PLUS_EXPR) + decl = TREE_OPERAND (decl, 0); + if (TREE_CODE (decl) == INDIRECT_REF + || TREE_CODE (decl) == ADDR_EXPR) + decl = TREE_OPERAND (decl, 0); + } + key = (splay_tree_key) decl; + n = splay_tree_lookup (ctx->field_map, key); + if (n == NULL) + break; + f = (tree) n->value; + if (tcctx.cb.decl_map) + f = *tcctx.cb.decl_map->get (f); + n = splay_tree_lookup (ctx->sfield_map, key); + sf = (tree) n->value; + if (tcctx.cb.decl_map) + sf = *tcctx.cb.decl_map->get (sf); + src = build_simple_mem_ref_loc (loc, sarg); + src = omp_build_component_ref (src, sf); + if (decl != OMP_CLAUSE_DECL (c) + && TREE_CODE (TREE_TYPE (decl)) == REFERENCE_TYPE + && TREE_CODE (TREE_TYPE (TREE_TYPE (decl))) == POINTER_TYPE) + src = build_simple_mem_ref_loc (loc, src); + dst = build_simple_mem_ref_loc (loc, arg); + dst = omp_build_component_ref (dst, f); + t = build2 (MODIFY_EXPR, TREE_TYPE (dst), dst, src); + append_to_statement_list (t, &list); + break; case OMP_CLAUSE__LOOPTEMP_: /* Fields for first two _looptemp_ clauses are initialized by GOMP_taskloop*, the rest are handled like firstprivate. */ diff --git a/libgomp/ChangeLog.gomp b/libgomp/ChangeLog.gomp index f8e693dfd805..bb86d44fe112 100644 --- a/libgomp/ChangeLog.gomp +++ b/libgomp/ChangeLog.gomp @@ -1,3 +1,27 @@ +2018-09-27 Jakub Jelinek + + * configure.ac: Check for aligned_alloc, posix_memalign, memalign + and _aligned_malloc. + * libgomp.h (gomp_aligned_alloc, gomp_aligned_free): New prototypes. + (struct gomp_taskgroup): Add reductions field. + * libgomp.map (GOMP_5.0): Export GOMP_taskgroup_reduction_register, + GOMP_taskgroup_reduction_unregister and GOMP_task_reduction_remap. + * task.c (GOMP_taskgroup_start): Initialize taskgroup->reductions. + (GOMP_taskgroup_reduction_register, + GOMP_taskgroup_reduction_unregister, GOMP_task_reduction_remap): New + functions. + * alloc.c (gomp_aligned_alloc, gomp_aligned_free): New functions. + * configure: Regenerated. + * config.h.in: Regenerated. + * testsuite/libgomp.c-c++-common/task-reduction-1.c: New test. + * testsuite/libgomp.c-c++-common/task-reduction-2.c: New test. + * testsuite/libgomp.c-c++-common/task-reduction-3.c: New test. + * testsuite/libgomp.c++/task-reduction-1.C: New test. + * testsuite/libgomp.c++/task-reduction-2.C: New test. + * testsuite/libgomp.c++/task-reduction-3.C: New test. + * testsuite/libgomp.c++/task-reduction-4.C: New test. + * testsuite/libgomp.c++/task-reduction-5.C: New test. + 2018-08-02 Jakub Jelinek * testsuite/libgomp.c-c++-common/depend-iterator-1.c: Add tests for diff --git a/libgomp/alloc.c b/libgomp/alloc.c index 1bf404235797..90d2d9cc4ef2 100644 --- a/libgomp/alloc.c +++ b/libgomp/alloc.c @@ -57,3 +57,51 @@ gomp_realloc (void *old, size_t size) gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size); return ret; } + +void * +gomp_aligned_alloc (size_t al, size_t size) +{ + void *ret; + if (al < sizeof (void *)) + al = sizeof (void *); +#ifdef HAVE_ALIGNED_ALLOC + ret = aligned_alloc (al, size); +#elif defined(HAVE__ALIGNED_MALLOC) + ret = _aligned_malloc (size, al); +#elif defined(HAVE_POSIX_MEMALIGN) + if (posix_memalign (&ret, al, size) != 0) + ret = NULL; +#elif defined(HAVE_MEMALIGN) + { + extern void *memalign (size_t, size_t); + ret = memalign (al, size); + } +#else + ret = NULL; + if ((al & (al - 1)) == 0 && size) + { + void *p = malloc (size + al); + if (p) + { + void *ap = (void *) (((uintptr_t) p + al) & -al); + ((void **) ap)[-1] = p; + ret = ap; + } +#define NEED_SPECIAL_GOMP_ALIGNED_FREE + } +#endif + if (ret == NULL) + gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size); + return ret; +} + +void +gomp_aligned_free (void *ptr) +{ +#ifdef NEED_SPECIAL_GOMP_ALIGNED_FREE + if (ptr) + free (((void **) ptr)[-1]); +#else + free (ptr); +#endif +} diff --git a/libgomp/config.h.in b/libgomp/config.h.in index d98267501455..52f4ed44412a 100644 --- a/libgomp/config.h.in +++ b/libgomp/config.h.in @@ -1,5 +1,8 @@ /* config.h.in. Generated from configure.ac by autoheader. */ +/* Define to 1 if you have the `aligned_alloc' function. */ +#undef HAVE_ALIGNED_ALLOC + /* Define to 1 if the target assembler supports .symver directive. */ #undef HAVE_AS_SYMVER_DIRECTIVE @@ -51,9 +54,15 @@ /* Define to 1 if you have the `dl' library (-ldl). */ #undef HAVE_LIBDL +/* Define to 1 if you have the `memalign' function. */ +#undef HAVE_MEMALIGN + /* Define to 1 if you have the header file. */ #undef HAVE_MEMORY_H +/* Define to 1 if you have the `posix_memalign' function. */ +#undef HAVE_POSIX_MEMALIGN + /* Define if pthread_{,attr_}{g,s}etaffinity_np is supported. */ #undef HAVE_PTHREAD_AFFINITY_NP @@ -115,6 +124,9 @@ /* Define to 1 if you have the header file. */ #undef HAVE_UNISTD_H +/* Define to 1 if you have the `_aligned_malloc' function. */ +#undef HAVE__ALIGNED_MALLOC + /* Define to 1 if you have the `__secure_getenv' function. */ #undef HAVE___SECURE_GETENV diff --git a/libgomp/configure b/libgomp/configure index 799dcfd36c1a..b039d486016a 100755 --- a/libgomp/configure +++ b/libgomp/configure @@ -15570,6 +15570,19 @@ _ACEOF fi done +for ac_func in aligned_alloc posix_memalign memalign _aligned_malloc +do : + as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` +ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var" +eval as_val=\$$as_ac_var + if test "x$as_val" = x""yes; then : + cat >>confdefs.h <<_ACEOF +#define `$as_echo "HAVE_$ac_func" | $as_tr_cpp` 1 +_ACEOF + +fi +done + # Check for broken semaphore implementation on darwin. # sem_init returns: sem_init error: Function not implemented. diff --git a/libgomp/configure.ac b/libgomp/configure.ac index d2a7d8dff8f2..50c0f2b0d001 100644 --- a/libgomp/configure.ac +++ b/libgomp/configure.ac @@ -219,6 +219,7 @@ m4_include([plugin/configfrag.ac]) # Check for functions needed. AC_CHECK_FUNCS(getloadavg clock_gettime strtoull) +AC_CHECK_FUNCS(aligned_alloc posix_memalign memalign _aligned_malloc) # Check for broken semaphore implementation on darwin. # sem_init returns: sem_init error: Function not implemented. diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h index 7453d6292c81..c18b40795ae5 100644 --- a/libgomp/libgomp.h +++ b/libgomp/libgomp.h @@ -89,6 +89,9 @@ enum memmodel extern void *gomp_malloc (size_t) __attribute__((malloc)); extern void *gomp_malloc_cleared (size_t) __attribute__((malloc)); extern void *gomp_realloc (void *, size_t); +extern void *gomp_aligned_alloc (size_t, size_t) + __attribute__((malloc, alloc_size (2))); +extern void gomp_aligned_free (void *); /* Avoid conflicting prototypes of alloca() in system headers by using GCC's builtin alloca(). */ @@ -474,6 +477,7 @@ struct gomp_taskgroup struct gomp_taskgroup *prev; /* Queue of tasks that belong in this taskgroup. */ struct priority_queue taskgroup_queue; + uintptr_t *reductions; bool in_taskgroup_wait; bool cancelled; gomp_sem_t taskgroup_sem; diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map index c8f08e7bf4bb..58a28d4a6b82 100644 --- a/libgomp/libgomp.map +++ b/libgomp/libgomp.map @@ -318,6 +318,9 @@ GOMP_5.0 { global: GOMP_taskwait_depend; GOMP_teams_reg; + GOMP_taskgroup_reduction_register; + GOMP_taskgroup_reduction_unregister; + GOMP_task_reduction_remap; } GOMP_4.5; OACC_2.0 { diff --git a/libgomp/task.c b/libgomp/task.c index 3813a382fc10..c5f77b002737 100644 --- a/libgomp/task.c +++ b/libgomp/task.c @@ -1768,7 +1768,7 @@ GOMP_taskgroup_start (void) struct gomp_thread *thr = gomp_thread (); struct gomp_team *team = thr->ts.team; struct gomp_task *task = thr->task; - struct gomp_taskgroup *taskgroup; + struct gomp_taskgroup *taskgroup, *prev; /* If team is NULL, all tasks are executed as GOMP_TASK_UNDEFERRED tasks and thus all children tasks of @@ -1777,9 +1777,11 @@ GOMP_taskgroup_start (void) if (team == NULL) return; taskgroup = gomp_malloc (sizeof (struct gomp_taskgroup)); - taskgroup->prev = task->taskgroup; + prev = task->taskgroup; + taskgroup->prev = prev; priority_queue_init (&taskgroup->taskgroup_queue); taskgroup->in_taskgroup_wait = false; + taskgroup->reductions = prev ? prev->reductions : NULL; taskgroup->cancelled = false; taskgroup->num_children = 0; gomp_sem_init (&taskgroup->taskgroup_sem, 0); @@ -1948,6 +1950,182 @@ GOMP_taskgroup_end (void) free (taskgroup); } +/* The format of data is: + data[0] cnt + data[1] size + data[2] alignment (on output array pointer) + data[3] allocator (-1 if malloc allocator) + data[4] next pointer + data[5] used internally (htab pointer) + data[6] used internally (end of array) + cnt times + ent[0] address + ent[1] offset + ent[2] used internally (pointer to data[0]). */ + +void +GOMP_taskgroup_reduction_register (uintptr_t *data) +{ + struct gomp_thread *thr = gomp_thread (); + struct gomp_team *team = thr->ts.team; + struct gomp_task *task = thr->task; + unsigned nthreads = team ? team->nthreads : 1; + size_t total_cnt = 0; + uintptr_t *d = data; + uintptr_t *old = task->taskgroup->reductions; + do + { + size_t sz = d[1] * nthreads; + /* Should use omp_alloc if d[3] is not -1. */ + void *ptr = gomp_aligned_alloc (d[2], sz); + memset (ptr, '\0', sz); + d[2] = (uintptr_t) ptr; + d[5] = 0; + d[6] = d[2] + sz; + total_cnt += d[0]; + if (d[4] == 0) + { + d[4] = (uintptr_t) old; + break; + } + else + d = (uintptr_t *) d[4]; + } + while (1); + struct htab *old_htab = NULL; + if (old && old[5]) + { + old_htab = (struct htab *) old[5]; + total_cnt += htab_elements (old_htab); + } + struct htab *new_htab = htab_create (total_cnt); + if (old_htab) + { + /* Copy old hash table, like in htab_expand. */ + hash_entry_type *p, *olimit; + new_htab->n_elements = htab_elements (old_htab); + olimit = old_htab->entries + old_htab->size; + p = old_htab->entries; + do + { + hash_entry_type x = *p; + if (x != HTAB_EMPTY_ENTRY && x != HTAB_DELETED_ENTRY) + *find_empty_slot_for_expand (new_htab, htab_hash (x)) = x; + p++; + } + while (p < olimit); + } + d = data; + do + { + size_t j; + for (j = 0; j < d[0]; ++j) + { + uintptr_t *p = d + 7 + j * 3; + p[2] = (uintptr_t) d; + /* Ugly hack, hash_entry_type is defined for the task dependencies, + which hash on the first element which is a pointer. We need + to hash also on the first sizeof (uintptr_t) bytes which contain + a pointer. Hide the cast from the compiler. */ + hash_entry_type n; + __asm ("" : "=g" (n) : "0" (p)); + *htab_find_slot (&new_htab, n, INSERT) = n; + } + if (d[4] == (uintptr_t) old) + break; + else + d = (uintptr_t *) d[4]; + } + while (1); + d[5] = (uintptr_t) new_htab; + task->taskgroup->reductions = data; +} + +void +GOMP_taskgroup_reduction_unregister (uintptr_t *data) +{ + uintptr_t *d = data; + htab_free ((struct htab *) data[5]); + do + { + gomp_aligned_free ((void *) d[2]); + d = (uintptr_t *) d[4]; + } + while (d && !d[5]); +} + +/* For i = 0 to cnt-1, remap ptrs[i] which is either address of the + original list item or address of previously remapped original list + item to address of the private copy, store that to ptrs[i]. + For i < cntorig, additionally set ptrs[cnt+i] to the address of + the original list item. */ + +void +GOMP_task_reduction_remap (size_t cnt, size_t cntorig, void **ptrs) +{ + struct gomp_thread *thr = gomp_thread (); + struct gomp_task *task = thr->task; + unsigned id = thr->ts.team_id; + uintptr_t *data = task->taskgroup->reductions; + uintptr_t *d; + struct htab *reduction_htab = (struct htab *) data[5]; + size_t i; + for (i = 0; i < cnt; ++i) + { + hash_entry_type ent, n; + __asm ("" : "=g" (ent) : "0" (ptrs + i)); + n = htab_find (reduction_htab, ent); + if (n) + { + uintptr_t *p; + __asm ("" : "=g" (p) : "0" (n)); + /* At this point, p[0] should be equal to (uintptr_t) ptrs[i], + p[1] is the offset within the allocated chunk for each + thread, p[2] is the array registered with + GOMP_taskgroup_reduction_register, d[2] is the base of the + allocated memory and d[1] is the size of the allocated chunk + for one thread. */ + d = (uintptr_t *) p[2]; + ptrs[i] = (void *) (d[2] + id * d[1] + p[1]); + if (__builtin_expect (i < cntorig, 0)) + ptrs[cnt + i] = (void *) p[0]; + continue; + } + d = data; + while (d != NULL) + { + if ((uintptr_t) ptrs[i] >= d[2] && (uintptr_t) ptrs[i] < d[6]) + break; + d = (uintptr_t *) d[4]; + } + if (d == NULL) + gomp_fatal ("couldn't find matching task_reduction or reduction with " + "task modifier for %p", ptrs[i]); + uintptr_t off = ((uintptr_t) ptrs[i] - d[2]) % d[1]; + ptrs[i] = (void *) (d[2] + id * d[1] + off); + if (__builtin_expect (i < cntorig, 0)) + { + size_t lo = 0, hi = d[0] - 1; + while (lo <= hi) + { + size_t m = (lo + hi) / 2; + if (d[7 + 3 * m + 1] < off) + lo = m + 1; + else if (d[7 + 3 * m + 1] == off) + { + ptrs[cnt + i] = (void *) d[7 + 3 * m]; + break; + } + else + hi = m - 1; + } + if (lo > hi) + gomp_fatal ("couldn't find matching task_reduction or reduction " + "with task modifier for %p", ptrs[i]); + } + } +} + int omp_in_final (void) { diff --git a/libgomp/testsuite/libgomp.c++/task-reduction-1.C b/libgomp/testsuite/libgomp.c++/task-reduction-1.C new file mode 100644 index 000000000000..b3e228e5310c --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/task-reduction-1.C @@ -0,0 +1,63 @@ +extern "C" void abort (); + +int as; +int &a = as; +long int bs = 1; +long int &b = bs; + +void +foo (int &c, long long int &d) +{ + int i; + for (i = 0; i < 2; i++) + #pragma omp task in_reduction (*: d) in_reduction (+: c) \ + in_reduction (+: a) in_reduction (*: b) + { + a += 7; + b *= 2; + c += 9; + d *= 3; + } +} + +int +main () +{ + int cs = 0; + int &c = cs; + long long int ds = 1; + #pragma omp parallel + #pragma omp single + { + long long int &d = ds; + #pragma omp taskgroup task_reduction (+: a, c) task_reduction (*: b, d) + { + int i; + for (i = 0; i < 4; i++) + #pragma omp task in_reduction (+: a, c) in_reduction (*: b, d) + { + int j; + a += 7; + b *= 2; + for (j = 0; j < 2; j++) + #pragma omp task in_reduction (+: a, c) in_reduction (*: b, d) + { + a += 7; + b *= 2; + c += 9; + d *= 3; + foo (c, d); + } + c += 9; + d *= 3; + } + } +#define THREEP4 (3LL * 3LL * 3LL * 3LL) + if (d != (THREEP4 * THREEP4 * THREEP4 * THREEP4 * THREEP4 * THREEP4 + * THREEP4)) + abort (); + } + if (a != 28 * 7 || b != (1L << 28) || c != 28 * 9) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/task-reduction-2.C b/libgomp/testsuite/libgomp.c++/task-reduction-2.C new file mode 100644 index 000000000000..75d2ee37e4fc --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/task-reduction-2.C @@ -0,0 +1,119 @@ +extern "C" void abort (); + +struct S { S (); S (long int, long int); ~S (); static int cnt1, cnt2, cnt3; long int s, t; }; + +int S::cnt1; +int S::cnt2; +int S::cnt3; + +S::S () +{ + #pragma omp atomic + cnt1++; +} + +S::S (long int x, long int y) : s (x), t (y) +{ + #pragma omp atomic update + ++cnt2; +} + +S::~S () +{ + #pragma omp atomic + cnt3 = cnt3 + 1; + if (t < 3 || t > 9 || (t & 1) == 0) + abort (); +} + +void +bar (S *p, S *o) +{ + p->s = 1; + if (o->t != 5) + abort (); + p->t = 9; +} + +static inline void +baz (S *o, S *i) +{ + if (o->t != 5 || i->t != 9) + abort (); + o->s *= i->s; +} + +#pragma omp declare reduction (+: S : omp_out.s += omp_in.s) initializer (omp_priv (0, 3)) +#pragma omp declare reduction (*: S : baz (&omp_out, &omp_in)) initializer (bar (&omp_priv, &omp_orig)) + +S a = { 0, 7 }; +S b (1, 5); + +void +foo () +{ + int i; + for (i = 0; i < 2; i++) + #pragma omp task in_reduction (*: b) in_reduction (+: a) + { + a.s += 7; + b.s *= 2; + if ((a.t != 7 && a.t != 3) || (b.t != 5 && b.t != 9)) + abort (); + } +} + +void +test () +{ + S c = { 0, 7 }; + #pragma omp parallel + #pragma omp single + { + S d (1, 5); + #pragma omp taskgroup task_reduction (+: a, c) task_reduction (*: b, d) + { + int i; + for (i = 0; i < 4; i++) + #pragma omp task in_reduction (*: b, d) in_reduction (+: a, c) + { + int j; + a.s += 7; + b.s *= 2; + for (j = 0; j < 2; j++) + #pragma omp task in_reduction (+: a) in_reduction (*: b) \ + in_reduction (+: c) in_reduction (*: d) + { + a.s += 7; + b.s *= 2; + c.s += 9; + d.s *= 3; + foo (); + if ((a.t != 7 && a.t != 3) || (b.t != 5 && b.t != 9) + || (c.t != 7 && c.t != 3) || (d.t != 5 && d.t != 9)) + abort (); + } + c.s += 9; + d.s *= 3; + if ((a.t != 7 && a.t != 3) || (b.t != 5 && b.t != 9) + || (c.t != 7 && c.t != 3) || (d.t != 5 && d.t != 9)) + abort (); + } + } +#define THREEP4 (3L * 3L * 3L * 3L) + if (d.s != (THREEP4 * THREEP4 * THREEP4) || d.t != 5) + abort (); + } + if (a.s != 28 * 7 || a.t != 7 || b.s != (1L << 28) || b.t != 5 + || c.s != 12 * 9 || c.t != 7) + abort (); +} + +int +main () +{ + int c1 = S::cnt1, c2 = S::cnt2, c3 = S::cnt3; + test (); + if (S::cnt1 + S::cnt2 - c1 - c2 != S::cnt3 - c3) + abort (); +} diff --git a/libgomp/testsuite/libgomp.c++/task-reduction-3.C b/libgomp/testsuite/libgomp.c++/task-reduction-3.C new file mode 100644 index 000000000000..a6eccf6ced6b --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/task-reduction-3.C @@ -0,0 +1,126 @@ +extern "C" void abort (); + +struct S { S (); S (long long int, int); ~S (); static int cnt1, cnt2, cnt3; long long int s; int t; }; + +int S::cnt1; +int S::cnt2; +int S::cnt3; + +S::S () +{ + #pragma omp atomic + cnt1++; +} + +S::S (long long int x, int y) : s (x), t (y) +{ + #pragma omp atomic update + ++cnt2; +} + +S::~S () +{ + #pragma omp atomic + cnt3 = cnt3 + 1; + if (t < 3 || t > 9 || (t & 1) == 0) + abort (); +} + +void +bar (S *p, S *o) +{ + p->s = 1; + if (o->t != 5) + abort (); + p->t = 9; +} + +static inline void +baz (S *o, S *i) +{ + if (o->t != 5 || i->t != 9) + abort (); + o->s *= i->s; +} + +#pragma omp declare reduction (+: S : omp_out.s += omp_in.s) initializer (omp_priv (0, 3)) +#pragma omp declare reduction (*: S : baz (&omp_out, &omp_in)) initializer (bar (&omp_priv, &omp_orig)) + +S as = { 0LL, 7 }; +S &a = as; +S bs (1LL, 5); +S &b = bs; + +void +foo (S &c, S &d) +{ + int i; + for (i = 0; i < 2; i++) + #pragma omp task in_reduction (+: c) in_reduction (*: b, d) in_reduction (+: a) + { + a.s += 7; + b.s *= 2; + c.s += 9; + d.s *= 3; + if ((a.t != 7 && a.t != 3) || (b.t != 5 && b.t != 9) + || (c.t != 7 && c.t != 3) || (d.t != 5 && d.t != 9)) + abort (); + } +} + +void +test () +{ + S cs = { 0LL, 7 }; + S &c = cs; + S ds (1LL, 5); + #pragma omp parallel + #pragma omp single + { + S &d = ds; + #pragma omp taskgroup task_reduction (+: a, c) task_reduction (*: b, d) + { + int i; + for (i = 0; i < 4; i++) + #pragma omp task in_reduction (*: b, d) in_reduction (+: a, c) + { + int j; + a.s += 7; + b.s *= 2; + for (j = 0; j < 2; j++) + #pragma omp task in_reduction (+: a) in_reduction (*: b) \ + in_reduction (+: c) in_reduction (*: d) + { + a.s += 7; + b.s *= 2; + c.s += 9; + d.s *= 3; + foo (c, d); + if ((a.t != 7 && a.t != 3) || (b.t != 5 && b.t != 9) + || (c.t != 7 && c.t != 3) || (d.t != 5 && d.t != 9)) + abort (); + } + c.s += 9; + d.s *= 3; + if ((a.t != 7 && a.t != 3) || (b.t != 5 && b.t != 9) + || (c.t != 7 && c.t != 3) || (d.t != 5 && d.t != 9)) + abort (); + } + } +#define THREEP7 (3LL * 3LL * 3LL * 3LL * 3LL * 3LL * 3LL) + if (d.s != (THREEP7 * THREEP7 * THREEP7 * THREEP7) || d.t != 5) + abort (); + } + if (a.s != 28 * 7 || a.t != 7 || b.s != (1L << 28) || b.t != 5 + || c.s != 28 * 9 || c.t != 7) + abort (); +} + +int +main () +{ + int c1 = S::cnt1, c2 = S::cnt2, c3 = S::cnt3; + test (); + if (S::cnt1 + S::cnt2 - c1 - c2 != S::cnt3 - c3) + abort (); +} diff --git a/libgomp/testsuite/libgomp.c++/task-reduction-4.C b/libgomp/testsuite/libgomp.c++/task-reduction-4.C new file mode 100644 index 000000000000..409c2ca19abb --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/task-reduction-4.C @@ -0,0 +1,236 @@ +extern "C" void abort (); + +int as[2]; +int (&a)[2] = as; +long long int bs[7] = { 9, 11, 1, 1, 1, 13, 15 }; +long long int (&b)[7] = bs; +int es[3] = { 5, 0, 5 }; +int (&e)[3] = es; +int fs[5] = { 6, 7, 0, 0, 9 }; +int (&f)[5] = fs; +int gs[4] = { 1, 0, 0, 2 }; +int (&g)[4] = gs; +int hs[3] = { 0, 1, 4 }; +int (&h)[3] = hs; +int ks[4][2] = { { 5, 6 }, { 0, 0 }, { 0, 0 }, { 7, 8 } }; +int (&k)[4][2] = ks; +long long *ss; +long long *&s = ss; +long long (*ts)[2]; +long long (*&t)[2] = ts; + +void +foo (int &n, int *&c, long long int *&d, int (&m)[3], int *&r, int (&o)[4], int *&p, int (&q)[4][2]) +{ + int i; + for (i = 0; i < 2; i++) + #pragma omp task in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \ + in_reduction (+: o[n:n*2], m[1], k[1:2][:], p[0], f[2:2]) \ + in_reduction (+: q[1:2][:], g[n:n*2], e[1], h[0], r[2:2]) \ + in_reduction (*: s[1:2], t[2:2][:]) + { + a[0] += 7; + a[1] += 17; + b[2] *= 2; + b[4] *= 2; + c[0] += 6; + d[1] *= 2; + e[1] += 19; + f[2] += 21; + f[3] += 23; + g[1] += 25; + g[2] += 27; + h[0] += 29; + k[1][0] += 31; + k[2][1] += 33; + m[1] += 19; + r[2] += 21; + r[3] += 23; + o[1] += 25; + o[2] += 27; + p[0] += 29; + q[1][0] += 31; + q[2][1] += 33; + s[1] *= 2; + t[2][0] *= 2; + t[3][1] *= 2; + } +} + +void +test (int &n) +{ + int cs[2] = { 0, 0 }; + int (&c)[2] = cs; + int ps[3] = { 0, 1, 4 }; + int (&p)[3] = ps; + int qs[4][2] = { { 5, 6 }, { 0, 0 }, { 0, 0 }, { 7, 8 } }; + int (&q)[4][2] = qs; + long long sb[4] = { 5, 1, 1, 6 }; + long long tb[5][2] = { { 9, 10 }, { 11, 12 }, { 1, 1 }, { 1, 1 }, { 13, 14 } }; + int ms[3] = { 5, 0, 5 }; + int os[4] = { 1, 0, 0, 2 }; + s = sb; + t = tb; + #pragma omp parallel + #pragma omp single + { + long long int ds[] = { 1, 1 }; + long long int (&d)[2] = ds; + int (&m)[3] = ms; + int rs[5] = { 6, 7, 0, 0, 9 }; + int (&r)[5] = rs; + int (&o)[4] = os; + #pragma omp taskgroup task_reduction (+: a, c) task_reduction (*: b[2 * n:3 * n], d) \ + task_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \ + task_reduction (+: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \ + task_reduction (*: t[2:2][:], s[1:n + 1]) + { + int i; + for (i = 0; i < 4; i++) + #pragma omp task in_reduction (+: a, c) in_reduction (*: b[2 * n:3 * n], d) \ + in_reduction (+: o[n:n*2], q[1:2][:], p[0], m[1], r[2:2]) \ + in_reduction (+: g[n:n * 2], e[1], k[1:2][:], h[0], f[2:2]) \ + in_reduction (*: s[1:2], t[2:2][:]) + { + int j; + a[0] += 2; + a[1] += 3; + b[2] *= 2; + f[3] += 8; + g[1] += 9; + g[2] += 10; + h[0] += 11; + k[1][1] += 13; + k[2][1] += 15; + m[1] += 16; + r[2] += 8; + s[1] *= 2; + t[2][1] *= 2; + t[3][1] *= 2; + for (j = 0; j < 2; j++) + #pragma omp task in_reduction (+: a, c[:2]) \ + in_reduction (*: b[2 * n:3 * n], d[n - 1:n + 1]) \ + in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][:2]) \ + in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][:2]) \ + in_reduction (*: s[n:2], t[2:2][:]) + { + m[1] += 6; + r[2] += 7; + q[1][0] += 17; + q[2][0] += 19; + a[0] += 4; + a[1] += 5; + b[3] *= 2; + b[4] *= 2; + f[3] += 18; + g[1] += 29; + g[2] += 18; + h[0] += 19; + s[2] *= 2; + t[2][0] *= 2; + t[3][0] *= 2; + int *cp = c; + long long int *dp = d; + int *rp = r; + int *pp = p; + foo (n, cp, dp, m, rp, o, pp, q); + r[3] += 18; + o[1] += 29; + o[2] += 18; + p[0] += 19; + c[0] += 4; + c[1] += 5; + d[0] *= 2; + e[1] += 6; + f[2] += 7; + k[1][0] += 17; + k[2][0] += 19; + } + r[3] += 8; + o[1] += 9; + o[2] += 10; + p[0] += 11; + q[1][1] += 13; + q[2][1] += 15; + b[3] *= 2; + c[0] += 4; + c[1] += 9; + d[0] *= 2; + e[1] += 16; + f[2] += 8; + } + } + if (d[0] != 1LL << (8 + 4) + || d[1] != 1LL << 16 + || m[0] != 5 + || m[1] != 19 * 16 + 6 * 8 + 16 * 4 + || m[2] != 5 + || r[0] != 6 + || r[1] != 7 + || r[2] != 21 * 16 + 7 * 8 + 8 * 4 + || r[3] != 23 * 16 + 18 * 8 + 8 * 4 + || r[4] != 9 + || o[0] != 1 + || o[1] != 25 * 16 + 29 * 8 + 9 * 4 + || o[2] != 27 * 16 + 18 * 8 + 10 * 4 + || o[3] != 2) + abort (); + } + if (a[0] != 7 * 16 + 4 * 8 + 2 * 4 + || a[1] != 17 * 16 + 5 * 8 + 3 * 4 + || b[0] != 9 || b[1] != 11 + || b[2] != 1LL << (16 + 4) + || b[3] != 1LL << (8 + 4) + || b[4] != 1LL << (16 + 8) + || b[5] != 13 || b[6] != 15 + || c[0] != 6 * 16 + 4 * 8 + 4 * 4 + || c[1] != 5 * 8 + 9 * 4 + || e[0] != 5 + || e[1] != 19 * 16 + 6 * 8 + 16 * 4 + || e[2] != 5 + || f[0] != 6 + || f[1] != 7 + || f[2] != 21 * 16 + 7 * 8 + 8 * 4 + || f[3] != 23 * 16 + 18 * 8 + 8 * 4 + || f[4] != 9 + || g[0] != 1 + || g[1] != 25 * 16 + 29 * 8 + 9 * 4 + || g[2] != 27 * 16 + 18 * 8 + 10 * 4 + || g[3] != 2 + || h[0] != 29 * 16 + 19 * 8 + 11 * 4 + || h[1] != 1 || h[2] != 4 + || k[0][0] != 5 || k[0][1] != 6 + || k[1][0] != 31 * 16 + 17 * 8 + || k[1][1] != 13 * 4 + || k[2][0] != 19 * 8 + || k[2][1] != 33 * 16 + 15 * 4 + || k[3][0] != 7 || k[3][1] != 8 + || p[0] != 29 * 16 + 19 * 8 + 11 * 4 + || p[1] != 1 || p[2] != 4 + || q[0][0] != 5 || q[0][1] != 6 + || q[1][0] != 31 * 16 + 17 * 8 + || q[1][1] != 13 * 4 + || q[2][0] != 19 * 8 + || q[2][1] != 33 * 16 + 15 * 4 + || q[3][0] != 7 || q[3][1] != 8 + || sb[0] != 5 + || sb[1] != 1LL << (16 + 4) + || sb[2] != 1LL << 8 + || sb[3] != 6 + || tb[0][0] != 9 || tb[0][1] != 10 || tb[1][0] != 11 || tb[1][1] != 12 + || tb[2][0] != 1LL << (16 + 8) + || tb[2][1] != 1LL << 4 + || tb[3][0] != 1LL << 8 + || tb[3][1] != 1LL << (16 + 4) + || tb[4][0] != 13 || tb[4][1] != 14) + abort (); +} + +int +main () +{ + int n = 1; + test (n); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/task-reduction-5.C b/libgomp/testsuite/libgomp.c++/task-reduction-5.C new file mode 100644 index 000000000000..91551ec052eb --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/task-reduction-5.C @@ -0,0 +1,317 @@ +extern "C" void abort (); + +struct S { S (); S (long int, long int); ~S (); static int cnt1, cnt2, cnt3; long int s, t; }; + +int S::cnt1; +int S::cnt2; +int S::cnt3; + +S::S () +{ + #pragma omp atomic + cnt1++; +} + +S::S (long int x, long int y) : s (x), t (y) +{ + #pragma omp atomic update + ++cnt2; +} + +S::~S () +{ + #pragma omp atomic + cnt3 = cnt3 + 1; + if (t < 3 || t > 9 || (t & 1) == 0) + abort (); +} + +void +bar (S *p, S *o) +{ + p->s = 1; + if (o->t != 5) + abort (); + p->t = 9; +} + +static inline void +baz (S *o, S *i) +{ + if (o->t != 5 || i->t != 9) + abort (); + o->s *= i->s; +} + +#pragma omp declare reduction (+: S : omp_out.s += omp_in.s) initializer (omp_priv (0, 3)) +#pragma omp declare reduction (*: S : baz (&omp_out, &omp_in)) initializer (bar (&omp_priv, &omp_orig)) + +S a[2] = { { 0, 7 }, { 0, 7 } }; +S b[7] = { { 9, 5 }, { 11, 5 }, { 1, 5 }, { 1, 5 }, { 1, 5 }, { 13, 5 }, { 15, 5 } }; +S e[3] = { { 5, 7 }, { 0, 7 }, { 5, 7 } }; +S f[5] = { { 6, 7 }, { 7, 7 }, { 0, 7 }, { 0, 7 }, { 9, 7 } }; +S g[4] = { { 1, 7 }, { 0, 7 }, { 0, 7 }, { 2, 7 } }; +S h[3] = { { 0, 7 }, { 1, 7 }, { 4, 7 } }; +S k[4][2] = { { { 5, 7 }, { 6, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 7, 7 }, { 8, 7 } } }; +S *s; +S (*t)[2]; + +void +foo (int n, S *c, S *d, S m[3], S *r, S o[4], S *p, S q[4][2]) +{ + int i; + for (i = 0; i < 2; i++) + #pragma omp task in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \ + in_reduction (+: o[n:n*2], m[1], k[1:2][:], p[0], f[2:2]) \ + in_reduction (+: q[1:2][:], g[n:n*2], e[1], h[0], r[2:2]) \ + in_reduction (*: s[1:2], t[2:2][:]) + { + a[0].s += 7; + a[1].s += 17; + b[2].s *= 2; + b[4].s *= 2; + c[0].s += 6; + d[1].s *= 2; + e[1].s += 19; + f[2].s += 21; + f[3].s += 23; + g[1].s += 25; + g[2].s += 27; + h[0].s += 29; + k[1][0].s += 31; + k[2][1].s += 33; + m[1].s += 19; + r[2].s += 21; + r[3].s += 23; + o[1].s += 25; + o[2].s += 27; + p[0].s += 29; + q[1][0].s += 31; + q[2][1].s += 33; + s[1].s *= 2; + t[2][0].s *= 2; + t[3][1].s *= 2; + if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3) + || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3)) + abort (); + for (int z = 0; z < 2; z++) + if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3) + || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3) + || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3) + || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3) + || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3) + || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3) + || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9)) + abort (); + for (int z = 0; z < 3; z++) + if (b[z + 2].t != 5 && b[z + 2].t != 9) + abort (); + } +} + +void +test (int n) +{ + S c[2] = { { 0, 7 }, { 0, 7 } }; + S p[3] = { { 0, 7 }, { 1, 7 }, { 4, 7 } }; + S q[4][2] = { { { 5, 7 }, { 6, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 7, 7 }, { 8, 7 } } }; + S ss[4] = { { 5, 5 }, { 1, 5 }, { 1, 5 }, { 6, 5 } }; + S tt[5][2] = { { { 9, 5 }, { 10, 5 } }, { { 11, 5 }, { 12, 5 } }, { { 1, 5 }, { 1, 5 } }, { { 1, 5 }, { 1, 5 } }, { { 13, 5 }, { 14, 5 } } }; + s = ss; + t = tt; + #pragma omp parallel + #pragma omp single + { + S d[] = { { 1, 5 }, { 1, 5 } }; + S m[3] = { { 5, 7 }, { 0, 7 }, { 5, 7 } }; + S r[5] = { { 6, 7 }, { 7, 7 }, { 0, 7 }, { 0, 7 }, { 9, 7 } }; + S o[4] = { { 1, 7 }, { 0, 7 }, { 0, 7 }, { 2, 7 } }; + #pragma omp taskgroup task_reduction (+: a, c) task_reduction (*: b[2 * n:3 * n], d) \ + task_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \ + task_reduction (+: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \ + task_reduction (*: t[2:2][:], s[1:n + 1]) + { + int i; + for (i = 0; i < 4; i++) + #pragma omp task in_reduction (+: a, c) in_reduction (*: b[2 * n:3 * n], d) \ + in_reduction (+: o[n:n*2], q[1:2][:], p[0], m[1], r[2:2]) \ + in_reduction (+: g[n:n * 2], e[1], k[1:2][:], h[0], f[2:2]) \ + in_reduction (*: s[1:2], t[2:2][:]) + { + int j; + a[0].s += 2; + a[1].s += 3; + b[2].s *= 2; + f[3].s += 8; + g[1].s += 9; + g[2].s += 10; + h[0].s += 11; + k[1][1].s += 13; + k[2][1].s += 15; + m[1].s += 16; + r[2].s += 8; + s[1].s *= 2; + t[2][1].s *= 2; + t[3][1].s *= 2; + if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3) + || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3)) + abort (); + for (int z = 0; z < 2; z++) + if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3) + || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3) + || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3) + || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3) + || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3) + || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3) + || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9)) + abort (); + for (int z = 0; z < 3; z++) + if (b[z + 2].t != 5 && b[z + 2].t != 9) + abort (); + for (j = 0; j < 2; j++) + #pragma omp task in_reduction (+: a, c[:2]) \ + in_reduction (*: b[2 * n:3 * n], d[n - 1:n + 1]) \ + in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][:2]) \ + in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][:2]) \ + in_reduction (*: s[n:2], t[2:2][:]) + { + m[1].s += 6; + r[2].s += 7; + q[1][0].s += 17; + q[2][0].s += 19; + a[0].s += 4; + a[1].s += 5; + b[3].s *= 2; + b[4].s *= 2; + f[3].s += 18; + g[1].s += 29; + g[2].s += 18; + h[0].s += 19; + s[2].s *= 2; + t[2][0].s *= 2; + t[3][0].s *= 2; + foo (n, c, d, m, r, o, p, q); + if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3) + || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3)) + abort (); + for (int z = 0; z < 2; z++) + if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3) + || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3) + || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3) + || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3) + || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3) + || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3) + || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9)) + abort (); + for (int z = 0; z < 3; z++) + if (b[z + 2].t != 5 && b[z + 2].t != 9) + abort (); + r[3].s += 18; + o[1].s += 29; + o[2].s += 18; + p[0].s += 19; + c[0].s += 4; + c[1].s += 5; + d[0].s *= 2; + e[1].s += 6; + f[2].s += 7; + k[1][0].s += 17; + k[2][0].s += 19; + } + r[3].s += 8; + o[1].s += 9; + o[2].s += 10; + p[0].s += 11; + q[1][1].s += 13; + q[2][1].s += 15; + b[3].s *= 2; + c[0].s += 4; + c[1].s += 9; + d[0].s *= 2; + e[1].s += 16; + f[2].s += 8; + } + } + if (d[0].s != 1LL << (8 + 4) + || d[1].s != 1LL << 16 + || m[0].s != 5 + || m[1].s != 19 * 16 + 6 * 8 + 16 * 4 + || m[2].s != 5 + || r[0].s != 6 + || r[1].s != 7 + || r[2].s != 21 * 16 + 7 * 8 + 8 * 4 + || r[3].s != 23 * 16 + 18 * 8 + 8 * 4 + || r[4].s != 9 + || o[0].s != 1 + || o[1].s != 25 * 16 + 29 * 8 + 9 * 4 + || o[2].s != 27 * 16 + 18 * 8 + 10 * 4 + || o[3].s != 2) + abort (); + if (e[1].t != 7 || h[0].t != 7 || m[1].t != 7 || p[0].t != 7) + abort (); + for (int z = 0; z < 2; z++) + if (a[z].t != 7 || c[z].t != 7 || d[z].t != 5 || f[z + 2].t != 7 + || g[z + 1].t != 7 || r[z + 2].t != 7 || s[z + 1].t != 5 || o[z + 1].t != 7 + || k[z + 1][0].t != 7 || k[z + 1][1].t != 7 || q[z + 1][0].t != 7 || q[z + 1][1].t != 7 + || t[z + 2][0].t != 5 || t[z + 2][1].t != 5) + abort (); + for (int z = 0; z < 3; z++) + if (b[z + 2].t != 5) + abort (); + } + if (a[0].s != 7 * 16 + 4 * 8 + 2 * 4 + || a[1].s != 17 * 16 + 5 * 8 + 3 * 4 + || b[0].s != 9 || b[1].s != 11 + || b[2].s != 1LL << (16 + 4) + || b[3].s != 1LL << (8 + 4) + || b[4].s != 1LL << (16 + 8) + || b[5].s != 13 || b[6].s != 15 + || c[0].s != 6 * 16 + 4 * 8 + 4 * 4 + || c[1].s != 5 * 8 + 9 * 4 + || e[0].s != 5 + || e[1].s != 19 * 16 + 6 * 8 + 16 * 4 + || e[2].s != 5 + || f[0].s != 6 + || f[1].s != 7 + || f[2].s != 21 * 16 + 7 * 8 + 8 * 4 + || f[3].s != 23 * 16 + 18 * 8 + 8 * 4 + || f[4].s != 9 + || g[0].s != 1 + || g[1].s != 25 * 16 + 29 * 8 + 9 * 4 + || g[2].s != 27 * 16 + 18 * 8 + 10 * 4 + || g[3].s != 2 + || h[0].s != 29 * 16 + 19 * 8 + 11 * 4 + || h[1].s != 1 || h[2].s != 4 + || k[0][0].s != 5 || k[0][1].s != 6 + || k[1][0].s != 31 * 16 + 17 * 8 + || k[1][1].s != 13 * 4 + || k[2][0].s != 19 * 8 + || k[2][1].s != 33 * 16 + 15 * 4 + || k[3][0].s != 7 || k[3][1].s != 8 + || p[0].s != 29 * 16 + 19 * 8 + 11 * 4 + || p[1].s != 1 || p[2].s != 4 + || q[0][0].s != 5 || q[0][1].s != 6 + || q[1][0].s != 31 * 16 + 17 * 8 + || q[1][1].s != 13 * 4 + || q[2][0].s != 19 * 8 + || q[2][1].s != 33 * 16 + 15 * 4 + || q[3][0].s != 7 || q[3][1].s != 8 + || ss[0].s != 5 + || ss[1].s != 1LL << (16 + 4) + || ss[2].s != 1LL << 8 + || ss[3].s != 6 + || tt[0][0].s != 9 || tt[0][1].s != 10 || tt[1][0].s != 11 || tt[1][1].s != 12 + || tt[2][0].s != 1LL << (16 + 8) + || tt[2][1].s != 1LL << 4 + || tt[3][0].s != 1LL << 8 + || tt[3][1].s != 1LL << (16 + 4) + || tt[4][0].s != 13 || tt[4][1].s != 14) + abort (); +} + +int +main () +{ + test (1); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/task-reduction-1.c b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-1.c new file mode 100644 index 000000000000..6c6191d96d59 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-1.c @@ -0,0 +1,58 @@ +#ifdef __cplusplus +extern "C" +#endif +void abort (void); + +int a; +long int b = 1; + +void +foo (void) +{ + int i; + for (i = 0; i < 2; i++) + #pragma omp task in_reduction (+: a) in_reduction (*: b) + { + a += 7; + b *= 2; + } +} + +int +main () +{ + int c = 0; + #pragma omp parallel + #pragma omp single + { + long int d = 1; + #pragma omp taskgroup task_reduction (+: a, c) task_reduction (*: b, d) + { + int i; + for (i = 0; i < 4; i++) + #pragma omp task in_reduction (+: a, c) in_reduction (*: b, d) + { + int j; + a += 7; + b *= 2; + for (j = 0; j < 2; j++) + #pragma omp task in_reduction (+: a, c) in_reduction (*: b, d) + { + a += 7; + b *= 2; + c += 9; + d *= 3; + foo (); + } + c += 9; + d *= 3; + } + } +#define THREEP4 (3L * 3L * 3L * 3L) + if (d != (THREEP4 * THREEP4 * THREEP4)) + abort (); + } + if (a != 28 * 7 || b != (1L << 28) || c != 12 * 9) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/task-reduction-2.c b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-2.c new file mode 100644 index 000000000000..aad725c29e34 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-2.c @@ -0,0 +1,90 @@ +#ifdef __cplusplus +extern "C" +#endif +void abort (void); + +struct S { long int s, t; }; + +void +bar (struct S *p, struct S *o) +{ + p->s = 1; + if (o->t != 5) + abort (); + p->t = 9; +} + +static inline void +baz (struct S *o, struct S *i) +{ + if (o->t != 5 || i->t != 9) + abort (); + o->s *= i->s; +} + +#pragma omp declare reduction (+: struct S : omp_out.s += omp_in.s) initializer (omp_priv = { 0, 3 }) +#pragma omp declare reduction (*: struct S : baz (&omp_out, &omp_in)) initializer (bar (&omp_priv, &omp_orig)) + +struct S a = { 0, 7 }; +struct S b = { 1, 5 }; + +void +foo (void) +{ + int i; + for (i = 0; i < 2; i++) + #pragma omp task in_reduction (*: b) in_reduction (+: a) + { + a.s += 7; + b.s *= 2; + if ((a.t != 7 && a.t != 3) || (b.t != 5 && b.t != 9)) + abort (); + } +} + +int +main () +{ + struct S c = { 0, 7 }; + #pragma omp parallel + #pragma omp single + { + struct S d = { 1, 5 }; + #pragma omp taskgroup task_reduction (+: a, c) task_reduction (*: b, d) + { + int i; + for (i = 0; i < 4; i++) + #pragma omp task in_reduction (*: b, d) in_reduction (+: a, c) + { + int j; + a.s += 7; + b.s *= 2; + for (j = 0; j < 2; j++) + #pragma omp task in_reduction (+: a) in_reduction (*: b) \ + in_reduction (+: c) in_reduction (*: d) + { + a.s += 7; + b.s *= 2; + c.s += 9; + d.s *= 3; + foo (); + if ((a.t != 7 && a.t != 3) || (b.t != 5 && b.t != 9) + || (c.t != 7 && c.t != 3) || (d.t != 5 && d.t != 9)) + abort (); + } + c.s += 9; + d.s *= 3; + if ((a.t != 7 && a.t != 3) || (b.t != 5 && b.t != 9) + || (c.t != 7 && c.t != 3) || (d.t != 5 && d.t != 9)) + abort (); + } + } +#define THREEP4 (3L * 3L * 3L * 3L) + if (d.s != (THREEP4 * THREEP4 * THREEP4) || d.t != 5) + abort (); + } + if (a.s != 28 * 7 || a.t != 7 || b.s != (1L << 28) || b.t != 5 + || c.s != 12 * 9 || c.t != 7) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/task-reduction-3.c b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-3.c new file mode 100644 index 000000000000..8a90e86e8470 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/task-reduction-3.c @@ -0,0 +1,218 @@ +#ifdef __cplusplus +extern "C" +#endif +void abort (void); + +int a[2]; +long long int b[7] = { 9, 11, 1, 1, 1, 13, 15 }; +int e[3] = { 5, 0, 5 }; +int f[5] = { 6, 7, 0, 0, 9 }; +int g[4] = { 1, 0, 0, 2 }; +int h[3] = { 0, 1, 4 }; +int k[4][2] = { { 5, 6 }, { 0, 0 }, { 0, 0 }, { 7, 8 } }; +long long *s; +long long (*t)[2]; + +void +foo (int n, int *c, long long int *d, int m[3], int *r, int o[4], int *p, int q[4][2]) +{ + int i; + for (i = 0; i < 2; i++) + #pragma omp task in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \ + in_reduction (+: o[n:n*2], m[1], k[1:2][:], p[0], f[2:2]) \ + in_reduction (+: q[1:2][:], g[n:n*2], e[1], h[0], r[2:2]) \ + in_reduction (*: s[1:2], t[2:2][:]) + { + a[0] += 7; + a[1] += 17; + b[2] *= 2; + b[4] *= 2; + c[0] += 6; + d[1] *= 2; + e[1] += 19; + f[2] += 21; + f[3] += 23; + g[1] += 25; + g[2] += 27; + h[0] += 29; + k[1][0] += 31; + k[2][1] += 33; + m[1] += 19; + r[2] += 21; + r[3] += 23; + o[1] += 25; + o[2] += 27; + p[0] += 29; + q[1][0] += 31; + q[2][1] += 33; + s[1] *= 2; + t[2][0] *= 2; + t[3][1] *= 2; + } +} + +void +test (int n) +{ + int c[2] = { 0, 0 }; + int p[3] = { 0, 1, 4 }; + int q[4][2] = { { 5, 6 }, { 0, 0 }, { 0, 0 }, { 7, 8 } }; + long long ss[4] = { 5, 1, 1, 6 }; + long long tt[5][2] = { { 9, 10 }, { 11, 12 }, { 1, 1 }, { 1, 1 }, { 13, 14 } }; + s = ss; + t = tt; + #pragma omp parallel + #pragma omp single + { + long long int d[] = { 1, 1 }; + int m[3] = { 5, 0, 5 }; + int r[5] = { 6, 7, 0, 0, 9 }; + int o[4] = { 1, 0, 0, 2 }; + #pragma omp taskgroup task_reduction (+: a, c) task_reduction (*: b[2 * n:3 * n], d) \ + task_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \ + task_reduction (+: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \ + task_reduction (*: t[2:2][:], s[1:n + 1]) + { + int i; + for (i = 0; i < 4; i++) + #pragma omp task in_reduction (+: a, c) in_reduction (*: b[2 * n:3 * n], d) \ + in_reduction (+: o[n:n*2], q[1:2][:], p[0], m[1], r[2:2]) \ + in_reduction (+: g[n:n * 2], e[1], k[1:2][:], h[0], f[2:2]) \ + in_reduction (*: s[1:2], t[2:2][:]) + { + int j; + a[0] += 2; + a[1] += 3; + b[2] *= 2; + f[3] += 8; + g[1] += 9; + g[2] += 10; + h[0] += 11; + k[1][1] += 13; + k[2][1] += 15; + m[1] += 16; + r[2] += 8; + s[1] *= 2; + t[2][1] *= 2; + t[3][1] *= 2; + for (j = 0; j < 2; j++) + #pragma omp task in_reduction (+: a, c[:2]) \ + in_reduction (*: b[2 * n:3 * n], d[n - 1:n + 1]) \ + in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][:2]) \ + in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][:2]) \ + in_reduction (*: s[n:2], t[2:2][:]) + { + m[1] += 6; + r[2] += 7; + q[1][0] += 17; + q[2][0] += 19; + a[0] += 4; + a[1] += 5; + b[3] *= 2; + b[4] *= 2; + f[3] += 18; + g[1] += 29; + g[2] += 18; + h[0] += 19; + s[2] *= 2; + t[2][0] *= 2; + t[3][0] *= 2; + foo (n, c, d, m, r, o, p, q); + r[3] += 18; + o[1] += 29; + o[2] += 18; + p[0] += 19; + c[0] += 4; + c[1] += 5; + d[0] *= 2; + e[1] += 6; + f[2] += 7; + k[1][0] += 17; + k[2][0] += 19; + } + r[3] += 8; + o[1] += 9; + o[2] += 10; + p[0] += 11; + q[1][1] += 13; + q[2][1] += 15; + b[3] *= 2; + c[0] += 4; + c[1] += 9; + d[0] *= 2; + e[1] += 16; + f[2] += 8; + } + } + if (d[0] != 1LL << (8 + 4) + || d[1] != 1LL << 16 + || m[0] != 5 + || m[1] != 19 * 16 + 6 * 8 + 16 * 4 + || m[2] != 5 + || r[0] != 6 + || r[1] != 7 + || r[2] != 21 * 16 + 7 * 8 + 8 * 4 + || r[3] != 23 * 16 + 18 * 8 + 8 * 4 + || r[4] != 9 + || o[0] != 1 + || o[1] != 25 * 16 + 29 * 8 + 9 * 4 + || o[2] != 27 * 16 + 18 * 8 + 10 * 4 + || o[3] != 2) + abort (); + } + if (a[0] != 7 * 16 + 4 * 8 + 2 * 4 + || a[1] != 17 * 16 + 5 * 8 + 3 * 4 + || b[0] != 9 || b[1] != 11 + || b[2] != 1LL << (16 + 4) + || b[3] != 1LL << (8 + 4) + || b[4] != 1LL << (16 + 8) + || b[5] != 13 || b[6] != 15 + || c[0] != 6 * 16 + 4 * 8 + 4 * 4 + || c[1] != 5 * 8 + 9 * 4 + || e[0] != 5 + || e[1] != 19 * 16 + 6 * 8 + 16 * 4 + || e[2] != 5 + || f[0] != 6 + || f[1] != 7 + || f[2] != 21 * 16 + 7 * 8 + 8 * 4 + || f[3] != 23 * 16 + 18 * 8 + 8 * 4 + || f[4] != 9 + || g[0] != 1 + || g[1] != 25 * 16 + 29 * 8 + 9 * 4 + || g[2] != 27 * 16 + 18 * 8 + 10 * 4 + || g[3] != 2 + || h[0] != 29 * 16 + 19 * 8 + 11 * 4 + || h[1] != 1 || h[2] != 4 + || k[0][0] != 5 || k[0][1] != 6 + || k[1][0] != 31 * 16 + 17 * 8 + || k[1][1] != 13 * 4 + || k[2][0] != 19 * 8 + || k[2][1] != 33 * 16 + 15 * 4 + || k[3][0] != 7 || k[3][1] != 8 + || p[0] != 29 * 16 + 19 * 8 + 11 * 4 + || p[1] != 1 || p[2] != 4 + || q[0][0] != 5 || q[0][1] != 6 + || q[1][0] != 31 * 16 + 17 * 8 + || q[1][1] != 13 * 4 + || q[2][0] != 19 * 8 + || q[2][1] != 33 * 16 + 15 * 4 + || q[3][0] != 7 || q[3][1] != 8 + || ss[0] != 5 + || ss[1] != 1LL << (16 + 4) + || ss[2] != 1LL << 8 + || ss[3] != 6 + || tt[0][0] != 9 || tt[0][1] != 10 || tt[1][0] != 11 || tt[1][1] != 12 + || tt[2][0] != 1LL << (16 + 8) + || tt[2][1] != 1LL << 4 + || tt[3][0] != 1LL << 8 + || tt[3][1] != 1LL << (16 + 4) + || tt[4][0] != 13 || tt[4][1] != 14) + abort (); +} + +int +main () +{ + test (1); + return 0; +} -- 2.47.2