From: Jakub Jelinek Date: Tue, 16 Oct 2018 16:05:23 +0000 (+0200) Subject: tree-core.h (enum omp_clause_code): Add OMP_CLAUSE__REDUCTEMP_. X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=65874a8db528871e5c0cbe2f5d98a5ab557214e4;p=thirdparty%2Fgcc.git tree-core.h (enum omp_clause_code): Add OMP_CLAUSE__REDUCTEMP_. * tree-core.h (enum omp_clause_code): Add OMP_CLAUSE__REDUCTEMP_. * tree.h (OMP_CLAUSE_DECL): Use OMP_CLAUSE__REDUCTEMP_ instead of OMP_CLAUSE__LOOPTEMP_. * tree.c (omp_clause_num_ops, omp_clause_code_name): Add _reductemp_ clause. (walk_tree_1): Handle OMP_CLAUSE__REDUCTEMP_. * tree-nested.c (convert_nonlocal_omp_clauses, convert_local_omp_clauses): Likewise. * tree-pretty-print.c (dump_omp_clause): Likewise. * omp-low.c (struct omp_context): Add task_reductions and task_reduction_map fields. (delete_omp_context): Release task_reductions and task_reduction_map. (scan_sharing_clauses): Don't add any fields for reduction clause on taskloop. Handle OMP_CLAUSE__REDUCTEMP_. (add_taskreg_looptemp_clauses): Add OMP_CLAUSE__REDUCTEMP_ clause if needed. (finish_taskreg_scan): Move also OMP_CLAUSE__REDUCTEMP_ clause in front if present. (task_reduction_read): New function. (lower_rec_input_clauses): Handle OMP_CLAUSE_REDUCTION on taskloop construct. Pass NULL_TREE instead of build_outer_var_ref to omp_clause_default_ctor langhook for task reductions for now. Handle OMP_CLAUSE__REDUCTEMP_. (lower_send_clauses): Handle OMP_CLAUSE__REDUCTEMP_. Don't send anything for OMP_CLAUSE_REDUCTION on taskloop. (lower_omp_task_reductions): Unshare byte positions before gimplifying them. Remember mapping of clauses to indexes in the registered array and if offsets or whole sizes are constant, those constants. Don't clear OMP_CLAUSE_REDUCTION_PLACEHOLDER or OMP_CLAUSE_REDUCTION_DECL_PLACEHOLDER for OMP_CLAUSE_REDUCTION. Handle code OMP_TASKLOOP. (create_task_copyfn): Copy over OMP_CLAUSE__REDUCTEMP_ pointer. (lower_omp_taskreg): Handle reduction clause on taskloop construct. * omp-expand.c (expand_task_call): Add GOMP_TASK_FLAG_REDUCTION flag to flags if there are any reduction clauses. gcc/c-family/ * c-omp.c (c_omp_split_clauses) : For taskloop simd, copy the clause to simd construct transformed into OMP_CLAUSE_REDUCTION clause. gcc/c/ * c-typeck.c (handle_omp_array_sections): Call save_expr on array reductions before calling build_index_type. (c_finish_omp_clauses): Diagnose nogroup clause used with reduction clause(s). gcc/cp/ * pt.c (tsubst_omp_clauses): Handle OMP_CLAUSE_IN_REDUCTION and OMP_CLAUSE_TASK_REDUCTION. * semantics.c (handle_omp_array_sections): Call save_expr on array reductions before calling build_index_type. (finish_omp_clauses): Diagnose nogroup clause used with reduction clause(s). gcc/testsuite/ * c-c++-common/gomp/clauses-1.c (r2): New variable. (bar): Put taskloop simd inside of taskgroup with task_reduction, use in_reduction clause instead of reduction. Add another taskloop simd without nogroup clause, but with reduction clause and a new in_reduction. * c-c++-common/gomp/taskloop-reduction-1.c: New test. include/ * gomp-constants.h (GOMP_TASK_FLAG_REDUCTION): Define. libgomp/ * task.c (GOMP_taskgroup_reduction_register): Add ialias. * taskloop.c (GOMP_taskloop): Handle GOMP_TASK_FLAG_REDUCTION flag by calling GOMP_taskgroup_reduction_register. * libgomp_g.h: Include gstdint.h. (GOMP_taskgroup_reduction_register, GOMP_taskgroup_reduction_unregister, GOMP_task_reduction_remap): New prototypes. * testsuite/libgomp.c-c++-common/taskloop-reduction-1.c: New test. * testsuite/libgomp.c-c++-common/taskloop-reduction-2.c: New test. * testsuite/libgomp.c++/taskloop-reduction-1.C: New test. * testsuite/libgomp.c++/taskloop-reduction-2.C: New test. * testsuite/libgomp.c++/taskloop-reduction-3.C: New test. * testsuite/libgomp.c++/taskloop-reduction-4.C: New test. * testsuite/libgomp.c++/task-reduction-4.C (foo): Turn into function template, replace all int occurences with the template parameter T. (test): Likewise. From-SVN: r265202 --- diff --git a/gcc/ChangeLog.gomp b/gcc/ChangeLog.gomp index 82e5bcc8107d..9177582ce290 100644 --- a/gcc/ChangeLog.gomp +++ b/gcc/ChangeLog.gomp @@ -1,3 +1,41 @@ +2018-10-16 Jakub Jelinek + + * tree-core.h (enum omp_clause_code): Add OMP_CLAUSE__REDUCTEMP_. + * tree.h (OMP_CLAUSE_DECL): Use OMP_CLAUSE__REDUCTEMP_ instead of + OMP_CLAUSE__LOOPTEMP_. + * tree.c (omp_clause_num_ops, omp_clause_code_name): Add _reductemp_ + clause. + (walk_tree_1): Handle OMP_CLAUSE__REDUCTEMP_. + * tree-nested.c (convert_nonlocal_omp_clauses, + convert_local_omp_clauses): Likewise. + * tree-pretty-print.c (dump_omp_clause): Likewise. + * omp-low.c (struct omp_context): Add task_reductions and + task_reduction_map fields. + (delete_omp_context): Release task_reductions and task_reduction_map. + (scan_sharing_clauses): Don't add any fields for reduction clause on + taskloop. Handle OMP_CLAUSE__REDUCTEMP_. + (add_taskreg_looptemp_clauses): Add OMP_CLAUSE__REDUCTEMP_ clause if + needed. + (finish_taskreg_scan): Move also OMP_CLAUSE__REDUCTEMP_ clause in + front if present. + (task_reduction_read): New function. + (lower_rec_input_clauses): Handle OMP_CLAUSE_REDUCTION on taskloop + construct. Pass NULL_TREE instead of build_outer_var_ref to + omp_clause_default_ctor langhook for task reductions for now. Handle + OMP_CLAUSE__REDUCTEMP_. + (lower_send_clauses): Handle OMP_CLAUSE__REDUCTEMP_. Don't send + anything for OMP_CLAUSE_REDUCTION on taskloop. + (lower_omp_task_reductions): Unshare byte positions before gimplifying + them. Remember mapping of clauses to indexes in the registered array + and if offsets or whole sizes are constant, those constants. Don't + clear OMP_CLAUSE_REDUCTION_PLACEHOLDER or + OMP_CLAUSE_REDUCTION_DECL_PLACEHOLDER for OMP_CLAUSE_REDUCTION. + Handle code OMP_TASKLOOP. + (create_task_copyfn): Copy over OMP_CLAUSE__REDUCTEMP_ pointer. + (lower_omp_taskreg): Handle reduction clause on taskloop construct. + * omp-expand.c (expand_task_call): Add GOMP_TASK_FLAG_REDUCTION flag + to flags if there are any reduction clauses. + 2018-10-10 Jakub Jelinek * omp-low.c (lower_rec_input_clauses): Handle VLAs properly. diff --git a/gcc/c-family/ChangeLog.gomp b/gcc/c-family/ChangeLog.gomp index edce73eaef9e..a4b9be14c103 100644 --- a/gcc/c-family/ChangeLog.gomp +++ b/gcc/c-family/ChangeLog.gomp @@ -1,3 +1,9 @@ +2018-10-16 Jakub Jelinek + + * c-omp.c (c_omp_split_clauses) : For + taskloop simd, copy the clause to simd construct transformed into + OMP_CLAUSE_REDUCTION clause. + 2018-08-01 Jakub Jelinek * c-omp.c (c_finish_omp_depobj): Test for OMP_CLAUSE_DEPEND_DEPOBJ diff --git a/gcc/c-family/c-omp.c b/gcc/c-family/c-omp.c index 82ef5924ef51..630d41b73156 100644 --- a/gcc/c-family/c-omp.c +++ b/gcc/c-family/c-omp.c @@ -1363,7 +1363,6 @@ c_omp_split_clauses (location_t loc, enum tree_code code, case OMP_CLAUSE_MERGEABLE: case OMP_CLAUSE_NOGROUP: case OMP_CLAUSE_PRIORITY: - case OMP_CLAUSE_IN_REDUCTION: s = C_OMP_CLAUSE_SPLIT_TASKLOOP; break; /* Duplicate this to all of taskloop, distribute, for and simd. */ @@ -1674,6 +1673,25 @@ c_omp_split_clauses (location_t loc, enum tree_code code, else s = C_OMP_CLAUSE_SPLIT_TEAMS; break; + case OMP_CLAUSE_IN_REDUCTION: + /* in_reduction on taskloop simd becomes reduction on the simd + and keeps being in_reduction on taskloop. */ + if (code == OMP_SIMD) + { + c = build_omp_clause (OMP_CLAUSE_LOCATION (clauses), + OMP_CLAUSE_REDUCTION); + OMP_CLAUSE_DECL (c) = OMP_CLAUSE_DECL (clauses); + OMP_CLAUSE_REDUCTION_CODE (c) + = OMP_CLAUSE_REDUCTION_CODE (clauses); + OMP_CLAUSE_REDUCTION_PLACEHOLDER (c) + = OMP_CLAUSE_REDUCTION_PLACEHOLDER (clauses); + OMP_CLAUSE_REDUCTION_DECL_PLACEHOLDER (c) + = OMP_CLAUSE_REDUCTION_DECL_PLACEHOLDER (clauses); + OMP_CLAUSE_CHAIN (c) = cclauses[C_OMP_CLAUSE_SPLIT_SIMD]; + cclauses[C_OMP_CLAUSE_SPLIT_SIMD] = c; + } + s = C_OMP_CLAUSE_SPLIT_TASKLOOP; + break; case OMP_CLAUSE_IF: if (OMP_CLAUSE_IF_MODIFIER (clauses) != ERROR_MARK) { diff --git a/gcc/c/ChangeLog.gomp b/gcc/c/ChangeLog.gomp index 05c90aa092a1..d759f0d8a78e 100644 --- a/gcc/c/ChangeLog.gomp +++ b/gcc/c/ChangeLog.gomp @@ -1,3 +1,10 @@ +2018-10-16 Jakub Jelinek + + * c-typeck.c (handle_omp_array_sections): Call save_expr on array + reductions before calling build_index_type. + (c_finish_omp_clauses): Diagnose nogroup clause used with reduction + clause(s). + 2018-08-02 Jakub Jelinek * c-parser.c (c_parser_omp_iterators): Build vector with 6 elts diff --git a/gcc/c/c-typeck.c b/gcc/c/c-typeck.c index 4746d9e8b282..9f03ff7defdf 100644 --- a/gcc/c/c-typeck.c +++ b/gcc/c/c-typeck.c @@ -12930,6 +12930,7 @@ handle_omp_array_sections (tree c, enum c_omp_region_type ort) { size = size_binop (MINUS_EXPR, size, size_one_node); size = c_fully_fold (size, false, NULL); + size = save_expr (size); tree index_type = build_index_type (size); tree eltype = TREE_TYPE (first); while (TREE_CODE (eltype) == ARRAY_TYPE) @@ -13250,6 +13251,8 @@ c_finish_omp_clauses (tree clauses, enum c_omp_region_type ort) bool oacc_async = false; tree last_iterators = NULL_TREE; bool last_iterators_remove = false; + tree *nogroup_seen = NULL; + bool reduction_seen = false; bitmap_obstack_initialize (NULL); bitmap_initialize (&generic_head, &bitmap_default_obstack); @@ -13288,6 +13291,8 @@ c_finish_omp_clauses (tree clauses, enum c_omp_region_type ort) goto check_dup_generic; case OMP_CLAUSE_REDUCTION: + reduction_seen = true; + /* FALLTHRU */ case OMP_CLAUSE_IN_REDUCTION: case OMP_CLAUSE_TASK_REDUCTION: need_implicitly_determined = true; @@ -14186,7 +14191,6 @@ c_finish_omp_clauses (tree clauses, enum c_omp_region_type ort) case OMP_CLAUSE_PRIORITY: case OMP_CLAUSE_GRAINSIZE: case OMP_CLAUSE_NUM_TASKS: - case OMP_CLAUSE_NOGROUP: case OMP_CLAUSE_THREADS: case OMP_CLAUSE_SIMD: case OMP_CLAUSE_HINT: @@ -14208,6 +14212,11 @@ c_finish_omp_clauses (tree clauses, enum c_omp_region_type ort) pc = &OMP_CLAUSE_CHAIN (c); continue; + case OMP_CLAUSE_NOGROUP: + nogroup_seen = pc; + pc = &OMP_CLAUSE_CHAIN (c); + continue; + case OMP_CLAUSE_SCHEDULE: if (OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) { @@ -14372,6 +14381,14 @@ c_finish_omp_clauses (tree clauses, enum c_omp_region_type ort) pc = &OMP_CLAUSE_CHAIN (c); } + if (nogroup_seen && reduction_seen) + { + error_at (OMP_CLAUSE_LOCATION (*nogroup_seen), + "% clause must not be used together with " + "% clause"); + *nogroup_seen = OMP_CLAUSE_CHAIN (*nogroup_seen); + } + bitmap_obstack_release (NULL); return clauses; } diff --git a/gcc/cp/ChangeLog.gomp b/gcc/cp/ChangeLog.gomp index b7db7b3690d4..88c01445757a 100644 --- a/gcc/cp/ChangeLog.gomp +++ b/gcc/cp/ChangeLog.gomp @@ -1,3 +1,12 @@ +2018-10-16 Jakub Jelinek + + * pt.c (tsubst_omp_clauses): Handle OMP_CLAUSE_IN_REDUCTION and + OMP_CLAUSE_TASK_REDUCTION. + * semantics.c (handle_omp_array_sections): Call save_expr on array + reductions before calling build_index_type. + (finish_omp_clauses): Diagnose nogroup clause used with reduction + clause(s). + 2018-09-27 Jakub Jelinek * semantics.c (finish_omp_reduction_clause): Don't mark diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c index 2a74789e2666..a0e196d921fb 100644 --- a/gcc/cp/pt.c +++ b/gcc/cp/pt.c @@ -16100,6 +16100,8 @@ tsubst_omp_clauses (tree clauses, enum c_omp_region_type ort, in_decl, /*integral_constant_expression_p=*/false); break; case OMP_CLAUSE_REDUCTION: + case OMP_CLAUSE_IN_REDUCTION: + case OMP_CLAUSE_TASK_REDUCTION: if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (oc)) { tree placeholder = OMP_CLAUSE_REDUCTION_PLACEHOLDER (oc); @@ -16181,6 +16183,8 @@ tsubst_omp_clauses (tree clauses, enum c_omp_region_type ort, case OMP_CLAUSE_COPYPRIVATE: case OMP_CLAUSE_LINEAR: case OMP_CLAUSE_REDUCTION: + case OMP_CLAUSE_IN_REDUCTION: + case OMP_CLAUSE_TASK_REDUCTION: case OMP_CLAUSE_USE_DEVICE_PTR: case OMP_CLAUSE_IS_DEVICE_PTR: /* tsubst_expr on SCOPE_REF results in returning diff --git a/gcc/cp/semantics.c b/gcc/cp/semantics.c index 2b19c775d52a..23d4e51ed042 100644 --- a/gcc/cp/semantics.c +++ b/gcc/cp/semantics.c @@ -5023,6 +5023,7 @@ handle_omp_array_sections (tree c, enum c_omp_region_type ort) || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_TASK_REDUCTION) { size = size_binop (MINUS_EXPR, size, size_one_node); + size = save_expr (size); tree index_type = build_index_type (size); tree eltype = TREE_TYPE (first); while (TREE_CODE (eltype) == ARRAY_TYPE) @@ -6023,6 +6024,7 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type ort) bool oacc_async = false; tree last_iterators = NULL_TREE; bool last_iterators_remove = false; + bool reduction_seen = false; bitmap_obstack_initialize (NULL); bitmap_initialize (&generic_head, &bitmap_default_obstack); @@ -6057,6 +6059,8 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type ort) field_ok = ((ort & C_ORT_OMP_DECLARE_SIMD) == C_ORT_OMP); goto check_dup_generic; case OMP_CLAUSE_REDUCTION: + reduction_seen = true; + /* FALLTHRU */ case OMP_CLAUSE_IN_REDUCTION: case OMP_CLAUSE_TASK_REDUCTION: field_ok = ((ort & C_ORT_OMP_DECLARE_SIMD) == C_ORT_OMP); @@ -7597,6 +7601,17 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type ort) } pc = &OMP_CLAUSE_CHAIN (c); continue; + case OMP_CLAUSE_NOGROUP: + if (reduction_seen) + { + error_at (OMP_CLAUSE_LOCATION (c), + "% clause must not be used together with " + "% clause"); + *pc = OMP_CLAUSE_CHAIN (c); + continue; + } + pc = &OMP_CLAUSE_CHAIN (c); + continue; case OMP_CLAUSE_NOWAIT: if (copyprivate_seen) { diff --git a/gcc/omp-expand.c b/gcc/omp-expand.c index dbeb79140a0c..58e7e85fa884 100644 --- a/gcc/omp-expand.c +++ b/gcc/omp-expand.c @@ -792,6 +792,8 @@ expand_task_call (struct omp_region *region, basic_block bb, if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP)) iflags |= GOMP_TASK_FLAG_NOGROUP; ull = fd.iter_type == long_long_unsigned_type_node; + if (omp_find_clause (clauses, OMP_CLAUSE_REDUCTION)) + iflags |= GOMP_TASK_FLAG_REDUCTION; } else if (priority) iflags |= GOMP_TASK_FLAG_PRIORITY; diff --git a/gcc/omp-low.c b/gcc/omp-low.c index a42bd1b11b1f..c3fd796b6d9c 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -114,6 +114,15 @@ struct omp_context otherwise. */ gimple *simt_stmt; + /* For task reductions registered in this context, a vector containing + the length of the private copies block (if constant, otherwise NULL) + and then offsets (if constant, otherwise NULL) for each entry. */ + vec task_reductions; + + /* And a hash map from the reduction clauses to the registered array + elts. */ + hash_map *task_reduction_map; + /* Nesting depth of this context. Used to beautify error messages re invalid gotos. The outermost ctx is depth 1, with depth 0 being reserved for the main body of the function. */ @@ -936,6 +945,12 @@ delete_omp_context (splay_tree_value value) if (is_task_ctx (ctx)) finalize_task_copyfn (as_a (ctx->stmt)); + if (ctx->task_reduction_map) + { + ctx->task_reductions.release (); + delete ctx->task_reduction_map; + } + XDELETE (ctx); } @@ -1081,7 +1096,9 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) || (TREE_CODE (TREE_TYPE (t)) == REFERENCE_TYPE && (TREE_CODE (TREE_TYPE (TREE_TYPE (t))) == POINTER_TYPE))))) - && !is_variable_sized (t)) + && !is_variable_sized (t) + && (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_IN_REDUCTION + || !is_task_ctx (ctx))) { by_ref = use_pointer_for_field (t, NULL); if (is_task_ctx (ctx) @@ -1103,7 +1120,8 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) if (!is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx))) { by_ref = use_pointer_for_field (decl, ctx); - install_var_field (decl, by_ref, 3, ctx); + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_IN_REDUCTION) + install_var_field (decl, by_ref, 3, ctx); } install_var_local (decl, ctx); break; @@ -1180,6 +1198,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) goto do_private; case OMP_CLAUSE__LOOPTEMP_: + case OMP_CLAUSE__REDUCTEMP_: gcc_assert (is_taskreg_ctx (ctx)); decl = OMP_CLAUSE_DECL (c); install_var_field (decl, false, 3, ctx); @@ -1517,6 +1536,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) case OMP_CLAUSE_ALIGNED: case OMP_CLAUSE_DEPEND: case OMP_CLAUSE__LOOPTEMP_: + case OMP_CLAUSE__REDUCTEMP_: case OMP_CLAUSE_TO: case OMP_CLAUSE_FROM: case OMP_CLAUSE_PRIORITY: @@ -1748,7 +1768,7 @@ omp_find_combined_for (gimple_stmt_iterator *gsi_p, return NULL; } -/* Add _LOOPTEMP_ clauses on OpenMP parallel or task. */ +/* Add _LOOPTEMP_/_REDUCTEMP_ clauses on OpenMP parallel or task. */ static void add_taskreg_looptemp_clauses (enum gf_mask msk, gimple *stmt, @@ -1795,6 +1815,18 @@ add_taskreg_looptemp_clauses (enum gf_mask msk, gimple *stmt, gimple_omp_taskreg_set_clauses (stmt, c); } } + if (msk == GF_OMP_FOR_KIND_TASKLOOP + && omp_find_clause (gimple_omp_task_clauses (stmt), + OMP_CLAUSE_REDUCTION)) + { + tree type = build_pointer_type (pointer_sized_int_node); + tree temp = create_tmp_var (type); + tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__REDUCTEMP_); + insert_decl_map (&outer_ctx->cb, temp, temp); + OMP_CLAUSE_DECL (c) = temp; + OMP_CLAUSE_CHAIN (c) = gimple_omp_task_clauses (stmt); + gimple_omp_task_set_clauses (stmt, c); + } } /* Scan an OpenMP parallel directive. */ @@ -2026,33 +2058,50 @@ finish_taskreg_scan (omp_context *ctx) /* Move fields corresponding to first and second _looptemp_ clause first. There are filled by GOMP_taskloop and thus need to be in specific positions. */ - tree c1 = gimple_omp_task_clauses (ctx->stmt); - c1 = omp_find_clause (c1, OMP_CLAUSE__LOOPTEMP_); + tree clauses = gimple_omp_task_clauses (ctx->stmt); + tree c1 = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_); tree c2 = omp_find_clause (OMP_CLAUSE_CHAIN (c1), OMP_CLAUSE__LOOPTEMP_); + tree c3 = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_); tree f1 = lookup_field (OMP_CLAUSE_DECL (c1), ctx); tree f2 = lookup_field (OMP_CLAUSE_DECL (c2), ctx); + tree f3 = c3 ? lookup_field (OMP_CLAUSE_DECL (c3), ctx) : NULL_TREE; p = &TYPE_FIELDS (ctx->record_type); while (*p) - if (*p == f1 || *p == f2) + if (*p == f1 || *p == f2 || *p == f3) *p = DECL_CHAIN (*p); else p = &DECL_CHAIN (*p); DECL_CHAIN (f1) = f2; - DECL_CHAIN (f2) = TYPE_FIELDS (ctx->record_type); + if (c3) + { + DECL_CHAIN (f2) = f3; + DECL_CHAIN (f3) = TYPE_FIELDS (ctx->record_type); + } + else + DECL_CHAIN (f2) = TYPE_FIELDS (ctx->record_type); TYPE_FIELDS (ctx->record_type) = f1; if (ctx->srecord_type) { f1 = lookup_sfield (OMP_CLAUSE_DECL (c1), ctx); f2 = lookup_sfield (OMP_CLAUSE_DECL (c2), ctx); + if (c3) + f3 = lookup_sfield (OMP_CLAUSE_DECL (c3), ctx); p = &TYPE_FIELDS (ctx->srecord_type); while (*p) - if (*p == f1 || *p == f2) + if (*p == f1 || *p == f2 || *p == f3) *p = DECL_CHAIN (*p); else p = &DECL_CHAIN (*p); DECL_CHAIN (f1) = f2; DECL_CHAIN (f2) = TYPE_FIELDS (ctx->srecord_type); + if (c3) + { + DECL_CHAIN (f2) = f3; + DECL_CHAIN (f3) = TYPE_FIELDS (ctx->srecord_type); + } + else + DECL_CHAIN (f2) = TYPE_FIELDS (ctx->srecord_type); TYPE_FIELDS (ctx->srecord_type) = f1; } } @@ -3609,6 +3658,30 @@ handle_simd_reference (location_t loc, tree new_vard, gimple_seq *ilist) } } +/* Helper function for lower_rec_input_clauses. Emit into ilist sequence + code to emit (type) (tskred_temp[idx]). */ + +static tree +task_reduction_read (gimple_seq *ilist, tree tskred_temp, tree type, + unsigned idx) +{ + unsigned HOST_WIDE_INT sz + = tree_to_uhwi (TYPE_SIZE_UNIT (pointer_sized_int_node)); + tree r = build2 (MEM_REF, pointer_sized_int_node, + tskred_temp, build_int_cst (TREE_TYPE (tskred_temp), + idx * sz)); + tree v = create_tmp_var (pointer_sized_int_node); + gimple *g = gimple_build_assign (v, r); + gimple_seq_add_stmt (ilist, g); + if (!useless_type_conversion_p (type, pointer_sized_int_node)) + { + v = create_tmp_var (type); + g = gimple_build_assign (v, NOP_EXPR, gimple_assign_lhs (g)); + gimple_seq_add_stmt (ilist, g); + } + return v; +} + /* Generate code to implement the input clauses, FIRSTPRIVATE and COPYIN, from the receiver (aka child) side and initializers for REFERENCE_TYPE private variables. Initialization statements go in ILIST, while calls @@ -3669,7 +3742,9 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, unsigned task_reduction_cntorig = 0; unsigned task_reduction_cnt_full = 0; unsigned task_reduction_cntorig_full = 0; + unsigned task_reduction_other_cnt = 0; tree tskred_atype = NULL_TREE, tskred_avar = NULL_TREE; + tree tskred_base = NULL_TREE, tskred_temp = NULL_TREE; /* Do all the fixed sized types in the first pass, and the variable sized types in the second pass. This makes sure that the scalar arguments to the variable sized types are processed before we use them in the @@ -3677,9 +3752,10 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, first two we ignore them, in the third one gather arguments for GOMP_task_reduction_remap call and in the last pass actually handle the task reductions. */ - for (pass = 0; pass < (task_reduction_cnt ? 4 : 2); ++pass) + for (pass = 0; pass < ((task_reduction_cnt || task_reduction_other_cnt) + ? 4 : 2); ++pass) { - if (pass == 2) + if (pass == 2 && task_reduction_cnt) { tskred_atype = build_array_type_nelts (ptr_type_node, task_reduction_cnt @@ -3690,7 +3766,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, task_reduction_cnt_full = task_reduction_cnt; task_reduction_cntorig_full = task_reduction_cntorig; } - else if (pass == 3) + else if (pass == 3 && task_reduction_cnt) { x = builtin_decl_explicit (BUILT_IN_GOMP_TASK_REDUCTION_REMAP); gimple *g @@ -3699,8 +3775,39 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, build_fold_addr_expr (tskred_avar)); gimple_seq_add_stmt (ilist, g); } + if (pass == 3 && task_reduction_other_cnt) + { + /* For reduction clauses, build + tskred_base = (void *) tskred_temp[2] + + omp_get_thread_num () * tskred_temp[1] + or if tskred_temp[1] is known to be constant, that constant + directly. This is the start of the private reduction copy block + for the current thread. */ + tree v = create_tmp_var (integer_type_node); + x = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM); + gimple *g = gimple_build_call (x, 0); + gimple_call_set_lhs (g, v); + gimple_seq_add_stmt (ilist, g); + c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_); + tskred_temp = lookup_decl (OMP_CLAUSE_DECL (c), ctx); + tree v2 = create_tmp_var (sizetype); + g = gimple_build_assign (v2, NOP_EXPR, v); + gimple_seq_add_stmt (ilist, g); + if (ctx->task_reductions[0]) + v = fold_convert (sizetype, ctx->task_reductions[0]); + else + v = task_reduction_read (ilist, tskred_temp, sizetype, 1); + tree v3 = create_tmp_var (sizetype); + g = gimple_build_assign (v3, MULT_EXPR, v2, v); + gimple_seq_add_stmt (ilist, g); + v = task_reduction_read (ilist, tskred_temp, ptr_type_node, 2); + tskred_base = create_tmp_var (ptr_type_node); + g = gimple_build_assign (tskred_base, POINTER_PLUS_EXPR, v, v3); + gimple_seq_add_stmt (ilist, g); + } task_reduction_cnt = 0; task_reduction_cntorig = 0; + task_reduction_other_cnt = 0; for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c)) { enum omp_clause_code c_kind = OMP_CLAUSE_CODE (c); @@ -3744,7 +3851,14 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, if (is_task_ctx (ctx) /* || OMP_CLAUSE_REDUCTION_TASK (c) */) { task_reduction_p = true; - task_reduction_cnt++; + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION) + { + task_reduction_other_cnt++; + if (pass == 2) + continue; + } + else + task_reduction_cnt++; if (OMP_CLAUSE_REDUCTION_OMP_ORIG_REF (c)) { var = OMP_CLAUSE_DECL (c); @@ -3758,13 +3872,16 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, (maybe_lookup_decl_in_outer_ctx (var, ctx))) { task_reduction_needs_orig_p = true; - task_reduction_cntorig++; + if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_REDUCTION) + task_reduction_cntorig++; } } } break; case OMP_CLAUSE__LOOPTEMP_: - /* Handle _looptemp_ clauses only on parallel/task. */ + case OMP_CLAUSE__REDUCTEMP_: + /* Handle _looptemp_/_reductemp_ clauses only on + parallel/task. */ if (fd) continue; break; @@ -3939,17 +4056,35 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, const char *name = get_name (orig_var); if (pass == 3) { - unsigned cnt = task_reduction_cnt - 1; - if (!task_reduction_needs_orig_p) - cnt += (task_reduction_cntorig_full - - task_reduction_cntorig); - else - cnt = task_reduction_cntorig - 1; - x = build4 (ARRAY_REF, ptr_type_node, tskred_avar, - size_int (cnt), NULL_TREE, NULL_TREE); tree xv = create_tmp_var (ptr_type_node); - gimple *g = gimple_build_assign (xv, x); - gimple_seq_add_stmt (ilist, g); + if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_REDUCTION) + { + unsigned cnt = task_reduction_cnt - 1; + if (!task_reduction_needs_orig_p) + cnt += (task_reduction_cntorig_full + - task_reduction_cntorig); + else + cnt = task_reduction_cntorig - 1; + x = build4 (ARRAY_REF, ptr_type_node, tskred_avar, + size_int (cnt), NULL_TREE, NULL_TREE); + + gimple *g = gimple_build_assign (xv, x); + gimple_seq_add_stmt (ilist, g); + } + else + { + unsigned int idx = *ctx->task_reduction_map->get (c); + tree off; + if (ctx->task_reductions[1 + idx]) + off = fold_convert (sizetype, + ctx->task_reductions[1 + idx]); + else + off = task_reduction_read (ilist, tskred_temp, sizetype, + 7 + 3 * idx + 1); + gimple *g = gimple_build_assign (xv, POINTER_PLUS_EXPR, + tskred_base, off); + gimple_seq_add_stmt (ilist, g); + } x = fold_convert (build_pointer_type (boolean_type_node), xv); if (TREE_CONSTANT (v)) @@ -4101,11 +4236,19 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, tree y3 = NULL_TREE, y4 = NULL_TREE; if (task_reduction_needs_orig_p) { - tree ref = build4 (ARRAY_REF, ptr_type_node, tskred_avar, - size_int (task_reduction_cnt_full - + task_reduction_cntorig - 1), - NULL_TREE, NULL_TREE); y3 = create_tmp_var (ptype); + tree ref; + if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_REDUCTION) + ref = build4 (ARRAY_REF, ptr_type_node, tskred_avar, + size_int (task_reduction_cnt_full + + task_reduction_cntorig - 1), + NULL_TREE, NULL_TREE); + else + { + unsigned int idx = *ctx->task_reduction_map->get (c); + ref = task_reduction_read (ilist, tskred_temp, ptype, + 7 + 3 * idx); + } gimplify_assign (y3, ref, ilist); } else if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c) || is_simd) @@ -4284,13 +4427,30 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, tree type = TREE_TYPE (new_var); if (!omp_is_reference (var)) type = build_pointer_type (type); - unsigned cnt = task_reduction_cnt - 1; - if (!task_reduction_needs_orig_p) - cnt += task_reduction_cntorig_full - task_reduction_cntorig; + if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_REDUCTION) + { + unsigned cnt = task_reduction_cnt - 1; + if (!task_reduction_needs_orig_p) + cnt += (task_reduction_cntorig_full + - task_reduction_cntorig); + else + cnt = task_reduction_cntorig - 1; + x = build4 (ARRAY_REF, ptr_type_node, tskred_avar, + size_int (cnt), NULL_TREE, NULL_TREE); + } else - cnt = task_reduction_cntorig - 1; - x = build4 (ARRAY_REF, ptr_type_node, tskred_avar, - size_int (cnt), NULL_TREE, NULL_TREE); + { + unsigned int idx = *ctx->task_reduction_map->get (c); + tree off; + if (ctx->task_reductions[1 + idx]) + off = fold_convert (sizetype, + ctx->task_reductions[1 + idx]); + else + off = task_reduction_read (ilist, tskred_temp, sizetype, + 7 + 3 * idx + 1); + x = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, + tskred_base, off); + } x = fold_convert (type, x); tree t; if (omp_is_reference (var)) @@ -4634,6 +4794,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, goto do_dtor; case OMP_CLAUSE__LOOPTEMP_: + case OMP_CLAUSE__REDUCTEMP_: gcc_assert (is_taskreg_ctx (ctx)); x = build_outer_var_ref (var, ctx); x = build2 (MODIFY_EXPR, TREE_TYPE (new_var), new_var, x); @@ -4667,11 +4828,20 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, x = var; else if (OMP_CLAUSE_REDUCTION_OMP_ORIG_REF (c)) { - x = build4 (ARRAY_REF, ptr_type_node, tskred_avar, - size_int (task_reduction_cnt_full - + task_reduction_cntorig - 1), - NULL_TREE, NULL_TREE); - x = fold_convert (build_pointer_type (ptype), x); + tree pptype = build_pointer_type (ptype); + if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_REDUCTION) + x = build4 (ARRAY_REF, ptr_type_node, tskred_avar, + size_int (task_reduction_cnt_full + + task_reduction_cntorig - 1), + NULL_TREE, NULL_TREE); + else + { + unsigned int idx + = *ctx->task_reduction_map->get (c); + x = task_reduction_read (ilist, tskred_temp, + pptype, 7 + 3 * idx); + } + x = fold_convert (pptype, x); x = build_simple_mem_ref (x); } } @@ -4767,7 +4937,8 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, } x = lang_hooks.decls.omp_clause_default_ctor (c, unshare_expr (new_var), - build_outer_var_ref (var, ctx)); + cond ? NULL_TREE + : build_outer_var_ref (var, ctx)); if (x) gimplify_and_add (x, ilist); if (OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c)) @@ -5835,8 +6006,12 @@ lower_send_clauses (tree clauses, gimple_seq *ilist, gimple_seq *olist, case OMP_CLAUSE_FIRSTPRIVATE: case OMP_CLAUSE_COPYIN: case OMP_CLAUSE_LASTPRIVATE: - case OMP_CLAUSE_REDUCTION: case OMP_CLAUSE_IN_REDUCTION: + case OMP_CLAUSE__REDUCTEMP_: + break; + case OMP_CLAUSE_REDUCTION: + if (is_task_ctx (ctx)) + continue; break; case OMP_CLAUSE_SHARED: if (OMP_CLAUSE_SHARED_FIRSTPRIVATE (c)) @@ -5933,6 +6108,7 @@ lower_send_clauses (tree clauses, gimple_seq *ilist, gimple_seq *olist, case OMP_CLAUSE_PRIVATE: case OMP_CLAUSE_COPYIN: case OMP_CLAUSE__LOOPTEMP_: + case OMP_CLAUSE__REDUCTEMP_: do_in = true; break; @@ -6733,6 +6909,10 @@ lower_omp_task_reductions (omp_context *ctx, enum tree_code code, tree clauses, build_int_cst (pointer_sized_int_node, cachesz - 1)); sz = fold_build2 (BIT_AND_EXPR, pointer_sized_int_node, sz, build_int_cst (pointer_sized_int_node, ~(cachesz - 1))); + ctx->task_reductions.create (1 + cnt); + ctx->task_reduction_map = new hash_map; + ctx->task_reductions.quick_push (TREE_CODE (sz) == INTEGER_CST + ? sz : NULL_TREE); sz = force_gimple_operand (sz, &seq, true, NULL_TREE); gimple_seq_add_seq (start, seq); r = build4 (ARRAY_REF, pointer_sized_int_node, avar, size_one_node, @@ -6841,8 +7021,11 @@ lower_omp_task_reductions (omp_context *ctx, enum tree_code code, tree clauses, r = build4 (ARRAY_REF, pointer_sized_int_node, avar, size_int (7 + cnt * 3), NULL_TREE, NULL_TREE); gimple_seq_add_stmt (start, gimple_build_assign (r, t)); - t = byte_position (field); + t = unshare_expr (byte_position (field)); t = fold_convert (pointer_sized_int_node, t); + ctx->task_reduction_map->put (c, cnt); + ctx->task_reductions.quick_push (TREE_CODE (t) == INTEGER_CST + ? t : NULL_TREE); seq = NULL; t = force_gimple_operand (t, &seq, true, NULL_TREE); gimple_seq_add_seq (start, seq); @@ -6855,7 +7038,7 @@ lower_omp_task_reductions (omp_context *ctx, enum tree_code code, tree clauses, if (TREE_TYPE (ptr) == ptr_type_node) { cond = build2 (POINTER_PLUS_EXPR, ptr_type_node, ptr, - byte_position (bfield)); + unshare_expr (byte_position (bfield))); seq = NULL; cond = force_gimple_operand (cond, &seq, true, NULL_TREE); gimple_seq_add_seq (end, seq); @@ -6879,7 +7062,7 @@ lower_omp_task_reductions (omp_context *ctx, enum tree_code code, tree clauses, if (TREE_TYPE (ptr) == ptr_type_node) { new_var = build2 (POINTER_PLUS_EXPR, ptr_type_node, ptr, - byte_position (field)); + unshare_expr (byte_position (field))); seq = NULL; new_var = force_gimple_operand (new_var, &seq, true, NULL_TREE); gimple_seq_add_seq (end, seq); @@ -6941,8 +7124,11 @@ lower_omp_task_reductions (omp_context *ctx, enum tree_code code, tree clauses, gimple_seq_add_seq (end, OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c)); OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c) = NULL; - OMP_CLAUSE_REDUCTION_PLACEHOLDER (c) = NULL; - OMP_CLAUSE_REDUCTION_DECL_PLACEHOLDER (c) = NULL; + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_TASK_REDUCTION) + { + OMP_CLAUSE_REDUCTION_PLACEHOLDER (c) = NULL; + OMP_CLAUSE_REDUCTION_DECL_PLACEHOLDER (c) = NULL; + } tree x = lang_hooks.decls.omp_clause_dtor (c, priv); if (x) { @@ -7009,7 +7195,8 @@ lower_omp_task_reductions (omp_context *ctx, enum tree_code code, tree clauses, } gimple_seq_add_seq (end, OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c)); OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c) = NULL; - OMP_CLAUSE_REDUCTION_PLACEHOLDER (c) = NULL; + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_TASK_REDUCTION) + OMP_CLAUSE_REDUCTION_PLACEHOLDER (c) = NULL; tree x = lang_hooks.decls.omp_clause_dtor (c, new_var); if (x) { @@ -7030,9 +7217,20 @@ lower_omp_task_reductions (omp_context *ctx, enum tree_code code, tree clauses, } } - t = builtin_decl_explicit (BUILT_IN_GOMP_TASKGROUP_REDUCTION_REGISTER); - g = gimple_build_call (t, 1, build_fold_addr_expr (avar)); - gimple_seq_add_stmt (start, g); + if (code == OMP_TASKGROUP) + { + t = builtin_decl_explicit (BUILT_IN_GOMP_TASKGROUP_REDUCTION_REGISTER); + g = gimple_build_call (t, 1, build_fold_addr_expr (avar)); + gimple_seq_add_stmt (start, g); + } + else if (code == OMP_TASKLOOP) + { + tree c = omp_find_clause (gimple_omp_task_clauses (ctx->stmt), + OMP_CLAUSE__REDUCTEMP_); + t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (c)), + build_fold_addr_expr (avar)); + gimplify_assign (OMP_CLAUSE_DECL (c), t, start); + } gimple_seq_add_stmt (end, gimple_build_assign (data, PLUS_EXPR, data, sz)); gimple_seq_add_stmt (end, gimple_build_assign (idx, PLUS_EXPR, idx, @@ -8113,6 +8311,7 @@ create_task_copyfn (gomp_task *task_stmt, omp_context *ctx) break; } /* FALLTHRU */ + case OMP_CLAUSE__REDUCTEMP_: case OMP_CLAUSE_FIRSTPRIVATE: decl = OMP_CLAUSE_DECL (c); if (is_variable_sized (decl)) @@ -8138,7 +8337,7 @@ create_task_copyfn (gomp_task *task_stmt, omp_context *ctx) src = decl; dst = build_simple_mem_ref_loc (loc, arg); dst = omp_build_component_ref (dst, f); - if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__LOOPTEMP_) + if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_FIRSTPRIVATE) t = build2 (MODIFY_EXPR, TREE_TYPE (dst), dst, src); else t = lang_hooks.decls.omp_clause_copy_ctor (c, dst, src); @@ -8345,7 +8544,7 @@ lower_omp_taskreg (gimple_stmt_iterator *gsi_p, omp_context *ctx) tree child_fn, t; gimple *stmt = gsi_stmt (*gsi_p); gbind *par_bind, *bind, *dep_bind = NULL; - gimple_seq par_body, olist, ilist, par_olist, par_rlist, par_ilist, new_body; + gimple_seq par_body; location_t loc = gimple_location (stmt); clauses = gimple_omp_taskreg_clauses (stmt); @@ -8403,11 +8602,25 @@ lower_omp_taskreg (gimple_stmt_iterator *gsi_p, omp_context *ctx) if (ctx->srecord_type) create_task_copyfn (as_a (stmt), ctx); + gimple_seq taskloop_ilist = NULL; + gimple_seq taskloop_olist = NULL; + if (is_task_ctx (ctx) && gimple_omp_task_taskloop_p (ctx->stmt)) + { + if (dep_bind == NULL) + { + push_gimplify_context (); + dep_bind = gimple_build_bind (NULL, NULL, make_node (BLOCK)); + } + lower_omp_task_reductions (ctx, OMP_TASKLOOP, + gimple_omp_task_clauses (ctx->stmt), + &taskloop_ilist, &taskloop_olist); + } + push_gimplify_context (); - par_olist = NULL; - par_ilist = NULL; - par_rlist = NULL; + gimple_seq par_olist = NULL; + gimple_seq par_ilist = NULL; + gimple_seq par_rlist = NULL; bool phony_construct = gimple_code (stmt) == GIMPLE_OMP_PARALLEL && gimple_omp_parallel_grid_phony (as_a (stmt)); if (phony_construct && ctx->record_type) @@ -8437,8 +8650,8 @@ lower_omp_taskreg (gimple_stmt_iterator *gsi_p, omp_context *ctx) gimple_omp_taskreg_set_data_arg (stmt, ctx->sender_decl); } - olist = NULL; - ilist = NULL; + gimple_seq olist = NULL; + gimple_seq ilist = NULL; lower_send_clauses (clauses, &ilist, &olist, ctx); lower_send_shared_vars (&ilist, &olist, ctx); @@ -8453,7 +8666,7 @@ lower_omp_taskreg (gimple_stmt_iterator *gsi_p, omp_context *ctx) /* Once all the expansions are done, sequence all the different fragments inside gimple_omp_body. */ - new_body = NULL; + gimple_seq new_body = NULL; if (ctx->record_type) { @@ -8481,7 +8694,10 @@ lower_omp_taskreg (gimple_stmt_iterator *gsi_p, omp_context *ctx) gimple_omp_set_body (stmt, new_body); } - bind = gimple_build_bind (NULL, NULL, gimple_bind_block (par_bind)); + if (dep_bind && gimple_bind_block (par_bind) == NULL_TREE) + bind = gimple_build_bind (NULL, NULL, make_node (BLOCK)); + else + bind = gimple_build_bind (NULL, NULL, gimple_bind_block (par_bind)); gsi_replace (gsi_p, dep_bind ? dep_bind : bind, true); gimple_bind_add_seq (bind, ilist); if (!phony_construct) @@ -8495,7 +8711,9 @@ lower_omp_taskreg (gimple_stmt_iterator *gsi_p, omp_context *ctx) if (dep_bind) { gimple_bind_add_seq (dep_bind, dep_ilist); + gimple_bind_add_seq (dep_bind, taskloop_ilist); gimple_bind_add_stmt (dep_bind, bind); + gimple_bind_add_seq (dep_bind, taskloop_olist); gimple_bind_add_seq (dep_bind, dep_olist); pop_gimplify_context (dep_bind); } diff --git a/gcc/testsuite/ChangeLog.gomp b/gcc/testsuite/ChangeLog.gomp index 2154857fcd2f..6e143f3118c1 100644 --- a/gcc/testsuite/ChangeLog.gomp +++ b/gcc/testsuite/ChangeLog.gomp @@ -1,3 +1,12 @@ +2018-10-16 Jakub Jelinek + + * c-c++-common/gomp/clauses-1.c (r2): New variable. + (bar): Put taskloop simd inside of taskgroup with task_reduction, + use in_reduction clause instead of reduction. Add another + taskloop simd without nogroup clause, but with reduction clause and + a new in_reduction. + * c-c++-common/gomp/taskloop-reduction-1.c: New test. + 2018-08-02 Jakub Jelinek * c-c++-common/gomp/depend-iterator-2.c (f1): Adjust expected diff --git a/gcc/testsuite/c-c++-common/gomp/clauses-1.c b/gcc/testsuite/c-c++-common/gomp/clauses-1.c index fe90c2428e04..1bbc1464216c 100644 --- a/gcc/testsuite/c-c++-common/gomp/clauses-1.c +++ b/gcc/testsuite/c-c++-common/gomp/clauses-1.c @@ -5,7 +5,7 @@ int t; #pragma omp threadprivate (t) #pragma omp declare target -int f, l, ll, r; +int f, l, ll, r, r2; void foo (int d, int m, int i1, int i2, int p, int *idp, int s, @@ -119,9 +119,16 @@ bar (int d, int m, int i1, int i2, int p, int *idp, int s, nowait depend(inout: dd[0]) for (int i = 0; i < 64; i++) ll++; + #pragma omp taskgroup task_reduction(+:r2) + #pragma omp taskloop simd \ + private (p) firstprivate (f) lastprivate (l) shared (s) default(shared) grainsize (g) collapse(1) untied if(taskloop: i1) final(fi) mergeable priority (pp) \ + safelen(8) simdlen(4) linear(ll: 1) aligned(q: 32) reduction(default, +:r) in_reduction(+:r2) + for (int i = 0; i < 64; i++) + ll++; + #pragma omp taskgroup task_reduction(+:r) #pragma omp taskloop simd \ private (p) firstprivate (f) lastprivate (l) shared (s) default(shared) grainsize (g) collapse(1) untied if(taskloop: i1) final(fi) mergeable nogroup priority (pp) \ - safelen(8) simdlen(4) linear(ll: 1) aligned(q: 32) reduction(+:r) + safelen(8) simdlen(4) linear(ll: 1) aligned(q: 32) in_reduction(+:r) for (int i = 0; i < 64; i++) ll++; #pragma omp taskwait diff --git a/gcc/testsuite/c-c++-common/gomp/taskloop-reduction-1.c b/gcc/testsuite/c-c++-common/gomp/taskloop-reduction-1.c new file mode 100644 index 000000000000..0df073fde74f --- /dev/null +++ b/gcc/testsuite/c-c++-common/gomp/taskloop-reduction-1.c @@ -0,0 +1,10 @@ +int +foo (int *a) +{ + int x = 0; + #pragma omp taskloop reduction (+:x) nogroup /* { dg-error "'nogroup' clause must not be used together with 'reduction' clause" } */ + for (int i = 0; i < 64; i++) + x += a[i]; + #pragma omp taskwait + return x; +} diff --git a/gcc/tree-core.h b/gcc/tree-core.h index ee68f042ef7c..0d8d55335c52 100644 --- a/gcc/tree-core.h +++ b/gcc/tree-core.h @@ -340,6 +340,9 @@ enum omp_clause_code { /* Internal clause: temporary for combined loops expansion. */ OMP_CLAUSE__LOOPTEMP_, + /* Internal clause: temporary for task reductions. */ + OMP_CLAUSE__REDUCTEMP_, + /* OpenACC/OpenMP clause: if (scalar-expression). */ OMP_CLAUSE_IF, diff --git a/gcc/tree-nested.c b/gcc/tree-nested.c index 84874ddbba24..1fda89ee9eba 100644 --- a/gcc/tree-nested.c +++ b/gcc/tree-nested.c @@ -1364,6 +1364,7 @@ convert_nonlocal_omp_clauses (tree *pclauses, struct walk_stmt_info *wi) /* The following clauses are only added during OMP lowering; nested function decomposition happens before that. */ case OMP_CLAUSE__LOOPTEMP_: + case OMP_CLAUSE__REDUCTEMP_: case OMP_CLAUSE__SIMDUID_: case OMP_CLAUSE__GRIDDIM_: /* Anything else. */ @@ -2057,6 +2058,7 @@ convert_local_omp_clauses (tree *pclauses, struct walk_stmt_info *wi) /* The following clauses are only added during OMP lowering; nested function decomposition happens before that. */ case OMP_CLAUSE__LOOPTEMP_: + case OMP_CLAUSE__REDUCTEMP_: case OMP_CLAUSE__SIMDUID_: case OMP_CLAUSE__GRIDDIM_: /* Anything else. */ diff --git a/gcc/tree-pretty-print.c b/gcc/tree-pretty-print.c index 1c81132a8864..7b25c20cd4a2 100644 --- a/gcc/tree-pretty-print.c +++ b/gcc/tree-pretty-print.c @@ -453,6 +453,9 @@ dump_omp_clause (pretty_printer *pp, tree clause, int spc, dump_flags_t flags) case OMP_CLAUSE__LOOPTEMP_: name = "_looptemp_"; goto print_remap; + case OMP_CLAUSE__REDUCTEMP_: + name = "_reductemp_"; + goto print_remap; case OMP_CLAUSE_TO_DECLARE: name = "to"; goto print_remap; diff --git a/gcc/tree.c b/gcc/tree.c index b6f5748e3c99..1381b4d9656e 100644 --- a/gcc/tree.c +++ b/gcc/tree.c @@ -305,6 +305,7 @@ unsigned const char omp_clause_num_ops[] = 0, /* OMP_CLAUSE_AUTO */ 0, /* OMP_CLAUSE_SEQ */ 1, /* OMP_CLAUSE__LOOPTEMP_ */ + 1, /* OMP_CLAUSE__REDUCTEMP_ */ 1, /* OMP_CLAUSE_IF */ 1, /* OMP_CLAUSE_NUM_THREADS */ 1, /* OMP_CLAUSE_SCHEDULE */ @@ -381,6 +382,7 @@ const char * const omp_clause_code_name[] = "auto", "seq", "_looptemp_", + "_reductemp_", "if", "num_threads", "schedule", @@ -11577,6 +11579,7 @@ walk_tree_1 (tree *tp, walk_tree_fn func, void *data, case OMP_CLAUSE_USE_DEVICE_PTR: case OMP_CLAUSE_IS_DEVICE_PTR: case OMP_CLAUSE__LOOPTEMP_: + case OMP_CLAUSE__REDUCTEMP_: case OMP_CLAUSE__SIMDUID_: WALK_SUBTREE (OMP_CLAUSE_OPERAND (*tp, 0)); /* FALLTHRU */ diff --git a/gcc/tree.h b/gcc/tree.h index 1d2c4a8e4332..eb4cc73b7e44 100644 --- a/gcc/tree.h +++ b/gcc/tree.h @@ -1408,7 +1408,7 @@ extern tree maybe_wrap_with_location (tree, location_t); #define OMP_CLAUSE_DECL(NODE) \ OMP_CLAUSE_OPERAND (OMP_CLAUSE_RANGE_CHECK (OMP_CLAUSE_CHECK (NODE), \ OMP_CLAUSE_PRIVATE, \ - OMP_CLAUSE__LOOPTEMP_), 0) + OMP_CLAUSE__REDUCTEMP_), 0) #define OMP_CLAUSE_HAS_LOCATION(NODE) \ (LOCATION_LOCUS ((OMP_CLAUSE_CHECK (NODE))->omp_clause.locus) \ != UNKNOWN_LOCATION) diff --git a/include/ChangeLog.gomp b/include/ChangeLog.gomp index 7536538d0cee..196320a38428 100644 --- a/include/ChangeLog.gomp +++ b/include/ChangeLog.gomp @@ -1,3 +1,7 @@ +2018-10-16 Jakub Jelinek + + * gomp-constants.h (GOMP_TASK_FLAG_REDUCTION): Define. + 2018-06-28 Jakub Jelinek * gomp-constants.h (GOMP_DEPEND_IN, GOMP_DEPEND_OUT, diff --git a/include/gomp-constants.h b/include/gomp-constants.h index 325a0e492b69..d3e64d4e352a 100644 --- a/include/gomp-constants.h +++ b/include/gomp-constants.h @@ -189,6 +189,7 @@ enum gomp_map_kind #define GOMP_TASK_FLAG_GRAINSIZE (1 << 9) #define GOMP_TASK_FLAG_IF (1 << 10) #define GOMP_TASK_FLAG_NOGROUP (1 << 11) +#define GOMP_TASK_FLAG_REDUCTION (1 << 12) /* GOMP_target{_ext,update_ext,enter_exit_data} flags argument. */ #define GOMP_TARGET_FLAG_NOWAIT (1 << 0) diff --git a/libgomp/ChangeLog.gomp b/libgomp/ChangeLog.gomp index fbf0540d8d03..cbab22f43442 100644 --- a/libgomp/ChangeLog.gomp +++ b/libgomp/ChangeLog.gomp @@ -1,3 +1,22 @@ +2018-10-16 Jakub Jelinek + + * task.c (GOMP_taskgroup_reduction_register): Add ialias. + * taskloop.c (GOMP_taskloop): Handle GOMP_TASK_FLAG_REDUCTION flag + by calling GOMP_taskgroup_reduction_register. + * libgomp_g.h: Include gstdint.h. + (GOMP_taskgroup_reduction_register, + GOMP_taskgroup_reduction_unregister, GOMP_task_reduction_remap): New + prototypes. + * testsuite/libgomp.c-c++-common/taskloop-reduction-1.c: New test. + * testsuite/libgomp.c-c++-common/taskloop-reduction-2.c: New test. + * testsuite/libgomp.c++/taskloop-reduction-1.C: New test. + * testsuite/libgomp.c++/taskloop-reduction-2.C: New test. + * testsuite/libgomp.c++/taskloop-reduction-3.C: New test. + * testsuite/libgomp.c++/taskloop-reduction-4.C: New test. + * testsuite/libgomp.c++/task-reduction-4.C (foo): Turn into function + template, replace all int occurences with the template parameter T. + (test): Likewise. + 2018-10-10 Jakub Jelinek * testsuite/libgomp.c-c++-common/task-reduction-5.c (size_t): New diff --git a/libgomp/libgomp_g.h b/libgomp/libgomp_g.h index 37858da0d0ed..2ffa7c141ad8 100644 --- a/libgomp/libgomp_g.h +++ b/libgomp/libgomp_g.h @@ -31,6 +31,7 @@ #include #include +#include "gstdint.h" /* barrier.c */ @@ -255,6 +256,9 @@ extern void GOMP_taskwait_depend (void **); extern void GOMP_taskyield (void); extern void GOMP_taskgroup_start (void); extern void GOMP_taskgroup_end (void); +extern void GOMP_taskgroup_reduction_register (uintptr_t *); +extern void GOMP_taskgroup_reduction_unregister (uintptr_t *); +extern void GOMP_task_reduction_remap (size_t, size_t, void **); /* sections.c */ diff --git a/libgomp/task.c b/libgomp/task.c index fcb7e9e7103c..83cf8936ac49 100644 --- a/libgomp/task.c +++ b/libgomp/task.c @@ -523,6 +523,7 @@ GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), ialias (GOMP_taskgroup_start) ialias (GOMP_taskgroup_end) +ialias (GOMP_taskgroup_reduction_register) #define TYPE long #define UTYPE unsigned long diff --git a/libgomp/taskloop.c b/libgomp/taskloop.c index 5515b355f000..d20af399d385 100644 --- a/libgomp/taskloop.c +++ b/libgomp/taskloop.c @@ -153,7 +153,15 @@ GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), return; } else - ialias_call (GOMP_taskgroup_start) (); + { + ialias_call (GOMP_taskgroup_start) (); + if (flags & GOMP_TASK_FLAG_REDUCTION) + { + struct gomp_data_head { TYPE t1, t2; uintptr_t *ptr; }; + uintptr_t *ptr = ((struct gomp_data_head *) data)->ptr; + ialias_call (GOMP_taskgroup_reduction_register) (ptr); + } + } if (priority > gomp_max_task_priority_var) priority = gomp_max_task_priority_var; diff --git a/libgomp/testsuite/libgomp.c++/task-reduction-4.C b/libgomp/testsuite/libgomp.c++/task-reduction-4.C index 409c2ca19abb..28ea0cd9f3bb 100644 --- a/libgomp/testsuite/libgomp.c++/task-reduction-4.C +++ b/libgomp/testsuite/libgomp.c++/task-reduction-4.C @@ -19,10 +19,11 @@ long long *&s = ss; long long (*ts)[2]; long long (*&t)[2] = ts; +template void -foo (int &n, int *&c, long long int *&d, int (&m)[3], int *&r, int (&o)[4], int *&p, int (&q)[4][2]) +foo (T &n, T *&c, long long int *&d, T (&m)[3], T *&r, T (&o)[4], T *&p, T (&q)[4][2]) { - int i; + T i; for (i = 0; i < 2; i++) #pragma omp task in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \ in_reduction (+: o[n:n*2], m[1], k[1:2][:], p[0], f[2:2]) \ @@ -57,19 +58,20 @@ foo (int &n, int *&c, long long int *&d, int (&m)[3], int *&r, int (&o)[4], int } } +template void -test (int &n) +test (T &n) { - int cs[2] = { 0, 0 }; - int (&c)[2] = cs; - int ps[3] = { 0, 1, 4 }; - int (&p)[3] = ps; - int qs[4][2] = { { 5, 6 }, { 0, 0 }, { 0, 0 }, { 7, 8 } }; - int (&q)[4][2] = qs; + T cs[2] = { 0, 0 }; + T (&c)[2] = cs; + T ps[3] = { 0, 1, 4 }; + T (&p)[3] = ps; + T qs[4][2] = { { 5, 6 }, { 0, 0 }, { 0, 0 }, { 7, 8 } }; + T (&q)[4][2] = qs; long long sb[4] = { 5, 1, 1, 6 }; long long tb[5][2] = { { 9, 10 }, { 11, 12 }, { 1, 1 }, { 1, 1 }, { 13, 14 } }; - int ms[3] = { 5, 0, 5 }; - int os[4] = { 1, 0, 0, 2 }; + T ms[3] = { 5, 0, 5 }; + T os[4] = { 1, 0, 0, 2 }; s = sb; t = tb; #pragma omp parallel @@ -77,23 +79,23 @@ test (int &n) { long long int ds[] = { 1, 1 }; long long int (&d)[2] = ds; - int (&m)[3] = ms; - int rs[5] = { 6, 7, 0, 0, 9 }; - int (&r)[5] = rs; - int (&o)[4] = os; + T (&m)[3] = ms; + T rs[5] = { 6, 7, 0, 0, 9 }; + T (&r)[5] = rs; + T (&o)[4] = os; #pragma omp taskgroup task_reduction (+: a, c) task_reduction (*: b[2 * n:3 * n], d) \ task_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \ task_reduction (+: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \ task_reduction (*: t[2:2][:], s[1:n + 1]) { - int i; + T i; for (i = 0; i < 4; i++) #pragma omp task in_reduction (+: a, c) in_reduction (*: b[2 * n:3 * n], d) \ in_reduction (+: o[n:n*2], q[1:2][:], p[0], m[1], r[2:2]) \ in_reduction (+: g[n:n * 2], e[1], k[1:2][:], h[0], f[2:2]) \ in_reduction (*: s[1:2], t[2:2][:]) { - int j; + T j; a[0] += 2; a[1] += 3; b[2] *= 2; @@ -130,10 +132,10 @@ test (int &n) s[2] *= 2; t[2][0] *= 2; t[3][0] *= 2; - int *cp = c; + T *cp = c; long long int *dp = d; - int *rp = r; - int *pp = p; + T *rp = r; + T *pp = p; foo (n, cp, dp, m, rp, o, pp, q); r[3] += 18; o[1] += 29; diff --git a/libgomp/testsuite/libgomp.c++/taskloop-reduction-1.C b/libgomp/testsuite/libgomp.c++/taskloop-reduction-1.C new file mode 100644 index 000000000000..400cc8b96384 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/taskloop-reduction-1.C @@ -0,0 +1,153 @@ +extern "C" void abort (); + +struct S { S (); S (unsigned long long int, int); ~S (); static int cnt1, cnt2, cnt3; unsigned long long int s; int t; }; + +int S::cnt1; +int S::cnt2; +int S::cnt3; + +S::S () +{ + #pragma omp atomic + cnt1++; +} + +S::S (unsigned long long int x, int y) : s (x), t (y) +{ + #pragma omp atomic update + ++cnt2; +} + +S::~S () +{ + #pragma omp atomic + cnt3 = cnt3 + 1; + if (t < 3 || t > 9 || (t & 1) == 0) + abort (); +} + +void +rbar (S *p, S *o) +{ + p->s = 1; + if (o->t != 5) + abort (); + p->t = 9; +} + +static inline void +rbaz (S *o, S *i) +{ + if (o->t != 5 || i->t != 9) + abort (); + o->s *= i->s; +} + +#pragma omp declare reduction (+: S : omp_out.s += omp_in.s) \ + initializer (omp_priv (0, 3)) +#pragma omp declare reduction (*: S : rbaz (&omp_out, &omp_in)) \ + initializer (rbar (&omp_priv, &omp_orig)) + +S gs = { 0, 7 }; +S &g = gs; +S hs (1, 5); +S &h = hs; + +int +foo (int *a, int &b) +{ + int xs = 0; + int &x = xs; + #pragma omp taskloop reduction (+:x) in_reduction (+:b) + for (int i = 0; i < 64; i++) + { + x += a[i]; + b += a[i] * 2; + } + return x; +} + +unsigned long long int +bar (int *a, unsigned long long int &b) +{ + unsigned long long int xs = 1; + unsigned long long int &x = xs; + #pragma omp taskloop reduction (*:x) in_reduction (*:b) + for (int i = 0; i < 64; i++) + { + #pragma omp task in_reduction (*:x) + x *= a[i]; + #pragma omp task in_reduction (*:b) + b *= (3 - a[i]); + } + return x; +} + +void +baz (int i, int *a, int *c) +{ + #pragma omp task in_reduction (*:h) in_reduction (+:g) + { + g.s += 7 * a[i]; + h.s *= (3 - c[i]); + if ((g.t != 7 && g.t != 3) || (h.t != 5 && h.t != 9)) + abort (); + } +} + +void +test () +{ + int i, j, a[64], b = 0, c[64]; + unsigned long long int d = 1, e; + S ms (0, 7); + for (i = 0; i < 64; i++) + { + a[i] = 2 * i; + c[i] = 1 + ((i % 3) != 1); + } + #pragma omp parallel + #pragma omp master + { + S ns = { 1, 5 }; + S &m = ms; + S &n = ns; + #pragma omp taskgroup task_reduction (+:b) + j = foo (a, b); + #pragma omp taskgroup task_reduction (*:d) + e = bar (c, d); + #pragma omp taskloop reduction (+: g, m) reduction (*: h, n) + for (i = 0; i < 64; ++i) + { + g.s += 3 * a[i]; + h.s *= (3 - c[i]); + m.s += 4 * a[i]; + n.s *= c[i]; + if ((g.t != 7 && g.t != 3) || (h.t != 5 && h.t != 9) + || (m.t != 7 && m.t != 3) || (n.t != 5 && n.t != 9)) + abort (); + baz (i, a, c); + } + if (n.s != (1ULL << 43) || n.t != 5) + abort (); + } + if (j != 63 * 64 || b != 63 * 64 * 2) + abort (); + if (e != (1ULL << 43) || d != (1ULL << 21)) + abort (); + if (g.s != 63 * 64 * 10 || g.t != 7) + abort (); + if (h.s != (1ULL << 42) || h.t != 5) + abort (); + if (ms.s != 63 * 64 * 4 || ms.t != 7) + abort (); +} + +int +main () +{ + int c1 = S::cnt1, c2 = S::cnt2, c3 = S::cnt3; + test (); + if (S::cnt1 + S::cnt2 - c1 - c2 != S::cnt3 - c3) + abort (); +} diff --git a/libgomp/testsuite/libgomp.c++/taskloop-reduction-2.C b/libgomp/testsuite/libgomp.c++/taskloop-reduction-2.C new file mode 100644 index 000000000000..486c3d337d79 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/taskloop-reduction-2.C @@ -0,0 +1,253 @@ +extern "C" void abort (); + +int as[2]; +int (&a)[2] = as; +long long int bs[7] = { 9, 11, 1, 1, 1, 13, 15 }; +long long int (&b)[7] = bs; +int es[3] = { 5, 0, 5 }; +int (&e)[3] = es; +int fs[5] = { 6, 7, 0, 0, 9 }; +int (&f)[5] = fs; +int gs[4] = { 1, 0, 0, 2 }; +int (&g)[4] = gs; +int hs[3] = { 0, 1, 4 }; +int (&h)[3] = hs; +int ks[4][2] = { { 5, 6 }, { 0, 0 }, { 0, 0 }, { 7, 8 } }; +int (&k)[4][2] = ks; +long long *ss; +long long *&s = ss; +long long (*ts)[2]; +long long (*&t)[2] = ts; +struct U { U (); ~U () {}; U (const U &); int u[4]; }; + +U::U +() +{ + u[0] = 0; u[1] = 1; u[2] = 2; u[3] = 3; +} + +U::U +(const U &r) +{ + u[0] = r.u[0]; u[1] = r.u[1]; u[2] = r.u[2]; u[3] = r.u[3]; +} + +void +foo (int &n, int *&c, long long int *&d, int (&m)[3], int *&r, int (&o)[4], int *&p, int (&q)[4][2]) +{ + int i; + U u; + u.u[2] = 8; + #pragma omp taskloop in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \ + in_reduction (+: o[n:n*2], m[1], k[1:2][:], p[0], f[2:2]) \ + in_reduction (+: q[1:2][:], g[n:n*2], e[1], h[0], r[2:2]) \ + in_reduction (*: s[1:2], t[2:2][:]) firstprivate (u) nogroup + for (i = 0; i < 2; i++) + { + a[0] += 7; + a[1] += 17; + b[2] *= 2; + b[4] *= 2; + c[0] += 6; + d[1] *= 2; + e[1] += 19; + f[2] += 21; + f[3] += 23; + g[1] += 25; + g[2] += 27; + h[0] += 29; + k[1][0] += 31; + k[2][1] += 33; + m[1] += 19; + r[2] += 21; + r[3] += 23; + o[1] += 25; + o[2] += 27; + p[0] += 29; + q[1][0] += 31; + q[2][1] += 33; + s[1] *= 2; + t[2][0] *= 2; + t[3][1] *= 2; + if (u.u[2] != 8) + abort (); + } +} + +void +test (int &n) +{ + int cs[2] = { 0, 0 }; + int (&c)[2] = cs; + int ps[3] = { 0, 1, 4 }; + int (&p)[3] = ps; + int qs[4][2] = { { 5, 6 }, { 0, 0 }, { 0, 0 }, { 7, 8 } }; + int (&q)[4][2] = qs; + long long sb[4] = { 5, 1, 1, 6 }; + long long tb[5][2] = { { 9, 10 }, { 11, 12 }, { 1, 1 }, { 1, 1 }, { 13, 14 } }; + int ms[3] = { 5, 0, 5 }; + int os[4] = { 1, 0, 0, 2 }; + s = sb; + t = tb; + U u; + u.u[2] = 10; + #pragma omp parallel + #pragma omp single + { + long long int ds[] = { 1, 1 }; + long long int (&d)[2] = ds; + int (&m)[3] = ms; + int rs[5] = { 6, 7, 0, 0, 9 }; + int (&r)[5] = rs; + int (&o)[4] = os; + int i; + #pragma omp taskloop reduction (+: a, c) reduction (*: b[2 * n:3 * n], d) \ + reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \ + reduction (+: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \ + reduction (*: t[2:2][:], s[1:n + 1]) firstprivate (u) + for (i = 0; i < 4; i++) + { + int j; + a[0] += 2; + a[1] += 3; + b[2] *= 2; + f[3] += 8; + g[1] += 9; + g[2] += 10; + h[0] += 11; + k[1][1] += 13; + k[2][1] += 15; + m[1] += 16; + r[2] += 8; + s[1] *= 2; + t[2][1] *= 2; + t[3][1] *= 2; + if (u.u[2] != 10) + abort (); + for (j = 0; j < 2; j++) + #pragma omp task in_reduction (+: a, c[:2]) \ + in_reduction (*: b[2 * n:3 * n], d[n - 1:n + 1]) \ + in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][:2]) \ + in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][:2]) \ + in_reduction (*: s[n:2], t[2:2][:]) firstprivate (u) + { + m[1] += 6; + r[2] += 7; + q[1][0] += 17; + q[2][0] += 19; + a[0] += 4; + a[1] += 5; + b[3] *= 2; + b[4] *= 2; + f[3] += 18; + g[1] += 29; + g[2] += 18; + h[0] += 19; + s[2] *= 2; + t[2][0] *= 2; + t[3][0] *= 2; + int *cp = c; + long long int *dp = d; + int *rp = r; + int *pp = p; + foo (n, cp, dp, m, rp, o, pp, q); + if (u.u[2] != 10) + abort (); + r[3] += 18; + o[1] += 29; + o[2] += 18; + p[0] += 19; + c[0] += 4; + c[1] += 5; + d[0] *= 2; + e[1] += 6; + f[2] += 7; + k[1][0] += 17; + k[2][0] += 19; + } + r[3] += 8; + o[1] += 9; + o[2] += 10; + p[0] += 11; + q[1][1] += 13; + q[2][1] += 15; + b[3] *= 2; + c[0] += 4; + c[1] += 9; + d[0] *= 2; + e[1] += 16; + f[2] += 8; + } + if (d[0] != 1LL << (8 + 4) + || d[1] != 1LL << 16 + || m[0] != 5 + || m[1] != 19 * 16 + 6 * 8 + 16 * 4 + || m[2] != 5 + || r[0] != 6 + || r[1] != 7 + || r[2] != 21 * 16 + 7 * 8 + 8 * 4 + || r[3] != 23 * 16 + 18 * 8 + 8 * 4 + || r[4] != 9 + || o[0] != 1 + || o[1] != 25 * 16 + 29 * 8 + 9 * 4 + || o[2] != 27 * 16 + 18 * 8 + 10 * 4 + || o[3] != 2) + abort (); + } + if (a[0] != 7 * 16 + 4 * 8 + 2 * 4 + || a[1] != 17 * 16 + 5 * 8 + 3 * 4 + || b[0] != 9 || b[1] != 11 + || b[2] != 1LL << (16 + 4) + || b[3] != 1LL << (8 + 4) + || b[4] != 1LL << (16 + 8) + || b[5] != 13 || b[6] != 15 + || c[0] != 6 * 16 + 4 * 8 + 4 * 4 + || c[1] != 5 * 8 + 9 * 4 + || e[0] != 5 + || e[1] != 19 * 16 + 6 * 8 + 16 * 4 + || e[2] != 5 + || f[0] != 6 + || f[1] != 7 + || f[2] != 21 * 16 + 7 * 8 + 8 * 4 + || f[3] != 23 * 16 + 18 * 8 + 8 * 4 + || f[4] != 9 + || g[0] != 1 + || g[1] != 25 * 16 + 29 * 8 + 9 * 4 + || g[2] != 27 * 16 + 18 * 8 + 10 * 4 + || g[3] != 2 + || h[0] != 29 * 16 + 19 * 8 + 11 * 4 + || h[1] != 1 || h[2] != 4 + || k[0][0] != 5 || k[0][1] != 6 + || k[1][0] != 31 * 16 + 17 * 8 + || k[1][1] != 13 * 4 + || k[2][0] != 19 * 8 + || k[2][1] != 33 * 16 + 15 * 4 + || k[3][0] != 7 || k[3][1] != 8 + || p[0] != 29 * 16 + 19 * 8 + 11 * 4 + || p[1] != 1 || p[2] != 4 + || q[0][0] != 5 || q[0][1] != 6 + || q[1][0] != 31 * 16 + 17 * 8 + || q[1][1] != 13 * 4 + || q[2][0] != 19 * 8 + || q[2][1] != 33 * 16 + 15 * 4 + || q[3][0] != 7 || q[3][1] != 8 + || sb[0] != 5 + || sb[1] != 1LL << (16 + 4) + || sb[2] != 1LL << 8 + || sb[3] != 6 + || tb[0][0] != 9 || tb[0][1] != 10 || tb[1][0] != 11 || tb[1][1] != 12 + || tb[2][0] != 1LL << (16 + 8) + || tb[2][1] != 1LL << 4 + || tb[3][0] != 1LL << 8 + || tb[3][1] != 1LL << (16 + 4) + || tb[4][0] != 13 || tb[4][1] != 14) + abort (); +} + +int +main () +{ + int n = 1; + test (n); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/taskloop-reduction-3.C b/libgomp/testsuite/libgomp.c++/taskloop-reduction-3.C new file mode 100644 index 000000000000..0588e4744f95 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/taskloop-reduction-3.C @@ -0,0 +1,314 @@ +extern "C" void abort (); + +struct S { S (); S (long int, long int); ~S (); static int cnt1, cnt2, cnt3; long int s, t; }; + +int S::cnt1; +int S::cnt2; +int S::cnt3; + +S::S () +{ + #pragma omp atomic + cnt1++; +} + +S::S (long int x, long int y) : s (x), t (y) +{ + #pragma omp atomic update + ++cnt2; +} + +S::~S () +{ + #pragma omp atomic + cnt3 = cnt3 + 1; + if (t < 3 || t > 9 || (t & 1) == 0) + abort (); +} + +void +bar (S *p, S *o) +{ + p->s = 1; + if (o->t != 5) + abort (); + p->t = 9; +} + +static inline void +baz (S *o, S *i) +{ + if (o->t != 5 || i->t != 9) + abort (); + o->s *= i->s; +} + +#pragma omp declare reduction (+: S : omp_out.s += omp_in.s) initializer (omp_priv (0, 3)) +#pragma omp declare reduction (*: S : baz (&omp_out, &omp_in)) initializer (bar (&omp_priv, &omp_orig)) + +S a[2] = { { 0, 7 }, { 0, 7 } }; +S b[7] = { { 9, 5 }, { 11, 5 }, { 1, 5 }, { 1, 5 }, { 1, 5 }, { 13, 5 }, { 15, 5 } }; +S e[3] = { { 5, 7 }, { 0, 7 }, { 5, 7 } }; +S f[5] = { { 6, 7 }, { 7, 7 }, { 0, 7 }, { 0, 7 }, { 9, 7 } }; +S g[4] = { { 1, 7 }, { 0, 7 }, { 0, 7 }, { 2, 7 } }; +S h[3] = { { 0, 7 }, { 1, 7 }, { 4, 7 } }; +S k[4][2] = { { { 5, 7 }, { 6, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 7, 7 }, { 8, 7 } } }; +S *s; +S (*t)[2]; + +void +foo (int n, S *c, S *d, S m[3], S *r, S o[4], S *p, S q[4][2]) +{ + int i; + #pragma omp taskloop in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \ + in_reduction (+: o[n:n*2], m[1], k[1:2][:], p[0], f[2:2]) \ + in_reduction (+: q[1:2][:], g[n:n*2], e[1], h[0], r[2:2]) \ + in_reduction (*: s[1:2], t[2:2][:]) nogroup + for (i = 0; i < 2; i++) + { + a[0].s += 7; + a[1].s += 17; + b[2].s *= 2; + b[4].s *= 2; + c[0].s += 6; + d[1].s *= 2; + e[1].s += 19; + f[2].s += 21; + f[3].s += 23; + g[1].s += 25; + g[2].s += 27; + h[0].s += 29; + k[1][0].s += 31; + k[2][1].s += 33; + m[1].s += 19; + r[2].s += 21; + r[3].s += 23; + o[1].s += 25; + o[2].s += 27; + p[0].s += 29; + q[1][0].s += 31; + q[2][1].s += 33; + s[1].s *= 2; + t[2][0].s *= 2; + t[3][1].s *= 2; + if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3) + || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3)) + abort (); + for (int z = 0; z < 2; z++) + if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3) + || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3) + || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3) + || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3) + || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3) + || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3) + || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9)) + abort (); + for (int z = 0; z < 3; z++) + if (b[z + 2].t != 5 && b[z + 2].t != 9) + abort (); + } +} + +void +test (int n) +{ + S c[2] = { { 0, 7 }, { 0, 7 } }; + S p[3] = { { 0, 7 }, { 1, 7 }, { 4, 7 } }; + S q[4][2] = { { { 5, 7 }, { 6, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 7, 7 }, { 8, 7 } } }; + S ss[4] = { { 5, 5 }, { 1, 5 }, { 1, 5 }, { 6, 5 } }; + S tt[5][2] = { { { 9, 5 }, { 10, 5 } }, { { 11, 5 }, { 12, 5 } }, { { 1, 5 }, { 1, 5 } }, { { 1, 5 }, { 1, 5 } }, { { 13, 5 }, { 14, 5 } } }; + s = ss; + t = tt; + #pragma omp parallel + #pragma omp single + { + S d[] = { { 1, 5 }, { 1, 5 } }; + S m[3] = { { 5, 7 }, { 0, 7 }, { 5, 7 } }; + S r[5] = { { 6, 7 }, { 7, 7 }, { 0, 7 }, { 0, 7 }, { 9, 7 } }; + S o[4] = { { 1, 7 }, { 0, 7 }, { 0, 7 }, { 2, 7 } }; + int i; + #pragma omp taskloop reduction (+: a, c) reduction (*: b[2 * n:3 * n], d) \ + reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \ + reduction (+: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \ + reduction (*: t[2:2][:], s[1:n + 1]) + for (i = 0; i < 4; i++) + { + int j; + a[0].s += 2; + a[1].s += 3; + b[2].s *= 2; + f[3].s += 8; + g[1].s += 9; + g[2].s += 10; + h[0].s += 11; + k[1][1].s += 13; + k[2][1].s += 15; + m[1].s += 16; + r[2].s += 8; + s[1].s *= 2; + t[2][1].s *= 2; + t[3][1].s *= 2; + if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3) + || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3)) + abort (); + for (int z = 0; z < 2; z++) + if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3) + || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3) + || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3) + || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3) + || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3) + || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3) + || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9)) + abort (); + for (int z = 0; z < 3; z++) + if (b[z + 2].t != 5 && b[z + 2].t != 9) + abort (); + #pragma omp taskloop in_reduction (+: a, c[:2]) \ + in_reduction (*: b[2 * n:3 * n], d[n - 1:n + 1]) \ + in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][:2]) \ + in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][:2]) \ + in_reduction (*: s[n:2], t[2:2][:]) nogroup + for (j = 0; j < 2; j++) + { + m[1].s += 6; + r[2].s += 7; + q[1][0].s += 17; + q[2][0].s += 19; + a[0].s += 4; + a[1].s += 5; + b[3].s *= 2; + b[4].s *= 2; + f[3].s += 18; + g[1].s += 29; + g[2].s += 18; + h[0].s += 19; + s[2].s *= 2; + t[2][0].s *= 2; + t[3][0].s *= 2; + foo (n, c, d, m, r, o, p, q); + if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3) + || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3)) + abort (); + for (int z = 0; z < 2; z++) + if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3) + || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3) + || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3) + || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3) + || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3) + || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3) + || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9)) + abort (); + for (int z = 0; z < 3; z++) + if (b[z + 2].t != 5 && b[z + 2].t != 9) + abort (); + r[3].s += 18; + o[1].s += 29; + o[2].s += 18; + p[0].s += 19; + c[0].s += 4; + c[1].s += 5; + d[0].s *= 2; + e[1].s += 6; + f[2].s += 7; + k[1][0].s += 17; + k[2][0].s += 19; + } + r[3].s += 8; + o[1].s += 9; + o[2].s += 10; + p[0].s += 11; + q[1][1].s += 13; + q[2][1].s += 15; + b[3].s *= 2; + c[0].s += 4; + c[1].s += 9; + d[0].s *= 2; + e[1].s += 16; + f[2].s += 8; + } + if (d[0].s != 1LL << (8 + 4) + || d[1].s != 1LL << 16 + || m[0].s != 5 + || m[1].s != 19 * 16 + 6 * 8 + 16 * 4 + || m[2].s != 5 + || r[0].s != 6 + || r[1].s != 7 + || r[2].s != 21 * 16 + 7 * 8 + 8 * 4 + || r[3].s != 23 * 16 + 18 * 8 + 8 * 4 + || r[4].s != 9 + || o[0].s != 1 + || o[1].s != 25 * 16 + 29 * 8 + 9 * 4 + || o[2].s != 27 * 16 + 18 * 8 + 10 * 4 + || o[3].s != 2) + abort (); + if (e[1].t != 7 || h[0].t != 7 || m[1].t != 7 || p[0].t != 7) + abort (); + for (int z = 0; z < 2; z++) + if (a[z].t != 7 || c[z].t != 7 || d[z].t != 5 || f[z + 2].t != 7 + || g[z + 1].t != 7 || r[z + 2].t != 7 || s[z + 1].t != 5 || o[z + 1].t != 7 + || k[z + 1][0].t != 7 || k[z + 1][1].t != 7 || q[z + 1][0].t != 7 || q[z + 1][1].t != 7 + || t[z + 2][0].t != 5 || t[z + 2][1].t != 5) + abort (); + for (int z = 0; z < 3; z++) + if (b[z + 2].t != 5) + abort (); + } + if (a[0].s != 7 * 16 + 4 * 8 + 2 * 4 + || a[1].s != 17 * 16 + 5 * 8 + 3 * 4 + || b[0].s != 9 || b[1].s != 11 + || b[2].s != 1LL << (16 + 4) + || b[3].s != 1LL << (8 + 4) + || b[4].s != 1LL << (16 + 8) + || b[5].s != 13 || b[6].s != 15 + || c[0].s != 6 * 16 + 4 * 8 + 4 * 4 + || c[1].s != 5 * 8 + 9 * 4 + || e[0].s != 5 + || e[1].s != 19 * 16 + 6 * 8 + 16 * 4 + || e[2].s != 5 + || f[0].s != 6 + || f[1].s != 7 + || f[2].s != 21 * 16 + 7 * 8 + 8 * 4 + || f[3].s != 23 * 16 + 18 * 8 + 8 * 4 + || f[4].s != 9 + || g[0].s != 1 + || g[1].s != 25 * 16 + 29 * 8 + 9 * 4 + || g[2].s != 27 * 16 + 18 * 8 + 10 * 4 + || g[3].s != 2 + || h[0].s != 29 * 16 + 19 * 8 + 11 * 4 + || h[1].s != 1 || h[2].s != 4 + || k[0][0].s != 5 || k[0][1].s != 6 + || k[1][0].s != 31 * 16 + 17 * 8 + || k[1][1].s != 13 * 4 + || k[2][0].s != 19 * 8 + || k[2][1].s != 33 * 16 + 15 * 4 + || k[3][0].s != 7 || k[3][1].s != 8 + || p[0].s != 29 * 16 + 19 * 8 + 11 * 4 + || p[1].s != 1 || p[2].s != 4 + || q[0][0].s != 5 || q[0][1].s != 6 + || q[1][0].s != 31 * 16 + 17 * 8 + || q[1][1].s != 13 * 4 + || q[2][0].s != 19 * 8 + || q[2][1].s != 33 * 16 + 15 * 4 + || q[3][0].s != 7 || q[3][1].s != 8 + || ss[0].s != 5 + || ss[1].s != 1LL << (16 + 4) + || ss[2].s != 1LL << 8 + || ss[3].s != 6 + || tt[0][0].s != 9 || tt[0][1].s != 10 || tt[1][0].s != 11 || tt[1][1].s != 12 + || tt[2][0].s != 1LL << (16 + 8) + || tt[2][1].s != 1LL << 4 + || tt[3][0].s != 1LL << 8 + || tt[3][1].s != 1LL << (16 + 4) + || tt[4][0].s != 13 || tt[4][1].s != 14) + abort (); +} + +int +main () +{ + int c1 = S::cnt1, c2 = S::cnt2, c3 = S::cnt3; + test (1); + if (S::cnt1 + S::cnt2 - c1 - c2 != S::cnt3 - c3) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/taskloop-reduction-4.C b/libgomp/testsuite/libgomp.c++/taskloop-reduction-4.C new file mode 100644 index 000000000000..9a9360bead65 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/taskloop-reduction-4.C @@ -0,0 +1,315 @@ +extern "C" void abort (); + +struct S { S (); S (long int, long int); ~S (); static int cnt1, cnt2, cnt3; long int s, t; }; + +int S::cnt1; +int S::cnt2; +int S::cnt3; + +S::S () +{ + #pragma omp atomic + cnt1++; +} + +S::S (long int x, long int y) : s (x), t (y) +{ + #pragma omp atomic update + ++cnt2; +} + +S::~S () +{ + #pragma omp atomic + cnt3 = cnt3 + 1; + if (t < 3 || t > 9 || (t & 1) == 0) + abort (); +} + +void +bar (S *p, S *o) +{ + p->s = 1; + if (o->t != 5) + abort (); + p->t = 9; +} + +static inline void +baz (S *o, S *i) +{ + if (o->t != 5 || i->t != 9) + abort (); + o->s *= i->s; +} + +#pragma omp declare reduction (+: S : omp_out.s += omp_in.s) initializer (omp_priv (0, 3)) +#pragma omp declare reduction (*: S : baz (&omp_out, &omp_in)) initializer (bar (&omp_priv, &omp_orig)) + +S a[2] = { { 0, 7 }, { 0, 7 } }; +S b[7] = { { 9, 5 }, { 11, 5 }, { 1, 5 }, { 1, 5 }, { 1, 5 }, { 13, 5 }, { 15, 5 } }; +S e[3] = { { 5, 7 }, { 0, 7 }, { 5, 7 } }; +S f[5] = { { 6, 7 }, { 7, 7 }, { 0, 7 }, { 0, 7 }, { 9, 7 } }; +S g[4] = { { 1, 7 }, { 0, 7 }, { 0, 7 }, { 2, 7 } }; +S h[3] = { { 0, 7 }, { 1, 7 }, { 4, 7 } }; +S k[4][2] = { { { 5, 7 }, { 6, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 7, 7 }, { 8, 7 } } }; +S *s; +S (*t)[2]; + +template +void +foo (int n, S *c, S *d, S m[3], S *r, S o[4], S *p, S q[4][2]) +{ + int i; + #pragma omp taskloop in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \ + reduction (default, +: o[n:n*2], m[1], p[0]) in_reduction (+: k[1:2][:], f[2:2]) \ + reduction (+: q[1:2][:], r[2:2]) in_reduction (+: g[n:n*2], e[1], h[0]) \ + in_reduction (*: s[1:2], t[2:2][:]) + for (i = 0; i < 2; i++) + { + a[0].s += 7; + a[1].s += 17; + b[2].s *= 2; + b[4].s *= 2; + c[0].s += 6; + d[1].s *= 2; + e[1].s += 19; + f[2].s += 21; + f[3].s += 23; + g[1].s += 25; + g[2].s += 27; + h[0].s += 29; + k[1][0].s += 31; + k[2][1].s += 33; + m[1].s += 19; + r[2].s += 21; + r[3].s += 23; + o[1].s += 25; + o[2].s += 27; + p[0].s += 29; + q[1][0].s += 31; + q[2][1].s += 33; + s[1].s *= 2; + t[2][0].s *= 2; + t[3][1].s *= 2; + if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3) + || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3)) + abort (); + for (int z = 0; z < 2; z++) + if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3) + || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3) + || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3) + || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3) + || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3) + || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3) + || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9)) + abort (); + for (int z = 0; z < 3; z++) + if (b[z + 2].t != 5 && b[z + 2].t != 9) + abort (); + } +} + +template +void +test (int n) +{ + T c[2] = { { 0, 7 }, { 0, 7 } }; + T p[3] = { { 0, 7 }, { 1, 7 }, { 4, 7 } }; + T q[4][2] = { { { 5, 7 }, { 6, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 0, 7 }, { 0, 7 } }, { { 7, 7 }, { 8, 7 } } }; + T ss[4] = { { 5, 5 }, { 1, 5 }, { 1, 5 }, { 6, 5 } }; + T tt[5][2] = { { { 9, 5 }, { 10, 5 } }, { { 11, 5 }, { 12, 5 } }, { { 1, 5 }, { 1, 5 } }, { { 1, 5 }, { 1, 5 } }, { { 13, 5 }, { 14, 5 } } }; + s = ss; + t = tt; + #pragma omp parallel + #pragma omp single + { + T d[] = { { 1, 5 }, { 1, 5 } }; + T m[3] = { { 5, 7 }, { 0, 7 }, { 5, 7 } }; + T r[5] = { { 6, 7 }, { 7, 7 }, { 0, 7 }, { 0, 7 }, { 9, 7 } }; + T o[4] = { { 1, 7 }, { 0, 7 }, { 0, 7 }, { 2, 7 } }; + int i; + #pragma omp taskloop reduction (+: a, c) reduction (default, *: b[2 * n:3 * n], d) \ + reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \ + reduction (+: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \ + reduction (*: t[2:2][:], s[1:n + 1]) + for (i = 0; i < 4; i++) + { + int j; + a[0].s += 2; + a[1].s += 3; + b[2].s *= 2; + f[3].s += 8; + g[1].s += 9; + g[2].s += 10; + h[0].s += 11; + k[1][1].s += 13; + k[2][1].s += 15; + m[1].s += 16; + r[2].s += 8; + s[1].s *= 2; + t[2][1].s *= 2; + t[3][1].s *= 2; + if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3) + || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3)) + abort (); + for (int z = 0; z < 2; z++) + if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3) + || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3) + || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3) + || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3) + || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3) + || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3) + || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9)) + abort (); + for (int z = 0; z < 3; z++) + if (b[z + 2].t != 5 && b[z + 2].t != 9) + abort (); + #pragma omp taskloop in_reduction (+: a, c[:2]) \ + in_reduction (*: b[2 * n:3 * n], d[n - 1:n + 1]) \ + in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][:2]) \ + in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][:2]) \ + in_reduction (*: s[n:2], t[2:2][:]) nogroup + for (j = 0; j < 2; j++) + { + m[1].s += 6; + r[2].s += 7; + q[1][0].s += 17; + q[2][0].s += 19; + a[0].s += 4; + a[1].s += 5; + b[3].s *= 2; + b[4].s *= 2; + f[3].s += 18; + g[1].s += 29; + g[2].s += 18; + h[0].s += 19; + s[2].s *= 2; + t[2][0].s *= 2; + t[3][0].s *= 2; + foo<0> (n, c, d, m, r, o, p, q); + if ((e[1].t != 7 && e[1].t != 3) || (h[0].t != 7 && h[0].t != 3) + || (m[1].t != 7 && m[1].t != 3) || (p[0].t != 7 && p[0].t != 3)) + abort (); + for (int z = 0; z < 2; z++) + if ((a[z].t != 7 && a[z].t != 3) || (c[z].t != 7 && c[z].t != 3) + || (d[z].t != 5 && d[z].t != 9) || (f[z + 2].t != 7 && f[z + 2].t != 3) + || (g[z + 1].t != 7 && g[z + 1].t != 3) || (r[z + 2].t != 7 && r[z + 2].t != 3) + || (s[z + 1].t != 5 && s[z + 1].t != 9) || (o[z + 1].t != 7 && o[z + 1].t != 3) + || (k[z + 1][0].t != 7 && k[z + 1][0].t != 3) || (k[z + 1][1].t != 7 && k[z + 1][1].t != 3) + || (q[z + 1][0].t != 7 && q[z + 1][0].t != 3) || (q[z + 1][1].t != 7 && q[z + 1][1].t != 3) + || (t[z + 2][0].t != 5 && t[z + 2][0].t != 9) || (t[z + 2][1].t != 5 && t[z + 2][1].t != 9)) + abort (); + for (int z = 0; z < 3; z++) + if (b[z + 2].t != 5 && b[z + 2].t != 9) + abort (); + r[3].s += 18; + o[1].s += 29; + o[2].s += 18; + p[0].s += 19; + c[0].s += 4; + c[1].s += 5; + d[0].s *= 2; + e[1].s += 6; + f[2].s += 7; + k[1][0].s += 17; + k[2][0].s += 19; + } + r[3].s += 8; + o[1].s += 9; + o[2].s += 10; + p[0].s += 11; + q[1][1].s += 13; + q[2][1].s += 15; + b[3].s *= 2; + c[0].s += 4; + c[1].s += 9; + d[0].s *= 2; + e[1].s += 16; + f[2].s += 8; + } + if (d[0].s != 1LL << (8 + 4) + || d[1].s != 1LL << 16 + || m[0].s != 5 + || m[1].s != 19 * 16 + 6 * 8 + 16 * 4 + || m[2].s != 5 + || r[0].s != 6 + || r[1].s != 7 + || r[2].s != 21 * 16 + 7 * 8 + 8 * 4 + || r[3].s != 23 * 16 + 18 * 8 + 8 * 4 + || r[4].s != 9 + || o[0].s != 1 + || o[1].s != 25 * 16 + 29 * 8 + 9 * 4 + || o[2].s != 27 * 16 + 18 * 8 + 10 * 4 + || o[3].s != 2) + abort (); + if (e[1].t != 7 || h[0].t != 7 || m[1].t != 7 || p[0].t != 7) + abort (); + for (int z = 0; z < 2; z++) + if (a[z].t != 7 || c[z].t != 7 || d[z].t != 5 || f[z + 2].t != 7 + || g[z + 1].t != 7 || r[z + 2].t != 7 || s[z + 1].t != 5 || o[z + 1].t != 7 + || k[z + 1][0].t != 7 || k[z + 1][1].t != 7 || q[z + 1][0].t != 7 || q[z + 1][1].t != 7 + || t[z + 2][0].t != 5 || t[z + 2][1].t != 5) + abort (); + for (int z = 0; z < 3; z++) + if (b[z + 2].t != 5) + abort (); + } + if (a[0].s != 7 * 16 + 4 * 8 + 2 * 4 + || a[1].s != 17 * 16 + 5 * 8 + 3 * 4 + || b[0].s != 9 || b[1].s != 11 + || b[2].s != 1LL << (16 + 4) + || b[3].s != 1LL << (8 + 4) + || b[4].s != 1LL << (16 + 8) + || b[5].s != 13 || b[6].s != 15 + || c[0].s != 6 * 16 + 4 * 8 + 4 * 4 + || c[1].s != 5 * 8 + 9 * 4 + || e[0].s != 5 + || e[1].s != 19 * 16 + 6 * 8 + 16 * 4 + || e[2].s != 5 + || f[0].s != 6 + || f[1].s != 7 + || f[2].s != 21 * 16 + 7 * 8 + 8 * 4 + || f[3].s != 23 * 16 + 18 * 8 + 8 * 4 + || f[4].s != 9 + || g[0].s != 1 + || g[1].s != 25 * 16 + 29 * 8 + 9 * 4 + || g[2].s != 27 * 16 + 18 * 8 + 10 * 4 + || g[3].s != 2 + || h[0].s != 29 * 16 + 19 * 8 + 11 * 4 + || h[1].s != 1 || h[2].s != 4 + || k[0][0].s != 5 || k[0][1].s != 6 + || k[1][0].s != 31 * 16 + 17 * 8 + || k[1][1].s != 13 * 4 + || k[2][0].s != 19 * 8 + || k[2][1].s != 33 * 16 + 15 * 4 + || k[3][0].s != 7 || k[3][1].s != 8 + || p[0].s != 29 * 16 + 19 * 8 + 11 * 4 + || p[1].s != 1 || p[2].s != 4 + || q[0][0].s != 5 || q[0][1].s != 6 + || q[1][0].s != 31 * 16 + 17 * 8 + || q[1][1].s != 13 * 4 + || q[2][0].s != 19 * 8 + || q[2][1].s != 33 * 16 + 15 * 4 + || q[3][0].s != 7 || q[3][1].s != 8 + || ss[0].s != 5 + || ss[1].s != 1LL << (16 + 4) + || ss[2].s != 1LL << 8 + || ss[3].s != 6 + || tt[0][0].s != 9 || tt[0][1].s != 10 || tt[1][0].s != 11 || tt[1][1].s != 12 + || tt[2][0].s != 1LL << (16 + 8) + || tt[2][1].s != 1LL << 4 + || tt[3][0].s != 1LL << 8 + || tt[3][1].s != 1LL << (16 + 4) + || tt[4][0].s != 13 || tt[4][1].s != 14) + abort (); +} + +int +main () +{ + int c1 = S::cnt1, c2 = S::cnt2, c3 = S::cnt3; + test (1); + if (S::cnt1 + S::cnt2 - c1 - c2 != S::cnt3 - c3) + abort (); +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/taskloop-reduction-1.c b/libgomp/testsuite/libgomp.c-c++-common/taskloop-reduction-1.c new file mode 100644 index 000000000000..907d9bb4c590 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/taskloop-reduction-1.c @@ -0,0 +1,119 @@ +extern +#ifdef __cplusplus +"C" +#endif +void abort (void); + +struct S { unsigned long int s, t; }; + +void +rbar (struct S *p, struct S *o) +{ + p->s = 1; + if (o->t != 5) + abort (); + p->t = 9; +} + +static inline void +rbaz (struct S *o, struct S *i) +{ + if (o->t != 5 || i->t != 9) + abort (); + o->s *= i->s; +} + +#pragma omp declare reduction (+: struct S : omp_out.s += omp_in.s) \ + initializer (omp_priv = { 0, 3 }) +#pragma omp declare reduction (*: struct S : rbaz (&omp_out, &omp_in)) \ + initializer (rbar (&omp_priv, &omp_orig)) + +struct S g = { 0, 7 }; +struct S h = { 1, 5 }; + +int +foo (int *a, int *b) +{ + int x = 0; + #pragma omp taskloop reduction (+:x) in_reduction (+:b[0]) + for (int i = 0; i < 64; i++) + { + x += a[i]; + *b += a[i] * 2; + } + return x; +} + +unsigned long long int +bar (int *a, unsigned long long int *b) +{ + unsigned long long int x = 1; + #pragma omp taskloop reduction (*:x) in_reduction (*:b[0]) + for (int i = 0; i < 64; i++) + { + #pragma omp task in_reduction (*:x) + x *= a[i]; + #pragma omp task in_reduction (*:b[0]) + *b *= (3 - a[i]); + } + return x; +} + +void +baz (int i, int *a, int *c) +{ + #pragma omp task in_reduction (*:h) in_reduction (+:g) + { + g.s += 7 * a[i]; + h.s *= (3 - c[i]); + if ((g.t != 7 && g.t != 3) || (h.t != 5 && h.t != 9)) + abort (); + } +} + +int +main () +{ + int i, j, a[64], b = 0, c[64]; + unsigned long long int d = 1, e; + struct S m = { 0, 7 }; + for (i = 0; i < 64; i++) + { + a[i] = 2 * i; + c[i] = 1 + ((i % 3) != 1); + } + #pragma omp parallel + #pragma omp master + { + struct S n = { 1, 5 }; + #pragma omp taskgroup task_reduction (+:b) + j = foo (a, &b); + #pragma omp taskgroup task_reduction (*:d) + e = bar (c, &d); + #pragma omp taskloop reduction (+: g, m) reduction (*: h, n) + for (i = 0; i < 64; ++i) + { + g.s += 3 * a[i]; + h.s *= (3 - c[i]); + m.s += 4 * a[i]; + n.s *= c[i]; + if ((g.t != 7 && g.t != 3) || (h.t != 5 && h.t != 9) + || (m.t != 7 && m.t != 3) || (n.t != 5 && n.t != 9)) + abort (); + baz (i, a, c); + } + if (n.s != (1ULL << 43) || n.t != 5) + abort (); + } + if (j != 63 * 64 || b != 63 * 64 * 2) + abort (); + if (e != (1ULL << 43) || d != (1ULL << 21)) + abort (); + if (g.s != 63 * 64 * 10 || g.t != 7) + abort (); + if (h.s != (1ULL << 42) || h.t != 5) + abort (); + if (m.s != 63 * 64 * 4 || m.t != 7) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/taskloop-reduction-2.c b/libgomp/testsuite/libgomp.c-c++-common/taskloop-reduction-2.c new file mode 100644 index 000000000000..8fc05dc668e2 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/taskloop-reduction-2.c @@ -0,0 +1,212 @@ +#ifdef __cplusplus +extern "C" +#endif +void abort (void); + +int a[2]; +long long int b[7] = { 9, 11, 1, 1, 1, 13, 15 }; +int e[3] = { 5, 0, 5 }; +int f[5] = { 6, 7, 0, 0, 9 }; +int g[4] = { 1, 0, 0, 2 }; +int h[3] = { 0, 1, 4 }; +int k[4][2] = { { 5, 6 }, { 0, 0 }, { 0, 0 }, { 7, 8 } }; +long long *s; +long long (*t)[2]; + +void +foo (int n, int *c, long long int *d, int m[3], int *r, int o[4], int *p, int q[4][2]) +{ + int i; + #pragma omp taskloop in_reduction (+: a, c[:2]) in_reduction (*: b[2 * n:3 * n], d[0:2]) \ + in_reduction (+: o[n:n*2], m[1], k[1:2][:], p[0], f[2:2]) \ + in_reduction (+: q[1:2][:], g[n:n*2], e[1], h[0], r[2:2]) \ + in_reduction (*: s[1:2], t[2:2][:]) nogroup + for (i = 0; i < 2; i++) + { + a[0] += 7; + a[1] += 17; + b[2] *= 2; + b[4] *= 2; + c[0] += 6; + d[1] *= 2; + e[1] += 19; + f[2] += 21; + f[3] += 23; + g[1] += 25; + g[2] += 27; + h[0] += 29; + k[1][0] += 31; + k[2][1] += 33; + m[1] += 19; + r[2] += 21; + r[3] += 23; + o[1] += 25; + o[2] += 27; + p[0] += 29; + q[1][0] += 31; + q[2][1] += 33; + s[1] *= 2; + t[2][0] *= 2; + t[3][1] *= 2; + } +} + +void +test (int n) +{ + int c[2] = { 0, 0 }; + int p[3] = { 0, 1, 4 }; + int q[4][2] = { { 5, 6 }, { 0, 0 }, { 0, 0 }, { 7, 8 } }; + long long ss[4] = { 5, 1, 1, 6 }; + long long tt[5][2] = { { 9, 10 }, { 11, 12 }, { 1, 1 }, { 1, 1 }, { 13, 14 } }; + s = ss; + t = tt; + #pragma omp parallel + #pragma omp single + { + long long int d[] = { 1, 1 }; + int m[3] = { 5, 0, 5 }; + int r[5] = { 6, 7, 0, 0, 9 }; + int o[4] = { 1, 0, 0, 2 }; + int i; + #pragma omp taskloop reduction (+: a, c) reduction (default, *: b[2 * n:3 * n], d) \ + reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][0:2]) \ + reduction (default, +: o[n:n*2], m[1], q[1:2][:], p[0], r[2:2]) \ + reduction (*: t[2:2][:], s[1:n + 1]) + for (i = 0; i < 4; i++) + { + int j; + a[0] += 2; + a[1] += 3; + b[2] *= 2; + f[3] += 8; + g[1] += 9; + g[2] += 10; + h[0] += 11; + k[1][1] += 13; + k[2][1] += 15; + m[1] += 16; + r[2] += 8; + s[1] *= 2; + t[2][1] *= 2; + t[3][1] *= 2; + #pragma omp taskloop in_reduction (+: a, c[:2]) \ + in_reduction (*: b[2 * n:3 * n], d[n - 1:n + 1]) \ + in_reduction (+: e[1], f[2:2], g[n:n*2], h[0], k[1:2][:2]) \ + in_reduction (+: m[1], r[2:2], o[n:n*2], p[0], q[1:2][:2]) \ + in_reduction (*: s[n:2], t[2:2][:]) nogroup + for (j = 0; j < 2; j++) + { + m[1] += 6; + r[2] += 7; + q[1][0] += 17; + q[2][0] += 19; + a[0] += 4; + a[1] += 5; + b[3] *= 2; + b[4] *= 2; + f[3] += 18; + g[1] += 29; + g[2] += 18; + h[0] += 19; + s[2] *= 2; + t[2][0] *= 2; + t[3][0] *= 2; + foo (n, c, d, m, r, o, p, q); + r[3] += 18; + o[1] += 29; + o[2] += 18; + p[0] += 19; + c[0] += 4; + c[1] += 5; + d[0] *= 2; + e[1] += 6; + f[2] += 7; + k[1][0] += 17; + k[2][0] += 19; + } + r[3] += 8; + o[1] += 9; + o[2] += 10; + p[0] += 11; + q[1][1] += 13; + q[2][1] += 15; + b[3] *= 2; + c[0] += 4; + c[1] += 9; + d[0] *= 2; + e[1] += 16; + f[2] += 8; + } + if (d[0] != 1LL << (8 + 4) + || d[1] != 1LL << 16 + || m[0] != 5 + || m[1] != 19 * 16 + 6 * 8 + 16 * 4 + || m[2] != 5 + || r[0] != 6 + || r[1] != 7 + || r[2] != 21 * 16 + 7 * 8 + 8 * 4 + || r[3] != 23 * 16 + 18 * 8 + 8 * 4 + || r[4] != 9 + || o[0] != 1 + || o[1] != 25 * 16 + 29 * 8 + 9 * 4 + || o[2] != 27 * 16 + 18 * 8 + 10 * 4 + || o[3] != 2) + abort (); + } + if (a[0] != 7 * 16 + 4 * 8 + 2 * 4 + || a[1] != 17 * 16 + 5 * 8 + 3 * 4 + || b[0] != 9 || b[1] != 11 + || b[2] != 1LL << (16 + 4) + || b[3] != 1LL << (8 + 4) + || b[4] != 1LL << (16 + 8) + || b[5] != 13 || b[6] != 15 + || c[0] != 6 * 16 + 4 * 8 + 4 * 4 + || c[1] != 5 * 8 + 9 * 4 + || e[0] != 5 + || e[1] != 19 * 16 + 6 * 8 + 16 * 4 + || e[2] != 5 + || f[0] != 6 + || f[1] != 7 + || f[2] != 21 * 16 + 7 * 8 + 8 * 4 + || f[3] != 23 * 16 + 18 * 8 + 8 * 4 + || f[4] != 9 + || g[0] != 1 + || g[1] != 25 * 16 + 29 * 8 + 9 * 4 + || g[2] != 27 * 16 + 18 * 8 + 10 * 4 + || g[3] != 2 + || h[0] != 29 * 16 + 19 * 8 + 11 * 4 + || h[1] != 1 || h[2] != 4 + || k[0][0] != 5 || k[0][1] != 6 + || k[1][0] != 31 * 16 + 17 * 8 + || k[1][1] != 13 * 4 + || k[2][0] != 19 * 8 + || k[2][1] != 33 * 16 + 15 * 4 + || k[3][0] != 7 || k[3][1] != 8 + || p[0] != 29 * 16 + 19 * 8 + 11 * 4 + || p[1] != 1 || p[2] != 4 + || q[0][0] != 5 || q[0][1] != 6 + || q[1][0] != 31 * 16 + 17 * 8 + || q[1][1] != 13 * 4 + || q[2][0] != 19 * 8 + || q[2][1] != 33 * 16 + 15 * 4 + || q[3][0] != 7 || q[3][1] != 8 + || ss[0] != 5 + || ss[1] != 1LL << (16 + 4) + || ss[2] != 1LL << 8 + || ss[3] != 6 + || tt[0][0] != 9 || tt[0][1] != 10 || tt[1][0] != 11 || tt[1][1] != 12 + || tt[2][0] != 1LL << (16 + 8) + || tt[2][1] != 1LL << 4 + || tt[3][0] != 1LL << 8 + || tt[3][1] != 1LL << (16 + 4) + || tt[4][0] != 13 || tt[4][1] != 14) + abort (); +} + +int +main () +{ + test (1); + return 0; +}