]>
Commit | Line | Data |
---|---|---|
8d9254fc | 1 | /* Copyright (C) 2017-2020 Free Software Foundation, Inc. |
5326695a AS |
2 | |
3 | This file is part of GCC. | |
4 | ||
5 | GCC is free software; you can redistribute it and/or modify it under | |
6 | the terms of the GNU General Public License as published by the Free | |
7 | Software Foundation; either version 3, or (at your option) any later | |
8 | version. | |
9 | ||
10 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
11 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
12 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
13 | for more details. | |
14 | ||
15 | You should have received a copy of the GNU General Public License | |
16 | along with GCC; see the file COPYING3. If not see | |
17 | <http://www.gnu.org/licenses/>. */ | |
18 | ||
19 | /* {{{ Includes. */ | |
20 | ||
21 | #include "config.h" | |
22 | #include "system.h" | |
23 | #include "coretypes.h" | |
24 | #include "backend.h" | |
25 | #include "target.h" | |
26 | #include "tree.h" | |
27 | #include "gimple.h" | |
28 | #include "tree-pass.h" | |
29 | #include "gimple-iterator.h" | |
30 | #include "cfghooks.h" | |
31 | #include "cfgloop.h" | |
32 | #include "tm_p.h" | |
33 | #include "stringpool.h" | |
34 | #include "fold-const.h" | |
35 | #include "varasm.h" | |
36 | #include "omp-low.h" | |
37 | #include "omp-general.h" | |
38 | #include "internal-fn.h" | |
39 | #include "tree-vrp.h" | |
40 | #include "tree-ssanames.h" | |
41 | #include "tree-ssa-operands.h" | |
42 | #include "gimplify.h" | |
43 | #include "tree-phinodes.h" | |
44 | #include "cgraph.h" | |
45 | #include "targhooks.h" | |
46 | #include "langhooks-def.h" | |
47 | ||
48 | /* }}} */ | |
49 | /* {{{ OMP GCN pass. | |
50 | ||
51 | This pass is intended to make any GCN-specfic transformations to OpenMP | |
52 | target regions. | |
53 | ||
54 | At present, its only purpose is to convert some "omp" built-in functions | |
55 | to use closer-to-the-metal "gcn" built-in functions. */ | |
56 | ||
57 | unsigned int | |
58 | execute_omp_gcn (void) | |
59 | { | |
60 | tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM); | |
61 | tree thr_num_id = DECL_NAME (thr_num_tree); | |
62 | tree team_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM); | |
63 | tree team_num_id = DECL_NAME (team_num_tree); | |
64 | basic_block bb; | |
65 | gimple_stmt_iterator gsi; | |
66 | unsigned int todo = 0; | |
67 | ||
68 | FOR_EACH_BB_FN (bb, cfun) | |
69 | for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
70 | { | |
71 | gimple *call = gsi_stmt (gsi); | |
72 | tree decl; | |
73 | ||
74 | if (is_gimple_call (call) && (decl = gimple_call_fndecl (call))) | |
75 | { | |
76 | tree decl_id = DECL_NAME (decl); | |
77 | tree lhs = gimple_get_lhs (call); | |
78 | ||
79 | if (decl_id == thr_num_id) | |
80 | { | |
81 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
82 | fprintf (dump_file, | |
83 | "Replace '%s' with __builtin_gcn_dim_pos.\n", | |
84 | IDENTIFIER_POINTER (decl_id)); | |
85 | ||
86 | /* Transform this: | |
87 | lhs = __builtin_omp_get_thread_num () | |
88 | to this: | |
89 | lhs = __builtin_gcn_dim_pos (1) */ | |
90 | tree fn = targetm.builtin_decl (GCN_BUILTIN_OMP_DIM_POS, 0); | |
91 | tree fnarg = build_int_cst (unsigned_type_node, 1); | |
92 | gimple *stmt = gimple_build_call (fn, 1, fnarg); | |
93 | gimple_call_set_lhs (stmt, lhs); | |
94 | gsi_replace (&gsi, stmt, true); | |
95 | ||
96 | todo |= TODO_update_ssa; | |
97 | } | |
98 | else if (decl_id == team_num_id) | |
99 | { | |
100 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
101 | fprintf (dump_file, | |
102 | "Replace '%s' with __builtin_gcn_dim_pos.\n", | |
103 | IDENTIFIER_POINTER (decl_id)); | |
104 | ||
105 | /* Transform this: | |
106 | lhs = __builtin_omp_get_team_num () | |
107 | to this: | |
108 | lhs = __builtin_gcn_dim_pos (0) */ | |
109 | tree fn = targetm.builtin_decl (GCN_BUILTIN_OMP_DIM_POS, 0); | |
110 | tree fnarg = build_zero_cst (unsigned_type_node); | |
111 | gimple *stmt = gimple_build_call (fn, 1, fnarg); | |
112 | gimple_call_set_lhs (stmt, lhs); | |
113 | gsi_replace (&gsi, stmt, true); | |
114 | ||
115 | todo |= TODO_update_ssa; | |
116 | } | |
117 | } | |
118 | } | |
119 | ||
120 | return todo; | |
121 | } | |
122 | ||
123 | namespace | |
124 | { | |
125 | ||
126 | const pass_data pass_data_omp_gcn = { | |
127 | GIMPLE_PASS, | |
128 | "omp_gcn", /* name */ | |
129 | OPTGROUP_NONE, /* optinfo_flags */ | |
130 | TV_NONE, /* tv_id */ | |
131 | 0, /* properties_required */ | |
132 | 0, /* properties_provided */ | |
133 | 0, /* properties_destroyed */ | |
134 | 0, /* todo_flags_start */ | |
135 | TODO_df_finish, /* todo_flags_finish */ | |
136 | }; | |
137 | ||
138 | class pass_omp_gcn : public gimple_opt_pass | |
139 | { | |
140 | public: | |
141 | pass_omp_gcn (gcc::context *ctxt) | |
142 | : gimple_opt_pass (pass_data_omp_gcn, ctxt) | |
143 | { | |
144 | } | |
145 | ||
146 | /* opt_pass methods: */ | |
147 | virtual bool gate (function *) | |
148 | { | |
149 | return flag_openmp; | |
150 | } | |
151 | ||
152 | virtual unsigned int execute (function *) | |
153 | { | |
154 | return execute_omp_gcn (); | |
155 | } | |
156 | ||
157 | }; /* class pass_omp_gcn. */ | |
158 | ||
159 | } /* anon namespace. */ | |
160 | ||
161 | gimple_opt_pass * | |
162 | make_pass_omp_gcn (gcc::context *ctxt) | |
163 | { | |
164 | return new pass_omp_gcn (ctxt); | |
165 | } | |
166 | ||
167 | /* }}} */ | |
168 | /* {{{ OpenACC reductions. */ | |
169 | ||
170 | /* Global lock variable, needed for 128bit worker & gang reductions. */ | |
171 | ||
172 | static GTY(()) tree global_lock_var; | |
173 | ||
174 | /* Lazily generate the global_lock_var decl and return its address. */ | |
175 | ||
176 | static tree | |
177 | gcn_global_lock_addr () | |
178 | { | |
179 | tree v = global_lock_var; | |
180 | ||
181 | if (!v) | |
182 | { | |
183 | tree name = get_identifier ("__reduction_lock"); | |
184 | tree type = build_qualified_type (unsigned_type_node, | |
185 | TYPE_QUAL_VOLATILE); | |
186 | v = build_decl (BUILTINS_LOCATION, VAR_DECL, name, type); | |
187 | global_lock_var = v; | |
188 | DECL_ARTIFICIAL (v) = 1; | |
189 | DECL_EXTERNAL (v) = 1; | |
190 | TREE_STATIC (v) = 1; | |
191 | TREE_PUBLIC (v) = 1; | |
192 | TREE_USED (v) = 1; | |
193 | mark_addressable (v); | |
194 | mark_decl_referenced (v); | |
195 | } | |
196 | ||
197 | return build_fold_addr_expr (v); | |
198 | } | |
199 | ||
200 | /* Helper function for gcn_reduction_update. | |
201 | ||
202 | Insert code to locklessly update *PTR with *PTR OP VAR just before | |
203 | GSI. We use a lockless scheme for nearly all case, which looks | |
204 | like: | |
205 | actual = initval (OP); | |
206 | do { | |
207 | guess = actual; | |
208 | write = guess OP myval; | |
209 | actual = cmp&swap (ptr, guess, write) | |
210 | } while (actual bit-different-to guess); | |
211 | return write; | |
212 | ||
213 | This relies on a cmp&swap instruction, which is available for 32- and | |
214 | 64-bit types. Larger types must use a locking scheme. */ | |
215 | ||
216 | static tree | |
217 | gcn_lockless_update (location_t loc, gimple_stmt_iterator *gsi, | |
218 | tree ptr, tree var, tree_code op) | |
219 | { | |
220 | unsigned fn = GCN_BUILTIN_CMP_SWAP; | |
221 | tree_code code = NOP_EXPR; | |
222 | tree arg_type = unsigned_type_node; | |
223 | tree var_type = TREE_TYPE (var); | |
224 | ||
225 | if (TREE_CODE (var_type) == COMPLEX_TYPE | |
226 | || TREE_CODE (var_type) == REAL_TYPE) | |
227 | code = VIEW_CONVERT_EXPR; | |
228 | ||
229 | if (TYPE_SIZE (var_type) == TYPE_SIZE (long_long_unsigned_type_node)) | |
230 | { | |
231 | arg_type = long_long_unsigned_type_node; | |
232 | fn = GCN_BUILTIN_CMP_SWAPLL; | |
233 | } | |
234 | ||
235 | tree swap_fn = gcn_builtin_decl (fn, true); | |
236 | ||
237 | gimple_seq init_seq = NULL; | |
238 | tree init_var = make_ssa_name (arg_type); | |
239 | tree init_expr = omp_reduction_init_op (loc, op, var_type); | |
240 | init_expr = fold_build1 (code, arg_type, init_expr); | |
241 | gimplify_assign (init_var, init_expr, &init_seq); | |
242 | gimple *init_end = gimple_seq_last (init_seq); | |
243 | ||
244 | gsi_insert_seq_before (gsi, init_seq, GSI_SAME_STMT); | |
245 | ||
246 | /* Split the block just after the init stmts. */ | |
247 | basic_block pre_bb = gsi_bb (*gsi); | |
248 | edge pre_edge = split_block (pre_bb, init_end); | |
249 | basic_block loop_bb = pre_edge->dest; | |
250 | pre_bb = pre_edge->src; | |
251 | /* Reset the iterator. */ | |
252 | *gsi = gsi_for_stmt (gsi_stmt (*gsi)); | |
253 | ||
254 | tree expect_var = make_ssa_name (arg_type); | |
255 | tree actual_var = make_ssa_name (arg_type); | |
256 | tree write_var = make_ssa_name (arg_type); | |
257 | ||
258 | /* Build and insert the reduction calculation. */ | |
259 | gimple_seq red_seq = NULL; | |
260 | tree write_expr = fold_build1 (code, var_type, expect_var); | |
261 | write_expr = fold_build2 (op, var_type, write_expr, var); | |
262 | write_expr = fold_build1 (code, arg_type, write_expr); | |
263 | gimplify_assign (write_var, write_expr, &red_seq); | |
264 | ||
265 | gsi_insert_seq_before (gsi, red_seq, GSI_SAME_STMT); | |
266 | ||
267 | /* Build & insert the cmp&swap sequence. */ | |
268 | gimple_seq latch_seq = NULL; | |
269 | tree swap_expr = build_call_expr_loc (loc, swap_fn, 3, | |
270 | ptr, expect_var, write_var); | |
271 | gimplify_assign (actual_var, swap_expr, &latch_seq); | |
272 | ||
273 | gcond *cond = gimple_build_cond (EQ_EXPR, actual_var, expect_var, | |
274 | NULL_TREE, NULL_TREE); | |
275 | gimple_seq_add_stmt (&latch_seq, cond); | |
276 | ||
277 | gimple *latch_end = gimple_seq_last (latch_seq); | |
278 | gsi_insert_seq_before (gsi, latch_seq, GSI_SAME_STMT); | |
279 | ||
280 | /* Split the block just after the latch stmts. */ | |
281 | edge post_edge = split_block (loop_bb, latch_end); | |
282 | basic_block post_bb = post_edge->dest; | |
283 | loop_bb = post_edge->src; | |
284 | *gsi = gsi_for_stmt (gsi_stmt (*gsi)); | |
285 | ||
286 | post_edge->flags ^= EDGE_TRUE_VALUE | EDGE_FALLTHRU; | |
287 | /* post_edge->probability = profile_probability::even (); */ | |
288 | edge loop_edge = make_edge (loop_bb, loop_bb, EDGE_FALSE_VALUE); | |
289 | /* loop_edge->probability = profile_probability::even (); */ | |
290 | set_immediate_dominator (CDI_DOMINATORS, loop_bb, pre_bb); | |
291 | set_immediate_dominator (CDI_DOMINATORS, post_bb, loop_bb); | |
292 | ||
293 | gphi *phi = create_phi_node (expect_var, loop_bb); | |
294 | add_phi_arg (phi, init_var, pre_edge, loc); | |
295 | add_phi_arg (phi, actual_var, loop_edge, loc); | |
296 | ||
297 | loop *loop = alloc_loop (); | |
298 | loop->header = loop_bb; | |
299 | loop->latch = loop_bb; | |
300 | add_loop (loop, loop_bb->loop_father); | |
301 | ||
302 | return fold_build1 (code, var_type, write_var); | |
303 | } | |
304 | ||
305 | /* Helper function for gcn_reduction_update. | |
306 | ||
307 | Insert code to lockfully update *PTR with *PTR OP VAR just before | |
308 | GSI. This is necessary for types larger than 64 bits, where there | |
309 | is no cmp&swap instruction to implement a lockless scheme. We use | |
310 | a lock variable in global memory. | |
311 | ||
312 | while (cmp&swap (&lock_var, 0, 1)) | |
313 | continue; | |
314 | T accum = *ptr; | |
315 | accum = accum OP var; | |
316 | *ptr = accum; | |
317 | cmp&swap (&lock_var, 1, 0); | |
318 | return accum; | |
319 | ||
320 | A lock in global memory is necessary to force execution engine | |
321 | descheduling and avoid resource starvation that can occur if the | |
322 | lock is in shared memory. */ | |
323 | ||
324 | static tree | |
325 | gcn_lockfull_update (location_t loc, gimple_stmt_iterator *gsi, | |
326 | tree ptr, tree var, tree_code op) | |
327 | { | |
328 | tree var_type = TREE_TYPE (var); | |
329 | tree swap_fn = gcn_builtin_decl (GCN_BUILTIN_CMP_SWAP, true); | |
330 | tree uns_unlocked = build_int_cst (unsigned_type_node, 0); | |
331 | tree uns_locked = build_int_cst (unsigned_type_node, 1); | |
332 | ||
333 | /* Split the block just before the gsi. Insert a gimple nop to make | |
334 | this easier. */ | |
335 | gimple *nop = gimple_build_nop (); | |
336 | gsi_insert_before (gsi, nop, GSI_SAME_STMT); | |
337 | basic_block entry_bb = gsi_bb (*gsi); | |
338 | edge entry_edge = split_block (entry_bb, nop); | |
339 | basic_block lock_bb = entry_edge->dest; | |
340 | /* Reset the iterator. */ | |
341 | *gsi = gsi_for_stmt (gsi_stmt (*gsi)); | |
342 | ||
343 | /* Build and insert the locking sequence. */ | |
344 | gimple_seq lock_seq = NULL; | |
345 | tree lock_var = make_ssa_name (unsigned_type_node); | |
346 | tree lock_expr = gcn_global_lock_addr (); | |
347 | lock_expr = build_call_expr_loc (loc, swap_fn, 3, lock_expr, | |
348 | uns_unlocked, uns_locked); | |
349 | gimplify_assign (lock_var, lock_expr, &lock_seq); | |
350 | gcond *cond = gimple_build_cond (EQ_EXPR, lock_var, uns_unlocked, | |
351 | NULL_TREE, NULL_TREE); | |
352 | gimple_seq_add_stmt (&lock_seq, cond); | |
353 | gimple *lock_end = gimple_seq_last (lock_seq); | |
354 | gsi_insert_seq_before (gsi, lock_seq, GSI_SAME_STMT); | |
355 | ||
356 | /* Split the block just after the lock sequence. */ | |
357 | edge locked_edge = split_block (lock_bb, lock_end); | |
358 | basic_block update_bb = locked_edge->dest; | |
359 | lock_bb = locked_edge->src; | |
360 | *gsi = gsi_for_stmt (gsi_stmt (*gsi)); | |
361 | ||
362 | /* Create the lock loop. */ | |
363 | locked_edge->flags ^= EDGE_TRUE_VALUE | EDGE_FALLTHRU; | |
364 | locked_edge->probability = profile_probability::even (); | |
365 | edge loop_edge = make_edge (lock_bb, lock_bb, EDGE_FALSE_VALUE); | |
366 | loop_edge->probability = profile_probability::even (); | |
367 | set_immediate_dominator (CDI_DOMINATORS, lock_bb, entry_bb); | |
368 | set_immediate_dominator (CDI_DOMINATORS, update_bb, lock_bb); | |
369 | ||
370 | /* Create the loop structure. */ | |
371 | loop *lock_loop = alloc_loop (); | |
372 | lock_loop->header = lock_bb; | |
373 | lock_loop->latch = lock_bb; | |
374 | lock_loop->nb_iterations_estimate = 1; | |
375 | lock_loop->any_estimate = true; | |
376 | add_loop (lock_loop, entry_bb->loop_father); | |
377 | ||
378 | /* Build and insert the reduction calculation. */ | |
379 | gimple_seq red_seq = NULL; | |
380 | tree acc_in = make_ssa_name (var_type); | |
381 | tree ref_in = build_simple_mem_ref (ptr); | |
382 | TREE_THIS_VOLATILE (ref_in) = 1; | |
383 | gimplify_assign (acc_in, ref_in, &red_seq); | |
384 | ||
385 | tree acc_out = make_ssa_name (var_type); | |
386 | tree update_expr = fold_build2 (op, var_type, ref_in, var); | |
387 | gimplify_assign (acc_out, update_expr, &red_seq); | |
388 | ||
389 | tree ref_out = build_simple_mem_ref (ptr); | |
390 | TREE_THIS_VOLATILE (ref_out) = 1; | |
391 | gimplify_assign (ref_out, acc_out, &red_seq); | |
392 | ||
393 | gsi_insert_seq_before (gsi, red_seq, GSI_SAME_STMT); | |
394 | ||
395 | /* Build & insert the unlock sequence. */ | |
396 | gimple_seq unlock_seq = NULL; | |
397 | tree unlock_expr = gcn_global_lock_addr (); | |
398 | unlock_expr = build_call_expr_loc (loc, swap_fn, 3, unlock_expr, | |
399 | uns_locked, uns_unlocked); | |
400 | gimplify_and_add (unlock_expr, &unlock_seq); | |
401 | gsi_insert_seq_before (gsi, unlock_seq, GSI_SAME_STMT); | |
402 | ||
403 | return acc_out; | |
404 | } | |
405 | ||
406 | /* Emit a sequence to update a reduction accumulator at *PTR with the | |
407 | value held in VAR using operator OP. Return the updated value. | |
408 | ||
409 | TODO: optimize for atomic ops and independent complex ops. */ | |
410 | ||
411 | static tree | |
412 | gcn_reduction_update (location_t loc, gimple_stmt_iterator *gsi, | |
413 | tree ptr, tree var, tree_code op) | |
414 | { | |
415 | tree type = TREE_TYPE (var); | |
416 | tree size = TYPE_SIZE (type); | |
417 | ||
418 | if (size == TYPE_SIZE (unsigned_type_node) | |
419 | || size == TYPE_SIZE (long_long_unsigned_type_node)) | |
420 | return gcn_lockless_update (loc, gsi, ptr, var, op); | |
421 | else | |
422 | return gcn_lockfull_update (loc, gsi, ptr, var, op); | |
423 | } | |
424 | ||
425 | /* Return a temporary variable decl to use for an OpenACC worker reduction. */ | |
426 | ||
427 | static tree | |
428 | gcn_goacc_get_worker_red_decl (tree type, unsigned offset) | |
429 | { | |
430 | machine_function *machfun = cfun->machine; | |
431 | tree existing_decl; | |
432 | ||
433 | if (TREE_CODE (type) == REFERENCE_TYPE) | |
434 | type = TREE_TYPE (type); | |
435 | ||
436 | tree var_type | |
437 | = build_qualified_type (type, | |
438 | (TYPE_QUALS (type) | |
439 | | ENCODE_QUAL_ADDR_SPACE (ADDR_SPACE_LDS))); | |
440 | ||
441 | if (machfun->reduc_decls | |
442 | && offset < machfun->reduc_decls->length () | |
443 | && (existing_decl = (*machfun->reduc_decls)[offset])) | |
444 | { | |
445 | gcc_assert (TREE_TYPE (existing_decl) == var_type); | |
446 | return existing_decl; | |
447 | } | |
448 | else | |
449 | { | |
450 | char name[50]; | |
451 | sprintf (name, ".oacc_reduction_%u", offset); | |
452 | tree decl = create_tmp_var_raw (var_type, name); | |
453 | ||
454 | DECL_CONTEXT (decl) = NULL_TREE; | |
455 | TREE_STATIC (decl) = 1; | |
456 | ||
457 | varpool_node::finalize_decl (decl); | |
458 | ||
459 | vec_safe_grow_cleared (machfun->reduc_decls, offset + 1); | |
460 | (*machfun->reduc_decls)[offset] = decl; | |
461 | ||
462 | return decl; | |
463 | } | |
464 | ||
465 | return NULL_TREE; | |
466 | } | |
467 | ||
468 | /* Expand IFN_GOACC_REDUCTION_SETUP. */ | |
469 | ||
470 | static void | |
471 | gcn_goacc_reduction_setup (gcall *call) | |
472 | { | |
473 | gimple_stmt_iterator gsi = gsi_for_stmt (call); | |
474 | tree lhs = gimple_call_lhs (call); | |
475 | tree var = gimple_call_arg (call, 2); | |
476 | int level = TREE_INT_CST_LOW (gimple_call_arg (call, 3)); | |
477 | gimple_seq seq = NULL; | |
478 | ||
479 | push_gimplify_context (true); | |
480 | ||
481 | if (level != GOMP_DIM_GANG) | |
482 | { | |
483 | /* Copy the receiver object. */ | |
484 | tree ref_to_res = gimple_call_arg (call, 1); | |
485 | ||
486 | if (!integer_zerop (ref_to_res)) | |
487 | var = build_simple_mem_ref (ref_to_res); | |
488 | } | |
489 | ||
490 | if (level == GOMP_DIM_WORKER) | |
491 | { | |
492 | tree var_type = TREE_TYPE (var); | |
493 | /* Store incoming value to worker reduction buffer. */ | |
494 | tree offset = gimple_call_arg (call, 5); | |
495 | tree decl | |
496 | = gcn_goacc_get_worker_red_decl (var_type, TREE_INT_CST_LOW (offset)); | |
497 | ||
498 | gimplify_assign (decl, var, &seq); | |
499 | } | |
500 | ||
501 | if (lhs) | |
502 | gimplify_assign (lhs, var, &seq); | |
503 | ||
504 | pop_gimplify_context (NULL); | |
505 | gsi_replace_with_seq (&gsi, seq, true); | |
506 | } | |
507 | ||
508 | /* Expand IFN_GOACC_REDUCTION_INIT. */ | |
509 | ||
510 | static void | |
511 | gcn_goacc_reduction_init (gcall *call) | |
512 | { | |
513 | gimple_stmt_iterator gsi = gsi_for_stmt (call); | |
514 | tree lhs = gimple_call_lhs (call); | |
515 | tree var = gimple_call_arg (call, 2); | |
516 | int level = TREE_INT_CST_LOW (gimple_call_arg (call, 3)); | |
517 | enum tree_code rcode | |
518 | = (enum tree_code) TREE_INT_CST_LOW (gimple_call_arg (call, 4)); | |
519 | tree init = omp_reduction_init_op (gimple_location (call), rcode, | |
520 | TREE_TYPE (var)); | |
521 | gimple_seq seq = NULL; | |
522 | ||
523 | push_gimplify_context (true); | |
524 | ||
525 | if (level == GOMP_DIM_GANG) | |
526 | { | |
527 | /* If there's no receiver object, propagate the incoming VAR. */ | |
528 | tree ref_to_res = gimple_call_arg (call, 1); | |
529 | if (integer_zerop (ref_to_res)) | |
530 | init = var; | |
531 | } | |
532 | ||
533 | if (lhs) | |
534 | gimplify_assign (lhs, init, &seq); | |
535 | ||
536 | pop_gimplify_context (NULL); | |
537 | gsi_replace_with_seq (&gsi, seq, true); | |
538 | } | |
539 | ||
540 | /* Expand IFN_GOACC_REDUCTION_FINI. */ | |
541 | ||
542 | static void | |
543 | gcn_goacc_reduction_fini (gcall *call) | |
544 | { | |
545 | gimple_stmt_iterator gsi = gsi_for_stmt (call); | |
546 | tree lhs = gimple_call_lhs (call); | |
547 | tree ref_to_res = gimple_call_arg (call, 1); | |
548 | tree var = gimple_call_arg (call, 2); | |
549 | int level = TREE_INT_CST_LOW (gimple_call_arg (call, 3)); | |
550 | enum tree_code op | |
551 | = (enum tree_code) TREE_INT_CST_LOW (gimple_call_arg (call, 4)); | |
552 | gimple_seq seq = NULL; | |
553 | tree r = NULL_TREE;; | |
554 | ||
555 | push_gimplify_context (true); | |
556 | ||
557 | tree accum = NULL_TREE; | |
558 | ||
559 | if (level == GOMP_DIM_WORKER) | |
560 | { | |
561 | tree var_type = TREE_TYPE (var); | |
562 | tree offset = gimple_call_arg (call, 5); | |
563 | tree decl | |
564 | = gcn_goacc_get_worker_red_decl (var_type, TREE_INT_CST_LOW (offset)); | |
565 | ||
566 | accum = build_fold_addr_expr (decl); | |
567 | } | |
568 | else if (integer_zerop (ref_to_res)) | |
569 | r = var; | |
570 | else | |
571 | accum = ref_to_res; | |
572 | ||
573 | if (accum) | |
574 | { | |
575 | /* UPDATE the accumulator. */ | |
576 | gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT); | |
577 | seq = NULL; | |
578 | r = gcn_reduction_update (gimple_location (call), &gsi, accum, var, op); | |
579 | } | |
580 | ||
581 | if (lhs) | |
582 | gimplify_assign (lhs, r, &seq); | |
583 | pop_gimplify_context (NULL); | |
584 | ||
585 | gsi_replace_with_seq (&gsi, seq, true); | |
586 | } | |
587 | ||
588 | /* Expand IFN_GOACC_REDUCTION_TEARDOWN. */ | |
589 | ||
590 | static void | |
591 | gcn_goacc_reduction_teardown (gcall *call) | |
592 | { | |
593 | gimple_stmt_iterator gsi = gsi_for_stmt (call); | |
594 | tree lhs = gimple_call_lhs (call); | |
595 | tree var = gimple_call_arg (call, 2); | |
596 | int level = TREE_INT_CST_LOW (gimple_call_arg (call, 3)); | |
597 | gimple_seq seq = NULL; | |
598 | ||
599 | push_gimplify_context (true); | |
600 | ||
601 | if (level == GOMP_DIM_WORKER) | |
602 | { | |
603 | tree var_type = TREE_TYPE (var); | |
604 | ||
605 | /* Read the worker reduction buffer. */ | |
606 | tree offset = gimple_call_arg (call, 5); | |
607 | tree decl | |
608 | = gcn_goacc_get_worker_red_decl (var_type, TREE_INT_CST_LOW (offset)); | |
609 | var = decl; | |
610 | } | |
611 | ||
612 | if (level != GOMP_DIM_GANG) | |
613 | { | |
614 | /* Write to the receiver object. */ | |
615 | tree ref_to_res = gimple_call_arg (call, 1); | |
616 | ||
617 | if (!integer_zerop (ref_to_res)) | |
618 | gimplify_assign (build_simple_mem_ref (ref_to_res), var, &seq); | |
619 | } | |
620 | ||
621 | if (lhs) | |
622 | gimplify_assign (lhs, var, &seq); | |
623 | ||
624 | pop_gimplify_context (NULL); | |
625 | ||
626 | gsi_replace_with_seq (&gsi, seq, true); | |
627 | } | |
628 | ||
629 | /* Implement TARGET_GOACC_REDUCTION. | |
630 | ||
631 | Expand calls to the GOACC REDUCTION internal function, into a sequence of | |
632 | gimple instructions. */ | |
633 | ||
634 | void | |
635 | gcn_goacc_reduction (gcall *call) | |
636 | { | |
637 | int level = TREE_INT_CST_LOW (gimple_call_arg (call, 3)); | |
638 | ||
639 | if (level == GOMP_DIM_VECTOR) | |
640 | { | |
641 | default_goacc_reduction (call); | |
642 | return; | |
643 | } | |
644 | ||
645 | unsigned code = (unsigned) TREE_INT_CST_LOW (gimple_call_arg (call, 0)); | |
646 | ||
647 | switch (code) | |
648 | { | |
649 | case IFN_GOACC_REDUCTION_SETUP: | |
650 | gcn_goacc_reduction_setup (call); | |
651 | break; | |
652 | ||
653 | case IFN_GOACC_REDUCTION_INIT: | |
654 | gcn_goacc_reduction_init (call); | |
655 | break; | |
656 | ||
657 | case IFN_GOACC_REDUCTION_FINI: | |
658 | gcn_goacc_reduction_fini (call); | |
659 | break; | |
660 | ||
661 | case IFN_GOACC_REDUCTION_TEARDOWN: | |
662 | gcn_goacc_reduction_teardown (call); | |
663 | break; | |
664 | ||
665 | default: | |
666 | gcc_unreachable (); | |
667 | } | |
668 | } | |
669 | ||
670 | /* Implement TARGET_GOACC_ADJUST_PROPAGATION_RECORD. | |
671 | ||
672 | Tweak (worker) propagation record, e.g. to put it in shared memory. */ | |
673 | ||
674 | tree | |
675 | gcn_goacc_adjust_propagation_record (tree record_type, bool sender, | |
676 | const char *name) | |
677 | { | |
678 | tree type = record_type; | |
679 | ||
680 | TYPE_ADDR_SPACE (type) = ADDR_SPACE_LDS; | |
681 | ||
682 | if (!sender) | |
683 | type = build_pointer_type (type); | |
684 | ||
685 | tree decl = create_tmp_var_raw (type, name); | |
686 | ||
687 | if (sender) | |
688 | { | |
689 | DECL_CONTEXT (decl) = NULL_TREE; | |
690 | TREE_STATIC (decl) = 1; | |
691 | } | |
692 | ||
693 | if (sender) | |
694 | varpool_node::finalize_decl (decl); | |
695 | ||
696 | return decl; | |
697 | } | |
698 | ||
699 | void | |
700 | gcn_goacc_adjust_gangprivate_decl (tree var) | |
701 | { | |
702 | tree type = TREE_TYPE (var); | |
703 | tree lds_type = build_qualified_type (type, | |
704 | TYPE_QUALS_NO_ADDR_SPACE (type) | |
705 | | ENCODE_QUAL_ADDR_SPACE (ADDR_SPACE_LDS)); | |
706 | machine_function *machfun = cfun->machine; | |
707 | ||
708 | TREE_TYPE (var) = lds_type; | |
709 | TREE_STATIC (var) = 1; | |
710 | ||
711 | /* We're making VAR static. We have to mangle the name to avoid collisions | |
712 | between different local variables that share the same names. */ | |
713 | lhd_set_decl_assembler_name (var); | |
714 | ||
715 | varpool_node::finalize_decl (var); | |
716 | ||
717 | if (machfun) | |
718 | machfun->use_flat_addressing = true; | |
719 | } | |
720 | ||
721 | /* }}} */ |