1 /* Copyright (C) 2017-2022 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free
7 Software Foundation; either version 3, or (at your option) any later
10 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11 WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 You should have received a copy of the GNU General Public License
16 along with GCC; see the file COPYING3. If not see
17 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
28 #include "tree-pass.h"
29 #include "gimple-iterator.h"
33 #include "stringpool.h"
34 #include "fold-const.h"
37 #include "omp-general.h"
38 #include "internal-fn.h"
40 #include "tree-ssanames.h"
41 #include "tree-ssa-operands.h"
43 #include "tree-phinodes.h"
45 #include "targhooks.h"
46 #include "langhooks-def.h"
49 /* {{{ OpenACC reductions. */
51 /* Global lock variable, needed for 128bit worker & gang reductions. */
53 static GTY(()) tree global_lock_var
;
55 /* Lazily generate the global_lock_var decl and return its address. */
58 gcn_global_lock_addr ()
60 tree v
= global_lock_var
;
64 tree name
= get_identifier ("__reduction_lock");
65 tree type
= build_qualified_type (unsigned_type_node
,
67 v
= build_decl (BUILTINS_LOCATION
, VAR_DECL
, name
, type
);
69 DECL_ARTIFICIAL (v
) = 1;
70 DECL_EXTERNAL (v
) = 1;
75 mark_decl_referenced (v
);
78 return build_fold_addr_expr (v
);
81 /* Helper function for gcn_reduction_update.
83 Insert code to locklessly update *PTR with *PTR OP VAR just before
84 GSI. We use a lockless scheme for nearly all case, which looks
86 actual = initval (OP);
89 write = guess OP myval;
90 actual = cmp&swap (ptr, guess, write)
91 } while (actual bit-different-to guess);
94 This relies on a cmp&swap instruction, which is available for 32- and
95 64-bit types. Larger types must use a locking scheme. */
98 gcn_lockless_update (location_t loc
, gimple_stmt_iterator
*gsi
,
99 tree ptr
, tree var
, tree_code op
)
101 unsigned fn
= GCN_BUILTIN_CMP_SWAP
;
102 tree_code code
= NOP_EXPR
;
103 tree arg_type
= unsigned_type_node
;
104 tree var_type
= TREE_TYPE (var
);
106 if (TREE_CODE (var_type
) == COMPLEX_TYPE
107 || TREE_CODE (var_type
) == REAL_TYPE
)
108 code
= VIEW_CONVERT_EXPR
;
110 if (TYPE_SIZE (var_type
) == TYPE_SIZE (long_long_unsigned_type_node
))
112 arg_type
= long_long_unsigned_type_node
;
113 fn
= GCN_BUILTIN_CMP_SWAPLL
;
116 tree swap_fn
= gcn_builtin_decl (fn
, true);
118 gimple_seq init_seq
= NULL
;
119 tree init_var
= make_ssa_name (arg_type
);
120 tree init_expr
= omp_reduction_init_op (loc
, op
, var_type
);
121 init_expr
= fold_build1 (code
, arg_type
, init_expr
);
122 gimplify_assign (init_var
, init_expr
, &init_seq
);
123 gimple
*init_end
= gimple_seq_last (init_seq
);
125 gsi_insert_seq_before (gsi
, init_seq
, GSI_SAME_STMT
);
127 /* Split the block just after the init stmts. */
128 basic_block pre_bb
= gsi_bb (*gsi
);
129 edge pre_edge
= split_block (pre_bb
, init_end
);
130 basic_block loop_bb
= pre_edge
->dest
;
131 pre_bb
= pre_edge
->src
;
132 /* Reset the iterator. */
133 *gsi
= gsi_for_stmt (gsi_stmt (*gsi
));
135 tree expect_var
= make_ssa_name (arg_type
);
136 tree actual_var
= make_ssa_name (arg_type
);
137 tree write_var
= make_ssa_name (arg_type
);
139 /* Build and insert the reduction calculation. */
140 gimple_seq red_seq
= NULL
;
141 tree write_expr
= fold_build1 (code
, var_type
, expect_var
);
142 write_expr
= fold_build2 (op
, var_type
, write_expr
, var
);
143 write_expr
= fold_build1 (code
, arg_type
, write_expr
);
144 gimplify_assign (write_var
, write_expr
, &red_seq
);
146 gsi_insert_seq_before (gsi
, red_seq
, GSI_SAME_STMT
);
148 /* Build & insert the cmp&swap sequence. */
149 gimple_seq latch_seq
= NULL
;
150 tree swap_expr
= build_call_expr_loc (loc
, swap_fn
, 3,
151 ptr
, expect_var
, write_var
);
152 gimplify_assign (actual_var
, swap_expr
, &latch_seq
);
154 gcond
*cond
= gimple_build_cond (EQ_EXPR
, actual_var
, expect_var
,
155 NULL_TREE
, NULL_TREE
);
156 gimple_seq_add_stmt (&latch_seq
, cond
);
158 gimple
*latch_end
= gimple_seq_last (latch_seq
);
159 gsi_insert_seq_before (gsi
, latch_seq
, GSI_SAME_STMT
);
161 /* Split the block just after the latch stmts. */
162 edge post_edge
= split_block (loop_bb
, latch_end
);
163 basic_block post_bb
= post_edge
->dest
;
164 loop_bb
= post_edge
->src
;
165 *gsi
= gsi_for_stmt (gsi_stmt (*gsi
));
167 post_edge
->flags
^= EDGE_TRUE_VALUE
| EDGE_FALLTHRU
;
168 /* post_edge->probability = profile_probability::even (); */
169 edge loop_edge
= make_edge (loop_bb
, loop_bb
, EDGE_FALSE_VALUE
);
170 /* loop_edge->probability = profile_probability::even (); */
171 set_immediate_dominator (CDI_DOMINATORS
, loop_bb
, pre_bb
);
172 set_immediate_dominator (CDI_DOMINATORS
, post_bb
, loop_bb
);
174 gphi
*phi
= create_phi_node (expect_var
, loop_bb
);
175 add_phi_arg (phi
, init_var
, pre_edge
, loc
);
176 add_phi_arg (phi
, actual_var
, loop_edge
, loc
);
178 loop
*loop
= alloc_loop ();
179 loop
->header
= loop_bb
;
180 loop
->latch
= loop_bb
;
181 add_loop (loop
, loop_bb
->loop_father
);
183 return fold_build1 (code
, var_type
, write_var
);
186 /* Helper function for gcn_reduction_update.
188 Insert code to lockfully update *PTR with *PTR OP VAR just before
189 GSI. This is necessary for types larger than 64 bits, where there
190 is no cmp&swap instruction to implement a lockless scheme. We use
191 a lock variable in global memory.
193 while (cmp&swap (&lock_var, 0, 1))
196 accum = accum OP var;
198 cmp&swap (&lock_var, 1, 0);
201 A lock in global memory is necessary to force execution engine
202 descheduling and avoid resource starvation that can occur if the
203 lock is in shared memory. */
206 gcn_lockfull_update (location_t loc
, gimple_stmt_iterator
*gsi
,
207 tree ptr
, tree var
, tree_code op
)
209 tree var_type
= TREE_TYPE (var
);
210 tree swap_fn
= gcn_builtin_decl (GCN_BUILTIN_CMP_SWAP
, true);
211 tree uns_unlocked
= build_int_cst (unsigned_type_node
, 0);
212 tree uns_locked
= build_int_cst (unsigned_type_node
, 1);
214 /* Split the block just before the gsi. Insert a gimple nop to make
216 gimple
*nop
= gimple_build_nop ();
217 gsi_insert_before (gsi
, nop
, GSI_SAME_STMT
);
218 basic_block entry_bb
= gsi_bb (*gsi
);
219 edge entry_edge
= split_block (entry_bb
, nop
);
220 basic_block lock_bb
= entry_edge
->dest
;
221 /* Reset the iterator. */
222 *gsi
= gsi_for_stmt (gsi_stmt (*gsi
));
224 /* Build and insert the locking sequence. */
225 gimple_seq lock_seq
= NULL
;
226 tree lock_var
= make_ssa_name (unsigned_type_node
);
227 tree lock_expr
= gcn_global_lock_addr ();
228 lock_expr
= build_call_expr_loc (loc
, swap_fn
, 3, lock_expr
,
229 uns_unlocked
, uns_locked
);
230 gimplify_assign (lock_var
, lock_expr
, &lock_seq
);
231 gcond
*cond
= gimple_build_cond (EQ_EXPR
, lock_var
, uns_unlocked
,
232 NULL_TREE
, NULL_TREE
);
233 gimple_seq_add_stmt (&lock_seq
, cond
);
234 gimple
*lock_end
= gimple_seq_last (lock_seq
);
235 gsi_insert_seq_before (gsi
, lock_seq
, GSI_SAME_STMT
);
237 /* Split the block just after the lock sequence. */
238 edge locked_edge
= split_block (lock_bb
, lock_end
);
239 basic_block update_bb
= locked_edge
->dest
;
240 lock_bb
= locked_edge
->src
;
241 *gsi
= gsi_for_stmt (gsi_stmt (*gsi
));
243 /* Create the lock loop. */
244 locked_edge
->flags
^= EDGE_TRUE_VALUE
| EDGE_FALLTHRU
;
245 locked_edge
->probability
= profile_probability::even ();
246 edge loop_edge
= make_edge (lock_bb
, lock_bb
, EDGE_FALSE_VALUE
);
247 loop_edge
->probability
= profile_probability::even ();
248 set_immediate_dominator (CDI_DOMINATORS
, lock_bb
, entry_bb
);
249 set_immediate_dominator (CDI_DOMINATORS
, update_bb
, lock_bb
);
251 /* Create the loop structure. */
252 loop
*lock_loop
= alloc_loop ();
253 lock_loop
->header
= lock_bb
;
254 lock_loop
->latch
= lock_bb
;
255 lock_loop
->nb_iterations_estimate
= 1;
256 lock_loop
->any_estimate
= true;
257 add_loop (lock_loop
, entry_bb
->loop_father
);
259 /* Build and insert the reduction calculation. */
260 gimple_seq red_seq
= NULL
;
261 tree acc_in
= make_ssa_name (var_type
);
262 tree ref_in
= build_simple_mem_ref (ptr
);
263 TREE_THIS_VOLATILE (ref_in
) = 1;
264 gimplify_assign (acc_in
, ref_in
, &red_seq
);
266 tree acc_out
= make_ssa_name (var_type
);
267 tree update_expr
= fold_build2 (op
, var_type
, ref_in
, var
);
268 gimplify_assign (acc_out
, update_expr
, &red_seq
);
270 tree ref_out
= build_simple_mem_ref (ptr
);
271 TREE_THIS_VOLATILE (ref_out
) = 1;
272 gimplify_assign (ref_out
, acc_out
, &red_seq
);
274 gsi_insert_seq_before (gsi
, red_seq
, GSI_SAME_STMT
);
276 /* Build & insert the unlock sequence. */
277 gimple_seq unlock_seq
= NULL
;
278 tree unlock_expr
= gcn_global_lock_addr ();
279 unlock_expr
= build_call_expr_loc (loc
, swap_fn
, 3, unlock_expr
,
280 uns_locked
, uns_unlocked
);
281 gimplify_and_add (unlock_expr
, &unlock_seq
);
282 gsi_insert_seq_before (gsi
, unlock_seq
, GSI_SAME_STMT
);
287 /* Emit a sequence to update a reduction accumulator at *PTR with the
288 value held in VAR using operator OP. Return the updated value.
290 TODO: optimize for atomic ops and independent complex ops. */
293 gcn_reduction_update (location_t loc
, gimple_stmt_iterator
*gsi
,
294 tree ptr
, tree var
, tree_code op
)
296 tree type
= TREE_TYPE (var
);
297 tree size
= TYPE_SIZE (type
);
299 if (size
== TYPE_SIZE (unsigned_type_node
)
300 || size
== TYPE_SIZE (long_long_unsigned_type_node
))
301 return gcn_lockless_update (loc
, gsi
, ptr
, var
, op
);
303 return gcn_lockfull_update (loc
, gsi
, ptr
, var
, op
);
306 /* Return a temporary variable decl to use for an OpenACC worker reduction. */
309 gcn_goacc_get_worker_red_decl (tree type
, unsigned offset
)
311 machine_function
*machfun
= cfun
->machine
;
313 if (TREE_CODE (type
) == REFERENCE_TYPE
)
314 type
= TREE_TYPE (type
);
317 = build_qualified_type (type
,
319 | ENCODE_QUAL_ADDR_SPACE (ADDR_SPACE_LDS
)));
322 < (machfun
->reduction_limit
- machfun
->reduction_base
));
323 tree ptr_type
= build_pointer_type (var_type
);
324 tree addr
= build_int_cst (ptr_type
, machfun
->reduction_base
+ offset
);
326 return build_simple_mem_ref (addr
);
329 /* Expand IFN_GOACC_REDUCTION_SETUP. */
332 gcn_goacc_reduction_setup (gcall
*call
)
334 gimple_stmt_iterator gsi
= gsi_for_stmt (call
);
335 tree lhs
= gimple_call_lhs (call
);
336 tree var
= gimple_call_arg (call
, 2);
337 int level
= TREE_INT_CST_LOW (gimple_call_arg (call
, 3));
338 gimple_seq seq
= NULL
;
340 push_gimplify_context (true);
342 if (level
!= GOMP_DIM_GANG
)
344 /* Copy the receiver object. */
345 tree ref_to_res
= gimple_call_arg (call
, 1);
347 if (!integer_zerop (ref_to_res
))
348 var
= build_simple_mem_ref (ref_to_res
);
351 if (level
== GOMP_DIM_WORKER
)
353 tree var_type
= TREE_TYPE (var
);
354 /* Store incoming value to worker reduction buffer. */
355 tree offset
= gimple_call_arg (call
, 5);
357 = gcn_goacc_get_worker_red_decl (var_type
, TREE_INT_CST_LOW (offset
));
359 gimplify_assign (decl
, var
, &seq
);
363 gimplify_assign (lhs
, var
, &seq
);
365 pop_gimplify_context (NULL
);
366 gsi_replace_with_seq (&gsi
, seq
, true);
369 /* Expand IFN_GOACC_REDUCTION_INIT. */
372 gcn_goacc_reduction_init (gcall
*call
)
374 gimple_stmt_iterator gsi
= gsi_for_stmt (call
);
375 tree lhs
= gimple_call_lhs (call
);
376 tree var
= gimple_call_arg (call
, 2);
377 int level
= TREE_INT_CST_LOW (gimple_call_arg (call
, 3));
379 = (enum tree_code
) TREE_INT_CST_LOW (gimple_call_arg (call
, 4));
380 tree init
= omp_reduction_init_op (gimple_location (call
), rcode
,
382 gimple_seq seq
= NULL
;
384 push_gimplify_context (true);
386 if (level
== GOMP_DIM_GANG
)
388 /* If there's no receiver object, propagate the incoming VAR. */
389 tree ref_to_res
= gimple_call_arg (call
, 1);
390 if (integer_zerop (ref_to_res
))
395 gimplify_assign (lhs
, init
, &seq
);
397 pop_gimplify_context (NULL
);
398 gsi_replace_with_seq (&gsi
, seq
, true);
401 /* Expand IFN_GOACC_REDUCTION_FINI. */
404 gcn_goacc_reduction_fini (gcall
*call
)
406 gimple_stmt_iterator gsi
= gsi_for_stmt (call
);
407 tree lhs
= gimple_call_lhs (call
);
408 tree ref_to_res
= gimple_call_arg (call
, 1);
409 tree var
= gimple_call_arg (call
, 2);
410 int level
= TREE_INT_CST_LOW (gimple_call_arg (call
, 3));
412 = (enum tree_code
) TREE_INT_CST_LOW (gimple_call_arg (call
, 4));
413 gimple_seq seq
= NULL
;
416 push_gimplify_context (true);
418 tree accum
= NULL_TREE
;
420 if (level
== GOMP_DIM_WORKER
)
422 tree var_type
= TREE_TYPE (var
);
423 tree offset
= gimple_call_arg (call
, 5);
425 = gcn_goacc_get_worker_red_decl (var_type
, TREE_INT_CST_LOW (offset
));
427 accum
= build_fold_addr_expr (decl
);
429 else if (integer_zerop (ref_to_res
))
436 /* UPDATE the accumulator. */
437 gsi_insert_seq_before (&gsi
, seq
, GSI_SAME_STMT
);
439 r
= gcn_reduction_update (gimple_location (call
), &gsi
, accum
, var
, op
);
443 gimplify_assign (lhs
, r
, &seq
);
444 pop_gimplify_context (NULL
);
446 gsi_replace_with_seq (&gsi
, seq
, true);
449 /* Expand IFN_GOACC_REDUCTION_TEARDOWN. */
452 gcn_goacc_reduction_teardown (gcall
*call
)
454 gimple_stmt_iterator gsi
= gsi_for_stmt (call
);
455 tree lhs
= gimple_call_lhs (call
);
456 tree var
= gimple_call_arg (call
, 2);
457 int level
= TREE_INT_CST_LOW (gimple_call_arg (call
, 3));
458 gimple_seq seq
= NULL
;
460 push_gimplify_context (true);
462 if (level
== GOMP_DIM_WORKER
)
464 tree var_type
= TREE_TYPE (var
);
466 /* Read the worker reduction buffer. */
467 tree offset
= gimple_call_arg (call
, 5);
469 = gcn_goacc_get_worker_red_decl (var_type
, TREE_INT_CST_LOW (offset
));
473 if (level
!= GOMP_DIM_GANG
)
475 /* Write to the receiver object. */
476 tree ref_to_res
= gimple_call_arg (call
, 1);
478 if (!integer_zerop (ref_to_res
))
479 gimplify_assign (build_simple_mem_ref (ref_to_res
), var
, &seq
);
483 gimplify_assign (lhs
, unshare_expr (var
), &seq
);
485 pop_gimplify_context (NULL
);
487 gsi_replace_with_seq (&gsi
, seq
, true);
490 /* Implement TARGET_GOACC_REDUCTION.
492 Expand calls to the GOACC REDUCTION internal function, into a sequence of
493 gimple instructions. */
496 gcn_goacc_reduction (gcall
*call
)
498 int level
= TREE_INT_CST_LOW (gimple_call_arg (call
, 3));
500 if (level
== GOMP_DIM_VECTOR
)
502 default_goacc_reduction (call
);
506 unsigned code
= (unsigned) TREE_INT_CST_LOW (gimple_call_arg (call
, 0));
510 case IFN_GOACC_REDUCTION_SETUP
:
511 gcn_goacc_reduction_setup (call
);
514 case IFN_GOACC_REDUCTION_INIT
:
515 gcn_goacc_reduction_init (call
);
518 case IFN_GOACC_REDUCTION_FINI
:
519 gcn_goacc_reduction_fini (call
);
522 case IFN_GOACC_REDUCTION_TEARDOWN
:
523 gcn_goacc_reduction_teardown (call
);
532 gcn_goacc_adjust_private_decl (location_t
, tree var
, int level
)
534 if (level
!= GOMP_DIM_GANG
)
537 tree type
= TREE_TYPE (var
);
538 tree lds_type
= build_qualified_type (type
,
539 TYPE_QUALS_NO_ADDR_SPACE (type
)
540 | ENCODE_QUAL_ADDR_SPACE (ADDR_SPACE_LDS
));
541 machine_function
*machfun
= cfun
->machine
;
543 TREE_TYPE (var
) = lds_type
;
544 TREE_STATIC (var
) = 1;
546 /* We're making VAR static. We have to mangle the name to avoid collisions
547 between different local variables that share the same names. */
548 lhd_set_decl_assembler_name (var
);
550 varpool_node::finalize_decl (var
);
553 machfun
->use_flat_addressing
= true;
558 /* Implement TARGET_GOACC_CREATE_WORKER_BROADCAST_RECORD.
560 Create OpenACC worker state propagation record in shared memory. */
563 gcn_goacc_create_worker_broadcast_record (tree record_type
, bool sender
,
565 unsigned HOST_WIDE_INT offset
)
567 tree type
= build_qualified_type (record_type
,
568 TYPE_QUALS_NO_ADDR_SPACE (record_type
)
569 | ENCODE_QUAL_ADDR_SPACE (ADDR_SPACE_LDS
));
573 tree ptr_type
= build_pointer_type (type
);
574 return create_tmp_var_raw (ptr_type
, name
);
577 if (record_type
== char_type_node
)
580 tree ptr_type
= build_pointer_type (type
);
581 return build_int_cst (ptr_type
, offset
);