]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/gcn/gcn-tree.c
Update copyright years.
[thirdparty/gcc.git] / gcc / config / gcn / gcn-tree.c
1 /* Copyright (C) 2017-2021 Free Software Foundation, Inc.
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free
7 Software Foundation; either version 3, or (at your option) any later
8 version.
9
10 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11 WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with GCC; see the file COPYING3. If not see
17 <http://www.gnu.org/licenses/>. */
18
19 /* {{{ Includes. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "tree-pass.h"
29 #include "gimple-iterator.h"
30 #include "cfghooks.h"
31 #include "cfgloop.h"
32 #include "tm_p.h"
33 #include "stringpool.h"
34 #include "fold-const.h"
35 #include "varasm.h"
36 #include "omp-low.h"
37 #include "omp-general.h"
38 #include "internal-fn.h"
39 #include "tree-vrp.h"
40 #include "tree-ssanames.h"
41 #include "tree-ssa-operands.h"
42 #include "gimplify.h"
43 #include "tree-phinodes.h"
44 #include "cgraph.h"
45 #include "targhooks.h"
46 #include "langhooks-def.h"
47
48 /* }}} */
49 /* {{{ OpenACC reductions. */
50
51 /* Global lock variable, needed for 128bit worker & gang reductions. */
52
53 static GTY(()) tree global_lock_var;
54
55 /* Lazily generate the global_lock_var decl and return its address. */
56
57 static tree
58 gcn_global_lock_addr ()
59 {
60 tree v = global_lock_var;
61
62 if (!v)
63 {
64 tree name = get_identifier ("__reduction_lock");
65 tree type = build_qualified_type (unsigned_type_node,
66 TYPE_QUAL_VOLATILE);
67 v = build_decl (BUILTINS_LOCATION, VAR_DECL, name, type);
68 global_lock_var = v;
69 DECL_ARTIFICIAL (v) = 1;
70 DECL_EXTERNAL (v) = 1;
71 TREE_STATIC (v) = 1;
72 TREE_PUBLIC (v) = 1;
73 TREE_USED (v) = 1;
74 mark_addressable (v);
75 mark_decl_referenced (v);
76 }
77
78 return build_fold_addr_expr (v);
79 }
80
81 /* Helper function for gcn_reduction_update.
82
83 Insert code to locklessly update *PTR with *PTR OP VAR just before
84 GSI. We use a lockless scheme for nearly all case, which looks
85 like:
86 actual = initval (OP);
87 do {
88 guess = actual;
89 write = guess OP myval;
90 actual = cmp&swap (ptr, guess, write)
91 } while (actual bit-different-to guess);
92 return write;
93
94 This relies on a cmp&swap instruction, which is available for 32- and
95 64-bit types. Larger types must use a locking scheme. */
96
97 static tree
98 gcn_lockless_update (location_t loc, gimple_stmt_iterator *gsi,
99 tree ptr, tree var, tree_code op)
100 {
101 unsigned fn = GCN_BUILTIN_CMP_SWAP;
102 tree_code code = NOP_EXPR;
103 tree arg_type = unsigned_type_node;
104 tree var_type = TREE_TYPE (var);
105
106 if (TREE_CODE (var_type) == COMPLEX_TYPE
107 || TREE_CODE (var_type) == REAL_TYPE)
108 code = VIEW_CONVERT_EXPR;
109
110 if (TYPE_SIZE (var_type) == TYPE_SIZE (long_long_unsigned_type_node))
111 {
112 arg_type = long_long_unsigned_type_node;
113 fn = GCN_BUILTIN_CMP_SWAPLL;
114 }
115
116 tree swap_fn = gcn_builtin_decl (fn, true);
117
118 gimple_seq init_seq = NULL;
119 tree init_var = make_ssa_name (arg_type);
120 tree init_expr = omp_reduction_init_op (loc, op, var_type);
121 init_expr = fold_build1 (code, arg_type, init_expr);
122 gimplify_assign (init_var, init_expr, &init_seq);
123 gimple *init_end = gimple_seq_last (init_seq);
124
125 gsi_insert_seq_before (gsi, init_seq, GSI_SAME_STMT);
126
127 /* Split the block just after the init stmts. */
128 basic_block pre_bb = gsi_bb (*gsi);
129 edge pre_edge = split_block (pre_bb, init_end);
130 basic_block loop_bb = pre_edge->dest;
131 pre_bb = pre_edge->src;
132 /* Reset the iterator. */
133 *gsi = gsi_for_stmt (gsi_stmt (*gsi));
134
135 tree expect_var = make_ssa_name (arg_type);
136 tree actual_var = make_ssa_name (arg_type);
137 tree write_var = make_ssa_name (arg_type);
138
139 /* Build and insert the reduction calculation. */
140 gimple_seq red_seq = NULL;
141 tree write_expr = fold_build1 (code, var_type, expect_var);
142 write_expr = fold_build2 (op, var_type, write_expr, var);
143 write_expr = fold_build1 (code, arg_type, write_expr);
144 gimplify_assign (write_var, write_expr, &red_seq);
145
146 gsi_insert_seq_before (gsi, red_seq, GSI_SAME_STMT);
147
148 /* Build & insert the cmp&swap sequence. */
149 gimple_seq latch_seq = NULL;
150 tree swap_expr = build_call_expr_loc (loc, swap_fn, 3,
151 ptr, expect_var, write_var);
152 gimplify_assign (actual_var, swap_expr, &latch_seq);
153
154 gcond *cond = gimple_build_cond (EQ_EXPR, actual_var, expect_var,
155 NULL_TREE, NULL_TREE);
156 gimple_seq_add_stmt (&latch_seq, cond);
157
158 gimple *latch_end = gimple_seq_last (latch_seq);
159 gsi_insert_seq_before (gsi, latch_seq, GSI_SAME_STMT);
160
161 /* Split the block just after the latch stmts. */
162 edge post_edge = split_block (loop_bb, latch_end);
163 basic_block post_bb = post_edge->dest;
164 loop_bb = post_edge->src;
165 *gsi = gsi_for_stmt (gsi_stmt (*gsi));
166
167 post_edge->flags ^= EDGE_TRUE_VALUE | EDGE_FALLTHRU;
168 /* post_edge->probability = profile_probability::even (); */
169 edge loop_edge = make_edge (loop_bb, loop_bb, EDGE_FALSE_VALUE);
170 /* loop_edge->probability = profile_probability::even (); */
171 set_immediate_dominator (CDI_DOMINATORS, loop_bb, pre_bb);
172 set_immediate_dominator (CDI_DOMINATORS, post_bb, loop_bb);
173
174 gphi *phi = create_phi_node (expect_var, loop_bb);
175 add_phi_arg (phi, init_var, pre_edge, loc);
176 add_phi_arg (phi, actual_var, loop_edge, loc);
177
178 loop *loop = alloc_loop ();
179 loop->header = loop_bb;
180 loop->latch = loop_bb;
181 add_loop (loop, loop_bb->loop_father);
182
183 return fold_build1 (code, var_type, write_var);
184 }
185
186 /* Helper function for gcn_reduction_update.
187
188 Insert code to lockfully update *PTR with *PTR OP VAR just before
189 GSI. This is necessary for types larger than 64 bits, where there
190 is no cmp&swap instruction to implement a lockless scheme. We use
191 a lock variable in global memory.
192
193 while (cmp&swap (&lock_var, 0, 1))
194 continue;
195 T accum = *ptr;
196 accum = accum OP var;
197 *ptr = accum;
198 cmp&swap (&lock_var, 1, 0);
199 return accum;
200
201 A lock in global memory is necessary to force execution engine
202 descheduling and avoid resource starvation that can occur if the
203 lock is in shared memory. */
204
205 static tree
206 gcn_lockfull_update (location_t loc, gimple_stmt_iterator *gsi,
207 tree ptr, tree var, tree_code op)
208 {
209 tree var_type = TREE_TYPE (var);
210 tree swap_fn = gcn_builtin_decl (GCN_BUILTIN_CMP_SWAP, true);
211 tree uns_unlocked = build_int_cst (unsigned_type_node, 0);
212 tree uns_locked = build_int_cst (unsigned_type_node, 1);
213
214 /* Split the block just before the gsi. Insert a gimple nop to make
215 this easier. */
216 gimple *nop = gimple_build_nop ();
217 gsi_insert_before (gsi, nop, GSI_SAME_STMT);
218 basic_block entry_bb = gsi_bb (*gsi);
219 edge entry_edge = split_block (entry_bb, nop);
220 basic_block lock_bb = entry_edge->dest;
221 /* Reset the iterator. */
222 *gsi = gsi_for_stmt (gsi_stmt (*gsi));
223
224 /* Build and insert the locking sequence. */
225 gimple_seq lock_seq = NULL;
226 tree lock_var = make_ssa_name (unsigned_type_node);
227 tree lock_expr = gcn_global_lock_addr ();
228 lock_expr = build_call_expr_loc (loc, swap_fn, 3, lock_expr,
229 uns_unlocked, uns_locked);
230 gimplify_assign (lock_var, lock_expr, &lock_seq);
231 gcond *cond = gimple_build_cond (EQ_EXPR, lock_var, uns_unlocked,
232 NULL_TREE, NULL_TREE);
233 gimple_seq_add_stmt (&lock_seq, cond);
234 gimple *lock_end = gimple_seq_last (lock_seq);
235 gsi_insert_seq_before (gsi, lock_seq, GSI_SAME_STMT);
236
237 /* Split the block just after the lock sequence. */
238 edge locked_edge = split_block (lock_bb, lock_end);
239 basic_block update_bb = locked_edge->dest;
240 lock_bb = locked_edge->src;
241 *gsi = gsi_for_stmt (gsi_stmt (*gsi));
242
243 /* Create the lock loop. */
244 locked_edge->flags ^= EDGE_TRUE_VALUE | EDGE_FALLTHRU;
245 locked_edge->probability = profile_probability::even ();
246 edge loop_edge = make_edge (lock_bb, lock_bb, EDGE_FALSE_VALUE);
247 loop_edge->probability = profile_probability::even ();
248 set_immediate_dominator (CDI_DOMINATORS, lock_bb, entry_bb);
249 set_immediate_dominator (CDI_DOMINATORS, update_bb, lock_bb);
250
251 /* Create the loop structure. */
252 loop *lock_loop = alloc_loop ();
253 lock_loop->header = lock_bb;
254 lock_loop->latch = lock_bb;
255 lock_loop->nb_iterations_estimate = 1;
256 lock_loop->any_estimate = true;
257 add_loop (lock_loop, entry_bb->loop_father);
258
259 /* Build and insert the reduction calculation. */
260 gimple_seq red_seq = NULL;
261 tree acc_in = make_ssa_name (var_type);
262 tree ref_in = build_simple_mem_ref (ptr);
263 TREE_THIS_VOLATILE (ref_in) = 1;
264 gimplify_assign (acc_in, ref_in, &red_seq);
265
266 tree acc_out = make_ssa_name (var_type);
267 tree update_expr = fold_build2 (op, var_type, ref_in, var);
268 gimplify_assign (acc_out, update_expr, &red_seq);
269
270 tree ref_out = build_simple_mem_ref (ptr);
271 TREE_THIS_VOLATILE (ref_out) = 1;
272 gimplify_assign (ref_out, acc_out, &red_seq);
273
274 gsi_insert_seq_before (gsi, red_seq, GSI_SAME_STMT);
275
276 /* Build & insert the unlock sequence. */
277 gimple_seq unlock_seq = NULL;
278 tree unlock_expr = gcn_global_lock_addr ();
279 unlock_expr = build_call_expr_loc (loc, swap_fn, 3, unlock_expr,
280 uns_locked, uns_unlocked);
281 gimplify_and_add (unlock_expr, &unlock_seq);
282 gsi_insert_seq_before (gsi, unlock_seq, GSI_SAME_STMT);
283
284 return acc_out;
285 }
286
287 /* Emit a sequence to update a reduction accumulator at *PTR with the
288 value held in VAR using operator OP. Return the updated value.
289
290 TODO: optimize for atomic ops and independent complex ops. */
291
292 static tree
293 gcn_reduction_update (location_t loc, gimple_stmt_iterator *gsi,
294 tree ptr, tree var, tree_code op)
295 {
296 tree type = TREE_TYPE (var);
297 tree size = TYPE_SIZE (type);
298
299 if (size == TYPE_SIZE (unsigned_type_node)
300 || size == TYPE_SIZE (long_long_unsigned_type_node))
301 return gcn_lockless_update (loc, gsi, ptr, var, op);
302 else
303 return gcn_lockfull_update (loc, gsi, ptr, var, op);
304 }
305
306 /* Return a temporary variable decl to use for an OpenACC worker reduction. */
307
308 static tree
309 gcn_goacc_get_worker_red_decl (tree type, unsigned offset)
310 {
311 machine_function *machfun = cfun->machine;
312 tree existing_decl;
313
314 if (TREE_CODE (type) == REFERENCE_TYPE)
315 type = TREE_TYPE (type);
316
317 tree var_type
318 = build_qualified_type (type,
319 (TYPE_QUALS (type)
320 | ENCODE_QUAL_ADDR_SPACE (ADDR_SPACE_LDS)));
321
322 if (machfun->reduc_decls
323 && offset < machfun->reduc_decls->length ()
324 && (existing_decl = (*machfun->reduc_decls)[offset]))
325 {
326 gcc_assert (TREE_TYPE (existing_decl) == var_type);
327 return existing_decl;
328 }
329 else
330 {
331 char name[50];
332 sprintf (name, ".oacc_reduction_%u", offset);
333 tree decl = create_tmp_var_raw (var_type, name);
334
335 DECL_CONTEXT (decl) = NULL_TREE;
336 TREE_STATIC (decl) = 1;
337
338 varpool_node::finalize_decl (decl);
339
340 vec_safe_grow_cleared (machfun->reduc_decls, offset + 1, true);
341 (*machfun->reduc_decls)[offset] = decl;
342
343 return decl;
344 }
345
346 return NULL_TREE;
347 }
348
349 /* Expand IFN_GOACC_REDUCTION_SETUP. */
350
351 static void
352 gcn_goacc_reduction_setup (gcall *call)
353 {
354 gimple_stmt_iterator gsi = gsi_for_stmt (call);
355 tree lhs = gimple_call_lhs (call);
356 tree var = gimple_call_arg (call, 2);
357 int level = TREE_INT_CST_LOW (gimple_call_arg (call, 3));
358 gimple_seq seq = NULL;
359
360 push_gimplify_context (true);
361
362 if (level != GOMP_DIM_GANG)
363 {
364 /* Copy the receiver object. */
365 tree ref_to_res = gimple_call_arg (call, 1);
366
367 if (!integer_zerop (ref_to_res))
368 var = build_simple_mem_ref (ref_to_res);
369 }
370
371 if (level == GOMP_DIM_WORKER)
372 {
373 tree var_type = TREE_TYPE (var);
374 /* Store incoming value to worker reduction buffer. */
375 tree offset = gimple_call_arg (call, 5);
376 tree decl
377 = gcn_goacc_get_worker_red_decl (var_type, TREE_INT_CST_LOW (offset));
378
379 gimplify_assign (decl, var, &seq);
380 }
381
382 if (lhs)
383 gimplify_assign (lhs, var, &seq);
384
385 pop_gimplify_context (NULL);
386 gsi_replace_with_seq (&gsi, seq, true);
387 }
388
389 /* Expand IFN_GOACC_REDUCTION_INIT. */
390
391 static void
392 gcn_goacc_reduction_init (gcall *call)
393 {
394 gimple_stmt_iterator gsi = gsi_for_stmt (call);
395 tree lhs = gimple_call_lhs (call);
396 tree var = gimple_call_arg (call, 2);
397 int level = TREE_INT_CST_LOW (gimple_call_arg (call, 3));
398 enum tree_code rcode
399 = (enum tree_code) TREE_INT_CST_LOW (gimple_call_arg (call, 4));
400 tree init = omp_reduction_init_op (gimple_location (call), rcode,
401 TREE_TYPE (var));
402 gimple_seq seq = NULL;
403
404 push_gimplify_context (true);
405
406 if (level == GOMP_DIM_GANG)
407 {
408 /* If there's no receiver object, propagate the incoming VAR. */
409 tree ref_to_res = gimple_call_arg (call, 1);
410 if (integer_zerop (ref_to_res))
411 init = var;
412 }
413
414 if (lhs)
415 gimplify_assign (lhs, init, &seq);
416
417 pop_gimplify_context (NULL);
418 gsi_replace_with_seq (&gsi, seq, true);
419 }
420
421 /* Expand IFN_GOACC_REDUCTION_FINI. */
422
423 static void
424 gcn_goacc_reduction_fini (gcall *call)
425 {
426 gimple_stmt_iterator gsi = gsi_for_stmt (call);
427 tree lhs = gimple_call_lhs (call);
428 tree ref_to_res = gimple_call_arg (call, 1);
429 tree var = gimple_call_arg (call, 2);
430 int level = TREE_INT_CST_LOW (gimple_call_arg (call, 3));
431 enum tree_code op
432 = (enum tree_code) TREE_INT_CST_LOW (gimple_call_arg (call, 4));
433 gimple_seq seq = NULL;
434 tree r = NULL_TREE;;
435
436 push_gimplify_context (true);
437
438 tree accum = NULL_TREE;
439
440 if (level == GOMP_DIM_WORKER)
441 {
442 tree var_type = TREE_TYPE (var);
443 tree offset = gimple_call_arg (call, 5);
444 tree decl
445 = gcn_goacc_get_worker_red_decl (var_type, TREE_INT_CST_LOW (offset));
446
447 accum = build_fold_addr_expr (decl);
448 }
449 else if (integer_zerop (ref_to_res))
450 r = var;
451 else
452 accum = ref_to_res;
453
454 if (accum)
455 {
456 /* UPDATE the accumulator. */
457 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
458 seq = NULL;
459 r = gcn_reduction_update (gimple_location (call), &gsi, accum, var, op);
460 }
461
462 if (lhs)
463 gimplify_assign (lhs, r, &seq);
464 pop_gimplify_context (NULL);
465
466 gsi_replace_with_seq (&gsi, seq, true);
467 }
468
469 /* Expand IFN_GOACC_REDUCTION_TEARDOWN. */
470
471 static void
472 gcn_goacc_reduction_teardown (gcall *call)
473 {
474 gimple_stmt_iterator gsi = gsi_for_stmt (call);
475 tree lhs = gimple_call_lhs (call);
476 tree var = gimple_call_arg (call, 2);
477 int level = TREE_INT_CST_LOW (gimple_call_arg (call, 3));
478 gimple_seq seq = NULL;
479
480 push_gimplify_context (true);
481
482 if (level == GOMP_DIM_WORKER)
483 {
484 tree var_type = TREE_TYPE (var);
485
486 /* Read the worker reduction buffer. */
487 tree offset = gimple_call_arg (call, 5);
488 tree decl
489 = gcn_goacc_get_worker_red_decl (var_type, TREE_INT_CST_LOW (offset));
490 var = decl;
491 }
492
493 if (level != GOMP_DIM_GANG)
494 {
495 /* Write to the receiver object. */
496 tree ref_to_res = gimple_call_arg (call, 1);
497
498 if (!integer_zerop (ref_to_res))
499 gimplify_assign (build_simple_mem_ref (ref_to_res), var, &seq);
500 }
501
502 if (lhs)
503 gimplify_assign (lhs, var, &seq);
504
505 pop_gimplify_context (NULL);
506
507 gsi_replace_with_seq (&gsi, seq, true);
508 }
509
510 /* Implement TARGET_GOACC_REDUCTION.
511
512 Expand calls to the GOACC REDUCTION internal function, into a sequence of
513 gimple instructions. */
514
515 void
516 gcn_goacc_reduction (gcall *call)
517 {
518 int level = TREE_INT_CST_LOW (gimple_call_arg (call, 3));
519
520 if (level == GOMP_DIM_VECTOR)
521 {
522 default_goacc_reduction (call);
523 return;
524 }
525
526 unsigned code = (unsigned) TREE_INT_CST_LOW (gimple_call_arg (call, 0));
527
528 switch (code)
529 {
530 case IFN_GOACC_REDUCTION_SETUP:
531 gcn_goacc_reduction_setup (call);
532 break;
533
534 case IFN_GOACC_REDUCTION_INIT:
535 gcn_goacc_reduction_init (call);
536 break;
537
538 case IFN_GOACC_REDUCTION_FINI:
539 gcn_goacc_reduction_fini (call);
540 break;
541
542 case IFN_GOACC_REDUCTION_TEARDOWN:
543 gcn_goacc_reduction_teardown (call);
544 break;
545
546 default:
547 gcc_unreachable ();
548 }
549 }
550
551 /* Implement TARGET_GOACC_ADJUST_PROPAGATION_RECORD.
552
553 Tweak (worker) propagation record, e.g. to put it in shared memory. */
554
555 tree
556 gcn_goacc_adjust_propagation_record (tree record_type, bool sender,
557 const char *name)
558 {
559 tree type = record_type;
560
561 TYPE_ADDR_SPACE (type) = ADDR_SPACE_LDS;
562
563 if (!sender)
564 type = build_pointer_type (type);
565
566 tree decl = create_tmp_var_raw (type, name);
567
568 if (sender)
569 {
570 DECL_CONTEXT (decl) = NULL_TREE;
571 TREE_STATIC (decl) = 1;
572 }
573
574 if (sender)
575 varpool_node::finalize_decl (decl);
576
577 return decl;
578 }
579
580 void
581 gcn_goacc_adjust_gangprivate_decl (tree var)
582 {
583 tree type = TREE_TYPE (var);
584 tree lds_type = build_qualified_type (type,
585 TYPE_QUALS_NO_ADDR_SPACE (type)
586 | ENCODE_QUAL_ADDR_SPACE (ADDR_SPACE_LDS));
587 machine_function *machfun = cfun->machine;
588
589 TREE_TYPE (var) = lds_type;
590 TREE_STATIC (var) = 1;
591
592 /* We're making VAR static. We have to mangle the name to avoid collisions
593 between different local variables that share the same names. */
594 lhd_set_decl_assembler_name (var);
595
596 varpool_node::finalize_decl (var);
597
598 if (machfun)
599 machfun->use_flat_addressing = true;
600 }
601
602 /* }}} */