]> git.ipfire.org Git - people/ms/gcc.git/blame - gcc/omp-offload.cc
c++: namespace-scoped friend in local class [PR69410]
[people/ms/gcc.git] / gcc / omp-offload.cc
CommitLineData
629b3d75
MJ
1/* Bits of OpenMP and OpenACC handling that is specific to device offloading
2 and a lowering pass for OpenACC device directives.
3
aeee4812 4 Copyright (C) 2005-2023 Free Software Foundation, Inc.
629b3d75
MJ
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "backend.h"
26#include "target.h"
27#include "tree.h"
28#include "gimple.h"
29#include "tree-pass.h"
30#include "ssa.h"
31#include "cgraph.h"
32#include "pretty-print.h"
33#include "diagnostic-core.h"
34#include "fold-const.h"
35#include "internal-fn.h"
0c6b03b5 36#include "langhooks.h"
629b3d75
MJ
37#include "gimplify.h"
38#include "gimple-iterator.h"
39#include "gimplify-me.h"
40#include "gimple-walk.h"
41#include "tree-cfg.h"
42#include "tree-into-ssa.h"
0c6b03b5
AM
43#include "tree-nested.h"
44#include "stor-layout.h"
629b3d75
MJ
45#include "common/common-target.h"
46#include "omp-general.h"
47#include "omp-offload.h"
48#include "lto-section-names.h"
49#include "gomp-constants.h"
50#include "gimple-pretty-print.h"
324ff1a0 51#include "intl.h"
314e6352
ML
52#include "stringpool.h"
53#include "attribs.h"
f64b12bd 54#include "cfgloop.h"
dc703151 55#include "context.h"
29a2f518 56#include "convert.h"
00f34291 57#include "opts.h"
629b3d75
MJ
58
59/* Describe the OpenACC looping structure of a function. The entire
60 function is held in a 'NULL' loop. */
61
62struct oacc_loop
63{
64 oacc_loop *parent; /* Containing loop. */
65
66 oacc_loop *child; /* First inner loop. */
67
68 oacc_loop *sibling; /* Next loop within same parent. */
69
70 location_t loc; /* Location of the loop start. */
71
72 gcall *marker; /* Initial head marker. */
73
01914336
MJ
74 gcall *heads[GOMP_DIM_MAX]; /* Head marker functions. */
75 gcall *tails[GOMP_DIM_MAX]; /* Tail marker functions. */
629b3d75
MJ
76
77 tree routine; /* Pseudo-loop enclosing a routine. */
78
79 unsigned mask; /* Partitioning mask. */
02889d23 80 unsigned e_mask; /* Partitioning of element loops (when tiling). */
629b3d75
MJ
81 unsigned inner; /* Partitioning of inner loops. */
82 unsigned flags; /* Partitioning flags. */
02889d23 83 vec<gcall *> ifns; /* Contained loop abstraction functions. */
629b3d75
MJ
84 tree chunk_size; /* Chunk size. */
85 gcall *head_end; /* Final marker of head sequence. */
86};
87
88/* Holds offload tables with decls. */
89vec<tree, va_gc> *offload_funcs, *offload_vars;
90
91/* Return level at which oacc routine may spawn a partitioned loop, or
92 -1 if it is not a routine (i.e. is an offload fn). */
93
4c187162 94int
629b3d75
MJ
95oacc_fn_attrib_level (tree attr)
96{
97 tree pos = TREE_VALUE (attr);
98
99 if (!TREE_PURPOSE (pos))
100 return -1;
101
102 int ix = 0;
103 for (ix = 0; ix != GOMP_DIM_MAX;
104 ix++, pos = TREE_CHAIN (pos))
105 if (!integer_zerop (TREE_PURPOSE (pos)))
106 break;
107
108 return ix;
109}
110
111/* Helper function for omp_finish_file routine. Takes decls from V_DECLS and
112 adds their addresses and sizes to constructor-vector V_CTOR. */
113
114static void
115add_decls_addresses_to_decl_constructor (vec<tree, va_gc> *v_decls,
116 vec<constructor_elt, va_gc> *v_ctor)
117{
118 unsigned len = vec_safe_length (v_decls);
119 for (unsigned i = 0; i < len; i++)
120 {
121 tree it = (*v_decls)[i];
122 bool is_var = VAR_P (it);
123 bool is_link_var
124 = is_var
125#ifdef ACCEL_COMPILER
126 && DECL_HAS_VALUE_EXPR_P (it)
127#endif
128 && lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (it));
129
e53b6e56 130 /* See also omp_finish_file and output_offload_tables in lto-cgraph.cc. */
bf4ab268 131 if (!in_lto_p && !symtab_node::get (it))
1c0fdaf7
TB
132 continue;
133
629b3d75
MJ
134 tree size = NULL_TREE;
135 if (is_var)
136 size = fold_convert (const_ptr_type_node, DECL_SIZE_UNIT (it));
137
138 tree addr;
139 if (!is_link_var)
140 addr = build_fold_addr_expr (it);
141 else
142 {
143#ifdef ACCEL_COMPILER
144 /* For "omp declare target link" vars add address of the pointer to
145 the target table, instead of address of the var. */
146 tree value_expr = DECL_VALUE_EXPR (it);
147 tree link_ptr_decl = TREE_OPERAND (value_expr, 0);
148 varpool_node::finalize_decl (link_ptr_decl);
149 addr = build_fold_addr_expr (link_ptr_decl);
150#else
151 addr = build_fold_addr_expr (it);
152#endif
153
154 /* Most significant bit of the size marks "omp declare target link"
155 vars in host and target tables. */
156 unsigned HOST_WIDE_INT isize = tree_to_uhwi (size);
157 isize |= 1ULL << (int_size_in_bytes (const_ptr_type_node)
158 * BITS_PER_UNIT - 1);
159 size = wide_int_to_tree (const_ptr_type_node, isize);
160 }
161
162 CONSTRUCTOR_APPEND_ELT (v_ctor, NULL_TREE, addr);
163 if (is_var)
164 CONSTRUCTOR_APPEND_ELT (v_ctor, NULL_TREE, size);
165 }
166}
167
dc703151
JJ
168/* Return true if DECL is a function for which its references should be
169 analyzed. */
170
171static bool
172omp_declare_target_fn_p (tree decl)
173{
174 return (TREE_CODE (decl) == FUNCTION_DECL
175 && lookup_attribute ("omp declare target", DECL_ATTRIBUTES (decl))
176 && !lookup_attribute ("omp declare target host",
177 DECL_ATTRIBUTES (decl))
178 && (!flag_openacc
179 || oacc_get_fn_attrib (decl) == NULL_TREE));
180}
181
182/* Return true if DECL Is a variable for which its initializer references
183 should be analyzed. */
184
185static bool
186omp_declare_target_var_p (tree decl)
187{
188 return (VAR_P (decl)
189 && lookup_attribute ("omp declare target", DECL_ATTRIBUTES (decl))
190 && !lookup_attribute ("omp declare target link",
191 DECL_ATTRIBUTES (decl)));
192}
193
194/* Helper function for omp_discover_implicit_declare_target, called through
195 walk_tree. Mark referenced FUNCTION_DECLs implicitly as
196 declare target to. */
197
198static tree
49ddde69 199omp_discover_declare_target_tgt_fn_r (tree *tp, int *walk_subtrees, void *data)
dc703151 200{
2298ca2d
JJ
201 if (TREE_CODE (*tp) == CALL_EXPR
202 && CALL_EXPR_FN (*tp)
203 && TREE_CODE (CALL_EXPR_FN (*tp)) == ADDR_EXPR
204 && TREE_CODE (TREE_OPERAND (CALL_EXPR_FN (*tp), 0)) == FUNCTION_DECL
205 && lookup_attribute ("omp declare variant base",
206 DECL_ATTRIBUTES (TREE_OPERAND (CALL_EXPR_FN (*tp),
207 0))))
208 {
209 tree fn = TREE_OPERAND (CALL_EXPR_FN (*tp), 0);
210 for (tree attr = DECL_ATTRIBUTES (fn); attr; attr = TREE_CHAIN (attr))
211 {
212 attr = lookup_attribute ("omp declare variant base", attr);
213 if (attr == NULL_TREE)
214 break;
215 tree purpose = TREE_PURPOSE (TREE_VALUE (attr));
216 if (TREE_CODE (purpose) == FUNCTION_DECL)
217 omp_discover_declare_target_tgt_fn_r (&purpose, walk_subtrees, data);
218 }
219 }
220 else if (TREE_CODE (*tp) == FUNCTION_DECL)
dc703151 221 {
2a10a2c0 222 tree decl = *tp;
dc703151 223 tree id = get_identifier ("omp declare target");
dc703151
JJ
224 symtab_node *node = symtab_node::get (*tp);
225 if (node != NULL)
226 {
44e20dce
JJ
227 while (node->alias_target
228 && TREE_CODE (node->alias_target) == FUNCTION_DECL)
2a10a2c0
TB
229 {
230 if (!omp_declare_target_fn_p (node->decl)
231 && !lookup_attribute ("omp declare target host",
232 DECL_ATTRIBUTES (node->decl)))
233 {
234 node->offloadable = 1;
235 DECL_ATTRIBUTES (node->decl)
236 = tree_cons (id, NULL_TREE, DECL_ATTRIBUTES (node->decl));
237 }
238 node = symtab_node::get (node->alias_target);
239 }
240 symtab_node *new_node = node->ultimate_alias_target ();
241 decl = new_node->decl;
242 while (node != new_node)
243 {
244 if (!omp_declare_target_fn_p (node->decl)
245 && !lookup_attribute ("omp declare target host",
246 DECL_ATTRIBUTES (node->decl)))
247 {
248 node->offloadable = 1;
249 DECL_ATTRIBUTES (node->decl)
250 = tree_cons (id, NULL_TREE, DECL_ATTRIBUTES (node->decl));
251 }
252 gcc_assert (node->alias && node->analyzed);
253 node = node->get_alias_target ();
254 }
dc703151
JJ
255 node->offloadable = 1;
256 if (ENABLE_OFFLOADING)
257 g->have_offload = true;
258 }
2a10a2c0
TB
259 if (omp_declare_target_fn_p (decl)
260 || lookup_attribute ("omp declare target host",
2298ca2d 261 DECL_ATTRIBUTES (decl)))
2a10a2c0
TB
262 return NULL_TREE;
263
264 if (!DECL_EXTERNAL (decl) && DECL_SAVED_TREE (decl))
265 ((vec<tree> *) data)->safe_push (decl);
266 DECL_ATTRIBUTES (decl) = tree_cons (id, NULL_TREE,
267 DECL_ATTRIBUTES (decl));
dc703151
JJ
268 }
269 else if (TYPE_P (*tp))
270 *walk_subtrees = 0;
209de00f
TB
271 else if (TREE_CODE (*tp) == OMP_TARGET)
272 {
273 tree c = omp_find_clause (OMP_CLAUSES (*tp), OMP_CLAUSE_DEVICE);
274 if (c && OMP_CLAUSE_DEVICE_ANCESTOR (c))
275 *walk_subtrees = 0;
276 }
dc703151
JJ
277 return NULL_TREE;
278}
279
49ddde69
JJ
280/* Similarly, but ignore references outside of OMP_TARGET regions. */
281
282static tree
283omp_discover_declare_target_fn_r (tree *tp, int *walk_subtrees, void *data)
284{
285 if (TREE_CODE (*tp) == OMP_TARGET)
286 {
209de00f
TB
287 tree c = omp_find_clause (OMP_CLAUSES (*tp), OMP_CLAUSE_DEVICE);
288 if (!c || !OMP_CLAUSE_DEVICE_ANCESTOR (c))
289 walk_tree_without_duplicates (&OMP_TARGET_BODY (*tp),
290 omp_discover_declare_target_tgt_fn_r,
291 data);
49ddde69
JJ
292 *walk_subtrees = 0;
293 }
294 else if (TYPE_P (*tp))
295 *walk_subtrees = 0;
296 return NULL_TREE;
297}
298
dc703151
JJ
299/* Helper function for omp_discover_implicit_declare_target, called through
300 walk_tree. Mark referenced FUNCTION_DECLs implicitly as
301 declare target to. */
302
303static tree
304omp_discover_declare_target_var_r (tree *tp, int *walk_subtrees, void *data)
305{
306 if (TREE_CODE (*tp) == FUNCTION_DECL)
49ddde69 307 return omp_discover_declare_target_tgt_fn_r (tp, walk_subtrees, data);
dc703151
JJ
308 else if (VAR_P (*tp)
309 && is_global_var (*tp)
310 && !omp_declare_target_var_p (*tp))
311 {
312 tree id = get_identifier ("omp declare target");
313 if (lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (*tp)))
314 {
315 error_at (DECL_SOURCE_LOCATION (*tp),
316 "%qD specified both in declare target %<link%> and "
317 "implicitly in %<to%> clauses", *tp);
318 DECL_ATTRIBUTES (*tp)
319 = remove_attribute ("omp declare target link", DECL_ATTRIBUTES (*tp));
320 }
3af02d32 321 if (TREE_STATIC (*tp) && lang_hooks.decls.omp_get_decl_init (*tp))
dc703151
JJ
322 ((vec<tree> *) data)->safe_push (*tp);
323 DECL_ATTRIBUTES (*tp) = tree_cons (id, NULL_TREE, DECL_ATTRIBUTES (*tp));
324 symtab_node *node = symtab_node::get (*tp);
325 if (node != NULL && !node->offloadable)
326 {
327 node->offloadable = 1;
328 if (ENABLE_OFFLOADING)
329 {
330 g->have_offload = true;
331 if (is_a <varpool_node *> (node))
332 vec_safe_push (offload_vars, node->decl);
333 }
334 }
335 }
336 else if (TYPE_P (*tp))
337 *walk_subtrees = 0;
338 return NULL_TREE;
339}
340
341/* Perform the OpenMP implicit declare target to discovery. */
342
343void
344omp_discover_implicit_declare_target (void)
345{
346 cgraph_node *node;
347 varpool_node *vnode;
348 auto_vec<tree> worklist;
349
350 FOR_EACH_DEFINED_FUNCTION (node)
49ddde69
JJ
351 if (DECL_SAVED_TREE (node->decl))
352 {
8b0a63e4 353 struct cgraph_node *cgn;
49ddde69
JJ
354 if (omp_declare_target_fn_p (node->decl))
355 worklist.safe_push (node->decl);
356 else if (DECL_STRUCT_FUNCTION (node->decl)
357 && DECL_STRUCT_FUNCTION (node->decl)->has_omp_target)
358 worklist.safe_push (node->decl);
89576d86
JH
359 for (cgn = first_nested_function (node);
360 cgn; cgn = next_nested_function (cgn))
8b0a63e4
TB
361 if (omp_declare_target_fn_p (cgn->decl))
362 worklist.safe_push (cgn->decl);
363 else if (DECL_STRUCT_FUNCTION (cgn->decl)
364 && DECL_STRUCT_FUNCTION (cgn->decl)->has_omp_target)
365 worklist.safe_push (cgn->decl);
49ddde69 366 }
3af02d32
KCY
367 FOR_EACH_VARIABLE (vnode)
368 if (lang_hooks.decls.omp_get_decl_init (vnode->decl)
369 && omp_declare_target_var_p (vnode->decl))
dc703151
JJ
370 worklist.safe_push (vnode->decl);
371 while (!worklist.is_empty ())
372 {
373 tree decl = worklist.pop ();
49ddde69 374 if (VAR_P (decl))
3af02d32 375 walk_tree_without_duplicates (lang_hooks.decls.omp_get_decl_init (decl),
49ddde69
JJ
376 omp_discover_declare_target_var_r,
377 &worklist);
378 else if (omp_declare_target_fn_p (decl))
dc703151 379 walk_tree_without_duplicates (&DECL_SAVED_TREE (decl),
49ddde69 380 omp_discover_declare_target_tgt_fn_r,
dc703151
JJ
381 &worklist);
382 else
49ddde69
JJ
383 walk_tree_without_duplicates (&DECL_SAVED_TREE (decl),
384 omp_discover_declare_target_fn_r,
dc703151
JJ
385 &worklist);
386 }
3af02d32
KCY
387
388 lang_hooks.decls.omp_finish_decl_inits ();
dc703151
JJ
389}
390
391
629b3d75
MJ
392/* Create new symbols containing (address, size) pairs for global variables,
393 marked with "omp declare target" attribute, as well as addresses for the
394 functions, which are outlined offloading regions. */
395void
396omp_finish_file (void)
397{
398 unsigned num_funcs = vec_safe_length (offload_funcs);
399 unsigned num_vars = vec_safe_length (offload_vars);
400
401 if (num_funcs == 0 && num_vars == 0)
402 return;
403
404 if (targetm_common.have_named_sections)
405 {
406 vec<constructor_elt, va_gc> *v_f, *v_v;
407 vec_alloc (v_f, num_funcs);
408 vec_alloc (v_v, num_vars * 2);
409
410 add_decls_addresses_to_decl_constructor (offload_funcs, v_f);
411 add_decls_addresses_to_decl_constructor (offload_vars, v_v);
412
413 tree vars_decl_type = build_array_type_nelts (pointer_sized_int_node,
1c0fdaf7 414 vec_safe_length (v_v));
629b3d75
MJ
415 tree funcs_decl_type = build_array_type_nelts (pointer_sized_int_node,
416 num_funcs);
417 SET_TYPE_ALIGN (vars_decl_type, TYPE_ALIGN (pointer_sized_int_node));
418 SET_TYPE_ALIGN (funcs_decl_type, TYPE_ALIGN (pointer_sized_int_node));
419 tree ctor_v = build_constructor (vars_decl_type, v_v);
420 tree ctor_f = build_constructor (funcs_decl_type, v_f);
421 TREE_CONSTANT (ctor_v) = TREE_CONSTANT (ctor_f) = 1;
422 TREE_STATIC (ctor_v) = TREE_STATIC (ctor_f) = 1;
423 tree funcs_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL,
424 get_identifier (".offload_func_table"),
425 funcs_decl_type);
426 tree vars_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL,
427 get_identifier (".offload_var_table"),
428 vars_decl_type);
429 TREE_STATIC (funcs_decl) = TREE_STATIC (vars_decl) = 1;
430 /* Do not align tables more than TYPE_ALIGN (pointer_sized_int_node),
431 otherwise a joint table in a binary will contain padding between
432 tables from multiple object files. */
433 DECL_USER_ALIGN (funcs_decl) = DECL_USER_ALIGN (vars_decl) = 1;
434 SET_DECL_ALIGN (funcs_decl, TYPE_ALIGN (funcs_decl_type));
435 SET_DECL_ALIGN (vars_decl, TYPE_ALIGN (vars_decl_type));
436 DECL_INITIAL (funcs_decl) = ctor_f;
437 DECL_INITIAL (vars_decl) = ctor_v;
438 set_decl_section_name (funcs_decl, OFFLOAD_FUNC_TABLE_SECTION_NAME);
439 set_decl_section_name (vars_decl, OFFLOAD_VAR_TABLE_SECTION_NAME);
440
441 varpool_node::finalize_decl (vars_decl);
442 varpool_node::finalize_decl (funcs_decl);
443 }
444 else
445 {
446 for (unsigned i = 0; i < num_funcs; i++)
447 {
448 tree it = (*offload_funcs)[i];
1c0fdaf7 449 /* See also add_decls_addresses_to_decl_constructor
e53b6e56 450 and output_offload_tables in lto-cgraph.cc. */
bf4ab268 451 if (!in_lto_p && !symtab_node::get (it))
1c0fdaf7 452 continue;
629b3d75
MJ
453 targetm.record_offload_symbol (it);
454 }
455 for (unsigned i = 0; i < num_vars; i++)
456 {
457 tree it = (*offload_vars)[i];
bf4ab268 458 if (!in_lto_p && !symtab_node::get (it))
1c0fdaf7 459 continue;
c2211a60
TB
460#ifdef ACCEL_COMPILER
461 if (DECL_HAS_VALUE_EXPR_P (it)
462 && lookup_attribute ("omp declare target link",
463 DECL_ATTRIBUTES (it)))
464 {
465 tree value_expr = DECL_VALUE_EXPR (it);
466 tree link_ptr_decl = TREE_OPERAND (value_expr, 0);
467 targetm.record_offload_symbol (link_ptr_decl);
468 varpool_node::finalize_decl (link_ptr_decl);
469 }
470 else
471#endif
472 targetm.record_offload_symbol (it);
629b3d75
MJ
473 }
474 }
475}
476
02889d23
CLT
477/* Call dim_pos (POS == true) or dim_size (POS == false) builtins for
478 axis DIM. Return a tmp var holding the result. */
479
480static tree
481oacc_dim_call (bool pos, int dim, gimple_seq *seq)
482{
483 tree arg = build_int_cst (unsigned_type_node, dim);
484 tree size = create_tmp_var (integer_type_node);
485 enum internal_fn fn = pos ? IFN_GOACC_DIM_POS : IFN_GOACC_DIM_SIZE;
486 gimple *call = gimple_build_call_internal (fn, 1, arg);
487
488 gimple_call_set_lhs (call, size);
489 gimple_seq_add_stmt (seq, call);
490
491 return size;
492}
493
629b3d75
MJ
494/* Find the number of threads (POS = false), or thread number (POS =
495 true) for an OpenACC region partitioned as MASK. Setup code
496 required for the calculation is added to SEQ. */
497
498static tree
499oacc_thread_numbers (bool pos, int mask, gimple_seq *seq)
500{
501 tree res = pos ? NULL_TREE : build_int_cst (unsigned_type_node, 1);
502 unsigned ix;
503
504 /* Start at gang level, and examine relevant dimension indices. */
505 for (ix = GOMP_DIM_GANG; ix != GOMP_DIM_MAX; ix++)
506 if (GOMP_DIM_MASK (ix) & mask)
507 {
629b3d75
MJ
508 if (res)
509 {
510 /* We had an outer index, so scale that by the size of
511 this dimension. */
02889d23 512 tree n = oacc_dim_call (false, ix, seq);
629b3d75
MJ
513 res = fold_build2 (MULT_EXPR, integer_type_node, res, n);
514 }
515 if (pos)
516 {
517 /* Determine index in this dimension. */
02889d23 518 tree id = oacc_dim_call (true, ix, seq);
629b3d75
MJ
519 if (res)
520 res = fold_build2 (PLUS_EXPR, integer_type_node, res, id);
521 else
522 res = id;
523 }
524 }
525
526 if (res == NULL_TREE)
527 res = integer_zero_node;
528
529 return res;
530}
531
532/* Transform IFN_GOACC_LOOP calls to actual code. See
533 expand_oacc_for for where these are generated. At the vector
534 level, we stride loops, such that each member of a warp will
535 operate on adjacent iterations. At the worker and gang level,
536 each gang/warp executes a set of contiguous iterations. Chunking
537 can override this such that each iteration engine executes a
01914336 538 contiguous chunk, and then moves on to stride to the next chunk. */
629b3d75
MJ
539
540static void
541oacc_xform_loop (gcall *call)
542{
543 gimple_stmt_iterator gsi = gsi_for_stmt (call);
544 enum ifn_goacc_loop_kind code
545 = (enum ifn_goacc_loop_kind) TREE_INT_CST_LOW (gimple_call_arg (call, 0));
546 tree dir = gimple_call_arg (call, 1);
547 tree range = gimple_call_arg (call, 2);
548 tree step = gimple_call_arg (call, 3);
549 tree chunk_size = NULL_TREE;
550 unsigned mask = (unsigned) TREE_INT_CST_LOW (gimple_call_arg (call, 5));
551 tree lhs = gimple_call_lhs (call);
c29c92c7 552 tree type = NULL_TREE;
629b3d75
MJ
553 tree diff_type = TREE_TYPE (range);
554 tree r = NULL_TREE;
555 gimple_seq seq = NULL;
556 bool chunking = false, striding = true;
557 unsigned outer_mask = mask & (~mask + 1); // Outermost partitioning
558 unsigned inner_mask = mask & ~outer_mask; // Inner partitioning (if any)
559
c29c92c7
FX
560 /* Skip lowering if return value of IFN_GOACC_LOOP call is not used. */
561 if (!lhs)
562 {
563 gsi_replace_with_seq (&gsi, seq, true);
564 return;
565 }
566
567 type = TREE_TYPE (lhs);
568
629b3d75
MJ
569#ifdef ACCEL_COMPILER
570 chunk_size = gimple_call_arg (call, 4);
571 if (integer_minus_onep (chunk_size) /* Force static allocation. */
572 || integer_zerop (chunk_size)) /* Default (also static). */
573 {
574 /* If we're at the gang level, we want each to execute a
575 contiguous run of iterations. Otherwise we want each element
576 to stride. */
577 striding = !(outer_mask & GOMP_DIM_MASK (GOMP_DIM_GANG));
578 chunking = false;
579 }
580 else
581 {
582 /* Chunk of size 1 is striding. */
583 striding = integer_onep (chunk_size);
584 chunking = !striding;
585 }
586#endif
587
588 /* striding=true, chunking=true
589 -> invalid.
590 striding=true, chunking=false
591 -> chunks=1
592 striding=false,chunking=true
593 -> chunks=ceil (range/(chunksize*threads*step))
594 striding=false,chunking=false
595 -> chunk_size=ceil(range/(threads*step)),chunks=1 */
596 push_gimplify_context (true);
597
598 switch (code)
599 {
600 default: gcc_unreachable ();
601
602 case IFN_GOACC_LOOP_CHUNKS:
603 if (!chunking)
604 r = build_int_cst (type, 1);
605 else
606 {
607 /* chunk_max
608 = (range - dir) / (chunks * step * num_threads) + dir */
609 tree per = oacc_thread_numbers (false, mask, &seq);
610 per = fold_convert (type, per);
611 chunk_size = fold_convert (type, chunk_size);
612 per = fold_build2 (MULT_EXPR, type, per, chunk_size);
613 per = fold_build2 (MULT_EXPR, type, per, step);
614 r = build2 (MINUS_EXPR, type, range, dir);
615 r = build2 (PLUS_EXPR, type, r, per);
616 r = build2 (TRUNC_DIV_EXPR, type, r, per);
617 }
618 break;
619
620 case IFN_GOACC_LOOP_STEP:
621 {
622 /* If striding, step by the entire compute volume, otherwise
623 step by the inner volume. */
624 unsigned volume = striding ? mask : inner_mask;
625
626 r = oacc_thread_numbers (false, volume, &seq);
627 r = build2 (MULT_EXPR, type, fold_convert (type, r), step);
628 }
629 break;
630
631 case IFN_GOACC_LOOP_OFFSET:
f64b12bd
CP
632 /* Enable vectorization on non-SIMT targets. */
633 if (!targetm.simt.vf
634 && outer_mask == GOMP_DIM_MASK (GOMP_DIM_VECTOR)
635 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
636 the loop. */
637 && (flag_tree_loop_vectorize
00f34291 638 || !OPTION_SET_P (flag_tree_loop_vectorize)))
f64b12bd
CP
639 {
640 basic_block bb = gsi_bb (gsi);
99b1c316
MS
641 class loop *parent = bb->loop_father;
642 class loop *body = parent->inner;
f64b12bd
CP
643
644 parent->force_vectorize = true;
645 parent->safelen = INT_MAX;
646
647 /* "Chunking loops" may have inner loops. */
648 if (parent->inner)
649 {
650 body->force_vectorize = true;
651 body->safelen = INT_MAX;
652 }
653
654 cfun->has_force_vectorize_loops = true;
655 }
629b3d75
MJ
656 if (striding)
657 {
658 r = oacc_thread_numbers (true, mask, &seq);
659 r = fold_convert (diff_type, r);
660 }
661 else
662 {
663 tree inner_size = oacc_thread_numbers (false, inner_mask, &seq);
664 tree outer_size = oacc_thread_numbers (false, outer_mask, &seq);
665 tree volume = fold_build2 (MULT_EXPR, TREE_TYPE (inner_size),
666 inner_size, outer_size);
667
668 volume = fold_convert (diff_type, volume);
669 if (chunking)
670 chunk_size = fold_convert (diff_type, chunk_size);
671 else
672 {
673 tree per = fold_build2 (MULT_EXPR, diff_type, volume, step);
674
675 chunk_size = build2 (MINUS_EXPR, diff_type, range, dir);
676 chunk_size = build2 (PLUS_EXPR, diff_type, chunk_size, per);
677 chunk_size = build2 (TRUNC_DIV_EXPR, diff_type, chunk_size, per);
678 }
679
680 tree span = build2 (MULT_EXPR, diff_type, chunk_size,
681 fold_convert (diff_type, inner_size));
682 r = oacc_thread_numbers (true, outer_mask, &seq);
683 r = fold_convert (diff_type, r);
684 r = build2 (MULT_EXPR, diff_type, r, span);
685
686 tree inner = oacc_thread_numbers (true, inner_mask, &seq);
687 inner = fold_convert (diff_type, inner);
688 r = fold_build2 (PLUS_EXPR, diff_type, r, inner);
689
690 if (chunking)
691 {
692 tree chunk = fold_convert (diff_type, gimple_call_arg (call, 6));
693 tree per
694 = fold_build2 (MULT_EXPR, diff_type, volume, chunk_size);
695 per = build2 (MULT_EXPR, diff_type, per, chunk);
696
697 r = build2 (PLUS_EXPR, diff_type, r, per);
698 }
699 }
700 r = fold_build2 (MULT_EXPR, diff_type, r, step);
701 if (type != diff_type)
702 r = fold_convert (type, r);
703 break;
704
705 case IFN_GOACC_LOOP_BOUND:
706 if (striding)
707 r = range;
708 else
709 {
710 tree inner_size = oacc_thread_numbers (false, inner_mask, &seq);
711 tree outer_size = oacc_thread_numbers (false, outer_mask, &seq);
712 tree volume = fold_build2 (MULT_EXPR, TREE_TYPE (inner_size),
713 inner_size, outer_size);
714
715 volume = fold_convert (diff_type, volume);
716 if (chunking)
717 chunk_size = fold_convert (diff_type, chunk_size);
718 else
719 {
720 tree per = fold_build2 (MULT_EXPR, diff_type, volume, step);
721
722 chunk_size = build2 (MINUS_EXPR, diff_type, range, dir);
723 chunk_size = build2 (PLUS_EXPR, diff_type, chunk_size, per);
724 chunk_size = build2 (TRUNC_DIV_EXPR, diff_type, chunk_size, per);
725 }
726
727 tree span = build2 (MULT_EXPR, diff_type, chunk_size,
728 fold_convert (diff_type, inner_size));
729
730 r = fold_build2 (MULT_EXPR, diff_type, span, step);
731
732 tree offset = gimple_call_arg (call, 6);
733 r = build2 (PLUS_EXPR, diff_type, r,
734 fold_convert (diff_type, offset));
735 r = build2 (integer_onep (dir) ? MIN_EXPR : MAX_EXPR,
736 diff_type, r, range);
737 }
738 if (diff_type != type)
739 r = fold_convert (type, r);
740 break;
741 }
742
743 gimplify_assign (lhs, r, &seq);
744
745 pop_gimplify_context (NULL);
746
747 gsi_replace_with_seq (&gsi, seq, true);
748}
749
02889d23
CLT
750/* Transform a GOACC_TILE call. Determines the element loop span for
751 the specified loop of the nest. This is 1 if we're not tiling.
752
753 GOACC_TILE (collapse_count, loop_no, tile_arg, gwv_tile, gwv_element); */
754
755static void
756oacc_xform_tile (gcall *call)
757{
758 gimple_stmt_iterator gsi = gsi_for_stmt (call);
759 unsigned collapse = tree_to_uhwi (gimple_call_arg (call, 0));
760 /* Inner loops have higher loop_nos. */
761 unsigned loop_no = tree_to_uhwi (gimple_call_arg (call, 1));
762 tree tile_size = gimple_call_arg (call, 2);
763 unsigned e_mask = tree_to_uhwi (gimple_call_arg (call, 4));
764 tree lhs = gimple_call_lhs (call);
765 tree type = TREE_TYPE (lhs);
766 gimple_seq seq = NULL;
767 tree span = build_int_cst (type, 1);
768
769 gcc_assert (!(e_mask
770 & ~(GOMP_DIM_MASK (GOMP_DIM_VECTOR)
771 | GOMP_DIM_MASK (GOMP_DIM_WORKER))));
772 push_gimplify_context (!seen_error ());
773
774#ifndef ACCEL_COMPILER
775 /* Partitioning disabled on host compilers. */
776 e_mask = 0;
777#endif
778 if (!e_mask)
779 /* Not paritioning. */
780 span = integer_one_node;
781 else if (!integer_zerop (tile_size))
782 /* User explicitly specified size. */
783 span = tile_size;
784 else
785 {
786 /* Pick a size based on the paritioning of the element loop and
787 the number of loop nests. */
788 tree first_size = NULL_TREE;
789 tree second_size = NULL_TREE;
790
791 if (e_mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR))
792 first_size = oacc_dim_call (false, GOMP_DIM_VECTOR, &seq);
793 if (e_mask & GOMP_DIM_MASK (GOMP_DIM_WORKER))
794 second_size = oacc_dim_call (false, GOMP_DIM_WORKER, &seq);
795
796 if (!first_size)
797 {
798 first_size = second_size;
799 second_size = NULL_TREE;
800 }
801
802 if (loop_no + 1 == collapse)
803 {
804 span = first_size;
805 if (!loop_no && second_size)
806 span = fold_build2 (MULT_EXPR, TREE_TYPE (span),
807 span, second_size);
808 }
809 else if (loop_no + 2 == collapse)
810 span = second_size;
811 else
812 span = NULL_TREE;
813
814 if (!span)
815 /* There's no obvious element size for this loop. Options
816 are 1, first_size or some non-unity constant (32 is my
817 favourite). We should gather some statistics. */
818 span = first_size;
819 }
820
821 span = fold_convert (type, span);
822 gimplify_assign (lhs, span, &seq);
823
824 pop_gimplify_context (NULL);
825
826 gsi_replace_with_seq (&gsi, seq, true);
827}
828
629b3d75
MJ
829/* Default partitioned and minimum partitioned dimensions. */
830
831static int oacc_default_dims[GOMP_DIM_MAX];
832static int oacc_min_dims[GOMP_DIM_MAX];
833
b75e9c83
TV
834int
835oacc_get_default_dim (int dim)
836{
837 gcc_assert (0 <= dim && dim < GOMP_DIM_MAX);
838 return oacc_default_dims[dim];
839}
840
6e373d13
TV
841int
842oacc_get_min_dim (int dim)
843{
844 gcc_assert (0 <= dim && dim < GOMP_DIM_MAX);
845 return oacc_min_dims[dim];
846}
847
629b3d75
MJ
848/* Parse the default dimension parameter. This is a set of
849 :-separated optional compute dimensions. Each specified dimension
850 is a positive integer. When device type support is added, it is
851 planned to be a comma separated list of such compute dimensions,
852 with all but the first prefixed by the colon-terminated device
853 type. */
854
855static void
856oacc_parse_default_dims (const char *dims)
857{
858 int ix;
859
860 for (ix = GOMP_DIM_MAX; ix--;)
861 {
862 oacc_default_dims[ix] = -1;
863 oacc_min_dims[ix] = 1;
864 }
865
866#ifndef ACCEL_COMPILER
867 /* Cannot be overridden on the host. */
868 dims = NULL;
869#endif
870 if (dims)
871 {
872 const char *pos = dims;
873
874 for (ix = 0; *pos && ix != GOMP_DIM_MAX; ix++)
875 {
876 if (ix)
877 {
878 if (*pos != ':')
879 goto malformed;
880 pos++;
881 }
882
883 if (*pos != ':')
884 {
885 long val;
886 const char *eptr;
887
888 errno = 0;
889 val = strtol (pos, CONST_CAST (char **, &eptr), 10);
890 if (errno || val <= 0 || (int) val != val)
891 goto malformed;
892 pos = eptr;
893 oacc_default_dims[ix] = (int) val;
894 }
895 }
896 if (*pos)
897 {
898 malformed:
899 error_at (UNKNOWN_LOCATION,
904f3daa 900 "%<-fopenacc-dim%> operand is malformed at %qs", pos);
629b3d75
MJ
901 }
902 }
903
904 /* Allow the backend to validate the dimensions. */
46dedae6
TV
905 targetm.goacc.validate_dims (NULL_TREE, oacc_default_dims, -1, 0);
906 targetm.goacc.validate_dims (NULL_TREE, oacc_min_dims, -2, 0);
629b3d75
MJ
907}
908
909/* Validate and update the dimensions for offloaded FN. ATTRS is the
910 raw attribute. DIMS is an array of dimensions, which is filled in.
911 LEVEL is the partitioning level of a routine, or -1 for an offload
01914336 912 region itself. USED is the mask of partitioned execution in the
629b3d75
MJ
913 function. */
914
915static void
916oacc_validate_dims (tree fn, tree attrs, int *dims, int level, unsigned used)
917{
918 tree purpose[GOMP_DIM_MAX];
919 unsigned ix;
920 tree pos = TREE_VALUE (attrs);
629b3d75
MJ
921
922 /* Make sure the attribute creator attached the dimension
923 information. */
924 gcc_assert (pos);
925
926 for (ix = 0; ix != GOMP_DIM_MAX; ix++)
927 {
928 purpose[ix] = TREE_PURPOSE (pos);
929 tree val = TREE_VALUE (pos);
930 dims[ix] = val ? TREE_INT_CST_LOW (val) : -1;
931 pos = TREE_CHAIN (pos);
932 }
933
22cff118
TS
934 bool check = true;
935#ifdef ACCEL_COMPILER
936 check = false;
937#endif
938 if (check
939 && warn_openacc_parallelism
940 && !lookup_attribute ("oacc kernels", DECL_ATTRIBUTES (fn)))
941 {
942 static char const *const axes[] =
943 /* Must be kept in sync with GOMP_DIM enumeration. */
944 { "gang", "worker", "vector" };
945 for (ix = level >= 0 ? level : 0; ix != GOMP_DIM_MAX; ix++)
946 if (dims[ix] < 0)
947 ; /* Defaulting axis. */
948 else if ((used & GOMP_DIM_MASK (ix)) && dims[ix] == 1)
949 /* There is partitioned execution, but the user requested a
950 dimension size of 1. They're probably confused. */
951 warning_at (DECL_SOURCE_LOCATION (fn), OPT_Wopenacc_parallelism,
952 "region contains %s partitioned code but"
953 " is not %s partitioned", axes[ix], axes[ix]);
954 else if (!(used & GOMP_DIM_MASK (ix)) && dims[ix] != 1)
955 /* The dimension is explicitly partitioned to non-unity, but
956 no use is made within the region. */
957 warning_at (DECL_SOURCE_LOCATION (fn), OPT_Wopenacc_parallelism,
958 "region is %s partitioned but"
959 " does not contain %s partitioned code",
960 axes[ix], axes[ix]);
961 }
962
46dedae6 963 bool changed = targetm.goacc.validate_dims (fn, dims, level, used);
629b3d75
MJ
964
965 /* Default anything left to 1 or a partitioned default. */
966 for (ix = 0; ix != GOMP_DIM_MAX; ix++)
967 if (dims[ix] < 0)
968 {
969 /* The OpenACC spec says 'If the [num_gangs] clause is not
970 specified, an implementation-defined default will be used;
971 the default may depend on the code within the construct.'
972 (2.5.6). Thus an implementation is free to choose
973 non-unity default for a parallel region that doesn't have
974 any gang-partitioned loops. However, it appears that there
975 is a sufficient body of user code that expects non-gang
976 partitioned regions to not execute in gang-redundant mode.
977 So we (a) don't warn about the non-portability and (b) pick
978 the minimum permissible dimension size when there is no
979 partitioned execution. Otherwise we pick the global
980 default for the dimension, which the user can control. The
981 same wording and logic applies to num_workers and
982 vector_length, however the worker- or vector- single
983 execution doesn't have the same impact as gang-redundant
984 execution. (If the minimum gang-level partioning is not 1,
985 the target is probably too confusing.) */
986 dims[ix] = (used & GOMP_DIM_MASK (ix)
987 ? oacc_default_dims[ix] : oacc_min_dims[ix]);
988 changed = true;
989 }
990
991 if (changed)
992 {
993 /* Replace the attribute with new values. */
994 pos = NULL_TREE;
995 for (ix = GOMP_DIM_MAX; ix--;)
25651634
TS
996 pos = tree_cons (purpose[ix],
997 build_int_cst (integer_type_node, dims[ix]), pos);
629b3d75
MJ
998 oacc_replace_fn_attrib (fn, pos);
999 }
1000}
1001
1002/* Create an empty OpenACC loop structure at LOC. */
1003
1004static oacc_loop *
1005new_oacc_loop_raw (oacc_loop *parent, location_t loc)
1006{
1007 oacc_loop *loop = XCNEW (oacc_loop);
1008
1009 loop->parent = parent;
629b3d75
MJ
1010
1011 if (parent)
1012 {
1013 loop->sibling = parent->child;
1014 parent->child = loop;
1015 }
1016
1017 loop->loc = loc;
629b3d75
MJ
1018 return loop;
1019}
1020
1021/* Create an outermost, dummy OpenACC loop for offloaded function
1022 DECL. */
1023
1024static oacc_loop *
1025new_oacc_loop_outer (tree decl)
1026{
1027 return new_oacc_loop_raw (NULL, DECL_SOURCE_LOCATION (decl));
1028}
1029
1030/* Start a new OpenACC loop structure beginning at head marker HEAD.
1031 Link into PARENT loop. Return the new loop. */
1032
1033static oacc_loop *
1034new_oacc_loop (oacc_loop *parent, gcall *marker)
1035{
1036 oacc_loop *loop = new_oacc_loop_raw (parent, gimple_location (marker));
1037
1038 loop->marker = marker;
1039
1040 /* TODO: This is where device_type flattening would occur for the loop
01914336 1041 flags. */
629b3d75
MJ
1042
1043 loop->flags = TREE_INT_CST_LOW (gimple_call_arg (marker, 3));
1044
1045 tree chunk_size = integer_zero_node;
1046 if (loop->flags & OLF_GANG_STATIC)
1047 chunk_size = gimple_call_arg (marker, 4);
1048 loop->chunk_size = chunk_size;
1049
1050 return loop;
1051}
1052
1053/* Create a dummy loop encompassing a call to a openACC routine.
1054 Extract the routine's partitioning requirements. */
1055
1056static void
1057new_oacc_loop_routine (oacc_loop *parent, gcall *call, tree decl, tree attrs)
1058{
1059 oacc_loop *loop = new_oacc_loop_raw (parent, gimple_location (call));
1060 int level = oacc_fn_attrib_level (attrs);
1061
1062 gcc_assert (level >= 0);
1063
1064 loop->marker = call;
1065 loop->routine = decl;
1066 loop->mask = ((GOMP_DIM_MASK (GOMP_DIM_MAX) - 1)
1067 ^ (GOMP_DIM_MASK (level) - 1));
1068}
1069
1070/* Finish off the current OpenACC loop ending at tail marker TAIL.
1071 Return the parent loop. */
1072
1073static oacc_loop *
1074finish_oacc_loop (oacc_loop *loop)
1075{
1076 /* If the loop has been collapsed, don't partition it. */
02889d23 1077 if (loop->ifns.is_empty ())
629b3d75
MJ
1078 loop->mask = loop->flags = 0;
1079 return loop->parent;
1080}
1081
1082/* Free all OpenACC loop structures within LOOP (inclusive). */
1083
1084static void
1085free_oacc_loop (oacc_loop *loop)
1086{
1087 if (loop->sibling)
1088 free_oacc_loop (loop->sibling);
1089 if (loop->child)
1090 free_oacc_loop (loop->child);
1091
622f6b64 1092 loop->ifns.release ();
629b3d75
MJ
1093 free (loop);
1094}
1095
1096/* Dump out the OpenACC loop head or tail beginning at FROM. */
1097
1098static void
1099dump_oacc_loop_part (FILE *file, gcall *from, int depth,
1100 const char *title, int level)
1101{
1102 enum ifn_unique_kind kind
1103 = (enum ifn_unique_kind) TREE_INT_CST_LOW (gimple_call_arg (from, 0));
1104
1105 fprintf (file, "%*s%s-%d:\n", depth * 2, "", title, level);
1106 for (gimple_stmt_iterator gsi = gsi_for_stmt (from);;)
1107 {
1108 gimple *stmt = gsi_stmt (gsi);
1109
1110 if (gimple_call_internal_p (stmt, IFN_UNIQUE))
1111 {
1112 enum ifn_unique_kind k
1113 = ((enum ifn_unique_kind) TREE_INT_CST_LOW
1114 (gimple_call_arg (stmt, 0)));
1115
1116 if (k == kind && stmt != from)
1117 break;
1118 }
ef6cb4c7 1119 print_gimple_stmt (file, stmt, depth * 2 + 2);
629b3d75
MJ
1120
1121 gsi_next (&gsi);
1122 while (gsi_end_p (gsi))
1123 gsi = gsi_start_bb (single_succ (gsi_bb (gsi)));
1124 }
1125}
1126
5d390fd3 1127/* Dump OpenACC loop LOOP, its children, and its siblings. */
629b3d75
MJ
1128
1129static void
1130dump_oacc_loop (FILE *file, oacc_loop *loop, int depth)
1131{
1132 int ix;
1133
1134 fprintf (file, "%*sLoop %x(%x) %s:%u\n", depth * 2, "",
1135 loop->flags, loop->mask,
1136 LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc));
1137
1138 if (loop->marker)
ef6cb4c7 1139 print_gimple_stmt (file, loop->marker, depth * 2);
629b3d75
MJ
1140
1141 if (loop->routine)
1142 fprintf (file, "%*sRoutine %s:%u:%s\n",
1143 depth * 2, "", DECL_SOURCE_FILE (loop->routine),
1144 DECL_SOURCE_LINE (loop->routine),
1145 IDENTIFIER_POINTER (DECL_NAME (loop->routine)));
1146
1147 for (ix = GOMP_DIM_GANG; ix != GOMP_DIM_MAX; ix++)
1148 if (loop->heads[ix])
1149 dump_oacc_loop_part (file, loop->heads[ix], depth, "Head", ix);
1150 for (ix = GOMP_DIM_MAX; ix--;)
1151 if (loop->tails[ix])
1152 dump_oacc_loop_part (file, loop->tails[ix], depth, "Tail", ix);
1153
1154 if (loop->child)
1155 dump_oacc_loop (file, loop->child, depth + 1);
1156 if (loop->sibling)
1157 dump_oacc_loop (file, loop->sibling, depth);
1158}
1159
1160void debug_oacc_loop (oacc_loop *);
1161
1162/* Dump loops to stderr. */
1163
1164DEBUG_FUNCTION void
1165debug_oacc_loop (oacc_loop *loop)
1166{
1167 dump_oacc_loop (stderr, loop, 0);
1168}
1169
5d390fd3
TS
1170/* Provide diagnostics on OpenACC loop LOOP, its children, and its
1171 siblings. */
1172
1173static void
1174inform_oacc_loop (const oacc_loop *loop)
1175{
1176 const char *gang
1177 = loop->mask & GOMP_DIM_MASK (GOMP_DIM_GANG) ? " gang" : "";
1178 const char *worker
1179 = loop->mask & GOMP_DIM_MASK (GOMP_DIM_WORKER) ? " worker" : "";
1180 const char *vector
1181 = loop->mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR) ? " vector" : "";
1182 const char *seq = loop->mask == 0 ? " seq" : "";
1183 const dump_user_location_t loc
1184 = dump_user_location_t::from_location_t (loop->loc);
1185 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc,
1186 "assigned OpenACC%s%s%s%s loop parallelism\n", gang, worker,
1187 vector, seq);
1188
1189 if (loop->child)
1190 inform_oacc_loop (loop->child);
1191 if (loop->sibling)
1192 inform_oacc_loop (loop->sibling);
1193}
1194
629b3d75
MJ
1195/* DFS walk of basic blocks BB onwards, creating OpenACC loop
1196 structures as we go. By construction these loops are properly
1197 nested. */
1198
1199static void
1200oacc_loop_discover_walk (oacc_loop *loop, basic_block bb)
1201{
1202 int marker = 0;
1203 int remaining = 0;
1204
1205 if (bb->flags & BB_VISITED)
1206 return;
1207
1208 follow:
1209 bb->flags |= BB_VISITED;
1210
1211 /* Scan for loop markers. */
1212 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
1213 gsi_next (&gsi))
1214 {
1215 gimple *stmt = gsi_stmt (gsi);
1216
1217 if (!is_gimple_call (stmt))
1218 continue;
1219
1220 gcall *call = as_a <gcall *> (stmt);
1221
1222 /* If this is a routine, make a dummy loop for it. */
1223 if (tree decl = gimple_call_fndecl (call))
1224 if (tree attrs = oacc_get_fn_attrib (decl))
1225 {
1226 gcc_assert (!marker);
1227 new_oacc_loop_routine (loop, call, decl, attrs);
1228 }
1229
1230 if (!gimple_call_internal_p (call))
1231 continue;
1232
1233 switch (gimple_call_internal_fn (call))
1234 {
1235 default:
1236 break;
1237
1238 case IFN_GOACC_LOOP:
02889d23
CLT
1239 case IFN_GOACC_TILE:
1240 /* Record the abstraction function, so we can manipulate it
1241 later. */
1242 loop->ifns.safe_push (call);
629b3d75
MJ
1243 break;
1244
1245 case IFN_UNIQUE:
1246 enum ifn_unique_kind kind
1247 = (enum ifn_unique_kind) (TREE_INT_CST_LOW
1248 (gimple_call_arg (call, 0)));
1249 if (kind == IFN_UNIQUE_OACC_HEAD_MARK
1250 || kind == IFN_UNIQUE_OACC_TAIL_MARK)
1251 {
1252 if (gimple_call_num_args (call) == 2)
1253 {
1254 gcc_assert (marker && !remaining);
1255 marker = 0;
1256 if (kind == IFN_UNIQUE_OACC_TAIL_MARK)
1257 loop = finish_oacc_loop (loop);
1258 else
1259 loop->head_end = call;
1260 }
1261 else
1262 {
1263 int count = TREE_INT_CST_LOW (gimple_call_arg (call, 2));
1264
1265 if (!marker)
1266 {
1267 if (kind == IFN_UNIQUE_OACC_HEAD_MARK)
1268 loop = new_oacc_loop (loop, call);
1269 remaining = count;
1270 }
1271 gcc_assert (count == remaining);
1272 if (remaining)
1273 {
1274 remaining--;
1275 if (kind == IFN_UNIQUE_OACC_HEAD_MARK)
1276 loop->heads[marker] = call;
1277 else
1278 loop->tails[remaining] = call;
1279 }
1280 marker++;
1281 }
1282 }
1283 }
1284 }
1285 if (remaining || marker)
1286 {
1287 bb = single_succ (bb);
1288 gcc_assert (single_pred_p (bb) && !(bb->flags & BB_VISITED));
1289 goto follow;
1290 }
1291
1292 /* Walk successor blocks. */
1293 edge e;
1294 edge_iterator ei;
1295
1296 FOR_EACH_EDGE (e, ei, bb->succs)
1297 oacc_loop_discover_walk (loop, e->dest);
1298}
1299
1300/* LOOP is the first sibling. Reverse the order in place and return
1301 the new first sibling. Recurse to child loops. */
1302
1303static oacc_loop *
1304oacc_loop_sibling_nreverse (oacc_loop *loop)
1305{
1306 oacc_loop *last = NULL;
1307 do
1308 {
1309 if (loop->child)
01914336 1310 loop->child = oacc_loop_sibling_nreverse (loop->child);
629b3d75
MJ
1311
1312 oacc_loop *next = loop->sibling;
1313 loop->sibling = last;
1314 last = loop;
1315 loop = next;
1316 }
1317 while (loop);
1318
1319 return last;
1320}
1321
1322/* Discover the OpenACC loops marked up by HEAD and TAIL markers for
1323 the current function. */
1324
1325static oacc_loop *
1326oacc_loop_discovery ()
1327{
1328 /* Clear basic block flags, in particular BB_VISITED which we're going to use
1329 in the following. */
1330 clear_bb_flags ();
1331
1332 oacc_loop *top = new_oacc_loop_outer (current_function_decl);
1333 oacc_loop_discover_walk (top, ENTRY_BLOCK_PTR_FOR_FN (cfun));
1334
1335 /* The siblings were constructed in reverse order, reverse them so
1336 that diagnostics come out in an unsurprising order. */
1337 top = oacc_loop_sibling_nreverse (top);
1338
1339 return top;
1340}
1341
1342/* Transform the abstract internal function markers starting at FROM
1343 to be for partitioning level LEVEL. Stop when we meet another HEAD
1344 or TAIL marker. */
1345
1346static void
1347oacc_loop_xform_head_tail (gcall *from, int level)
1348{
1349 enum ifn_unique_kind kind
1350 = (enum ifn_unique_kind) TREE_INT_CST_LOW (gimple_call_arg (from, 0));
1351 tree replacement = build_int_cst (unsigned_type_node, level);
1352
1353 for (gimple_stmt_iterator gsi = gsi_for_stmt (from);;)
1354 {
1355 gimple *stmt = gsi_stmt (gsi);
1356
1357 if (gimple_call_internal_p (stmt, IFN_UNIQUE))
1358 {
1359 enum ifn_unique_kind k
1360 = ((enum ifn_unique_kind)
1361 TREE_INT_CST_LOW (gimple_call_arg (stmt, 0)));
1362
29a2f518
JB
1363 if (k == IFN_UNIQUE_OACC_FORK
1364 || k == IFN_UNIQUE_OACC_JOIN
1365 || k == IFN_UNIQUE_OACC_PRIVATE)
629b3d75
MJ
1366 *gimple_call_arg_ptr (stmt, 2) = replacement;
1367 else if (k == kind && stmt != from)
1368 break;
1369 }
1370 else if (gimple_call_internal_p (stmt, IFN_GOACC_REDUCTION))
1371 *gimple_call_arg_ptr (stmt, 3) = replacement;
0829ab79 1372 update_stmt (stmt);
629b3d75
MJ
1373
1374 gsi_next (&gsi);
1375 while (gsi_end_p (gsi))
1376 gsi = gsi_start_bb (single_succ (gsi_bb (gsi)));
1377 }
1378}
1379
629b3d75
MJ
1380/* Process the discovered OpenACC loops, setting the correct
1381 partitioning level etc. */
1382
1383static void
77d24d43 1384oacc_loop_process (oacc_loop *loop, int fn_level)
629b3d75
MJ
1385{
1386 if (loop->child)
77d24d43 1387 oacc_loop_process (loop->child, fn_level);
629b3d75
MJ
1388
1389 if (loop->mask && !loop->routine)
1390 {
1391 int ix;
02889d23
CLT
1392 tree mask_arg = build_int_cst (unsigned_type_node, loop->mask);
1393 tree e_mask_arg = build_int_cst (unsigned_type_node, loop->e_mask);
629b3d75 1394 tree chunk_arg = loop->chunk_size;
02889d23
CLT
1395 gcall *call;
1396
1397 for (ix = 0; loop->ifns.iterate (ix, &call); ix++)
0829ab79
TS
1398 {
1399 switch (gimple_call_internal_fn (call))
02889d23 1400 {
0829ab79
TS
1401 case IFN_GOACC_LOOP:
1402 {
1403 bool is_e = gimple_call_arg (call, 5) == integer_minus_one_node;
1404 gimple_call_set_arg (call, 5, is_e ? e_mask_arg : mask_arg);
1405 if (!is_e)
1406 gimple_call_set_arg (call, 4, chunk_arg);
1407 }
1408 break;
02889d23 1409
0829ab79
TS
1410 case IFN_GOACC_TILE:
1411 gimple_call_set_arg (call, 3, mask_arg);
1412 gimple_call_set_arg (call, 4, e_mask_arg);
1413 break;
629b3d75 1414
0829ab79
TS
1415 default:
1416 gcc_unreachable ();
1417 }
1418 update_stmt (call);
1419 }
629b3d75 1420
02889d23
CLT
1421 unsigned dim = GOMP_DIM_GANG;
1422 unsigned mask = loop->mask | loop->e_mask;
629b3d75
MJ
1423 for (ix = 0; ix != GOMP_DIM_MAX && mask; ix++)
1424 {
1425 while (!(GOMP_DIM_MASK (dim) & mask))
1426 dim++;
1427
1428 oacc_loop_xform_head_tail (loop->heads[ix], dim);
1429 oacc_loop_xform_head_tail (loop->tails[ix], dim);
1430
1431 mask ^= GOMP_DIM_MASK (dim);
1432 }
1433 }
1434
1435 if (loop->sibling)
77d24d43
TS
1436 oacc_loop_process (loop->sibling, fn_level);
1437
1438
1439 /* OpenACC 2.6, 2.9.11. "reduction clause" places a restriction such that
1440 "The 'reduction' clause may not be specified on an orphaned 'loop'
1441 construct with the 'gang' clause, or on an orphaned 'loop' construct that
1442 will generate gang parallelism in a procedure that is compiled with the
1443 'routine gang' clause." */
1444 if (fn_level == GOMP_DIM_GANG
1445 && (loop->mask & GOMP_DIM_MASK (GOMP_DIM_GANG))
1446 && (loop->flags & OLF_REDUCTION))
1447 error_at (loop->loc,
1448 "gang reduction on an orphan loop");
629b3d75
MJ
1449}
1450
1451/* Walk the OpenACC loop heirarchy checking and assigning the
1452 programmer-specified partitionings. OUTER_MASK is the partitioning
1453 this loop is contained within. Return mask of partitioning
1454 encountered. If any auto loops are discovered, set GOMP_DIM_MAX
1455 bit. */
1456
1457static unsigned
1458oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask)
1459{
1460 unsigned this_mask = loop->mask;
1461 unsigned mask_all = 0;
1462 bool noisy = true;
1463
1464#ifdef ACCEL_COMPILER
1465 /* When device_type is supported, we want the device compiler to be
1466 noisy, if the loop parameters are device_type-specific. */
1467 noisy = false;
1468#endif
1469
1470 if (!loop->routine)
1471 {
1472 bool auto_par = (loop->flags & OLF_AUTO) != 0;
1473 bool seq_par = (loop->flags & OLF_SEQ) != 0;
02889d23
CLT
1474 bool tiling = (loop->flags & OLF_TILE) != 0;
1475
629b3d75
MJ
1476 this_mask = ((loop->flags >> OLF_DIM_BASE)
1477 & (GOMP_DIM_MASK (GOMP_DIM_MAX) - 1));
1478
02889d23
CLT
1479 /* Apply auto partitioning if this is a non-partitioned regular
1480 loop, or (no more than) single axis tiled loop. */
1481 bool maybe_auto
1482 = !seq_par && this_mask == (tiling ? this_mask & -this_mask : 0);
1483
629b3d75
MJ
1484 if ((this_mask != 0) + auto_par + seq_par > 1)
1485 {
1486 if (noisy)
1487 error_at (loop->loc,
1488 seq_par
324ff1a0
JJ
1489 ? G_("%<seq%> overrides other OpenACC loop specifiers")
1490 : G_("%<auto%> conflicts with other OpenACC loop "
1491 "specifiers"));
02889d23 1492 maybe_auto = false;
629b3d75
MJ
1493 loop->flags &= ~OLF_AUTO;
1494 if (seq_par)
1495 {
01914336
MJ
1496 loop->flags
1497 &= ~((GOMP_DIM_MASK (GOMP_DIM_MAX) - 1) << OLF_DIM_BASE);
629b3d75
MJ
1498 this_mask = 0;
1499 }
1500 }
02889d23
CLT
1501
1502 if (maybe_auto && (loop->flags & OLF_INDEPENDENT))
1503 {
1504 loop->flags |= OLF_AUTO;
1505 mask_all |= GOMP_DIM_MASK (GOMP_DIM_MAX);
1506 }
629b3d75
MJ
1507 }
1508
1509 if (this_mask & outer_mask)
1510 {
1511 const oacc_loop *outer;
1512 for (outer = loop->parent; outer; outer = outer->parent)
02889d23 1513 if ((outer->mask | outer->e_mask) & this_mask)
629b3d75
MJ
1514 break;
1515
1516 if (noisy)
1517 {
1518 if (outer)
1519 {
1520 error_at (loop->loc,
efebb49e
DM
1521 loop->routine
1522 ? G_("routine call uses same OpenACC parallelism"
1523 " as containing loop")
1524 : G_("inner loop uses same OpenACC parallelism"
1525 " as containing loop"));
629b3d75
MJ
1526 inform (outer->loc, "containing loop here");
1527 }
1528 else
1529 error_at (loop->loc,
efebb49e
DM
1530 loop->routine
1531 ? G_("routine call uses OpenACC parallelism disallowed"
1532 " by containing routine")
1533 : G_("loop uses OpenACC parallelism disallowed"
1534 " by containing routine"));
629b3d75
MJ
1535
1536 if (loop->routine)
1537 inform (DECL_SOURCE_LOCATION (loop->routine),
1538 "routine %qD declared here", loop->routine);
1539 }
1540 this_mask &= ~outer_mask;
1541 }
1542 else
1543 {
1544 unsigned outermost = least_bit_hwi (this_mask);
1545
1546 if (outermost && outermost <= outer_mask)
1547 {
1548 if (noisy)
1549 {
1550 error_at (loop->loc,
1551 "incorrectly nested OpenACC loop parallelism");
1552
1553 const oacc_loop *outer;
1554 for (outer = loop->parent;
1555 outer->flags && outer->flags < outermost;
1556 outer = outer->parent)
1557 continue;
1558 inform (outer->loc, "containing loop here");
1559 }
1560
1561 this_mask &= ~outermost;
1562 }
1563 }
1564
629b3d75
MJ
1565 mask_all |= this_mask;
1566
02889d23
CLT
1567 if (loop->flags & OLF_TILE)
1568 {
1569 /* When tiling, vector goes to the element loop, and failing
1570 that we put worker there. The std doesn't contemplate
1571 specifying all three. We choose to put worker and vector on
1572 the element loops in that case. */
1573 unsigned this_e_mask = this_mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR);
1574 if (!this_e_mask || this_mask & GOMP_DIM_MASK (GOMP_DIM_GANG))
1575 this_e_mask |= this_mask & GOMP_DIM_MASK (GOMP_DIM_WORKER);
1576
1577 loop->e_mask = this_e_mask;
1578 this_mask ^= this_e_mask;
1579 }
1580
1581 loop->mask = this_mask;
1582
1583 if (dump_file)
1584 fprintf (dump_file, "Loop %s:%d user specified %d & %d\n",
1585 LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc),
1586 loop->mask, loop->e_mask);
1587
629b3d75
MJ
1588 if (loop->child)
1589 {
02889d23
CLT
1590 unsigned tmp_mask = outer_mask | this_mask | loop->e_mask;
1591 loop->inner = oacc_loop_fixed_partitions (loop->child, tmp_mask);
629b3d75
MJ
1592 mask_all |= loop->inner;
1593 }
1594
1595 if (loop->sibling)
1596 mask_all |= oacc_loop_fixed_partitions (loop->sibling, outer_mask);
1597
1598 return mask_all;
1599}
1600
1601/* Walk the OpenACC loop heirarchy to assign auto-partitioned loops.
1602 OUTER_MASK is the partitioning this loop is contained within.
02889d23 1603 OUTER_ASSIGN is true if an outer loop is being auto-partitioned.
629b3d75
MJ
1604 Return the cumulative partitioning used by this loop, siblings and
1605 children. */
1606
1607static unsigned
02889d23
CLT
1608oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask,
1609 bool outer_assign)
629b3d75
MJ
1610{
1611 bool assign = (loop->flags & OLF_AUTO) && (loop->flags & OLF_INDEPENDENT);
1612 bool noisy = true;
02889d23 1613 bool tiling = loop->flags & OLF_TILE;
629b3d75
MJ
1614
1615#ifdef ACCEL_COMPILER
1616 /* When device_type is supported, we want the device compiler to be
1617 noisy, if the loop parameters are device_type-specific. */
1618 noisy = false;
1619#endif
1620
891ba5eb 1621 if (assign && (!outer_assign || loop->inner))
629b3d75 1622 {
02889d23
CLT
1623 /* Allocate outermost and non-innermost loops at the outermost
1624 non-innermost available level. */
1625 unsigned this_mask = GOMP_DIM_MASK (GOMP_DIM_GANG);
1626
1627 /* Find the first outermost available partition. */
1628 while (this_mask <= outer_mask)
1629 this_mask <<= 1;
1630
1631 /* Grab two axes if tiling, and we've not assigned anything */
1632 if (tiling && !(loop->mask | loop->e_mask))
1633 this_mask |= this_mask << 1;
1634
1635 /* Prohibit the innermost partitioning at the moment. */
1636 this_mask &= GOMP_DIM_MASK (GOMP_DIM_MAX - 1) - 1;
629b3d75 1637
02889d23
CLT
1638 /* Don't use any dimension explicitly claimed by an inner loop. */
1639 this_mask &= ~loop->inner;
1640
1641 if (tiling && !loop->e_mask)
1642 {
1643 /* If we got two axes, allocate the inner one to the element
1644 loop. */
1645 loop->e_mask = this_mask & (this_mask << 1);
1646 this_mask ^= loop->e_mask;
1647 }
1648
1649 loop->mask |= this_mask;
629b3d75
MJ
1650 }
1651
1652 if (loop->child)
1653 {
02889d23
CLT
1654 unsigned tmp_mask = outer_mask | loop->mask | loop->e_mask;
1655 loop->inner = oacc_loop_auto_partitions (loop->child, tmp_mask,
1656 outer_assign | assign);
629b3d75
MJ
1657 }
1658
02889d23 1659 if (assign && (!loop->mask || (tiling && !loop->e_mask) || !outer_assign))
629b3d75 1660 {
02889d23
CLT
1661 /* Allocate the loop at the innermost available level. Note
1662 that we do this even if we already assigned this loop the
1663 outermost available level above. That way we'll partition
1664 this along 2 axes, if they are available. */
629b3d75
MJ
1665 unsigned this_mask = 0;
1666
01914336 1667 /* Determine the outermost partitioning used within this loop. */
629b3d75
MJ
1668 this_mask = loop->inner | GOMP_DIM_MASK (GOMP_DIM_MAX);
1669 this_mask = least_bit_hwi (this_mask);
1670
1671 /* Pick the partitioning just inside that one. */
1672 this_mask >>= 1;
1673
01914336 1674 /* And avoid picking one use by an outer loop. */
629b3d75
MJ
1675 this_mask &= ~outer_mask;
1676
02889d23
CLT
1677 /* If tiling and we failed completely above, grab the next one
1678 too. Making sure it doesn't hit an outer loop. */
1679 if (tiling)
1680 {
1681 this_mask &= ~(loop->e_mask | loop->mask);
1682 unsigned tile_mask = ((this_mask >> 1)
1683 & ~(outer_mask | loop->e_mask | loop->mask));
1684
1685 if (tile_mask || loop->mask)
1686 {
1687 loop->e_mask |= this_mask;
1688 this_mask = tile_mask;
1689 }
1690 if (!loop->e_mask && noisy)
1691 warning_at (loop->loc, 0,
1692 "insufficient partitioning available"
1693 " to parallelize element loop");
1694 }
629b3d75 1695
02889d23
CLT
1696 loop->mask |= this_mask;
1697 if (!loop->mask && noisy)
1698 warning_at (loop->loc, 0,
efebb49e
DM
1699 tiling
1700 ? G_("insufficient partitioning available"
1701 " to parallelize tile loop")
1702 : G_("insufficient partitioning available"
1703 " to parallelize loop"));
629b3d75
MJ
1704 }
1705
1706 if (assign && dump_file)
02889d23 1707 fprintf (dump_file, "Auto loop %s:%d assigned %d & %d\n",
629b3d75 1708 LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc),
02889d23 1709 loop->mask, loop->e_mask);
629b3d75
MJ
1710
1711 unsigned inner_mask = 0;
1712
1713 if (loop->sibling)
02889d23
CLT
1714 inner_mask |= oacc_loop_auto_partitions (loop->sibling,
1715 outer_mask, outer_assign);
629b3d75 1716
02889d23 1717 inner_mask |= loop->inner | loop->mask | loop->e_mask;
629b3d75
MJ
1718
1719 return inner_mask;
1720}
1721
1722/* Walk the OpenACC loop heirarchy to check and assign partitioning
1723 axes. Return mask of partitioning. */
1724
1725static unsigned
1726oacc_loop_partition (oacc_loop *loop, unsigned outer_mask)
1727{
1728 unsigned mask_all = oacc_loop_fixed_partitions (loop, outer_mask);
1729
1730 if (mask_all & GOMP_DIM_MASK (GOMP_DIM_MAX))
1731 {
1732 mask_all ^= GOMP_DIM_MASK (GOMP_DIM_MAX);
02889d23 1733 mask_all |= oacc_loop_auto_partitions (loop, outer_mask, false);
629b3d75
MJ
1734 }
1735 return mask_all;
1736}
1737
1738/* Default fork/join early expander. Delete the function calls if
1739 there is no RTL expander. */
1740
1741bool
1742default_goacc_fork_join (gcall *ARG_UNUSED (call),
1743 const int *ARG_UNUSED (dims), bool is_fork)
1744{
1745 if (is_fork)
1746 return targetm.have_oacc_fork ();
1747 else
1748 return targetm.have_oacc_join ();
1749}
1750
1751/* Default goacc.reduction early expander.
1752
1753 LHS-opt = IFN_REDUCTION (KIND, RES_PTR, VAR, LEVEL, OP, OFFSET)
1754 If RES_PTR is not integer-zerop:
1755 SETUP - emit 'LHS = *RES_PTR', LHS = NULL
1756 TEARDOWN - emit '*RES_PTR = VAR'
1757 If LHS is not NULL
1758 emit 'LHS = VAR' */
1759
1760void
1761default_goacc_reduction (gcall *call)
1762{
1763 unsigned code = (unsigned)TREE_INT_CST_LOW (gimple_call_arg (call, 0));
1764 gimple_stmt_iterator gsi = gsi_for_stmt (call);
1765 tree lhs = gimple_call_lhs (call);
1766 tree var = gimple_call_arg (call, 2);
1767 gimple_seq seq = NULL;
1768
1769 if (code == IFN_GOACC_REDUCTION_SETUP
1770 || code == IFN_GOACC_REDUCTION_TEARDOWN)
1771 {
1772 /* Setup and Teardown need to copy from/to the receiver object,
1773 if there is one. */
1774 tree ref_to_res = gimple_call_arg (call, 1);
1775
1776 if (!integer_zerop (ref_to_res))
1777 {
1778 tree dst = build_simple_mem_ref (ref_to_res);
1779 tree src = var;
1780
1781 if (code == IFN_GOACC_REDUCTION_SETUP)
1782 {
1783 src = dst;
1784 dst = lhs;
1785 lhs = NULL;
1786 }
1787 gimple_seq_add_stmt (&seq, gimple_build_assign (dst, src));
1788 }
1789 }
1790
1791 /* Copy VAR to LHS, if there is an LHS. */
1792 if (lhs)
1793 gimple_seq_add_stmt (&seq, gimple_build_assign (lhs, var));
1794
1795 gsi_replace_with_seq (&gsi, seq, true);
1796}
1797
29a2f518
JB
1798struct var_decl_rewrite_info
1799{
1800 gimple *stmt;
1801 hash_map<tree, tree> *adjusted_vars;
1802 bool avoid_pointer_conversion;
1803 bool modified;
1804};
1805
1806/* Helper function for execute_oacc_device_lower. Rewrite VAR_DECLs (by
1807 themselves or wrapped in various other nodes) according to ADJUSTED_VARS in
1808 the var_decl_rewrite_info pointed to via DATA. Used as part of coercing
1809 gang-private variables in OpenACC offload regions to reside in GPU shared
1810 memory. */
1811
1812static tree
1813oacc_rewrite_var_decl (tree *tp, int *walk_subtrees, void *data)
1814{
1815 walk_stmt_info *wi = (walk_stmt_info *) data;
1816 var_decl_rewrite_info *info = (var_decl_rewrite_info *) wi->info;
1817
1818 if (TREE_CODE (*tp) == ADDR_EXPR)
1819 {
1820 tree arg = TREE_OPERAND (*tp, 0);
1821 tree *new_arg = info->adjusted_vars->get (arg);
1822
1823 if (new_arg)
1824 {
1825 if (info->avoid_pointer_conversion)
1826 {
1827 *tp = build_fold_addr_expr (*new_arg);
1828 info->modified = true;
1829 *walk_subtrees = 0;
1830 }
1831 else
1832 {
1833 gimple_stmt_iterator gsi = gsi_for_stmt (info->stmt);
1834 tree repl = build_fold_addr_expr (*new_arg);
1835 gimple *stmt1
1836 = gimple_build_assign (make_ssa_name (TREE_TYPE (repl)), repl);
1837 tree conv = convert_to_pointer (TREE_TYPE (*tp),
1838 gimple_assign_lhs (stmt1));
1839 gimple *stmt2
1840 = gimple_build_assign (make_ssa_name (TREE_TYPE (*tp)), conv);
1841 gsi_insert_before (&gsi, stmt1, GSI_SAME_STMT);
1842 gsi_insert_before (&gsi, stmt2, GSI_SAME_STMT);
1843 *tp = gimple_assign_lhs (stmt2);
1844 info->modified = true;
1845 *walk_subtrees = 0;
1846 }
1847 }
1848 }
1849 else if (TREE_CODE (*tp) == COMPONENT_REF || TREE_CODE (*tp) == ARRAY_REF)
1850 {
1851 tree *base = &TREE_OPERAND (*tp, 0);
1852
1853 while (TREE_CODE (*base) == COMPONENT_REF
1854 || TREE_CODE (*base) == ARRAY_REF)
1855 base = &TREE_OPERAND (*base, 0);
1856
1857 if (TREE_CODE (*base) != VAR_DECL)
1858 return NULL;
1859
1860 tree *new_decl = info->adjusted_vars->get (*base);
1861 if (!new_decl)
1862 return NULL;
1863
1864 int base_quals = TYPE_QUALS (TREE_TYPE (*new_decl));
1865 tree field = TREE_OPERAND (*tp, 1);
1866
1867 /* Adjust the type of the field. */
1868 int field_quals = TYPE_QUALS (TREE_TYPE (field));
1869 if (TREE_CODE (field) == FIELD_DECL && field_quals != base_quals)
1870 {
1871 tree *field_type = &TREE_TYPE (field);
1872 while (TREE_CODE (*field_type) == ARRAY_TYPE)
1873 field_type = &TREE_TYPE (*field_type);
1874 field_quals |= base_quals;
1875 *field_type = build_qualified_type (*field_type, field_quals);
1876 }
1877
1878 /* Adjust the type of the component ref itself. */
1879 tree comp_type = TREE_TYPE (*tp);
1880 int comp_quals = TYPE_QUALS (comp_type);
1881 if (TREE_CODE (*tp) == COMPONENT_REF && comp_quals != base_quals)
1882 {
1883 comp_quals |= base_quals;
1884 TREE_TYPE (*tp)
1885 = build_qualified_type (comp_type, comp_quals);
1886 }
1887
1888 *base = *new_decl;
1889 info->modified = true;
1890 }
1891 else if (TREE_CODE (*tp) == VAR_DECL)
1892 {
1893 tree *new_decl = info->adjusted_vars->get (*tp);
1894 if (new_decl)
1895 {
1896 *tp = *new_decl;
1897 info->modified = true;
1898 }
1899 }
1900
1901 return NULL_TREE;
1902}
1903
1904/* Return TRUE if CALL is a call to a builtin atomic/sync operation. */
1905
1906static bool
1907is_sync_builtin_call (gcall *call)
1908{
1909 tree callee = gimple_call_fndecl (call);
1910
1911 if (callee != NULL_TREE
1912 && gimple_call_builtin_p (call, BUILT_IN_NORMAL))
1913 switch (DECL_FUNCTION_CODE (callee))
1914 {
1915#undef DEF_SYNC_BUILTIN
1916#define DEF_SYNC_BUILTIN(ENUM, NAME, TYPE, ATTRS) case ENUM:
1917#include "sync-builtins.def"
1918#undef DEF_SYNC_BUILTIN
1919 return true;
1920
1921 default:
1922 ;
1923 }
1924
1925 return false;
1926}
1927
629b3d75
MJ
1928/* Main entry point for oacc transformations which run on the device
1929 compiler after LTO, so we know what the target device is at this
1930 point (including the host fallback). */
1931
1932static unsigned int
0829ab79 1933execute_oacc_loop_designation ()
629b3d75
MJ
1934{
1935 tree attrs = oacc_get_fn_attrib (current_function_decl);
1936
1937 if (!attrs)
1938 /* Not an offloaded function. */
1939 return 0;
1940
1941 /* Parse the default dim argument exactly once. */
1942 if ((const void *)flag_openacc_dims != &flag_openacc_dims)
1943 {
1944 oacc_parse_default_dims (flag_openacc_dims);
1945 flag_openacc_dims = (char *)&flag_openacc_dims;
1946 }
1947
703e4f86
TS
1948 bool is_oacc_parallel
1949 = (lookup_attribute ("oacc parallel",
1950 DECL_ATTRIBUTES (current_function_decl)) != NULL);
b0f271ce
TS
1951 bool is_oacc_kernels
1952 = (lookup_attribute ("oacc kernels",
1953 DECL_ATTRIBUTES (current_function_decl)) != NULL);
703e4f86
TS
1954 bool is_oacc_serial
1955 = (lookup_attribute ("oacc serial",
1956 DECL_ATTRIBUTES (current_function_decl)) != NULL);
e898ce79
GB
1957 bool is_oacc_parallel_kernels_parallelized
1958 = (lookup_attribute ("oacc parallel_kernels_parallelized",
1959 DECL_ATTRIBUTES (current_function_decl)) != NULL);
1960 bool is_oacc_parallel_kernels_gang_single
1961 = (lookup_attribute ("oacc parallel_kernels_gang_single",
1962 DECL_ATTRIBUTES (current_function_decl)) != NULL);
703e4f86
TS
1963 int fn_level = oacc_fn_attrib_level (attrs);
1964 bool is_oacc_routine = (fn_level >= 0);
1965 gcc_checking_assert (is_oacc_parallel
1966 + is_oacc_kernels
1967 + is_oacc_serial
e898ce79
GB
1968 + is_oacc_parallel_kernels_parallelized
1969 + is_oacc_parallel_kernels_gang_single
703e4f86
TS
1970 + is_oacc_routine
1971 == 1);
1972
b0f271ce
TS
1973 bool is_oacc_kernels_parallelized
1974 = (lookup_attribute ("oacc kernels parallelized",
1975 DECL_ATTRIBUTES (current_function_decl)) != NULL);
703e4f86
TS
1976 if (is_oacc_kernels_parallelized)
1977 gcc_checking_assert (is_oacc_kernels);
1978
1979 if (dump_file)
1980 {
1981 if (is_oacc_parallel)
1982 fprintf (dump_file, "Function is OpenACC parallel offload\n");
1983 else if (is_oacc_kernels)
1984 fprintf (dump_file, "Function is %s OpenACC kernels offload\n",
1985 (is_oacc_kernels_parallelized
1986 ? "parallelized" : "unparallelized"));
1987 else if (is_oacc_serial)
1988 fprintf (dump_file, "Function is OpenACC serial offload\n");
e898ce79
GB
1989 else if (is_oacc_parallel_kernels_parallelized)
1990 fprintf (dump_file, "Function is %s OpenACC kernels offload\n",
1991 "parallel_kernels_parallelized");
1992 else if (is_oacc_parallel_kernels_gang_single)
1993 fprintf (dump_file, "Function is %s OpenACC kernels offload\n",
1994 "parallel_kernels_gang_single");
703e4f86
TS
1995 else if (is_oacc_routine)
1996 fprintf (dump_file, "Function is OpenACC routine level %d\n",
1997 fn_level);
1998 else
1999 gcc_unreachable ();
2000 }
b0f271ce 2001
0829ab79
TS
2002 /* This doesn't belong into 'pass_oacc_loop_designation' conceptually, but
2003 it's a convenient place, so... */
a61f6afb
TS
2004 if (is_oacc_routine)
2005 {
2006 tree attr = lookup_attribute ("omp declare target",
2007 DECL_ATTRIBUTES (current_function_decl));
2008 gcc_checking_assert (attr);
2009 tree clauses = TREE_VALUE (attr);
2010 gcc_checking_assert (clauses);
2011
2012 /* Should this OpenACC routine be discarded? */
2013 bool discard = false;
2014
2015 tree clause_nohost = omp_find_clause (clauses, OMP_CLAUSE_NOHOST);
2016 if (dump_file)
2017 fprintf (dump_file,
2018 "OpenACC routine '%s' %s '%s' clause.\n",
2019 lang_hooks.decl_printable_name (current_function_decl, 2),
2020 clause_nohost ? "has" : "doesn't have",
2021 omp_clause_code_name[OMP_CLAUSE_NOHOST]);
2022 /* Host compiler, 'nohost' clause? */
2023#ifndef ACCEL_COMPILER
2024 if (clause_nohost)
2025 discard = true;
2026#endif
2027
2028 if (dump_file)
2029 fprintf (dump_file,
2030 "OpenACC routine '%s' %sdiscarded.\n",
2031 lang_hooks.decl_printable_name (current_function_decl, 2),
2032 discard ? "" : "not ");
2033 if (discard)
2034 {
2035 TREE_ASM_WRITTEN (current_function_decl) = 1;
2036 return TODO_discard_function;
2037 }
2038 }
2039
fd71a9a2
TS
2040 /* Unparallelized OpenACC kernels constructs must get launched as 1 x 1 x 1
2041 kernels, so remove the parallelism dimensions function attributes
2042 potentially set earlier on. */
2043 if (is_oacc_kernels && !is_oacc_kernels_parallelized)
2044 {
2045 oacc_set_fn_attrib (current_function_decl, NULL, NULL);
2046 attrs = oacc_get_fn_attrib (current_function_decl);
2047 }
2048
629b3d75
MJ
2049 /* Discover, partition and process the loops. */
2050 oacc_loop *loops = oacc_loop_discovery ();
629b3d75 2051
703e4f86
TS
2052 unsigned outer_mask = 0;
2053 if (is_oacc_routine)
2054 outer_mask = GOMP_DIM_MASK (fn_level) - 1;
629b3d75 2055 unsigned used_mask = oacc_loop_partition (loops, outer_mask);
b0f271ce
TS
2056 /* OpenACC kernels constructs are special: they currently don't use the
2057 generic oacc_loop infrastructure and attribute/dimension processing. */
2058 if (is_oacc_kernels && is_oacc_kernels_parallelized)
2059 {
2060 /* Parallelized OpenACC kernels constructs use gang parallelism. See
e53b6e56 2061 also tree-parloops.cc:create_parallel_loop. */
b0f271ce
TS
2062 used_mask |= GOMP_DIM_MASK (GOMP_DIM_GANG);
2063 }
629b3d75 2064
b0f271ce 2065 int dims[GOMP_DIM_MAX];
629b3d75
MJ
2066 oacc_validate_dims (current_function_decl, attrs, dims, fn_level, used_mask);
2067
2068 if (dump_file)
2069 {
2070 const char *comma = "Compute dimensions [";
2071 for (int ix = 0; ix != GOMP_DIM_MAX; ix++, comma = ", ")
2072 fprintf (dump_file, "%s%d", comma, dims[ix]);
2073 fprintf (dump_file, "]\n");
2074 }
2075
e898ce79
GB
2076 /* Verify that for OpenACC 'kernels' decomposed "gang-single" parts we launch
2077 a single gang only. */
2078 if (is_oacc_parallel_kernels_gang_single)
2079 gcc_checking_assert (dims[GOMP_DIM_GANG] == 1);
2080
77d24d43 2081 oacc_loop_process (loops, fn_level);
629b3d75
MJ
2082 if (dump_file)
2083 {
2084 fprintf (dump_file, "OpenACC loops\n");
2085 dump_oacc_loop (dump_file, loops, 0);
2086 fprintf (dump_file, "\n");
2087 }
5d390fd3
TS
2088 if (dump_enabled_p ())
2089 {
2090 oacc_loop *l = loops;
2091 /* OpenACC kernels constructs are special: they currently don't use the
2092 generic oacc_loop infrastructure. */
2093 if (is_oacc_kernels)
2094 {
2095 /* Create a fake oacc_loop for diagnostic purposes. */
2096 l = new_oacc_loop_raw (NULL,
2097 DECL_SOURCE_LOCATION (current_function_decl));
2098 l->mask = used_mask;
2099 }
2100 else
2101 {
2102 /* Skip the outermost, dummy OpenACC loop */
2103 l = l->child;
2104 }
2105 if (l)
2106 inform_oacc_loop (l);
2107 if (is_oacc_kernels)
2108 free_oacc_loop (l);
2109 }
629b3d75 2110
0829ab79
TS
2111 free_oacc_loop (loops);
2112
2113 return 0;
2114}
2115
2116static unsigned int
2117execute_oacc_device_lower ()
2118{
2119 tree attrs = oacc_get_fn_attrib (current_function_decl);
2120
2121 if (!attrs)
2122 /* Not an offloaded function. */
2123 return 0;
2124
2125 int dims[GOMP_DIM_MAX];
2126 for (unsigned i = 0; i < GOMP_DIM_MAX; i++)
2127 dims[i] = oacc_get_fn_dim_size (current_function_decl, i);
629b3d75 2128
29a2f518
JB
2129 hash_map<tree, tree> adjusted_vars;
2130
629b3d75
MJ
2131 /* Now lower internal loop functions to target-specific code
2132 sequences. */
2133 basic_block bb;
2134 FOR_ALL_BB_FN (bb, cfun)
2135 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
2136 {
2137 gimple *stmt = gsi_stmt (gsi);
2138 if (!is_gimple_call (stmt))
2139 {
2140 gsi_next (&gsi);
2141 continue;
2142 }
2143
2144 gcall *call = as_a <gcall *> (stmt);
2145 if (!gimple_call_internal_p (call))
2146 {
2147 gsi_next (&gsi);
2148 continue;
2149 }
2150
2151 /* Rewind to allow rescan. */
2152 gsi_prev (&gsi);
2153 bool rescan = false, remove = false;
2154 enum internal_fn ifn_code = gimple_call_internal_fn (call);
2155
2156 switch (ifn_code)
2157 {
2158 default: break;
2159
02889d23
CLT
2160 case IFN_GOACC_TILE:
2161 oacc_xform_tile (call);
2162 rescan = true;
2163 break;
2164
629b3d75
MJ
2165 case IFN_GOACC_LOOP:
2166 oacc_xform_loop (call);
2167 rescan = true;
2168 break;
2169
2170 case IFN_GOACC_REDUCTION:
2171 /* Mark the function for SSA renaming. */
2172 mark_virtual_operands_for_renaming (cfun);
2173
2174 /* If the level is -1, this ended up being an unused
2175 axis. Handle as a default. */
2176 if (integer_minus_onep (gimple_call_arg (call, 3)))
2177 default_goacc_reduction (call);
2178 else
2179 targetm.goacc.reduction (call);
2180 rescan = true;
2181 break;
2182
2183 case IFN_UNIQUE:
2184 {
2185 enum ifn_unique_kind kind
2186 = ((enum ifn_unique_kind)
2187 TREE_INT_CST_LOW (gimple_call_arg (call, 0)));
2188
2189 switch (kind)
2190 {
2191 default:
02889d23 2192 break;
629b3d75
MJ
2193
2194 case IFN_UNIQUE_OACC_FORK:
2195 case IFN_UNIQUE_OACC_JOIN:
2196 if (integer_minus_onep (gimple_call_arg (call, 2)))
2197 remove = true;
2198 else if (!targetm.goacc.fork_join
2199 (call, dims, kind == IFN_UNIQUE_OACC_FORK))
2200 remove = true;
2201 break;
2202
2203 case IFN_UNIQUE_OACC_HEAD_MARK:
2204 case IFN_UNIQUE_OACC_TAIL_MARK:
2205 remove = true;
2206 break;
29a2f518
JB
2207
2208 case IFN_UNIQUE_OACC_PRIVATE:
2209 {
11b8286a
TS
2210 dump_flags_t l_dump_flags
2211 = get_openacc_privatization_dump_flags ();
2212
2213 location_t loc = gimple_location (stmt);
2214 if (LOCATION_LOCUS (loc) == UNKNOWN_LOCATION)
2215 loc = DECL_SOURCE_LOCATION (current_function_decl);
2216 const dump_user_location_t d_u_loc
2217 = dump_user_location_t::from_location_t (loc);
2218
29a2f518
JB
2219 HOST_WIDE_INT level
2220 = TREE_INT_CST_LOW (gimple_call_arg (call, 2));
ff451ea7
TS
2221 gcc_checking_assert (level == -1
2222 || (level >= 0
2223 && level < GOMP_DIM_MAX));
29a2f518
JB
2224 for (unsigned i = 3;
2225 i < gimple_call_num_args (call);
2226 i++)
2227 {
11b8286a
TS
2228 static char const *const axes[] =
2229 /* Must be kept in sync with GOMP_DIM enumeration. */
2230 { "gang", "worker", "vector" };
2231
29a2f518
JB
2232 tree arg = gimple_call_arg (call, i);
2233 gcc_checking_assert (TREE_CODE (arg) == ADDR_EXPR);
2234 tree decl = TREE_OPERAND (arg, 0);
11b8286a
TS
2235 if (dump_enabled_p ())
2236/* PR100695 "Format decoder, quoting in 'dump_printf' etc." */
2237#if __GNUC__ >= 10
2238# pragma GCC diagnostic push
2239# pragma GCC diagnostic ignored "-Wformat"
2240#endif
2241 dump_printf_loc (l_dump_flags, d_u_loc,
2242 "variable %<%T%> ought to be"
2243 " adjusted for OpenACC"
2244 " privatization level: %qs\n",
2245 decl,
2246 (level == -1
2247 ? "UNKNOWN" : axes[level]));
2248#if __GNUC__ >= 10
2249# pragma GCC diagnostic pop
2250#endif
2251 bool adjusted;
2252 if (level == -1)
2253 adjusted = false;
2254 else if (!targetm.goacc.adjust_private_decl)
2255 adjusted = false;
2256 else if (level == GOMP_DIM_VECTOR)
29a2f518 2257 {
11b8286a
TS
2258 /* That's the default behavior. */
2259 adjusted = true;
29a2f518 2260 }
11b8286a 2261 else
29a2f518
JB
2262 {
2263 tree oldtype = TREE_TYPE (decl);
2264 tree newdecl
11b8286a
TS
2265 = targetm.goacc.adjust_private_decl (loc, decl,
2266 level);
2267 adjusted = (TREE_TYPE (newdecl) != oldtype
2268 || newdecl != decl);
2269 if (adjusted)
29a2f518
JB
2270 adjusted_vars.put (decl, newdecl);
2271 }
11b8286a
TS
2272 if (adjusted
2273 && dump_enabled_p ())
2274/* PR100695 "Format decoder, quoting in 'dump_printf' etc." */
2275#if __GNUC__ >= 10
2276# pragma GCC diagnostic push
2277# pragma GCC diagnostic ignored "-Wformat"
2278#endif
2279 dump_printf_loc (l_dump_flags, d_u_loc,
2280 "variable %<%T%> adjusted for"
2281 " OpenACC privatization level:"
2282 " %qs\n",
2283 decl, axes[level]);
2284#if __GNUC__ >= 10
2285# pragma GCC diagnostic pop
2286#endif
29a2f518
JB
2287 }
2288 remove = true;
2289 }
2290 break;
629b3d75
MJ
2291 }
2292 break;
2293 }
2294 }
2295
2296 if (gsi_end_p (gsi))
2297 /* We rewound past the beginning of the BB. */
2298 gsi = gsi_start_bb (bb);
2299 else
2300 /* Undo the rewind. */
2301 gsi_next (&gsi);
2302
2303 if (remove)
2304 {
2305 if (gimple_vdef (call))
2306 replace_uses_by (gimple_vdef (call), gimple_vuse (call));
2307 if (gimple_call_lhs (call))
2308 {
2309 /* Propagate the data dependency var. */
2310 gimple *ass = gimple_build_assign (gimple_call_lhs (call),
2311 gimple_call_arg (call, 1));
2312 gsi_replace (&gsi, ass, false);
2313 }
2314 else
2315 gsi_remove (&gsi, true);
2316 }
2317 else if (!rescan)
2318 /* If not rescanning, advance over the call. */
2319 gsi_next (&gsi);
2320 }
2321
21803fca
TS
2322 /* Regarding the OpenACC privatization level, we're currently only looking at
2323 making the gang-private level work. Regarding that, we have the following
2324 configurations:
2325
2326 - GCN offloading: 'targetm.goacc.adjust_private_decl' does the work (in
2327 particular, change 'TREE_TYPE', etc.) and there is no
2328 'targetm.goacc.expand_var_decl'.
2329
2330 - nvptx offloading: 'targetm.goacc.adjust_private_decl' only sets a
2331 marker and then 'targetm.goacc.expand_var_decl' does the work.
2332
2333 Eventually (in particular, for worker-private level?), both
2334 'targetm.goacc.adjust_private_decl' and 'targetm.goacc.expand_var_decl'
2335 may need to do things, but that's currently not meant to be addressed, and
2336 thus not fully worked out and implemented, and thus untested. Hence,
2337 'assert' what currently is implemented/tested, only. */
2338
2339 if (targetm.goacc.expand_var_decl)
2340 gcc_assert (adjusted_vars.is_empty ());
2341
29a2f518
JB
2342 /* Make adjustments to gang-private local variables if required by the
2343 target, e.g. forcing them into a particular address space. Afterwards,
2344 ADDR_EXPR nodes which have adjusted variables as their argument need to
2345 be modified in one of two ways:
2346
2347 1. They can be recreated, making a pointer to the variable in the new
2348 address space, or
2349
2350 2. The address of the variable in the new address space can be taken,
2351 converted to the default (original) address space, and the result of
2352 that conversion subsituted in place of the original ADDR_EXPR node.
2353
2354 Which of these is done depends on the gimple statement being processed.
2355 At present atomic operations and inline asms use (1), and everything else
2356 uses (2). At least on AMD GCN, there are atomic operations that work
2357 directly in the LDS address space.
2358
2359 COMPONENT_REFS, ARRAY_REFS and plain VAR_DECLs are also rewritten to use
2360 the new decl, adjusting types of appropriate tree nodes as necessary. */
2361
ad4612cb
TS
2362 if (targetm.goacc.adjust_private_decl
2363 && !adjusted_vars.is_empty ())
29a2f518
JB
2364 {
2365 FOR_ALL_BB_FN (bb, cfun)
2366 for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
2367 !gsi_end_p (gsi);
2368 gsi_next (&gsi))
2369 {
2370 gimple *stmt = gsi_stmt (gsi);
2371 walk_stmt_info wi;
2372 var_decl_rewrite_info info;
2373
2374 info.avoid_pointer_conversion
2375 = (is_gimple_call (stmt)
2376 && is_sync_builtin_call (as_a <gcall *> (stmt)))
2377 || gimple_code (stmt) == GIMPLE_ASM;
2378 info.stmt = stmt;
2379 info.modified = false;
2380 info.adjusted_vars = &adjusted_vars;
2381
2382 memset (&wi, 0, sizeof (wi));
2383 wi.info = &info;
2384
2385 walk_gimple_op (stmt, oacc_rewrite_var_decl, &wi);
2386
2387 if (info.modified)
2388 update_stmt (stmt);
2389 }
2390 }
2391
629b3d75
MJ
2392 return 0;
2393}
2394
2395/* Default launch dimension validator. Force everything to 1. A
2396 backend that wants to provide larger dimensions must override this
2397 hook. */
2398
2399bool
2400default_goacc_validate_dims (tree ARG_UNUSED (decl), int *dims,
46dedae6
TV
2401 int ARG_UNUSED (fn_level),
2402 unsigned ARG_UNUSED (used))
629b3d75
MJ
2403{
2404 bool changed = false;
2405
2406 for (unsigned ix = 0; ix != GOMP_DIM_MAX; ix++)
2407 {
2408 if (dims[ix] != 1)
2409 {
2410 dims[ix] = 1;
2411 changed = true;
2412 }
2413 }
2414
2415 return changed;
2416}
2417
01914336 2418/* Default dimension bound is unknown on accelerator and 1 on host. */
629b3d75
MJ
2419
2420int
2421default_goacc_dim_limit (int ARG_UNUSED (axis))
2422{
2423#ifdef ACCEL_COMPILER
2424 return 0;
2425#else
2426 return 1;
2427#endif
2428}
2429
2430namespace {
2431
0829ab79
TS
2432const pass_data pass_data_oacc_loop_designation =
2433{
2434 GIMPLE_PASS, /* type */
2435 "oaccloops", /* name */
2436 OPTGROUP_OMP, /* optinfo_flags */
2437 TV_NONE, /* tv_id */
2438 PROP_cfg, /* properties_required */
2439 0 /* Possibly PROP_gimple_eomp. */, /* properties_provided */
2440 0, /* properties_destroyed */
2441 0, /* todo_flags_start */
2442 TODO_update_ssa | TODO_cleanup_cfg, /* todo_flags_finish */
2443};
2444
2445class pass_oacc_loop_designation : public gimple_opt_pass
2446{
2447public:
2448 pass_oacc_loop_designation (gcc::context *ctxt)
2449 : gimple_opt_pass (pass_data_oacc_loop_designation, ctxt)
2450 {}
2451
2452 /* opt_pass methods: */
725793af 2453 bool gate (function *) final override { return flag_openacc; };
0829ab79 2454
725793af 2455 unsigned int execute (function *) final override
0829ab79
TS
2456 {
2457 return execute_oacc_loop_designation ();
2458 }
2459
2460}; // class pass_oacc_loop_designation
2461
629b3d75
MJ
2462const pass_data pass_data_oacc_device_lower =
2463{
2464 GIMPLE_PASS, /* type */
2465 "oaccdevlow", /* name */
fd2b8c8b 2466 OPTGROUP_OMP, /* optinfo_flags */
629b3d75
MJ
2467 TV_NONE, /* tv_id */
2468 PROP_cfg, /* properties_required */
2469 0 /* Possibly PROP_gimple_eomp. */, /* properties_provided */
2470 0, /* properties_destroyed */
2471 0, /* todo_flags_start */
2472 TODO_update_ssa | TODO_cleanup_cfg, /* todo_flags_finish */
2473};
2474
2475class pass_oacc_device_lower : public gimple_opt_pass
2476{
2477public:
2478 pass_oacc_device_lower (gcc::context *ctxt)
2479 : gimple_opt_pass (pass_data_oacc_device_lower, ctxt)
2480 {}
2481
2482 /* opt_pass methods: */
725793af 2483 bool gate (function *) final override { return flag_openacc; };
629b3d75 2484
725793af 2485 unsigned int execute (function *) final override
629b3d75
MJ
2486 {
2487 return execute_oacc_device_lower ();
2488 }
2489
2490}; // class pass_oacc_device_lower
2491
2492} // anon namespace
2493
0829ab79
TS
2494gimple_opt_pass *
2495make_pass_oacc_loop_designation (gcc::context *ctxt)
2496{
2497 return new pass_oacc_loop_designation (ctxt);
2498}
2499
629b3d75
MJ
2500gimple_opt_pass *
2501make_pass_oacc_device_lower (gcc::context *ctxt)
2502{
2503 return new pass_oacc_device_lower (ctxt);
2504}
2505
0c6b03b5
AM
2506\f
2507/* Rewrite GOMP_SIMT_ENTER_ALLOC call given by GSI and remove the preceding
2508 GOMP_SIMT_ENTER call identifying the privatized variables, which are
2509 turned to structure fields and receive a DECL_VALUE_EXPR accordingly.
2510 Set *REGIMPLIFY to true, except if no privatized variables were seen. */
2511
2512static void
2513ompdevlow_adjust_simt_enter (gimple_stmt_iterator *gsi, bool *regimplify)
2514{
2515 gimple *alloc_stmt = gsi_stmt (*gsi);
2516 tree simtrec = gimple_call_lhs (alloc_stmt);
2517 tree simduid = gimple_call_arg (alloc_stmt, 0);
2518 gimple *enter_stmt = SSA_NAME_DEF_STMT (simduid);
2519 gcc_assert (gimple_call_internal_p (enter_stmt, IFN_GOMP_SIMT_ENTER));
2520 tree rectype = lang_hooks.types.make_type (RECORD_TYPE);
2521 TYPE_ARTIFICIAL (rectype) = TYPE_NAMELESS (rectype) = 1;
2522 TREE_ADDRESSABLE (rectype) = 1;
2523 TREE_TYPE (simtrec) = build_pointer_type (rectype);
2524 for (unsigned i = 1; i < gimple_call_num_args (enter_stmt); i++)
2525 {
2526 tree *argp = gimple_call_arg_ptr (enter_stmt, i);
2527 if (*argp == null_pointer_node)
2528 continue;
2529 gcc_assert (TREE_CODE (*argp) == ADDR_EXPR
2530 && VAR_P (TREE_OPERAND (*argp, 0)));
2531 tree var = TREE_OPERAND (*argp, 0);
2532
2533 tree field = build_decl (DECL_SOURCE_LOCATION (var), FIELD_DECL,
2534 DECL_NAME (var), TREE_TYPE (var));
2535 SET_DECL_ALIGN (field, DECL_ALIGN (var));
2536 DECL_USER_ALIGN (field) = DECL_USER_ALIGN (var);
2537 TREE_THIS_VOLATILE (field) = TREE_THIS_VOLATILE (var);
2538
2539 insert_field_into_struct (rectype, field);
2540
2541 tree t = build_simple_mem_ref (simtrec);
2542 t = build3 (COMPONENT_REF, TREE_TYPE (var), t, field, NULL);
2543 TREE_THIS_VOLATILE (t) = TREE_THIS_VOLATILE (var);
2544 SET_DECL_VALUE_EXPR (var, t);
2545 DECL_HAS_VALUE_EXPR_P (var) = 1;
2546 *regimplify = true;
2547 }
2548 layout_type (rectype);
2549 tree size = TYPE_SIZE_UNIT (rectype);
2550 tree align = build_int_cst (TREE_TYPE (size), TYPE_ALIGN_UNIT (rectype));
2551
2552 alloc_stmt
2553 = gimple_build_call_internal (IFN_GOMP_SIMT_ENTER_ALLOC, 2, size, align);
2554 gimple_call_set_lhs (alloc_stmt, simtrec);
2555 gsi_replace (gsi, alloc_stmt, false);
2556 gimple_stmt_iterator enter_gsi = gsi_for_stmt (enter_stmt);
2557 enter_stmt = gimple_build_assign (simduid, gimple_call_arg (enter_stmt, 0));
2558 gsi_replace (&enter_gsi, enter_stmt, false);
2559
2560 use_operand_p use;
2561 gimple *exit_stmt;
2562 if (single_imm_use (simtrec, &use, &exit_stmt))
2563 {
2564 gcc_assert (gimple_call_internal_p (exit_stmt, IFN_GOMP_SIMT_EXIT));
2565 gimple_stmt_iterator exit_gsi = gsi_for_stmt (exit_stmt);
25b45c7c 2566 tree clobber = build_clobber (rectype);
0c6b03b5
AM
2567 exit_stmt = gimple_build_assign (build_simple_mem_ref (simtrec), clobber);
2568 gsi_insert_before (&exit_gsi, exit_stmt, GSI_SAME_STMT);
2569 }
2570 else
2571 gcc_checking_assert (has_zero_uses (simtrec));
2572}
2573
2574/* Callback for walk_gimple_stmt used to scan for SIMT-privatized variables. */
2575
2576static tree
2577find_simtpriv_var_op (tree *tp, int *walk_subtrees, void *)
2578{
2579 tree t = *tp;
2580
2581 if (VAR_P (t)
2582 && DECL_HAS_VALUE_EXPR_P (t)
2583 && lookup_attribute ("omp simt private", DECL_ATTRIBUTES (t)))
2584 {
2585 *walk_subtrees = 0;
2586 return t;
2587 }
2588 return NULL_TREE;
2589}
2590
629b3d75
MJ
2591/* Cleanup uses of SIMT placeholder internal functions: on non-SIMT targets,
2592 VF is 1 and LANE is 0; on SIMT targets, VF is folded to a constant, and
2593 LANE is kept to be expanded to RTL later on. Also cleanup all other SIMT
2594 internal functions on non-SIMT targets, and likewise some SIMD internal
2595 functions on SIMT targets. */
2596
2597static unsigned int
2598execute_omp_device_lower ()
2599{
2600 int vf = targetm.simt.vf ? targetm.simt.vf () : 1;
0c6b03b5 2601 bool regimplify = false;
629b3d75
MJ
2602 basic_block bb;
2603 gimple_stmt_iterator gsi;
7a50e708
JJ
2604 bool calls_declare_variant_alt
2605 = cgraph_node::get (cfun->decl)->calls_declare_variant_alt;
629b3d75
MJ
2606 FOR_EACH_BB_FN (bb, cfun)
2607 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2608 {
2609 gimple *stmt = gsi_stmt (gsi);
7a50e708 2610 if (!is_gimple_call (stmt))
629b3d75 2611 continue;
7a50e708
JJ
2612 if (!gimple_call_internal_p (stmt))
2613 {
2614 if (calls_declare_variant_alt)
2615 if (tree fndecl = gimple_call_fndecl (stmt))
2616 {
2617 tree new_fndecl = omp_resolve_declare_variant (fndecl);
2618 if (new_fndecl != fndecl)
2619 {
2620 gimple_call_set_fndecl (stmt, new_fndecl);
2621 update_stmt (stmt);
2622 }
2623 }
2624 continue;
2625 }
629b3d75
MJ
2626 tree lhs = gimple_call_lhs (stmt), rhs = NULL_TREE;
2627 tree type = lhs ? TREE_TYPE (lhs) : integer_type_node;
2628 switch (gimple_call_internal_fn (stmt))
2629 {
d6621a2f
TB
2630 case IFN_GOMP_TARGET_REV:
2631 {
2632#ifndef ACCEL_COMPILER
2633 gimple_stmt_iterator gsi2 = gsi;
2634 gsi_next (&gsi2);
2635 gcc_assert (!gsi_end_p (gsi2));
2636 gcc_assert (gimple_call_builtin_p (gsi_stmt (gsi2),
2637 BUILT_IN_GOMP_TARGET));
2638 tree old_decl
2639 = TREE_OPERAND (gimple_call_arg (gsi_stmt (gsi2), 1), 0);
2640 tree new_decl = gimple_call_arg (gsi_stmt (gsi), 0);
2641 gimple_call_set_arg (gsi_stmt (gsi2), 1, new_decl);
2642 update_stmt (gsi_stmt (gsi2));
2643 new_decl = TREE_OPERAND (new_decl, 0);
2644 unsigned i;
2645 unsigned num_funcs = vec_safe_length (offload_funcs);
2646 for (i = 0; i < num_funcs; i++)
2647 {
2648 if ((*offload_funcs)[i] == old_decl)
2649 {
2650 (*offload_funcs)[i] = new_decl;
2651 break;
2652 }
2653 else if ((*offload_funcs)[i] == new_decl)
2654 break; /* This can happen due to inlining. */
2655 }
2656 gcc_assert (i < num_funcs);
2657#else
2658 tree old_decl = TREE_OPERAND (gimple_call_arg (gsi_stmt (gsi), 0),
2659 0);
2660#endif
2661 /* FIXME: Find a way to actually prevent outputting the empty-body
2662 old_decl as debug symbol + function in the assembly file. */
2663 cgraph_node *node = cgraph_node::get (old_decl);
2664 node->address_taken = false;
2665 node->need_lto_streaming = false;
2666 node->offloadable = false;
2667
2668 unlink_stmt_vdef (stmt);
2669 }
2670 break;
629b3d75
MJ
2671 case IFN_GOMP_USE_SIMT:
2672 rhs = vf == 1 ? integer_zero_node : integer_one_node;
2673 break;
0c6b03b5
AM
2674 case IFN_GOMP_SIMT_ENTER:
2675 rhs = vf == 1 ? gimple_call_arg (stmt, 0) : NULL_TREE;
2676 goto simtreg_enter_exit;
2677 case IFN_GOMP_SIMT_ENTER_ALLOC:
2678 if (vf != 1)
2679 ompdevlow_adjust_simt_enter (&gsi, &regimplify);
2680 rhs = vf == 1 ? null_pointer_node : NULL_TREE;
2681 goto simtreg_enter_exit;
2682 case IFN_GOMP_SIMT_EXIT:
2683 simtreg_enter_exit:
2684 if (vf != 1)
2685 continue;
2686 unlink_stmt_vdef (stmt);
2687 break;
629b3d75
MJ
2688 case IFN_GOMP_SIMT_LANE:
2689 case IFN_GOMP_SIMT_LAST_LANE:
2690 rhs = vf == 1 ? build_zero_cst (type) : NULL_TREE;
2691 break;
2692 case IFN_GOMP_SIMT_VF:
2693 rhs = build_int_cst (type, vf);
2694 break;
2695 case IFN_GOMP_SIMT_ORDERED_PRED:
2696 rhs = vf == 1 ? integer_zero_node : NULL_TREE;
2697 if (rhs || !lhs)
2698 unlink_stmt_vdef (stmt);
2699 break;
2700 case IFN_GOMP_SIMT_VOTE_ANY:
2701 case IFN_GOMP_SIMT_XCHG_BFLY:
2702 case IFN_GOMP_SIMT_XCHG_IDX:
2703 rhs = vf == 1 ? gimple_call_arg (stmt, 0) : NULL_TREE;
2704 break;
2705 case IFN_GOMP_SIMD_LANE:
2706 case IFN_GOMP_SIMD_LAST_LANE:
2707 rhs = vf != 1 ? build_zero_cst (type) : NULL_TREE;
2708 break;
2709 case IFN_GOMP_SIMD_VF:
2710 rhs = vf != 1 ? build_one_cst (type) : NULL_TREE;
2711 break;
2712 default:
2713 continue;
2714 }
2715 if (lhs && !rhs)
2716 continue;
2717 stmt = lhs ? gimple_build_assign (lhs, rhs) : gimple_build_nop ();
2718 gsi_replace (&gsi, stmt, false);
2719 }
0c6b03b5
AM
2720 if (regimplify)
2721 FOR_EACH_BB_REVERSE_FN (bb, cfun)
2722 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
2723 if (walk_gimple_stmt (&gsi, NULL, find_simtpriv_var_op, NULL))
2724 {
2725 if (gimple_clobber_p (gsi_stmt (gsi)))
2726 gsi_remove (&gsi, true);
2727 else
2728 gimple_regimplify_operands (gsi_stmt (gsi), &gsi);
2729 }
629b3d75
MJ
2730 if (vf != 1)
2731 cfun->has_force_vectorize_loops = false;
2732 return 0;
2733}
2734
2735namespace {
2736
2737const pass_data pass_data_omp_device_lower =
2738{
2739 GIMPLE_PASS, /* type */
2740 "ompdevlow", /* name */
fd2b8c8b 2741 OPTGROUP_OMP, /* optinfo_flags */
629b3d75
MJ
2742 TV_NONE, /* tv_id */
2743 PROP_cfg, /* properties_required */
2744 PROP_gimple_lomp_dev, /* properties_provided */
2745 0, /* properties_destroyed */
2746 0, /* todo_flags_start */
2747 TODO_update_ssa, /* todo_flags_finish */
2748};
2749
2750class pass_omp_device_lower : public gimple_opt_pass
2751{
2752public:
2753 pass_omp_device_lower (gcc::context *ctxt)
2754 : gimple_opt_pass (pass_data_omp_device_lower, ctxt)
2755 {}
2756
2757 /* opt_pass methods: */
725793af 2758 bool gate (function *fun) final override
629b3d75 2759 {
7a50e708
JJ
2760 return (!(fun->curr_properties & PROP_gimple_lomp_dev)
2761 || (flag_openmp
2762 && cgraph_node::get (fun->decl)->calls_declare_variant_alt));
629b3d75 2763 }
725793af 2764 unsigned int execute (function *) final override
629b3d75
MJ
2765 {
2766 return execute_omp_device_lower ();
2767 }
2768
2769}; // class pass_expand_omp_ssa
2770
2771} // anon namespace
2772
2773gimple_opt_pass *
2774make_pass_omp_device_lower (gcc::context *ctxt)
2775{
2776 return new pass_omp_device_lower (ctxt);
2777}
2778
2779/* "omp declare target link" handling pass. */
2780
2781namespace {
2782
2783const pass_data pass_data_omp_target_link =
2784{
2785 GIMPLE_PASS, /* type */
2786 "omptargetlink", /* name */
fd2b8c8b 2787 OPTGROUP_OMP, /* optinfo_flags */
629b3d75
MJ
2788 TV_NONE, /* tv_id */
2789 PROP_ssa, /* properties_required */
2790 0, /* properties_provided */
2791 0, /* properties_destroyed */
2792 0, /* todo_flags_start */
2793 TODO_update_ssa, /* todo_flags_finish */
2794};
2795
2796class pass_omp_target_link : public gimple_opt_pass
2797{
2798public:
2799 pass_omp_target_link (gcc::context *ctxt)
2800 : gimple_opt_pass (pass_data_omp_target_link, ctxt)
2801 {}
2802
2803 /* opt_pass methods: */
725793af 2804 bool gate (function *fun) final override
629b3d75
MJ
2805 {
2806#ifdef ACCEL_COMPILER
46dbeb40 2807 return offloading_function_p (fun->decl);
629b3d75
MJ
2808#else
2809 (void) fun;
2810 return false;
2811#endif
2812 }
2813
725793af 2814 unsigned execute (function *) final override;
629b3d75
MJ
2815};
2816
2817/* Callback for walk_gimple_stmt used to scan for link var operands. */
2818
2819static tree
2820find_link_var_op (tree *tp, int *walk_subtrees, void *)
2821{
2822 tree t = *tp;
2823
56f71478
JJ
2824 if (VAR_P (t)
2825 && DECL_HAS_VALUE_EXPR_P (t)
2826 && is_global_var (t)
629b3d75
MJ
2827 && lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (t)))
2828 {
2829 *walk_subtrees = 0;
2830 return t;
2831 }
2832
2833 return NULL_TREE;
2834}
2835
2836unsigned
2837pass_omp_target_link::execute (function *fun)
2838{
2839 basic_block bb;
2840 FOR_EACH_BB_FN (bb, fun)
2841 {
2842 gimple_stmt_iterator gsi;
2843 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
95d67762
JJ
2844 {
2845 if (gimple_call_builtin_p (gsi_stmt (gsi), BUILT_IN_GOMP_TARGET))
2846 {
d6621a2f
TB
2847 tree dev = gimple_call_arg (gsi_stmt (gsi), 0);
2848 tree fn = gimple_call_arg (gsi_stmt (gsi), 1);
2849 if (POINTER_TYPE_P (TREE_TYPE (fn)))
2850 fn = TREE_OPERAND (fn, 0);
2851 if (TREE_CODE (dev) == INTEGER_CST
2852 && wi::to_wide (dev) == GOMP_DEVICE_HOST_FALLBACK
2853 && lookup_attribute ("omp target device_ancestor_nohost",
2854 DECL_ATTRIBUTES (fn)) != NULL_TREE)
2855 continue; /* ancestor:1 */
95d67762
JJ
2856 /* Nullify the second argument of __builtin_GOMP_target_ext. */
2857 gimple_call_set_arg (gsi_stmt (gsi), 1, null_pointer_node);
2858 update_stmt (gsi_stmt (gsi));
2859 }
2860 if (walk_gimple_stmt (&gsi, NULL, find_link_var_op, NULL))
2861 gimple_regimplify_operands (gsi_stmt (gsi), &gsi);
2862 }
629b3d75
MJ
2863 }
2864
2865 return 0;
2866}
2867
2868} // anon namespace
2869
2870gimple_opt_pass *
2871make_pass_omp_target_link (gcc::context *ctxt)
2872{
2873 return new pass_omp_target_link (ctxt);
2874}