]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/omp-offload.c
[OpenACC privatization] Don't let unhandled 'IFN_UNIQUE_OACC_PRIVATE' linger [PR90115]
[thirdparty/gcc.git] / gcc / omp-offload.c
CommitLineData
629b3d75
MJ
1/* Bits of OpenMP and OpenACC handling that is specific to device offloading
2 and a lowering pass for OpenACC device directives.
3
99dee823 4 Copyright (C) 2005-2021 Free Software Foundation, Inc.
629b3d75
MJ
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "backend.h"
26#include "target.h"
27#include "tree.h"
28#include "gimple.h"
29#include "tree-pass.h"
30#include "ssa.h"
31#include "cgraph.h"
32#include "pretty-print.h"
33#include "diagnostic-core.h"
34#include "fold-const.h"
35#include "internal-fn.h"
0c6b03b5 36#include "langhooks.h"
629b3d75
MJ
37#include "gimplify.h"
38#include "gimple-iterator.h"
39#include "gimplify-me.h"
40#include "gimple-walk.h"
41#include "tree-cfg.h"
42#include "tree-into-ssa.h"
0c6b03b5
AM
43#include "tree-nested.h"
44#include "stor-layout.h"
629b3d75
MJ
45#include "common/common-target.h"
46#include "omp-general.h"
47#include "omp-offload.h"
48#include "lto-section-names.h"
49#include "gomp-constants.h"
50#include "gimple-pretty-print.h"
324ff1a0 51#include "intl.h"
314e6352
ML
52#include "stringpool.h"
53#include "attribs.h"
f64b12bd 54#include "cfgloop.h"
dc703151 55#include "context.h"
29a2f518 56#include "convert.h"
629b3d75
MJ
57
58/* Describe the OpenACC looping structure of a function. The entire
59 function is held in a 'NULL' loop. */
60
61struct oacc_loop
62{
63 oacc_loop *parent; /* Containing loop. */
64
65 oacc_loop *child; /* First inner loop. */
66
67 oacc_loop *sibling; /* Next loop within same parent. */
68
69 location_t loc; /* Location of the loop start. */
70
71 gcall *marker; /* Initial head marker. */
72
01914336
MJ
73 gcall *heads[GOMP_DIM_MAX]; /* Head marker functions. */
74 gcall *tails[GOMP_DIM_MAX]; /* Tail marker functions. */
629b3d75
MJ
75
76 tree routine; /* Pseudo-loop enclosing a routine. */
77
78 unsigned mask; /* Partitioning mask. */
02889d23 79 unsigned e_mask; /* Partitioning of element loops (when tiling). */
629b3d75
MJ
80 unsigned inner; /* Partitioning of inner loops. */
81 unsigned flags; /* Partitioning flags. */
02889d23 82 vec<gcall *> ifns; /* Contained loop abstraction functions. */
629b3d75
MJ
83 tree chunk_size; /* Chunk size. */
84 gcall *head_end; /* Final marker of head sequence. */
85};
86
87/* Holds offload tables with decls. */
88vec<tree, va_gc> *offload_funcs, *offload_vars;
89
90/* Return level at which oacc routine may spawn a partitioned loop, or
91 -1 if it is not a routine (i.e. is an offload fn). */
92
4c187162 93int
629b3d75
MJ
94oacc_fn_attrib_level (tree attr)
95{
96 tree pos = TREE_VALUE (attr);
97
98 if (!TREE_PURPOSE (pos))
99 return -1;
100
101 int ix = 0;
102 for (ix = 0; ix != GOMP_DIM_MAX;
103 ix++, pos = TREE_CHAIN (pos))
104 if (!integer_zerop (TREE_PURPOSE (pos)))
105 break;
106
107 return ix;
108}
109
110/* Helper function for omp_finish_file routine. Takes decls from V_DECLS and
111 adds their addresses and sizes to constructor-vector V_CTOR. */
112
113static void
114add_decls_addresses_to_decl_constructor (vec<tree, va_gc> *v_decls,
115 vec<constructor_elt, va_gc> *v_ctor)
116{
117 unsigned len = vec_safe_length (v_decls);
118 for (unsigned i = 0; i < len; i++)
119 {
120 tree it = (*v_decls)[i];
121 bool is_var = VAR_P (it);
122 bool is_link_var
123 = is_var
124#ifdef ACCEL_COMPILER
125 && DECL_HAS_VALUE_EXPR_P (it)
126#endif
127 && lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (it));
128
1c0fdaf7 129 /* See also omp_finish_file and output_offload_tables in lto-cgraph.c. */
bf4ab268 130 if (!in_lto_p && !symtab_node::get (it))
1c0fdaf7
TB
131 continue;
132
629b3d75
MJ
133 tree size = NULL_TREE;
134 if (is_var)
135 size = fold_convert (const_ptr_type_node, DECL_SIZE_UNIT (it));
136
137 tree addr;
138 if (!is_link_var)
139 addr = build_fold_addr_expr (it);
140 else
141 {
142#ifdef ACCEL_COMPILER
143 /* For "omp declare target link" vars add address of the pointer to
144 the target table, instead of address of the var. */
145 tree value_expr = DECL_VALUE_EXPR (it);
146 tree link_ptr_decl = TREE_OPERAND (value_expr, 0);
147 varpool_node::finalize_decl (link_ptr_decl);
148 addr = build_fold_addr_expr (link_ptr_decl);
149#else
150 addr = build_fold_addr_expr (it);
151#endif
152
153 /* Most significant bit of the size marks "omp declare target link"
154 vars in host and target tables. */
155 unsigned HOST_WIDE_INT isize = tree_to_uhwi (size);
156 isize |= 1ULL << (int_size_in_bytes (const_ptr_type_node)
157 * BITS_PER_UNIT - 1);
158 size = wide_int_to_tree (const_ptr_type_node, isize);
159 }
160
161 CONSTRUCTOR_APPEND_ELT (v_ctor, NULL_TREE, addr);
162 if (is_var)
163 CONSTRUCTOR_APPEND_ELT (v_ctor, NULL_TREE, size);
164 }
165}
166
dc703151
JJ
167/* Return true if DECL is a function for which its references should be
168 analyzed. */
169
170static bool
171omp_declare_target_fn_p (tree decl)
172{
173 return (TREE_CODE (decl) == FUNCTION_DECL
174 && lookup_attribute ("omp declare target", DECL_ATTRIBUTES (decl))
175 && !lookup_attribute ("omp declare target host",
176 DECL_ATTRIBUTES (decl))
177 && (!flag_openacc
178 || oacc_get_fn_attrib (decl) == NULL_TREE));
179}
180
181/* Return true if DECL Is a variable for which its initializer references
182 should be analyzed. */
183
184static bool
185omp_declare_target_var_p (tree decl)
186{
187 return (VAR_P (decl)
188 && lookup_attribute ("omp declare target", DECL_ATTRIBUTES (decl))
189 && !lookup_attribute ("omp declare target link",
190 DECL_ATTRIBUTES (decl)));
191}
192
193/* Helper function for omp_discover_implicit_declare_target, called through
194 walk_tree. Mark referenced FUNCTION_DECLs implicitly as
195 declare target to. */
196
197static tree
49ddde69 198omp_discover_declare_target_tgt_fn_r (tree *tp, int *walk_subtrees, void *data)
dc703151 199{
2298ca2d
JJ
200 if (TREE_CODE (*tp) == CALL_EXPR
201 && CALL_EXPR_FN (*tp)
202 && TREE_CODE (CALL_EXPR_FN (*tp)) == ADDR_EXPR
203 && TREE_CODE (TREE_OPERAND (CALL_EXPR_FN (*tp), 0)) == FUNCTION_DECL
204 && lookup_attribute ("omp declare variant base",
205 DECL_ATTRIBUTES (TREE_OPERAND (CALL_EXPR_FN (*tp),
206 0))))
207 {
208 tree fn = TREE_OPERAND (CALL_EXPR_FN (*tp), 0);
209 for (tree attr = DECL_ATTRIBUTES (fn); attr; attr = TREE_CHAIN (attr))
210 {
211 attr = lookup_attribute ("omp declare variant base", attr);
212 if (attr == NULL_TREE)
213 break;
214 tree purpose = TREE_PURPOSE (TREE_VALUE (attr));
215 if (TREE_CODE (purpose) == FUNCTION_DECL)
216 omp_discover_declare_target_tgt_fn_r (&purpose, walk_subtrees, data);
217 }
218 }
219 else if (TREE_CODE (*tp) == FUNCTION_DECL)
dc703151 220 {
2a10a2c0 221 tree decl = *tp;
dc703151 222 tree id = get_identifier ("omp declare target");
dc703151
JJ
223 symtab_node *node = symtab_node::get (*tp);
224 if (node != NULL)
225 {
44e20dce
JJ
226 while (node->alias_target
227 && TREE_CODE (node->alias_target) == FUNCTION_DECL)
2a10a2c0
TB
228 {
229 if (!omp_declare_target_fn_p (node->decl)
230 && !lookup_attribute ("omp declare target host",
231 DECL_ATTRIBUTES (node->decl)))
232 {
233 node->offloadable = 1;
234 DECL_ATTRIBUTES (node->decl)
235 = tree_cons (id, NULL_TREE, DECL_ATTRIBUTES (node->decl));
236 }
237 node = symtab_node::get (node->alias_target);
238 }
239 symtab_node *new_node = node->ultimate_alias_target ();
240 decl = new_node->decl;
241 while (node != new_node)
242 {
243 if (!omp_declare_target_fn_p (node->decl)
244 && !lookup_attribute ("omp declare target host",
245 DECL_ATTRIBUTES (node->decl)))
246 {
247 node->offloadable = 1;
248 DECL_ATTRIBUTES (node->decl)
249 = tree_cons (id, NULL_TREE, DECL_ATTRIBUTES (node->decl));
250 }
251 gcc_assert (node->alias && node->analyzed);
252 node = node->get_alias_target ();
253 }
dc703151
JJ
254 node->offloadable = 1;
255 if (ENABLE_OFFLOADING)
256 g->have_offload = true;
257 }
2a10a2c0
TB
258 if (omp_declare_target_fn_p (decl)
259 || lookup_attribute ("omp declare target host",
2298ca2d 260 DECL_ATTRIBUTES (decl)))
2a10a2c0
TB
261 return NULL_TREE;
262
263 if (!DECL_EXTERNAL (decl) && DECL_SAVED_TREE (decl))
264 ((vec<tree> *) data)->safe_push (decl);
265 DECL_ATTRIBUTES (decl) = tree_cons (id, NULL_TREE,
266 DECL_ATTRIBUTES (decl));
dc703151
JJ
267 }
268 else if (TYPE_P (*tp))
269 *walk_subtrees = 0;
270 /* else if (TREE_CODE (*tp) == OMP_TARGET)
271 {
272 if (tree dev = omp_find_clause (OMP_TARGET_CLAUSES (*tp)))
273 if (OMP_DEVICE_ANCESTOR (dev))
274 *walk_subtrees = 0;
275 } */
276 return NULL_TREE;
277}
278
49ddde69
JJ
279/* Similarly, but ignore references outside of OMP_TARGET regions. */
280
281static tree
282omp_discover_declare_target_fn_r (tree *tp, int *walk_subtrees, void *data)
283{
284 if (TREE_CODE (*tp) == OMP_TARGET)
285 {
286 /* And not OMP_DEVICE_ANCESTOR. */
287 walk_tree_without_duplicates (&OMP_TARGET_BODY (*tp),
288 omp_discover_declare_target_tgt_fn_r,
289 data);
290 *walk_subtrees = 0;
291 }
292 else if (TYPE_P (*tp))
293 *walk_subtrees = 0;
294 return NULL_TREE;
295}
296
dc703151
JJ
297/* Helper function for omp_discover_implicit_declare_target, called through
298 walk_tree. Mark referenced FUNCTION_DECLs implicitly as
299 declare target to. */
300
301static tree
302omp_discover_declare_target_var_r (tree *tp, int *walk_subtrees, void *data)
303{
304 if (TREE_CODE (*tp) == FUNCTION_DECL)
49ddde69 305 return omp_discover_declare_target_tgt_fn_r (tp, walk_subtrees, data);
dc703151
JJ
306 else if (VAR_P (*tp)
307 && is_global_var (*tp)
308 && !omp_declare_target_var_p (*tp))
309 {
310 tree id = get_identifier ("omp declare target");
311 if (lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (*tp)))
312 {
313 error_at (DECL_SOURCE_LOCATION (*tp),
314 "%qD specified both in declare target %<link%> and "
315 "implicitly in %<to%> clauses", *tp);
316 DECL_ATTRIBUTES (*tp)
317 = remove_attribute ("omp declare target link", DECL_ATTRIBUTES (*tp));
318 }
3af02d32 319 if (TREE_STATIC (*tp) && lang_hooks.decls.omp_get_decl_init (*tp))
dc703151
JJ
320 ((vec<tree> *) data)->safe_push (*tp);
321 DECL_ATTRIBUTES (*tp) = tree_cons (id, NULL_TREE, DECL_ATTRIBUTES (*tp));
322 symtab_node *node = symtab_node::get (*tp);
323 if (node != NULL && !node->offloadable)
324 {
325 node->offloadable = 1;
326 if (ENABLE_OFFLOADING)
327 {
328 g->have_offload = true;
329 if (is_a <varpool_node *> (node))
330 vec_safe_push (offload_vars, node->decl);
331 }
332 }
333 }
334 else if (TYPE_P (*tp))
335 *walk_subtrees = 0;
336 return NULL_TREE;
337}
338
339/* Perform the OpenMP implicit declare target to discovery. */
340
341void
342omp_discover_implicit_declare_target (void)
343{
344 cgraph_node *node;
345 varpool_node *vnode;
346 auto_vec<tree> worklist;
347
348 FOR_EACH_DEFINED_FUNCTION (node)
49ddde69
JJ
349 if (DECL_SAVED_TREE (node->decl))
350 {
8b0a63e4 351 struct cgraph_node *cgn;
49ddde69
JJ
352 if (omp_declare_target_fn_p (node->decl))
353 worklist.safe_push (node->decl);
354 else if (DECL_STRUCT_FUNCTION (node->decl)
355 && DECL_STRUCT_FUNCTION (node->decl)->has_omp_target)
356 worklist.safe_push (node->decl);
89576d86
JH
357 for (cgn = first_nested_function (node);
358 cgn; cgn = next_nested_function (cgn))
8b0a63e4
TB
359 if (omp_declare_target_fn_p (cgn->decl))
360 worklist.safe_push (cgn->decl);
361 else if (DECL_STRUCT_FUNCTION (cgn->decl)
362 && DECL_STRUCT_FUNCTION (cgn->decl)->has_omp_target)
363 worklist.safe_push (cgn->decl);
49ddde69 364 }
3af02d32
KCY
365 FOR_EACH_VARIABLE (vnode)
366 if (lang_hooks.decls.omp_get_decl_init (vnode->decl)
367 && omp_declare_target_var_p (vnode->decl))
dc703151
JJ
368 worklist.safe_push (vnode->decl);
369 while (!worklist.is_empty ())
370 {
371 tree decl = worklist.pop ();
49ddde69 372 if (VAR_P (decl))
3af02d32 373 walk_tree_without_duplicates (lang_hooks.decls.omp_get_decl_init (decl),
49ddde69
JJ
374 omp_discover_declare_target_var_r,
375 &worklist);
376 else if (omp_declare_target_fn_p (decl))
dc703151 377 walk_tree_without_duplicates (&DECL_SAVED_TREE (decl),
49ddde69 378 omp_discover_declare_target_tgt_fn_r,
dc703151
JJ
379 &worklist);
380 else
49ddde69
JJ
381 walk_tree_without_duplicates (&DECL_SAVED_TREE (decl),
382 omp_discover_declare_target_fn_r,
dc703151
JJ
383 &worklist);
384 }
3af02d32
KCY
385
386 lang_hooks.decls.omp_finish_decl_inits ();
dc703151
JJ
387}
388
389
629b3d75
MJ
390/* Create new symbols containing (address, size) pairs for global variables,
391 marked with "omp declare target" attribute, as well as addresses for the
392 functions, which are outlined offloading regions. */
393void
394omp_finish_file (void)
395{
396 unsigned num_funcs = vec_safe_length (offload_funcs);
397 unsigned num_vars = vec_safe_length (offload_vars);
398
399 if (num_funcs == 0 && num_vars == 0)
400 return;
401
402 if (targetm_common.have_named_sections)
403 {
404 vec<constructor_elt, va_gc> *v_f, *v_v;
405 vec_alloc (v_f, num_funcs);
406 vec_alloc (v_v, num_vars * 2);
407
408 add_decls_addresses_to_decl_constructor (offload_funcs, v_f);
409 add_decls_addresses_to_decl_constructor (offload_vars, v_v);
410
411 tree vars_decl_type = build_array_type_nelts (pointer_sized_int_node,
1c0fdaf7 412 vec_safe_length (v_v));
629b3d75
MJ
413 tree funcs_decl_type = build_array_type_nelts (pointer_sized_int_node,
414 num_funcs);
415 SET_TYPE_ALIGN (vars_decl_type, TYPE_ALIGN (pointer_sized_int_node));
416 SET_TYPE_ALIGN (funcs_decl_type, TYPE_ALIGN (pointer_sized_int_node));
417 tree ctor_v = build_constructor (vars_decl_type, v_v);
418 tree ctor_f = build_constructor (funcs_decl_type, v_f);
419 TREE_CONSTANT (ctor_v) = TREE_CONSTANT (ctor_f) = 1;
420 TREE_STATIC (ctor_v) = TREE_STATIC (ctor_f) = 1;
421 tree funcs_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL,
422 get_identifier (".offload_func_table"),
423 funcs_decl_type);
424 tree vars_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL,
425 get_identifier (".offload_var_table"),
426 vars_decl_type);
427 TREE_STATIC (funcs_decl) = TREE_STATIC (vars_decl) = 1;
428 /* Do not align tables more than TYPE_ALIGN (pointer_sized_int_node),
429 otherwise a joint table in a binary will contain padding between
430 tables from multiple object files. */
431 DECL_USER_ALIGN (funcs_decl) = DECL_USER_ALIGN (vars_decl) = 1;
432 SET_DECL_ALIGN (funcs_decl, TYPE_ALIGN (funcs_decl_type));
433 SET_DECL_ALIGN (vars_decl, TYPE_ALIGN (vars_decl_type));
434 DECL_INITIAL (funcs_decl) = ctor_f;
435 DECL_INITIAL (vars_decl) = ctor_v;
436 set_decl_section_name (funcs_decl, OFFLOAD_FUNC_TABLE_SECTION_NAME);
437 set_decl_section_name (vars_decl, OFFLOAD_VAR_TABLE_SECTION_NAME);
438
439 varpool_node::finalize_decl (vars_decl);
440 varpool_node::finalize_decl (funcs_decl);
441 }
442 else
443 {
444 for (unsigned i = 0; i < num_funcs; i++)
445 {
446 tree it = (*offload_funcs)[i];
1c0fdaf7
TB
447 /* See also add_decls_addresses_to_decl_constructor
448 and output_offload_tables in lto-cgraph.c. */
bf4ab268 449 if (!in_lto_p && !symtab_node::get (it))
1c0fdaf7 450 continue;
629b3d75
MJ
451 targetm.record_offload_symbol (it);
452 }
453 for (unsigned i = 0; i < num_vars; i++)
454 {
455 tree it = (*offload_vars)[i];
bf4ab268 456 if (!in_lto_p && !symtab_node::get (it))
1c0fdaf7 457 continue;
c2211a60
TB
458#ifdef ACCEL_COMPILER
459 if (DECL_HAS_VALUE_EXPR_P (it)
460 && lookup_attribute ("omp declare target link",
461 DECL_ATTRIBUTES (it)))
462 {
463 tree value_expr = DECL_VALUE_EXPR (it);
464 tree link_ptr_decl = TREE_OPERAND (value_expr, 0);
465 targetm.record_offload_symbol (link_ptr_decl);
466 varpool_node::finalize_decl (link_ptr_decl);
467 }
468 else
469#endif
470 targetm.record_offload_symbol (it);
629b3d75
MJ
471 }
472 }
473}
474
02889d23
CLT
475/* Call dim_pos (POS == true) or dim_size (POS == false) builtins for
476 axis DIM. Return a tmp var holding the result. */
477
478static tree
479oacc_dim_call (bool pos, int dim, gimple_seq *seq)
480{
481 tree arg = build_int_cst (unsigned_type_node, dim);
482 tree size = create_tmp_var (integer_type_node);
483 enum internal_fn fn = pos ? IFN_GOACC_DIM_POS : IFN_GOACC_DIM_SIZE;
484 gimple *call = gimple_build_call_internal (fn, 1, arg);
485
486 gimple_call_set_lhs (call, size);
487 gimple_seq_add_stmt (seq, call);
488
489 return size;
490}
491
629b3d75
MJ
492/* Find the number of threads (POS = false), or thread number (POS =
493 true) for an OpenACC region partitioned as MASK. Setup code
494 required for the calculation is added to SEQ. */
495
496static tree
497oacc_thread_numbers (bool pos, int mask, gimple_seq *seq)
498{
499 tree res = pos ? NULL_TREE : build_int_cst (unsigned_type_node, 1);
500 unsigned ix;
501
502 /* Start at gang level, and examine relevant dimension indices. */
503 for (ix = GOMP_DIM_GANG; ix != GOMP_DIM_MAX; ix++)
504 if (GOMP_DIM_MASK (ix) & mask)
505 {
629b3d75
MJ
506 if (res)
507 {
508 /* We had an outer index, so scale that by the size of
509 this dimension. */
02889d23 510 tree n = oacc_dim_call (false, ix, seq);
629b3d75
MJ
511 res = fold_build2 (MULT_EXPR, integer_type_node, res, n);
512 }
513 if (pos)
514 {
515 /* Determine index in this dimension. */
02889d23 516 tree id = oacc_dim_call (true, ix, seq);
629b3d75
MJ
517 if (res)
518 res = fold_build2 (PLUS_EXPR, integer_type_node, res, id);
519 else
520 res = id;
521 }
522 }
523
524 if (res == NULL_TREE)
525 res = integer_zero_node;
526
527 return res;
528}
529
530/* Transform IFN_GOACC_LOOP calls to actual code. See
531 expand_oacc_for for where these are generated. At the vector
532 level, we stride loops, such that each member of a warp will
533 operate on adjacent iterations. At the worker and gang level,
534 each gang/warp executes a set of contiguous iterations. Chunking
535 can override this such that each iteration engine executes a
01914336 536 contiguous chunk, and then moves on to stride to the next chunk. */
629b3d75
MJ
537
538static void
539oacc_xform_loop (gcall *call)
540{
541 gimple_stmt_iterator gsi = gsi_for_stmt (call);
542 enum ifn_goacc_loop_kind code
543 = (enum ifn_goacc_loop_kind) TREE_INT_CST_LOW (gimple_call_arg (call, 0));
544 tree dir = gimple_call_arg (call, 1);
545 tree range = gimple_call_arg (call, 2);
546 tree step = gimple_call_arg (call, 3);
547 tree chunk_size = NULL_TREE;
548 unsigned mask = (unsigned) TREE_INT_CST_LOW (gimple_call_arg (call, 5));
549 tree lhs = gimple_call_lhs (call);
c29c92c7 550 tree type = NULL_TREE;
629b3d75
MJ
551 tree diff_type = TREE_TYPE (range);
552 tree r = NULL_TREE;
553 gimple_seq seq = NULL;
554 bool chunking = false, striding = true;
555 unsigned outer_mask = mask & (~mask + 1); // Outermost partitioning
556 unsigned inner_mask = mask & ~outer_mask; // Inner partitioning (if any)
557
c29c92c7
FX
558 /* Skip lowering if return value of IFN_GOACC_LOOP call is not used. */
559 if (!lhs)
560 {
561 gsi_replace_with_seq (&gsi, seq, true);
562 return;
563 }
564
565 type = TREE_TYPE (lhs);
566
629b3d75
MJ
567#ifdef ACCEL_COMPILER
568 chunk_size = gimple_call_arg (call, 4);
569 if (integer_minus_onep (chunk_size) /* Force static allocation. */
570 || integer_zerop (chunk_size)) /* Default (also static). */
571 {
572 /* If we're at the gang level, we want each to execute a
573 contiguous run of iterations. Otherwise we want each element
574 to stride. */
575 striding = !(outer_mask & GOMP_DIM_MASK (GOMP_DIM_GANG));
576 chunking = false;
577 }
578 else
579 {
580 /* Chunk of size 1 is striding. */
581 striding = integer_onep (chunk_size);
582 chunking = !striding;
583 }
584#endif
585
586 /* striding=true, chunking=true
587 -> invalid.
588 striding=true, chunking=false
589 -> chunks=1
590 striding=false,chunking=true
591 -> chunks=ceil (range/(chunksize*threads*step))
592 striding=false,chunking=false
593 -> chunk_size=ceil(range/(threads*step)),chunks=1 */
594 push_gimplify_context (true);
595
596 switch (code)
597 {
598 default: gcc_unreachable ();
599
600 case IFN_GOACC_LOOP_CHUNKS:
601 if (!chunking)
602 r = build_int_cst (type, 1);
603 else
604 {
605 /* chunk_max
606 = (range - dir) / (chunks * step * num_threads) + dir */
607 tree per = oacc_thread_numbers (false, mask, &seq);
608 per = fold_convert (type, per);
609 chunk_size = fold_convert (type, chunk_size);
610 per = fold_build2 (MULT_EXPR, type, per, chunk_size);
611 per = fold_build2 (MULT_EXPR, type, per, step);
612 r = build2 (MINUS_EXPR, type, range, dir);
613 r = build2 (PLUS_EXPR, type, r, per);
614 r = build2 (TRUNC_DIV_EXPR, type, r, per);
615 }
616 break;
617
618 case IFN_GOACC_LOOP_STEP:
619 {
620 /* If striding, step by the entire compute volume, otherwise
621 step by the inner volume. */
622 unsigned volume = striding ? mask : inner_mask;
623
624 r = oacc_thread_numbers (false, volume, &seq);
625 r = build2 (MULT_EXPR, type, fold_convert (type, r), step);
626 }
627 break;
628
629 case IFN_GOACC_LOOP_OFFSET:
f64b12bd
CP
630 /* Enable vectorization on non-SIMT targets. */
631 if (!targetm.simt.vf
632 && outer_mask == GOMP_DIM_MASK (GOMP_DIM_VECTOR)
633 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
634 the loop. */
635 && (flag_tree_loop_vectorize
636 || !global_options_set.x_flag_tree_loop_vectorize))
637 {
638 basic_block bb = gsi_bb (gsi);
99b1c316
MS
639 class loop *parent = bb->loop_father;
640 class loop *body = parent->inner;
f64b12bd
CP
641
642 parent->force_vectorize = true;
643 parent->safelen = INT_MAX;
644
645 /* "Chunking loops" may have inner loops. */
646 if (parent->inner)
647 {
648 body->force_vectorize = true;
649 body->safelen = INT_MAX;
650 }
651
652 cfun->has_force_vectorize_loops = true;
653 }
629b3d75
MJ
654 if (striding)
655 {
656 r = oacc_thread_numbers (true, mask, &seq);
657 r = fold_convert (diff_type, r);
658 }
659 else
660 {
661 tree inner_size = oacc_thread_numbers (false, inner_mask, &seq);
662 tree outer_size = oacc_thread_numbers (false, outer_mask, &seq);
663 tree volume = fold_build2 (MULT_EXPR, TREE_TYPE (inner_size),
664 inner_size, outer_size);
665
666 volume = fold_convert (diff_type, volume);
667 if (chunking)
668 chunk_size = fold_convert (diff_type, chunk_size);
669 else
670 {
671 tree per = fold_build2 (MULT_EXPR, diff_type, volume, step);
672
673 chunk_size = build2 (MINUS_EXPR, diff_type, range, dir);
674 chunk_size = build2 (PLUS_EXPR, diff_type, chunk_size, per);
675 chunk_size = build2 (TRUNC_DIV_EXPR, diff_type, chunk_size, per);
676 }
677
678 tree span = build2 (MULT_EXPR, diff_type, chunk_size,
679 fold_convert (diff_type, inner_size));
680 r = oacc_thread_numbers (true, outer_mask, &seq);
681 r = fold_convert (diff_type, r);
682 r = build2 (MULT_EXPR, diff_type, r, span);
683
684 tree inner = oacc_thread_numbers (true, inner_mask, &seq);
685 inner = fold_convert (diff_type, inner);
686 r = fold_build2 (PLUS_EXPR, diff_type, r, inner);
687
688 if (chunking)
689 {
690 tree chunk = fold_convert (diff_type, gimple_call_arg (call, 6));
691 tree per
692 = fold_build2 (MULT_EXPR, diff_type, volume, chunk_size);
693 per = build2 (MULT_EXPR, diff_type, per, chunk);
694
695 r = build2 (PLUS_EXPR, diff_type, r, per);
696 }
697 }
698 r = fold_build2 (MULT_EXPR, diff_type, r, step);
699 if (type != diff_type)
700 r = fold_convert (type, r);
701 break;
702
703 case IFN_GOACC_LOOP_BOUND:
704 if (striding)
705 r = range;
706 else
707 {
708 tree inner_size = oacc_thread_numbers (false, inner_mask, &seq);
709 tree outer_size = oacc_thread_numbers (false, outer_mask, &seq);
710 tree volume = fold_build2 (MULT_EXPR, TREE_TYPE (inner_size),
711 inner_size, outer_size);
712
713 volume = fold_convert (diff_type, volume);
714 if (chunking)
715 chunk_size = fold_convert (diff_type, chunk_size);
716 else
717 {
718 tree per = fold_build2 (MULT_EXPR, diff_type, volume, step);
719
720 chunk_size = build2 (MINUS_EXPR, diff_type, range, dir);
721 chunk_size = build2 (PLUS_EXPR, diff_type, chunk_size, per);
722 chunk_size = build2 (TRUNC_DIV_EXPR, diff_type, chunk_size, per);
723 }
724
725 tree span = build2 (MULT_EXPR, diff_type, chunk_size,
726 fold_convert (diff_type, inner_size));
727
728 r = fold_build2 (MULT_EXPR, diff_type, span, step);
729
730 tree offset = gimple_call_arg (call, 6);
731 r = build2 (PLUS_EXPR, diff_type, r,
732 fold_convert (diff_type, offset));
733 r = build2 (integer_onep (dir) ? MIN_EXPR : MAX_EXPR,
734 diff_type, r, range);
735 }
736 if (diff_type != type)
737 r = fold_convert (type, r);
738 break;
739 }
740
741 gimplify_assign (lhs, r, &seq);
742
743 pop_gimplify_context (NULL);
744
745 gsi_replace_with_seq (&gsi, seq, true);
746}
747
02889d23
CLT
748/* Transform a GOACC_TILE call. Determines the element loop span for
749 the specified loop of the nest. This is 1 if we're not tiling.
750
751 GOACC_TILE (collapse_count, loop_no, tile_arg, gwv_tile, gwv_element); */
752
753static void
754oacc_xform_tile (gcall *call)
755{
756 gimple_stmt_iterator gsi = gsi_for_stmt (call);
757 unsigned collapse = tree_to_uhwi (gimple_call_arg (call, 0));
758 /* Inner loops have higher loop_nos. */
759 unsigned loop_no = tree_to_uhwi (gimple_call_arg (call, 1));
760 tree tile_size = gimple_call_arg (call, 2);
761 unsigned e_mask = tree_to_uhwi (gimple_call_arg (call, 4));
762 tree lhs = gimple_call_lhs (call);
763 tree type = TREE_TYPE (lhs);
764 gimple_seq seq = NULL;
765 tree span = build_int_cst (type, 1);
766
767 gcc_assert (!(e_mask
768 & ~(GOMP_DIM_MASK (GOMP_DIM_VECTOR)
769 | GOMP_DIM_MASK (GOMP_DIM_WORKER))));
770 push_gimplify_context (!seen_error ());
771
772#ifndef ACCEL_COMPILER
773 /* Partitioning disabled on host compilers. */
774 e_mask = 0;
775#endif
776 if (!e_mask)
777 /* Not paritioning. */
778 span = integer_one_node;
779 else if (!integer_zerop (tile_size))
780 /* User explicitly specified size. */
781 span = tile_size;
782 else
783 {
784 /* Pick a size based on the paritioning of the element loop and
785 the number of loop nests. */
786 tree first_size = NULL_TREE;
787 tree second_size = NULL_TREE;
788
789 if (e_mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR))
790 first_size = oacc_dim_call (false, GOMP_DIM_VECTOR, &seq);
791 if (e_mask & GOMP_DIM_MASK (GOMP_DIM_WORKER))
792 second_size = oacc_dim_call (false, GOMP_DIM_WORKER, &seq);
793
794 if (!first_size)
795 {
796 first_size = second_size;
797 second_size = NULL_TREE;
798 }
799
800 if (loop_no + 1 == collapse)
801 {
802 span = first_size;
803 if (!loop_no && second_size)
804 span = fold_build2 (MULT_EXPR, TREE_TYPE (span),
805 span, second_size);
806 }
807 else if (loop_no + 2 == collapse)
808 span = second_size;
809 else
810 span = NULL_TREE;
811
812 if (!span)
813 /* There's no obvious element size for this loop. Options
814 are 1, first_size or some non-unity constant (32 is my
815 favourite). We should gather some statistics. */
816 span = first_size;
817 }
818
819 span = fold_convert (type, span);
820 gimplify_assign (lhs, span, &seq);
821
822 pop_gimplify_context (NULL);
823
824 gsi_replace_with_seq (&gsi, seq, true);
825}
826
629b3d75
MJ
827/* Default partitioned and minimum partitioned dimensions. */
828
829static int oacc_default_dims[GOMP_DIM_MAX];
830static int oacc_min_dims[GOMP_DIM_MAX];
831
b75e9c83
TV
832int
833oacc_get_default_dim (int dim)
834{
835 gcc_assert (0 <= dim && dim < GOMP_DIM_MAX);
836 return oacc_default_dims[dim];
837}
838
6e373d13
TV
839int
840oacc_get_min_dim (int dim)
841{
842 gcc_assert (0 <= dim && dim < GOMP_DIM_MAX);
843 return oacc_min_dims[dim];
844}
845
629b3d75
MJ
846/* Parse the default dimension parameter. This is a set of
847 :-separated optional compute dimensions. Each specified dimension
848 is a positive integer. When device type support is added, it is
849 planned to be a comma separated list of such compute dimensions,
850 with all but the first prefixed by the colon-terminated device
851 type. */
852
853static void
854oacc_parse_default_dims (const char *dims)
855{
856 int ix;
857
858 for (ix = GOMP_DIM_MAX; ix--;)
859 {
860 oacc_default_dims[ix] = -1;
861 oacc_min_dims[ix] = 1;
862 }
863
864#ifndef ACCEL_COMPILER
865 /* Cannot be overridden on the host. */
866 dims = NULL;
867#endif
868 if (dims)
869 {
870 const char *pos = dims;
871
872 for (ix = 0; *pos && ix != GOMP_DIM_MAX; ix++)
873 {
874 if (ix)
875 {
876 if (*pos != ':')
877 goto malformed;
878 pos++;
879 }
880
881 if (*pos != ':')
882 {
883 long val;
884 const char *eptr;
885
886 errno = 0;
887 val = strtol (pos, CONST_CAST (char **, &eptr), 10);
888 if (errno || val <= 0 || (int) val != val)
889 goto malformed;
890 pos = eptr;
891 oacc_default_dims[ix] = (int) val;
892 }
893 }
894 if (*pos)
895 {
896 malformed:
897 error_at (UNKNOWN_LOCATION,
904f3daa 898 "%<-fopenacc-dim%> operand is malformed at %qs", pos);
629b3d75
MJ
899 }
900 }
901
902 /* Allow the backend to validate the dimensions. */
46dedae6
TV
903 targetm.goacc.validate_dims (NULL_TREE, oacc_default_dims, -1, 0);
904 targetm.goacc.validate_dims (NULL_TREE, oacc_min_dims, -2, 0);
629b3d75
MJ
905}
906
907/* Validate and update the dimensions for offloaded FN. ATTRS is the
908 raw attribute. DIMS is an array of dimensions, which is filled in.
909 LEVEL is the partitioning level of a routine, or -1 for an offload
01914336 910 region itself. USED is the mask of partitioned execution in the
629b3d75
MJ
911 function. */
912
913static void
914oacc_validate_dims (tree fn, tree attrs, int *dims, int level, unsigned used)
915{
916 tree purpose[GOMP_DIM_MAX];
917 unsigned ix;
918 tree pos = TREE_VALUE (attrs);
629b3d75
MJ
919
920 /* Make sure the attribute creator attached the dimension
921 information. */
922 gcc_assert (pos);
923
924 for (ix = 0; ix != GOMP_DIM_MAX; ix++)
925 {
926 purpose[ix] = TREE_PURPOSE (pos);
927 tree val = TREE_VALUE (pos);
928 dims[ix] = val ? TREE_INT_CST_LOW (val) : -1;
929 pos = TREE_CHAIN (pos);
930 }
931
22cff118
TS
932 bool check = true;
933#ifdef ACCEL_COMPILER
934 check = false;
935#endif
936 if (check
937 && warn_openacc_parallelism
938 && !lookup_attribute ("oacc kernels", DECL_ATTRIBUTES (fn)))
939 {
940 static char const *const axes[] =
941 /* Must be kept in sync with GOMP_DIM enumeration. */
942 { "gang", "worker", "vector" };
943 for (ix = level >= 0 ? level : 0; ix != GOMP_DIM_MAX; ix++)
944 if (dims[ix] < 0)
945 ; /* Defaulting axis. */
946 else if ((used & GOMP_DIM_MASK (ix)) && dims[ix] == 1)
947 /* There is partitioned execution, but the user requested a
948 dimension size of 1. They're probably confused. */
949 warning_at (DECL_SOURCE_LOCATION (fn), OPT_Wopenacc_parallelism,
950 "region contains %s partitioned code but"
951 " is not %s partitioned", axes[ix], axes[ix]);
952 else if (!(used & GOMP_DIM_MASK (ix)) && dims[ix] != 1)
953 /* The dimension is explicitly partitioned to non-unity, but
954 no use is made within the region. */
955 warning_at (DECL_SOURCE_LOCATION (fn), OPT_Wopenacc_parallelism,
956 "region is %s partitioned but"
957 " does not contain %s partitioned code",
958 axes[ix], axes[ix]);
959 }
960
46dedae6 961 bool changed = targetm.goacc.validate_dims (fn, dims, level, used);
629b3d75
MJ
962
963 /* Default anything left to 1 or a partitioned default. */
964 for (ix = 0; ix != GOMP_DIM_MAX; ix++)
965 if (dims[ix] < 0)
966 {
967 /* The OpenACC spec says 'If the [num_gangs] clause is not
968 specified, an implementation-defined default will be used;
969 the default may depend on the code within the construct.'
970 (2.5.6). Thus an implementation is free to choose
971 non-unity default for a parallel region that doesn't have
972 any gang-partitioned loops. However, it appears that there
973 is a sufficient body of user code that expects non-gang
974 partitioned regions to not execute in gang-redundant mode.
975 So we (a) don't warn about the non-portability and (b) pick
976 the minimum permissible dimension size when there is no
977 partitioned execution. Otherwise we pick the global
978 default for the dimension, which the user can control. The
979 same wording and logic applies to num_workers and
980 vector_length, however the worker- or vector- single
981 execution doesn't have the same impact as gang-redundant
982 execution. (If the minimum gang-level partioning is not 1,
983 the target is probably too confusing.) */
984 dims[ix] = (used & GOMP_DIM_MASK (ix)
985 ? oacc_default_dims[ix] : oacc_min_dims[ix]);
986 changed = true;
987 }
988
989 if (changed)
990 {
991 /* Replace the attribute with new values. */
992 pos = NULL_TREE;
993 for (ix = GOMP_DIM_MAX; ix--;)
25651634
TS
994 pos = tree_cons (purpose[ix],
995 build_int_cst (integer_type_node, dims[ix]), pos);
629b3d75
MJ
996 oacc_replace_fn_attrib (fn, pos);
997 }
998}
999
1000/* Create an empty OpenACC loop structure at LOC. */
1001
1002static oacc_loop *
1003new_oacc_loop_raw (oacc_loop *parent, location_t loc)
1004{
1005 oacc_loop *loop = XCNEW (oacc_loop);
1006
1007 loop->parent = parent;
629b3d75
MJ
1008
1009 if (parent)
1010 {
1011 loop->sibling = parent->child;
1012 parent->child = loop;
1013 }
1014
1015 loop->loc = loc;
629b3d75
MJ
1016 return loop;
1017}
1018
1019/* Create an outermost, dummy OpenACC loop for offloaded function
1020 DECL. */
1021
1022static oacc_loop *
1023new_oacc_loop_outer (tree decl)
1024{
1025 return new_oacc_loop_raw (NULL, DECL_SOURCE_LOCATION (decl));
1026}
1027
1028/* Start a new OpenACC loop structure beginning at head marker HEAD.
1029 Link into PARENT loop. Return the new loop. */
1030
1031static oacc_loop *
1032new_oacc_loop (oacc_loop *parent, gcall *marker)
1033{
1034 oacc_loop *loop = new_oacc_loop_raw (parent, gimple_location (marker));
1035
1036 loop->marker = marker;
1037
1038 /* TODO: This is where device_type flattening would occur for the loop
01914336 1039 flags. */
629b3d75
MJ
1040
1041 loop->flags = TREE_INT_CST_LOW (gimple_call_arg (marker, 3));
1042
1043 tree chunk_size = integer_zero_node;
1044 if (loop->flags & OLF_GANG_STATIC)
1045 chunk_size = gimple_call_arg (marker, 4);
1046 loop->chunk_size = chunk_size;
1047
1048 return loop;
1049}
1050
1051/* Create a dummy loop encompassing a call to a openACC routine.
1052 Extract the routine's partitioning requirements. */
1053
1054static void
1055new_oacc_loop_routine (oacc_loop *parent, gcall *call, tree decl, tree attrs)
1056{
1057 oacc_loop *loop = new_oacc_loop_raw (parent, gimple_location (call));
1058 int level = oacc_fn_attrib_level (attrs);
1059
1060 gcc_assert (level >= 0);
1061
1062 loop->marker = call;
1063 loop->routine = decl;
1064 loop->mask = ((GOMP_DIM_MASK (GOMP_DIM_MAX) - 1)
1065 ^ (GOMP_DIM_MASK (level) - 1));
1066}
1067
1068/* Finish off the current OpenACC loop ending at tail marker TAIL.
1069 Return the parent loop. */
1070
1071static oacc_loop *
1072finish_oacc_loop (oacc_loop *loop)
1073{
1074 /* If the loop has been collapsed, don't partition it. */
02889d23 1075 if (loop->ifns.is_empty ())
629b3d75
MJ
1076 loop->mask = loop->flags = 0;
1077 return loop->parent;
1078}
1079
1080/* Free all OpenACC loop structures within LOOP (inclusive). */
1081
1082static void
1083free_oacc_loop (oacc_loop *loop)
1084{
1085 if (loop->sibling)
1086 free_oacc_loop (loop->sibling);
1087 if (loop->child)
1088 free_oacc_loop (loop->child);
1089
622f6b64 1090 loop->ifns.release ();
629b3d75
MJ
1091 free (loop);
1092}
1093
1094/* Dump out the OpenACC loop head or tail beginning at FROM. */
1095
1096static void
1097dump_oacc_loop_part (FILE *file, gcall *from, int depth,
1098 const char *title, int level)
1099{
1100 enum ifn_unique_kind kind
1101 = (enum ifn_unique_kind) TREE_INT_CST_LOW (gimple_call_arg (from, 0));
1102
1103 fprintf (file, "%*s%s-%d:\n", depth * 2, "", title, level);
1104 for (gimple_stmt_iterator gsi = gsi_for_stmt (from);;)
1105 {
1106 gimple *stmt = gsi_stmt (gsi);
1107
1108 if (gimple_call_internal_p (stmt, IFN_UNIQUE))
1109 {
1110 enum ifn_unique_kind k
1111 = ((enum ifn_unique_kind) TREE_INT_CST_LOW
1112 (gimple_call_arg (stmt, 0)));
1113
1114 if (k == kind && stmt != from)
1115 break;
1116 }
ef6cb4c7 1117 print_gimple_stmt (file, stmt, depth * 2 + 2);
629b3d75
MJ
1118
1119 gsi_next (&gsi);
1120 while (gsi_end_p (gsi))
1121 gsi = gsi_start_bb (single_succ (gsi_bb (gsi)));
1122 }
1123}
1124
5d390fd3 1125/* Dump OpenACC loop LOOP, its children, and its siblings. */
629b3d75
MJ
1126
1127static void
1128dump_oacc_loop (FILE *file, oacc_loop *loop, int depth)
1129{
1130 int ix;
1131
1132 fprintf (file, "%*sLoop %x(%x) %s:%u\n", depth * 2, "",
1133 loop->flags, loop->mask,
1134 LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc));
1135
1136 if (loop->marker)
ef6cb4c7 1137 print_gimple_stmt (file, loop->marker, depth * 2);
629b3d75
MJ
1138
1139 if (loop->routine)
1140 fprintf (file, "%*sRoutine %s:%u:%s\n",
1141 depth * 2, "", DECL_SOURCE_FILE (loop->routine),
1142 DECL_SOURCE_LINE (loop->routine),
1143 IDENTIFIER_POINTER (DECL_NAME (loop->routine)));
1144
1145 for (ix = GOMP_DIM_GANG; ix != GOMP_DIM_MAX; ix++)
1146 if (loop->heads[ix])
1147 dump_oacc_loop_part (file, loop->heads[ix], depth, "Head", ix);
1148 for (ix = GOMP_DIM_MAX; ix--;)
1149 if (loop->tails[ix])
1150 dump_oacc_loop_part (file, loop->tails[ix], depth, "Tail", ix);
1151
1152 if (loop->child)
1153 dump_oacc_loop (file, loop->child, depth + 1);
1154 if (loop->sibling)
1155 dump_oacc_loop (file, loop->sibling, depth);
1156}
1157
1158void debug_oacc_loop (oacc_loop *);
1159
1160/* Dump loops to stderr. */
1161
1162DEBUG_FUNCTION void
1163debug_oacc_loop (oacc_loop *loop)
1164{
1165 dump_oacc_loop (stderr, loop, 0);
1166}
1167
5d390fd3
TS
1168/* Provide diagnostics on OpenACC loop LOOP, its children, and its
1169 siblings. */
1170
1171static void
1172inform_oacc_loop (const oacc_loop *loop)
1173{
1174 const char *gang
1175 = loop->mask & GOMP_DIM_MASK (GOMP_DIM_GANG) ? " gang" : "";
1176 const char *worker
1177 = loop->mask & GOMP_DIM_MASK (GOMP_DIM_WORKER) ? " worker" : "";
1178 const char *vector
1179 = loop->mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR) ? " vector" : "";
1180 const char *seq = loop->mask == 0 ? " seq" : "";
1181 const dump_user_location_t loc
1182 = dump_user_location_t::from_location_t (loop->loc);
1183 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc,
1184 "assigned OpenACC%s%s%s%s loop parallelism\n", gang, worker,
1185 vector, seq);
1186
1187 if (loop->child)
1188 inform_oacc_loop (loop->child);
1189 if (loop->sibling)
1190 inform_oacc_loop (loop->sibling);
1191}
1192
629b3d75
MJ
1193/* DFS walk of basic blocks BB onwards, creating OpenACC loop
1194 structures as we go. By construction these loops are properly
1195 nested. */
1196
1197static void
1198oacc_loop_discover_walk (oacc_loop *loop, basic_block bb)
1199{
1200 int marker = 0;
1201 int remaining = 0;
1202
1203 if (bb->flags & BB_VISITED)
1204 return;
1205
1206 follow:
1207 bb->flags |= BB_VISITED;
1208
1209 /* Scan for loop markers. */
1210 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
1211 gsi_next (&gsi))
1212 {
1213 gimple *stmt = gsi_stmt (gsi);
1214
1215 if (!is_gimple_call (stmt))
1216 continue;
1217
1218 gcall *call = as_a <gcall *> (stmt);
1219
1220 /* If this is a routine, make a dummy loop for it. */
1221 if (tree decl = gimple_call_fndecl (call))
1222 if (tree attrs = oacc_get_fn_attrib (decl))
1223 {
1224 gcc_assert (!marker);
1225 new_oacc_loop_routine (loop, call, decl, attrs);
1226 }
1227
1228 if (!gimple_call_internal_p (call))
1229 continue;
1230
1231 switch (gimple_call_internal_fn (call))
1232 {
1233 default:
1234 break;
1235
1236 case IFN_GOACC_LOOP:
02889d23
CLT
1237 case IFN_GOACC_TILE:
1238 /* Record the abstraction function, so we can manipulate it
1239 later. */
1240 loop->ifns.safe_push (call);
629b3d75
MJ
1241 break;
1242
1243 case IFN_UNIQUE:
1244 enum ifn_unique_kind kind
1245 = (enum ifn_unique_kind) (TREE_INT_CST_LOW
1246 (gimple_call_arg (call, 0)));
1247 if (kind == IFN_UNIQUE_OACC_HEAD_MARK
1248 || kind == IFN_UNIQUE_OACC_TAIL_MARK)
1249 {
1250 if (gimple_call_num_args (call) == 2)
1251 {
1252 gcc_assert (marker && !remaining);
1253 marker = 0;
1254 if (kind == IFN_UNIQUE_OACC_TAIL_MARK)
1255 loop = finish_oacc_loop (loop);
1256 else
1257 loop->head_end = call;
1258 }
1259 else
1260 {
1261 int count = TREE_INT_CST_LOW (gimple_call_arg (call, 2));
1262
1263 if (!marker)
1264 {
1265 if (kind == IFN_UNIQUE_OACC_HEAD_MARK)
1266 loop = new_oacc_loop (loop, call);
1267 remaining = count;
1268 }
1269 gcc_assert (count == remaining);
1270 if (remaining)
1271 {
1272 remaining--;
1273 if (kind == IFN_UNIQUE_OACC_HEAD_MARK)
1274 loop->heads[marker] = call;
1275 else
1276 loop->tails[remaining] = call;
1277 }
1278 marker++;
1279 }
1280 }
1281 }
1282 }
1283 if (remaining || marker)
1284 {
1285 bb = single_succ (bb);
1286 gcc_assert (single_pred_p (bb) && !(bb->flags & BB_VISITED));
1287 goto follow;
1288 }
1289
1290 /* Walk successor blocks. */
1291 edge e;
1292 edge_iterator ei;
1293
1294 FOR_EACH_EDGE (e, ei, bb->succs)
1295 oacc_loop_discover_walk (loop, e->dest);
1296}
1297
1298/* LOOP is the first sibling. Reverse the order in place and return
1299 the new first sibling. Recurse to child loops. */
1300
1301static oacc_loop *
1302oacc_loop_sibling_nreverse (oacc_loop *loop)
1303{
1304 oacc_loop *last = NULL;
1305 do
1306 {
1307 if (loop->child)
01914336 1308 loop->child = oacc_loop_sibling_nreverse (loop->child);
629b3d75
MJ
1309
1310 oacc_loop *next = loop->sibling;
1311 loop->sibling = last;
1312 last = loop;
1313 loop = next;
1314 }
1315 while (loop);
1316
1317 return last;
1318}
1319
1320/* Discover the OpenACC loops marked up by HEAD and TAIL markers for
1321 the current function. */
1322
1323static oacc_loop *
1324oacc_loop_discovery ()
1325{
1326 /* Clear basic block flags, in particular BB_VISITED which we're going to use
1327 in the following. */
1328 clear_bb_flags ();
1329
1330 oacc_loop *top = new_oacc_loop_outer (current_function_decl);
1331 oacc_loop_discover_walk (top, ENTRY_BLOCK_PTR_FOR_FN (cfun));
1332
1333 /* The siblings were constructed in reverse order, reverse them so
1334 that diagnostics come out in an unsurprising order. */
1335 top = oacc_loop_sibling_nreverse (top);
1336
1337 return top;
1338}
1339
1340/* Transform the abstract internal function markers starting at FROM
1341 to be for partitioning level LEVEL. Stop when we meet another HEAD
1342 or TAIL marker. */
1343
1344static void
1345oacc_loop_xform_head_tail (gcall *from, int level)
1346{
1347 enum ifn_unique_kind kind
1348 = (enum ifn_unique_kind) TREE_INT_CST_LOW (gimple_call_arg (from, 0));
1349 tree replacement = build_int_cst (unsigned_type_node, level);
1350
1351 for (gimple_stmt_iterator gsi = gsi_for_stmt (from);;)
1352 {
1353 gimple *stmt = gsi_stmt (gsi);
1354
1355 if (gimple_call_internal_p (stmt, IFN_UNIQUE))
1356 {
1357 enum ifn_unique_kind k
1358 = ((enum ifn_unique_kind)
1359 TREE_INT_CST_LOW (gimple_call_arg (stmt, 0)));
1360
29a2f518
JB
1361 if (k == IFN_UNIQUE_OACC_FORK
1362 || k == IFN_UNIQUE_OACC_JOIN
1363 || k == IFN_UNIQUE_OACC_PRIVATE)
629b3d75
MJ
1364 *gimple_call_arg_ptr (stmt, 2) = replacement;
1365 else if (k == kind && stmt != from)
1366 break;
1367 }
1368 else if (gimple_call_internal_p (stmt, IFN_GOACC_REDUCTION))
1369 *gimple_call_arg_ptr (stmt, 3) = replacement;
1370
1371 gsi_next (&gsi);
1372 while (gsi_end_p (gsi))
1373 gsi = gsi_start_bb (single_succ (gsi_bb (gsi)));
1374 }
1375}
1376
629b3d75
MJ
1377/* Process the discovered OpenACC loops, setting the correct
1378 partitioning level etc. */
1379
1380static void
1381oacc_loop_process (oacc_loop *loop)
1382{
1383 if (loop->child)
1384 oacc_loop_process (loop->child);
1385
1386 if (loop->mask && !loop->routine)
1387 {
1388 int ix;
02889d23
CLT
1389 tree mask_arg = build_int_cst (unsigned_type_node, loop->mask);
1390 tree e_mask_arg = build_int_cst (unsigned_type_node, loop->e_mask);
629b3d75 1391 tree chunk_arg = loop->chunk_size;
02889d23
CLT
1392 gcall *call;
1393
1394 for (ix = 0; loop->ifns.iterate (ix, &call); ix++)
1395 switch (gimple_call_internal_fn (call))
1396 {
1397 case IFN_GOACC_LOOP:
1398 {
1399 bool is_e = gimple_call_arg (call, 5) == integer_minus_one_node;
1400 gimple_call_set_arg (call, 5, is_e ? e_mask_arg : mask_arg);
1401 if (!is_e)
1402 gimple_call_set_arg (call, 4, chunk_arg);
1403 }
1404 break;
1405
1406 case IFN_GOACC_TILE:
1407 gimple_call_set_arg (call, 3, mask_arg);
1408 gimple_call_set_arg (call, 4, e_mask_arg);
1409 break;
629b3d75 1410
02889d23
CLT
1411 default:
1412 gcc_unreachable ();
1413 }
629b3d75 1414
02889d23
CLT
1415 unsigned dim = GOMP_DIM_GANG;
1416 unsigned mask = loop->mask | loop->e_mask;
629b3d75
MJ
1417 for (ix = 0; ix != GOMP_DIM_MAX && mask; ix++)
1418 {
1419 while (!(GOMP_DIM_MASK (dim) & mask))
1420 dim++;
1421
1422 oacc_loop_xform_head_tail (loop->heads[ix], dim);
1423 oacc_loop_xform_head_tail (loop->tails[ix], dim);
1424
1425 mask ^= GOMP_DIM_MASK (dim);
1426 }
1427 }
1428
1429 if (loop->sibling)
1430 oacc_loop_process (loop->sibling);
1431}
1432
1433/* Walk the OpenACC loop heirarchy checking and assigning the
1434 programmer-specified partitionings. OUTER_MASK is the partitioning
1435 this loop is contained within. Return mask of partitioning
1436 encountered. If any auto loops are discovered, set GOMP_DIM_MAX
1437 bit. */
1438
1439static unsigned
1440oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask)
1441{
1442 unsigned this_mask = loop->mask;
1443 unsigned mask_all = 0;
1444 bool noisy = true;
1445
1446#ifdef ACCEL_COMPILER
1447 /* When device_type is supported, we want the device compiler to be
1448 noisy, if the loop parameters are device_type-specific. */
1449 noisy = false;
1450#endif
1451
1452 if (!loop->routine)
1453 {
1454 bool auto_par = (loop->flags & OLF_AUTO) != 0;
1455 bool seq_par = (loop->flags & OLF_SEQ) != 0;
02889d23
CLT
1456 bool tiling = (loop->flags & OLF_TILE) != 0;
1457
629b3d75
MJ
1458 this_mask = ((loop->flags >> OLF_DIM_BASE)
1459 & (GOMP_DIM_MASK (GOMP_DIM_MAX) - 1));
1460
02889d23
CLT
1461 /* Apply auto partitioning if this is a non-partitioned regular
1462 loop, or (no more than) single axis tiled loop. */
1463 bool maybe_auto
1464 = !seq_par && this_mask == (tiling ? this_mask & -this_mask : 0);
1465
629b3d75
MJ
1466 if ((this_mask != 0) + auto_par + seq_par > 1)
1467 {
1468 if (noisy)
1469 error_at (loop->loc,
1470 seq_par
324ff1a0
JJ
1471 ? G_("%<seq%> overrides other OpenACC loop specifiers")
1472 : G_("%<auto%> conflicts with other OpenACC loop "
1473 "specifiers"));
02889d23 1474 maybe_auto = false;
629b3d75
MJ
1475 loop->flags &= ~OLF_AUTO;
1476 if (seq_par)
1477 {
01914336
MJ
1478 loop->flags
1479 &= ~((GOMP_DIM_MASK (GOMP_DIM_MAX) - 1) << OLF_DIM_BASE);
629b3d75
MJ
1480 this_mask = 0;
1481 }
1482 }
02889d23
CLT
1483
1484 if (maybe_auto && (loop->flags & OLF_INDEPENDENT))
1485 {
1486 loop->flags |= OLF_AUTO;
1487 mask_all |= GOMP_DIM_MASK (GOMP_DIM_MAX);
1488 }
629b3d75
MJ
1489 }
1490
1491 if (this_mask & outer_mask)
1492 {
1493 const oacc_loop *outer;
1494 for (outer = loop->parent; outer; outer = outer->parent)
02889d23 1495 if ((outer->mask | outer->e_mask) & this_mask)
629b3d75
MJ
1496 break;
1497
1498 if (noisy)
1499 {
1500 if (outer)
1501 {
1502 error_at (loop->loc,
efebb49e
DM
1503 loop->routine
1504 ? G_("routine call uses same OpenACC parallelism"
1505 " as containing loop")
1506 : G_("inner loop uses same OpenACC parallelism"
1507 " as containing loop"));
629b3d75
MJ
1508 inform (outer->loc, "containing loop here");
1509 }
1510 else
1511 error_at (loop->loc,
efebb49e
DM
1512 loop->routine
1513 ? G_("routine call uses OpenACC parallelism disallowed"
1514 " by containing routine")
1515 : G_("loop uses OpenACC parallelism disallowed"
1516 " by containing routine"));
629b3d75
MJ
1517
1518 if (loop->routine)
1519 inform (DECL_SOURCE_LOCATION (loop->routine),
1520 "routine %qD declared here", loop->routine);
1521 }
1522 this_mask &= ~outer_mask;
1523 }
1524 else
1525 {
1526 unsigned outermost = least_bit_hwi (this_mask);
1527
1528 if (outermost && outermost <= outer_mask)
1529 {
1530 if (noisy)
1531 {
1532 error_at (loop->loc,
1533 "incorrectly nested OpenACC loop parallelism");
1534
1535 const oacc_loop *outer;
1536 for (outer = loop->parent;
1537 outer->flags && outer->flags < outermost;
1538 outer = outer->parent)
1539 continue;
1540 inform (outer->loc, "containing loop here");
1541 }
1542
1543 this_mask &= ~outermost;
1544 }
1545 }
1546
629b3d75
MJ
1547 mask_all |= this_mask;
1548
02889d23
CLT
1549 if (loop->flags & OLF_TILE)
1550 {
1551 /* When tiling, vector goes to the element loop, and failing
1552 that we put worker there. The std doesn't contemplate
1553 specifying all three. We choose to put worker and vector on
1554 the element loops in that case. */
1555 unsigned this_e_mask = this_mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR);
1556 if (!this_e_mask || this_mask & GOMP_DIM_MASK (GOMP_DIM_GANG))
1557 this_e_mask |= this_mask & GOMP_DIM_MASK (GOMP_DIM_WORKER);
1558
1559 loop->e_mask = this_e_mask;
1560 this_mask ^= this_e_mask;
1561 }
1562
1563 loop->mask = this_mask;
1564
1565 if (dump_file)
1566 fprintf (dump_file, "Loop %s:%d user specified %d & %d\n",
1567 LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc),
1568 loop->mask, loop->e_mask);
1569
629b3d75
MJ
1570 if (loop->child)
1571 {
02889d23
CLT
1572 unsigned tmp_mask = outer_mask | this_mask | loop->e_mask;
1573 loop->inner = oacc_loop_fixed_partitions (loop->child, tmp_mask);
629b3d75
MJ
1574 mask_all |= loop->inner;
1575 }
1576
1577 if (loop->sibling)
1578 mask_all |= oacc_loop_fixed_partitions (loop->sibling, outer_mask);
1579
1580 return mask_all;
1581}
1582
1583/* Walk the OpenACC loop heirarchy to assign auto-partitioned loops.
1584 OUTER_MASK is the partitioning this loop is contained within.
02889d23 1585 OUTER_ASSIGN is true if an outer loop is being auto-partitioned.
629b3d75
MJ
1586 Return the cumulative partitioning used by this loop, siblings and
1587 children. */
1588
1589static unsigned
02889d23
CLT
1590oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask,
1591 bool outer_assign)
629b3d75
MJ
1592{
1593 bool assign = (loop->flags & OLF_AUTO) && (loop->flags & OLF_INDEPENDENT);
1594 bool noisy = true;
02889d23 1595 bool tiling = loop->flags & OLF_TILE;
629b3d75
MJ
1596
1597#ifdef ACCEL_COMPILER
1598 /* When device_type is supported, we want the device compiler to be
1599 noisy, if the loop parameters are device_type-specific. */
1600 noisy = false;
1601#endif
1602
891ba5eb 1603 if (assign && (!outer_assign || loop->inner))
629b3d75 1604 {
02889d23
CLT
1605 /* Allocate outermost and non-innermost loops at the outermost
1606 non-innermost available level. */
1607 unsigned this_mask = GOMP_DIM_MASK (GOMP_DIM_GANG);
1608
1609 /* Find the first outermost available partition. */
1610 while (this_mask <= outer_mask)
1611 this_mask <<= 1;
1612
1613 /* Grab two axes if tiling, and we've not assigned anything */
1614 if (tiling && !(loop->mask | loop->e_mask))
1615 this_mask |= this_mask << 1;
1616
1617 /* Prohibit the innermost partitioning at the moment. */
1618 this_mask &= GOMP_DIM_MASK (GOMP_DIM_MAX - 1) - 1;
629b3d75 1619
02889d23
CLT
1620 /* Don't use any dimension explicitly claimed by an inner loop. */
1621 this_mask &= ~loop->inner;
1622
1623 if (tiling && !loop->e_mask)
1624 {
1625 /* If we got two axes, allocate the inner one to the element
1626 loop. */
1627 loop->e_mask = this_mask & (this_mask << 1);
1628 this_mask ^= loop->e_mask;
1629 }
1630
1631 loop->mask |= this_mask;
629b3d75
MJ
1632 }
1633
1634 if (loop->child)
1635 {
02889d23
CLT
1636 unsigned tmp_mask = outer_mask | loop->mask | loop->e_mask;
1637 loop->inner = oacc_loop_auto_partitions (loop->child, tmp_mask,
1638 outer_assign | assign);
629b3d75
MJ
1639 }
1640
02889d23 1641 if (assign && (!loop->mask || (tiling && !loop->e_mask) || !outer_assign))
629b3d75 1642 {
02889d23
CLT
1643 /* Allocate the loop at the innermost available level. Note
1644 that we do this even if we already assigned this loop the
1645 outermost available level above. That way we'll partition
1646 this along 2 axes, if they are available. */
629b3d75
MJ
1647 unsigned this_mask = 0;
1648
01914336 1649 /* Determine the outermost partitioning used within this loop. */
629b3d75
MJ
1650 this_mask = loop->inner | GOMP_DIM_MASK (GOMP_DIM_MAX);
1651 this_mask = least_bit_hwi (this_mask);
1652
1653 /* Pick the partitioning just inside that one. */
1654 this_mask >>= 1;
1655
01914336 1656 /* And avoid picking one use by an outer loop. */
629b3d75
MJ
1657 this_mask &= ~outer_mask;
1658
02889d23
CLT
1659 /* If tiling and we failed completely above, grab the next one
1660 too. Making sure it doesn't hit an outer loop. */
1661 if (tiling)
1662 {
1663 this_mask &= ~(loop->e_mask | loop->mask);
1664 unsigned tile_mask = ((this_mask >> 1)
1665 & ~(outer_mask | loop->e_mask | loop->mask));
1666
1667 if (tile_mask || loop->mask)
1668 {
1669 loop->e_mask |= this_mask;
1670 this_mask = tile_mask;
1671 }
1672 if (!loop->e_mask && noisy)
1673 warning_at (loop->loc, 0,
1674 "insufficient partitioning available"
1675 " to parallelize element loop");
1676 }
629b3d75 1677
02889d23
CLT
1678 loop->mask |= this_mask;
1679 if (!loop->mask && noisy)
1680 warning_at (loop->loc, 0,
efebb49e
DM
1681 tiling
1682 ? G_("insufficient partitioning available"
1683 " to parallelize tile loop")
1684 : G_("insufficient partitioning available"
1685 " to parallelize loop"));
629b3d75
MJ
1686 }
1687
1688 if (assign && dump_file)
02889d23 1689 fprintf (dump_file, "Auto loop %s:%d assigned %d & %d\n",
629b3d75 1690 LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc),
02889d23 1691 loop->mask, loop->e_mask);
629b3d75
MJ
1692
1693 unsigned inner_mask = 0;
1694
1695 if (loop->sibling)
02889d23
CLT
1696 inner_mask |= oacc_loop_auto_partitions (loop->sibling,
1697 outer_mask, outer_assign);
629b3d75 1698
02889d23 1699 inner_mask |= loop->inner | loop->mask | loop->e_mask;
629b3d75
MJ
1700
1701 return inner_mask;
1702}
1703
1704/* Walk the OpenACC loop heirarchy to check and assign partitioning
1705 axes. Return mask of partitioning. */
1706
1707static unsigned
1708oacc_loop_partition (oacc_loop *loop, unsigned outer_mask)
1709{
1710 unsigned mask_all = oacc_loop_fixed_partitions (loop, outer_mask);
1711
1712 if (mask_all & GOMP_DIM_MASK (GOMP_DIM_MAX))
1713 {
1714 mask_all ^= GOMP_DIM_MASK (GOMP_DIM_MAX);
02889d23 1715 mask_all |= oacc_loop_auto_partitions (loop, outer_mask, false);
629b3d75
MJ
1716 }
1717 return mask_all;
1718}
1719
1720/* Default fork/join early expander. Delete the function calls if
1721 there is no RTL expander. */
1722
1723bool
1724default_goacc_fork_join (gcall *ARG_UNUSED (call),
1725 const int *ARG_UNUSED (dims), bool is_fork)
1726{
1727 if (is_fork)
1728 return targetm.have_oacc_fork ();
1729 else
1730 return targetm.have_oacc_join ();
1731}
1732
1733/* Default goacc.reduction early expander.
1734
1735 LHS-opt = IFN_REDUCTION (KIND, RES_PTR, VAR, LEVEL, OP, OFFSET)
1736 If RES_PTR is not integer-zerop:
1737 SETUP - emit 'LHS = *RES_PTR', LHS = NULL
1738 TEARDOWN - emit '*RES_PTR = VAR'
1739 If LHS is not NULL
1740 emit 'LHS = VAR' */
1741
1742void
1743default_goacc_reduction (gcall *call)
1744{
1745 unsigned code = (unsigned)TREE_INT_CST_LOW (gimple_call_arg (call, 0));
1746 gimple_stmt_iterator gsi = gsi_for_stmt (call);
1747 tree lhs = gimple_call_lhs (call);
1748 tree var = gimple_call_arg (call, 2);
1749 gimple_seq seq = NULL;
1750
1751 if (code == IFN_GOACC_REDUCTION_SETUP
1752 || code == IFN_GOACC_REDUCTION_TEARDOWN)
1753 {
1754 /* Setup and Teardown need to copy from/to the receiver object,
1755 if there is one. */
1756 tree ref_to_res = gimple_call_arg (call, 1);
1757
1758 if (!integer_zerop (ref_to_res))
1759 {
1760 tree dst = build_simple_mem_ref (ref_to_res);
1761 tree src = var;
1762
1763 if (code == IFN_GOACC_REDUCTION_SETUP)
1764 {
1765 src = dst;
1766 dst = lhs;
1767 lhs = NULL;
1768 }
1769 gimple_seq_add_stmt (&seq, gimple_build_assign (dst, src));
1770 }
1771 }
1772
1773 /* Copy VAR to LHS, if there is an LHS. */
1774 if (lhs)
1775 gimple_seq_add_stmt (&seq, gimple_build_assign (lhs, var));
1776
1777 gsi_replace_with_seq (&gsi, seq, true);
1778}
1779
29a2f518
JB
1780struct var_decl_rewrite_info
1781{
1782 gimple *stmt;
1783 hash_map<tree, tree> *adjusted_vars;
1784 bool avoid_pointer_conversion;
1785 bool modified;
1786};
1787
1788/* Helper function for execute_oacc_device_lower. Rewrite VAR_DECLs (by
1789 themselves or wrapped in various other nodes) according to ADJUSTED_VARS in
1790 the var_decl_rewrite_info pointed to via DATA. Used as part of coercing
1791 gang-private variables in OpenACC offload regions to reside in GPU shared
1792 memory. */
1793
1794static tree
1795oacc_rewrite_var_decl (tree *tp, int *walk_subtrees, void *data)
1796{
1797 walk_stmt_info *wi = (walk_stmt_info *) data;
1798 var_decl_rewrite_info *info = (var_decl_rewrite_info *) wi->info;
1799
1800 if (TREE_CODE (*tp) == ADDR_EXPR)
1801 {
1802 tree arg = TREE_OPERAND (*tp, 0);
1803 tree *new_arg = info->adjusted_vars->get (arg);
1804
1805 if (new_arg)
1806 {
1807 if (info->avoid_pointer_conversion)
1808 {
1809 *tp = build_fold_addr_expr (*new_arg);
1810 info->modified = true;
1811 *walk_subtrees = 0;
1812 }
1813 else
1814 {
1815 gimple_stmt_iterator gsi = gsi_for_stmt (info->stmt);
1816 tree repl = build_fold_addr_expr (*new_arg);
1817 gimple *stmt1
1818 = gimple_build_assign (make_ssa_name (TREE_TYPE (repl)), repl);
1819 tree conv = convert_to_pointer (TREE_TYPE (*tp),
1820 gimple_assign_lhs (stmt1));
1821 gimple *stmt2
1822 = gimple_build_assign (make_ssa_name (TREE_TYPE (*tp)), conv);
1823 gsi_insert_before (&gsi, stmt1, GSI_SAME_STMT);
1824 gsi_insert_before (&gsi, stmt2, GSI_SAME_STMT);
1825 *tp = gimple_assign_lhs (stmt2);
1826 info->modified = true;
1827 *walk_subtrees = 0;
1828 }
1829 }
1830 }
1831 else if (TREE_CODE (*tp) == COMPONENT_REF || TREE_CODE (*tp) == ARRAY_REF)
1832 {
1833 tree *base = &TREE_OPERAND (*tp, 0);
1834
1835 while (TREE_CODE (*base) == COMPONENT_REF
1836 || TREE_CODE (*base) == ARRAY_REF)
1837 base = &TREE_OPERAND (*base, 0);
1838
1839 if (TREE_CODE (*base) != VAR_DECL)
1840 return NULL;
1841
1842 tree *new_decl = info->adjusted_vars->get (*base);
1843 if (!new_decl)
1844 return NULL;
1845
1846 int base_quals = TYPE_QUALS (TREE_TYPE (*new_decl));
1847 tree field = TREE_OPERAND (*tp, 1);
1848
1849 /* Adjust the type of the field. */
1850 int field_quals = TYPE_QUALS (TREE_TYPE (field));
1851 if (TREE_CODE (field) == FIELD_DECL && field_quals != base_quals)
1852 {
1853 tree *field_type = &TREE_TYPE (field);
1854 while (TREE_CODE (*field_type) == ARRAY_TYPE)
1855 field_type = &TREE_TYPE (*field_type);
1856 field_quals |= base_quals;
1857 *field_type = build_qualified_type (*field_type, field_quals);
1858 }
1859
1860 /* Adjust the type of the component ref itself. */
1861 tree comp_type = TREE_TYPE (*tp);
1862 int comp_quals = TYPE_QUALS (comp_type);
1863 if (TREE_CODE (*tp) == COMPONENT_REF && comp_quals != base_quals)
1864 {
1865 comp_quals |= base_quals;
1866 TREE_TYPE (*tp)
1867 = build_qualified_type (comp_type, comp_quals);
1868 }
1869
1870 *base = *new_decl;
1871 info->modified = true;
1872 }
1873 else if (TREE_CODE (*tp) == VAR_DECL)
1874 {
1875 tree *new_decl = info->adjusted_vars->get (*tp);
1876 if (new_decl)
1877 {
1878 *tp = *new_decl;
1879 info->modified = true;
1880 }
1881 }
1882
1883 return NULL_TREE;
1884}
1885
1886/* Return TRUE if CALL is a call to a builtin atomic/sync operation. */
1887
1888static bool
1889is_sync_builtin_call (gcall *call)
1890{
1891 tree callee = gimple_call_fndecl (call);
1892
1893 if (callee != NULL_TREE
1894 && gimple_call_builtin_p (call, BUILT_IN_NORMAL))
1895 switch (DECL_FUNCTION_CODE (callee))
1896 {
1897#undef DEF_SYNC_BUILTIN
1898#define DEF_SYNC_BUILTIN(ENUM, NAME, TYPE, ATTRS) case ENUM:
1899#include "sync-builtins.def"
1900#undef DEF_SYNC_BUILTIN
1901 return true;
1902
1903 default:
1904 ;
1905 }
1906
1907 return false;
1908}
1909
629b3d75
MJ
1910/* Main entry point for oacc transformations which run on the device
1911 compiler after LTO, so we know what the target device is at this
1912 point (including the host fallback). */
1913
1914static unsigned int
1915execute_oacc_device_lower ()
1916{
1917 tree attrs = oacc_get_fn_attrib (current_function_decl);
1918
1919 if (!attrs)
1920 /* Not an offloaded function. */
1921 return 0;
1922
1923 /* Parse the default dim argument exactly once. */
1924 if ((const void *)flag_openacc_dims != &flag_openacc_dims)
1925 {
1926 oacc_parse_default_dims (flag_openacc_dims);
1927 flag_openacc_dims = (char *)&flag_openacc_dims;
1928 }
1929
703e4f86
TS
1930 bool is_oacc_parallel
1931 = (lookup_attribute ("oacc parallel",
1932 DECL_ATTRIBUTES (current_function_decl)) != NULL);
b0f271ce
TS
1933 bool is_oacc_kernels
1934 = (lookup_attribute ("oacc kernels",
1935 DECL_ATTRIBUTES (current_function_decl)) != NULL);
703e4f86
TS
1936 bool is_oacc_serial
1937 = (lookup_attribute ("oacc serial",
1938 DECL_ATTRIBUTES (current_function_decl)) != NULL);
e898ce79
GB
1939 bool is_oacc_parallel_kernels_parallelized
1940 = (lookup_attribute ("oacc parallel_kernels_parallelized",
1941 DECL_ATTRIBUTES (current_function_decl)) != NULL);
1942 bool is_oacc_parallel_kernels_gang_single
1943 = (lookup_attribute ("oacc parallel_kernels_gang_single",
1944 DECL_ATTRIBUTES (current_function_decl)) != NULL);
703e4f86
TS
1945 int fn_level = oacc_fn_attrib_level (attrs);
1946 bool is_oacc_routine = (fn_level >= 0);
1947 gcc_checking_assert (is_oacc_parallel
1948 + is_oacc_kernels
1949 + is_oacc_serial
e898ce79
GB
1950 + is_oacc_parallel_kernels_parallelized
1951 + is_oacc_parallel_kernels_gang_single
703e4f86
TS
1952 + is_oacc_routine
1953 == 1);
1954
b0f271ce
TS
1955 bool is_oacc_kernels_parallelized
1956 = (lookup_attribute ("oacc kernels parallelized",
1957 DECL_ATTRIBUTES (current_function_decl)) != NULL);
703e4f86
TS
1958 if (is_oacc_kernels_parallelized)
1959 gcc_checking_assert (is_oacc_kernels);
1960
1961 if (dump_file)
1962 {
1963 if (is_oacc_parallel)
1964 fprintf (dump_file, "Function is OpenACC parallel offload\n");
1965 else if (is_oacc_kernels)
1966 fprintf (dump_file, "Function is %s OpenACC kernels offload\n",
1967 (is_oacc_kernels_parallelized
1968 ? "parallelized" : "unparallelized"));
1969 else if (is_oacc_serial)
1970 fprintf (dump_file, "Function is OpenACC serial offload\n");
e898ce79
GB
1971 else if (is_oacc_parallel_kernels_parallelized)
1972 fprintf (dump_file, "Function is %s OpenACC kernels offload\n",
1973 "parallel_kernels_parallelized");
1974 else if (is_oacc_parallel_kernels_gang_single)
1975 fprintf (dump_file, "Function is %s OpenACC kernels offload\n",
1976 "parallel_kernels_gang_single");
703e4f86
TS
1977 else if (is_oacc_routine)
1978 fprintf (dump_file, "Function is OpenACC routine level %d\n",
1979 fn_level);
1980 else
1981 gcc_unreachable ();
1982 }
b0f271ce 1983
fd71a9a2
TS
1984 /* Unparallelized OpenACC kernels constructs must get launched as 1 x 1 x 1
1985 kernels, so remove the parallelism dimensions function attributes
1986 potentially set earlier on. */
1987 if (is_oacc_kernels && !is_oacc_kernels_parallelized)
1988 {
1989 oacc_set_fn_attrib (current_function_decl, NULL, NULL);
1990 attrs = oacc_get_fn_attrib (current_function_decl);
1991 }
1992
629b3d75
MJ
1993 /* Discover, partition and process the loops. */
1994 oacc_loop *loops = oacc_loop_discovery ();
629b3d75 1995
703e4f86
TS
1996 unsigned outer_mask = 0;
1997 if (is_oacc_routine)
1998 outer_mask = GOMP_DIM_MASK (fn_level) - 1;
629b3d75 1999 unsigned used_mask = oacc_loop_partition (loops, outer_mask);
b0f271ce
TS
2000 /* OpenACC kernels constructs are special: they currently don't use the
2001 generic oacc_loop infrastructure and attribute/dimension processing. */
2002 if (is_oacc_kernels && is_oacc_kernels_parallelized)
2003 {
2004 /* Parallelized OpenACC kernels constructs use gang parallelism. See
2005 also tree-parloops.c:create_parallel_loop. */
2006 used_mask |= GOMP_DIM_MASK (GOMP_DIM_GANG);
2007 }
629b3d75 2008
b0f271ce 2009 int dims[GOMP_DIM_MAX];
629b3d75
MJ
2010 oacc_validate_dims (current_function_decl, attrs, dims, fn_level, used_mask);
2011
2012 if (dump_file)
2013 {
2014 const char *comma = "Compute dimensions [";
2015 for (int ix = 0; ix != GOMP_DIM_MAX; ix++, comma = ", ")
2016 fprintf (dump_file, "%s%d", comma, dims[ix]);
2017 fprintf (dump_file, "]\n");
2018 }
2019
e898ce79
GB
2020 /* Verify that for OpenACC 'kernels' decomposed "gang-single" parts we launch
2021 a single gang only. */
2022 if (is_oacc_parallel_kernels_gang_single)
2023 gcc_checking_assert (dims[GOMP_DIM_GANG] == 1);
2024
629b3d75
MJ
2025 oacc_loop_process (loops);
2026 if (dump_file)
2027 {
2028 fprintf (dump_file, "OpenACC loops\n");
2029 dump_oacc_loop (dump_file, loops, 0);
2030 fprintf (dump_file, "\n");
2031 }
5d390fd3
TS
2032 if (dump_enabled_p ())
2033 {
2034 oacc_loop *l = loops;
2035 /* OpenACC kernels constructs are special: they currently don't use the
2036 generic oacc_loop infrastructure. */
2037 if (is_oacc_kernels)
2038 {
2039 /* Create a fake oacc_loop for diagnostic purposes. */
2040 l = new_oacc_loop_raw (NULL,
2041 DECL_SOURCE_LOCATION (current_function_decl));
2042 l->mask = used_mask;
2043 }
2044 else
2045 {
2046 /* Skip the outermost, dummy OpenACC loop */
2047 l = l->child;
2048 }
2049 if (l)
2050 inform_oacc_loop (l);
2051 if (is_oacc_kernels)
2052 free_oacc_loop (l);
2053 }
629b3d75
MJ
2054
2055 /* Offloaded targets may introduce new basic blocks, which require
2056 dominance information to update SSA. */
2057 calculate_dominance_info (CDI_DOMINATORS);
2058
29a2f518
JB
2059 hash_map<tree, tree> adjusted_vars;
2060
629b3d75
MJ
2061 /* Now lower internal loop functions to target-specific code
2062 sequences. */
2063 basic_block bb;
2064 FOR_ALL_BB_FN (bb, cfun)
2065 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
2066 {
2067 gimple *stmt = gsi_stmt (gsi);
2068 if (!is_gimple_call (stmt))
2069 {
2070 gsi_next (&gsi);
2071 continue;
2072 }
2073
2074 gcall *call = as_a <gcall *> (stmt);
2075 if (!gimple_call_internal_p (call))
2076 {
2077 gsi_next (&gsi);
2078 continue;
2079 }
2080
2081 /* Rewind to allow rescan. */
2082 gsi_prev (&gsi);
2083 bool rescan = false, remove = false;
2084 enum internal_fn ifn_code = gimple_call_internal_fn (call);
2085
2086 switch (ifn_code)
2087 {
2088 default: break;
2089
02889d23
CLT
2090 case IFN_GOACC_TILE:
2091 oacc_xform_tile (call);
2092 rescan = true;
2093 break;
2094
629b3d75
MJ
2095 case IFN_GOACC_LOOP:
2096 oacc_xform_loop (call);
2097 rescan = true;
2098 break;
2099
2100 case IFN_GOACC_REDUCTION:
2101 /* Mark the function for SSA renaming. */
2102 mark_virtual_operands_for_renaming (cfun);
2103
2104 /* If the level is -1, this ended up being an unused
2105 axis. Handle as a default. */
2106 if (integer_minus_onep (gimple_call_arg (call, 3)))
2107 default_goacc_reduction (call);
2108 else
2109 targetm.goacc.reduction (call);
2110 rescan = true;
2111 break;
2112
2113 case IFN_UNIQUE:
2114 {
2115 enum ifn_unique_kind kind
2116 = ((enum ifn_unique_kind)
2117 TREE_INT_CST_LOW (gimple_call_arg (call, 0)));
2118
2119 switch (kind)
2120 {
2121 default:
02889d23 2122 break;
629b3d75
MJ
2123
2124 case IFN_UNIQUE_OACC_FORK:
2125 case IFN_UNIQUE_OACC_JOIN:
2126 if (integer_minus_onep (gimple_call_arg (call, 2)))
2127 remove = true;
2128 else if (!targetm.goacc.fork_join
2129 (call, dims, kind == IFN_UNIQUE_OACC_FORK))
2130 remove = true;
2131 break;
2132
2133 case IFN_UNIQUE_OACC_HEAD_MARK:
2134 case IFN_UNIQUE_OACC_TAIL_MARK:
2135 remove = true;
2136 break;
29a2f518
JB
2137
2138 case IFN_UNIQUE_OACC_PRIVATE:
2139 {
2140 HOST_WIDE_INT level
2141 = TREE_INT_CST_LOW (gimple_call_arg (call, 2));
ff451ea7
TS
2142 gcc_checking_assert (level == -1
2143 || (level >= 0
2144 && level < GOMP_DIM_MAX));
29a2f518
JB
2145 for (unsigned i = 3;
2146 i < gimple_call_num_args (call);
2147 i++)
2148 {
2149 tree arg = gimple_call_arg (call, i);
2150 gcc_checking_assert (TREE_CODE (arg) == ADDR_EXPR);
2151 tree decl = TREE_OPERAND (arg, 0);
2152 if (dump_file && (dump_flags & TDF_DETAILS))
2153 {
2154 static char const *const axes[] =
2155 /* Must be kept in sync with GOMP_DIM
2156 enumeration. */
2157 { "gang", "worker", "vector" };
2158 fprintf (dump_file, "Decl UID %u has %s "
2159 "partitioning:", DECL_UID (decl),
ff451ea7 2160 (level == -1 ? "UNKNOWN" : axes[level]));
29a2f518
JB
2161 print_generic_decl (dump_file, decl, TDF_SLIM);
2162 fputc ('\n', dump_file);
2163 }
ff451ea7
TS
2164 if (level != -1
2165 && targetm.goacc.adjust_private_decl)
29a2f518
JB
2166 {
2167 tree oldtype = TREE_TYPE (decl);
2168 tree newdecl
2169 = targetm.goacc.adjust_private_decl (decl, level);
2170 if (TREE_TYPE (newdecl) != oldtype
2171 || newdecl != decl)
2172 adjusted_vars.put (decl, newdecl);
2173 }
2174 }
2175 remove = true;
2176 }
2177 break;
629b3d75
MJ
2178 }
2179 break;
2180 }
2181 }
2182
2183 if (gsi_end_p (gsi))
2184 /* We rewound past the beginning of the BB. */
2185 gsi = gsi_start_bb (bb);
2186 else
2187 /* Undo the rewind. */
2188 gsi_next (&gsi);
2189
2190 if (remove)
2191 {
2192 if (gimple_vdef (call))
2193 replace_uses_by (gimple_vdef (call), gimple_vuse (call));
2194 if (gimple_call_lhs (call))
2195 {
2196 /* Propagate the data dependency var. */
2197 gimple *ass = gimple_build_assign (gimple_call_lhs (call),
2198 gimple_call_arg (call, 1));
2199 gsi_replace (&gsi, ass, false);
2200 }
2201 else
2202 gsi_remove (&gsi, true);
2203 }
2204 else if (!rescan)
2205 /* If not rescanning, advance over the call. */
2206 gsi_next (&gsi);
2207 }
2208
29a2f518
JB
2209 /* Make adjustments to gang-private local variables if required by the
2210 target, e.g. forcing them into a particular address space. Afterwards,
2211 ADDR_EXPR nodes which have adjusted variables as their argument need to
2212 be modified in one of two ways:
2213
2214 1. They can be recreated, making a pointer to the variable in the new
2215 address space, or
2216
2217 2. The address of the variable in the new address space can be taken,
2218 converted to the default (original) address space, and the result of
2219 that conversion subsituted in place of the original ADDR_EXPR node.
2220
2221 Which of these is done depends on the gimple statement being processed.
2222 At present atomic operations and inline asms use (1), and everything else
2223 uses (2). At least on AMD GCN, there are atomic operations that work
2224 directly in the LDS address space.
2225
2226 COMPONENT_REFS, ARRAY_REFS and plain VAR_DECLs are also rewritten to use
2227 the new decl, adjusting types of appropriate tree nodes as necessary. */
2228
2229 if (targetm.goacc.adjust_private_decl)
2230 {
2231 FOR_ALL_BB_FN (bb, cfun)
2232 for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
2233 !gsi_end_p (gsi);
2234 gsi_next (&gsi))
2235 {
2236 gimple *stmt = gsi_stmt (gsi);
2237 walk_stmt_info wi;
2238 var_decl_rewrite_info info;
2239
2240 info.avoid_pointer_conversion
2241 = (is_gimple_call (stmt)
2242 && is_sync_builtin_call (as_a <gcall *> (stmt)))
2243 || gimple_code (stmt) == GIMPLE_ASM;
2244 info.stmt = stmt;
2245 info.modified = false;
2246 info.adjusted_vars = &adjusted_vars;
2247
2248 memset (&wi, 0, sizeof (wi));
2249 wi.info = &info;
2250
2251 walk_gimple_op (stmt, oacc_rewrite_var_decl, &wi);
2252
2253 if (info.modified)
2254 update_stmt (stmt);
2255 }
2256 }
2257
629b3d75
MJ
2258 free_oacc_loop (loops);
2259
2260 return 0;
2261}
2262
2263/* Default launch dimension validator. Force everything to 1. A
2264 backend that wants to provide larger dimensions must override this
2265 hook. */
2266
2267bool
2268default_goacc_validate_dims (tree ARG_UNUSED (decl), int *dims,
46dedae6
TV
2269 int ARG_UNUSED (fn_level),
2270 unsigned ARG_UNUSED (used))
629b3d75
MJ
2271{
2272 bool changed = false;
2273
2274 for (unsigned ix = 0; ix != GOMP_DIM_MAX; ix++)
2275 {
2276 if (dims[ix] != 1)
2277 {
2278 dims[ix] = 1;
2279 changed = true;
2280 }
2281 }
2282
2283 return changed;
2284}
2285
01914336 2286/* Default dimension bound is unknown on accelerator and 1 on host. */
629b3d75
MJ
2287
2288int
2289default_goacc_dim_limit (int ARG_UNUSED (axis))
2290{
2291#ifdef ACCEL_COMPILER
2292 return 0;
2293#else
2294 return 1;
2295#endif
2296}
2297
2298namespace {
2299
2300const pass_data pass_data_oacc_device_lower =
2301{
2302 GIMPLE_PASS, /* type */
2303 "oaccdevlow", /* name */
fd2b8c8b 2304 OPTGROUP_OMP, /* optinfo_flags */
629b3d75
MJ
2305 TV_NONE, /* tv_id */
2306 PROP_cfg, /* properties_required */
2307 0 /* Possibly PROP_gimple_eomp. */, /* properties_provided */
2308 0, /* properties_destroyed */
2309 0, /* todo_flags_start */
2310 TODO_update_ssa | TODO_cleanup_cfg, /* todo_flags_finish */
2311};
2312
2313class pass_oacc_device_lower : public gimple_opt_pass
2314{
2315public:
2316 pass_oacc_device_lower (gcc::context *ctxt)
2317 : gimple_opt_pass (pass_data_oacc_device_lower, ctxt)
2318 {}
2319
2320 /* opt_pass methods: */
2321 virtual bool gate (function *) { return flag_openacc; };
2322
2323 virtual unsigned int execute (function *)
2324 {
2325 return execute_oacc_device_lower ();
2326 }
2327
2328}; // class pass_oacc_device_lower
2329
2330} // anon namespace
2331
2332gimple_opt_pass *
2333make_pass_oacc_device_lower (gcc::context *ctxt)
2334{
2335 return new pass_oacc_device_lower (ctxt);
2336}
2337
0c6b03b5
AM
2338\f
2339/* Rewrite GOMP_SIMT_ENTER_ALLOC call given by GSI and remove the preceding
2340 GOMP_SIMT_ENTER call identifying the privatized variables, which are
2341 turned to structure fields and receive a DECL_VALUE_EXPR accordingly.
2342 Set *REGIMPLIFY to true, except if no privatized variables were seen. */
2343
2344static void
2345ompdevlow_adjust_simt_enter (gimple_stmt_iterator *gsi, bool *regimplify)
2346{
2347 gimple *alloc_stmt = gsi_stmt (*gsi);
2348 tree simtrec = gimple_call_lhs (alloc_stmt);
2349 tree simduid = gimple_call_arg (alloc_stmt, 0);
2350 gimple *enter_stmt = SSA_NAME_DEF_STMT (simduid);
2351 gcc_assert (gimple_call_internal_p (enter_stmt, IFN_GOMP_SIMT_ENTER));
2352 tree rectype = lang_hooks.types.make_type (RECORD_TYPE);
2353 TYPE_ARTIFICIAL (rectype) = TYPE_NAMELESS (rectype) = 1;
2354 TREE_ADDRESSABLE (rectype) = 1;
2355 TREE_TYPE (simtrec) = build_pointer_type (rectype);
2356 for (unsigned i = 1; i < gimple_call_num_args (enter_stmt); i++)
2357 {
2358 tree *argp = gimple_call_arg_ptr (enter_stmt, i);
2359 if (*argp == null_pointer_node)
2360 continue;
2361 gcc_assert (TREE_CODE (*argp) == ADDR_EXPR
2362 && VAR_P (TREE_OPERAND (*argp, 0)));
2363 tree var = TREE_OPERAND (*argp, 0);
2364
2365 tree field = build_decl (DECL_SOURCE_LOCATION (var), FIELD_DECL,
2366 DECL_NAME (var), TREE_TYPE (var));
2367 SET_DECL_ALIGN (field, DECL_ALIGN (var));
2368 DECL_USER_ALIGN (field) = DECL_USER_ALIGN (var);
2369 TREE_THIS_VOLATILE (field) = TREE_THIS_VOLATILE (var);
2370
2371 insert_field_into_struct (rectype, field);
2372
2373 tree t = build_simple_mem_ref (simtrec);
2374 t = build3 (COMPONENT_REF, TREE_TYPE (var), t, field, NULL);
2375 TREE_THIS_VOLATILE (t) = TREE_THIS_VOLATILE (var);
2376 SET_DECL_VALUE_EXPR (var, t);
2377 DECL_HAS_VALUE_EXPR_P (var) = 1;
2378 *regimplify = true;
2379 }
2380 layout_type (rectype);
2381 tree size = TYPE_SIZE_UNIT (rectype);
2382 tree align = build_int_cst (TREE_TYPE (size), TYPE_ALIGN_UNIT (rectype));
2383
2384 alloc_stmt
2385 = gimple_build_call_internal (IFN_GOMP_SIMT_ENTER_ALLOC, 2, size, align);
2386 gimple_call_set_lhs (alloc_stmt, simtrec);
2387 gsi_replace (gsi, alloc_stmt, false);
2388 gimple_stmt_iterator enter_gsi = gsi_for_stmt (enter_stmt);
2389 enter_stmt = gimple_build_assign (simduid, gimple_call_arg (enter_stmt, 0));
2390 gsi_replace (&enter_gsi, enter_stmt, false);
2391
2392 use_operand_p use;
2393 gimple *exit_stmt;
2394 if (single_imm_use (simtrec, &use, &exit_stmt))
2395 {
2396 gcc_assert (gimple_call_internal_p (exit_stmt, IFN_GOMP_SIMT_EXIT));
2397 gimple_stmt_iterator exit_gsi = gsi_for_stmt (exit_stmt);
25b45c7c 2398 tree clobber = build_clobber (rectype);
0c6b03b5
AM
2399 exit_stmt = gimple_build_assign (build_simple_mem_ref (simtrec), clobber);
2400 gsi_insert_before (&exit_gsi, exit_stmt, GSI_SAME_STMT);
2401 }
2402 else
2403 gcc_checking_assert (has_zero_uses (simtrec));
2404}
2405
2406/* Callback for walk_gimple_stmt used to scan for SIMT-privatized variables. */
2407
2408static tree
2409find_simtpriv_var_op (tree *tp, int *walk_subtrees, void *)
2410{
2411 tree t = *tp;
2412
2413 if (VAR_P (t)
2414 && DECL_HAS_VALUE_EXPR_P (t)
2415 && lookup_attribute ("omp simt private", DECL_ATTRIBUTES (t)))
2416 {
2417 *walk_subtrees = 0;
2418 return t;
2419 }
2420 return NULL_TREE;
2421}
2422
629b3d75
MJ
2423/* Cleanup uses of SIMT placeholder internal functions: on non-SIMT targets,
2424 VF is 1 and LANE is 0; on SIMT targets, VF is folded to a constant, and
2425 LANE is kept to be expanded to RTL later on. Also cleanup all other SIMT
2426 internal functions on non-SIMT targets, and likewise some SIMD internal
2427 functions on SIMT targets. */
2428
2429static unsigned int
2430execute_omp_device_lower ()
2431{
2432 int vf = targetm.simt.vf ? targetm.simt.vf () : 1;
0c6b03b5 2433 bool regimplify = false;
629b3d75
MJ
2434 basic_block bb;
2435 gimple_stmt_iterator gsi;
7a50e708
JJ
2436 bool calls_declare_variant_alt
2437 = cgraph_node::get (cfun->decl)->calls_declare_variant_alt;
629b3d75
MJ
2438 FOR_EACH_BB_FN (bb, cfun)
2439 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2440 {
2441 gimple *stmt = gsi_stmt (gsi);
7a50e708 2442 if (!is_gimple_call (stmt))
629b3d75 2443 continue;
7a50e708
JJ
2444 if (!gimple_call_internal_p (stmt))
2445 {
2446 if (calls_declare_variant_alt)
2447 if (tree fndecl = gimple_call_fndecl (stmt))
2448 {
2449 tree new_fndecl = omp_resolve_declare_variant (fndecl);
2450 if (new_fndecl != fndecl)
2451 {
2452 gimple_call_set_fndecl (stmt, new_fndecl);
2453 update_stmt (stmt);
2454 }
2455 }
2456 continue;
2457 }
629b3d75
MJ
2458 tree lhs = gimple_call_lhs (stmt), rhs = NULL_TREE;
2459 tree type = lhs ? TREE_TYPE (lhs) : integer_type_node;
2460 switch (gimple_call_internal_fn (stmt))
2461 {
2462 case IFN_GOMP_USE_SIMT:
2463 rhs = vf == 1 ? integer_zero_node : integer_one_node;
2464 break;
0c6b03b5
AM
2465 case IFN_GOMP_SIMT_ENTER:
2466 rhs = vf == 1 ? gimple_call_arg (stmt, 0) : NULL_TREE;
2467 goto simtreg_enter_exit;
2468 case IFN_GOMP_SIMT_ENTER_ALLOC:
2469 if (vf != 1)
2470 ompdevlow_adjust_simt_enter (&gsi, &regimplify);
2471 rhs = vf == 1 ? null_pointer_node : NULL_TREE;
2472 goto simtreg_enter_exit;
2473 case IFN_GOMP_SIMT_EXIT:
2474 simtreg_enter_exit:
2475 if (vf != 1)
2476 continue;
2477 unlink_stmt_vdef (stmt);
2478 break;
629b3d75
MJ
2479 case IFN_GOMP_SIMT_LANE:
2480 case IFN_GOMP_SIMT_LAST_LANE:
2481 rhs = vf == 1 ? build_zero_cst (type) : NULL_TREE;
2482 break;
2483 case IFN_GOMP_SIMT_VF:
2484 rhs = build_int_cst (type, vf);
2485 break;
2486 case IFN_GOMP_SIMT_ORDERED_PRED:
2487 rhs = vf == 1 ? integer_zero_node : NULL_TREE;
2488 if (rhs || !lhs)
2489 unlink_stmt_vdef (stmt);
2490 break;
2491 case IFN_GOMP_SIMT_VOTE_ANY:
2492 case IFN_GOMP_SIMT_XCHG_BFLY:
2493 case IFN_GOMP_SIMT_XCHG_IDX:
2494 rhs = vf == 1 ? gimple_call_arg (stmt, 0) : NULL_TREE;
2495 break;
2496 case IFN_GOMP_SIMD_LANE:
2497 case IFN_GOMP_SIMD_LAST_LANE:
2498 rhs = vf != 1 ? build_zero_cst (type) : NULL_TREE;
2499 break;
2500 case IFN_GOMP_SIMD_VF:
2501 rhs = vf != 1 ? build_one_cst (type) : NULL_TREE;
2502 break;
2503 default:
2504 continue;
2505 }
2506 if (lhs && !rhs)
2507 continue;
2508 stmt = lhs ? gimple_build_assign (lhs, rhs) : gimple_build_nop ();
2509 gsi_replace (&gsi, stmt, false);
2510 }
0c6b03b5
AM
2511 if (regimplify)
2512 FOR_EACH_BB_REVERSE_FN (bb, cfun)
2513 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
2514 if (walk_gimple_stmt (&gsi, NULL, find_simtpriv_var_op, NULL))
2515 {
2516 if (gimple_clobber_p (gsi_stmt (gsi)))
2517 gsi_remove (&gsi, true);
2518 else
2519 gimple_regimplify_operands (gsi_stmt (gsi), &gsi);
2520 }
629b3d75
MJ
2521 if (vf != 1)
2522 cfun->has_force_vectorize_loops = false;
2523 return 0;
2524}
2525
2526namespace {
2527
2528const pass_data pass_data_omp_device_lower =
2529{
2530 GIMPLE_PASS, /* type */
2531 "ompdevlow", /* name */
fd2b8c8b 2532 OPTGROUP_OMP, /* optinfo_flags */
629b3d75
MJ
2533 TV_NONE, /* tv_id */
2534 PROP_cfg, /* properties_required */
2535 PROP_gimple_lomp_dev, /* properties_provided */
2536 0, /* properties_destroyed */
2537 0, /* todo_flags_start */
2538 TODO_update_ssa, /* todo_flags_finish */
2539};
2540
2541class pass_omp_device_lower : public gimple_opt_pass
2542{
2543public:
2544 pass_omp_device_lower (gcc::context *ctxt)
2545 : gimple_opt_pass (pass_data_omp_device_lower, ctxt)
2546 {}
2547
2548 /* opt_pass methods: */
4cea8675 2549 virtual bool gate (function *fun)
629b3d75 2550 {
7a50e708
JJ
2551 return (!(fun->curr_properties & PROP_gimple_lomp_dev)
2552 || (flag_openmp
2553 && cgraph_node::get (fun->decl)->calls_declare_variant_alt));
629b3d75
MJ
2554 }
2555 virtual unsigned int execute (function *)
2556 {
2557 return execute_omp_device_lower ();
2558 }
2559
2560}; // class pass_expand_omp_ssa
2561
2562} // anon namespace
2563
2564gimple_opt_pass *
2565make_pass_omp_device_lower (gcc::context *ctxt)
2566{
2567 return new pass_omp_device_lower (ctxt);
2568}
2569
2570/* "omp declare target link" handling pass. */
2571
2572namespace {
2573
2574const pass_data pass_data_omp_target_link =
2575{
2576 GIMPLE_PASS, /* type */
2577 "omptargetlink", /* name */
fd2b8c8b 2578 OPTGROUP_OMP, /* optinfo_flags */
629b3d75
MJ
2579 TV_NONE, /* tv_id */
2580 PROP_ssa, /* properties_required */
2581 0, /* properties_provided */
2582 0, /* properties_destroyed */
2583 0, /* todo_flags_start */
2584 TODO_update_ssa, /* todo_flags_finish */
2585};
2586
2587class pass_omp_target_link : public gimple_opt_pass
2588{
2589public:
2590 pass_omp_target_link (gcc::context *ctxt)
2591 : gimple_opt_pass (pass_data_omp_target_link, ctxt)
2592 {}
2593
2594 /* opt_pass methods: */
2595 virtual bool gate (function *fun)
2596 {
2597#ifdef ACCEL_COMPILER
46dbeb40 2598 return offloading_function_p (fun->decl);
629b3d75
MJ
2599#else
2600 (void) fun;
2601 return false;
2602#endif
2603 }
2604
2605 virtual unsigned execute (function *);
2606};
2607
2608/* Callback for walk_gimple_stmt used to scan for link var operands. */
2609
2610static tree
2611find_link_var_op (tree *tp, int *walk_subtrees, void *)
2612{
2613 tree t = *tp;
2614
56f71478
JJ
2615 if (VAR_P (t)
2616 && DECL_HAS_VALUE_EXPR_P (t)
2617 && is_global_var (t)
629b3d75
MJ
2618 && lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (t)))
2619 {
2620 *walk_subtrees = 0;
2621 return t;
2622 }
2623
2624 return NULL_TREE;
2625}
2626
2627unsigned
2628pass_omp_target_link::execute (function *fun)
2629{
2630 basic_block bb;
2631 FOR_EACH_BB_FN (bb, fun)
2632 {
2633 gimple_stmt_iterator gsi;
2634 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2635 if (walk_gimple_stmt (&gsi, NULL, find_link_var_op, NULL))
2636 gimple_regimplify_operands (gsi_stmt (gsi), &gsi);
2637 }
2638
2639 return 0;
2640}
2641
2642} // anon namespace
2643
2644gimple_opt_pass *
2645make_pass_omp_target_link (gcc::context *ctxt)
2646{
2647 return new pass_omp_target_link (ctxt);
2648}