]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/omp-offload.c
coroutines: Make call argument handling more robust [PR95440]
[thirdparty/gcc.git] / gcc / omp-offload.c
CommitLineData
629b3d75
MJ
1/* Bits of OpenMP and OpenACC handling that is specific to device offloading
2 and a lowering pass for OpenACC device directives.
3
8d9254fc 4 Copyright (C) 2005-2020 Free Software Foundation, Inc.
629b3d75
MJ
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "backend.h"
26#include "target.h"
27#include "tree.h"
28#include "gimple.h"
29#include "tree-pass.h"
30#include "ssa.h"
31#include "cgraph.h"
32#include "pretty-print.h"
33#include "diagnostic-core.h"
34#include "fold-const.h"
35#include "internal-fn.h"
0c6b03b5 36#include "langhooks.h"
629b3d75
MJ
37#include "gimplify.h"
38#include "gimple-iterator.h"
39#include "gimplify-me.h"
40#include "gimple-walk.h"
41#include "tree-cfg.h"
42#include "tree-into-ssa.h"
0c6b03b5
AM
43#include "tree-nested.h"
44#include "stor-layout.h"
629b3d75
MJ
45#include "common/common-target.h"
46#include "omp-general.h"
47#include "omp-offload.h"
48#include "lto-section-names.h"
49#include "gomp-constants.h"
50#include "gimple-pretty-print.h"
324ff1a0 51#include "intl.h"
314e6352
ML
52#include "stringpool.h"
53#include "attribs.h"
f64b12bd 54#include "cfgloop.h"
dc703151 55#include "context.h"
629b3d75
MJ
56
57/* Describe the OpenACC looping structure of a function. The entire
58 function is held in a 'NULL' loop. */
59
60struct oacc_loop
61{
62 oacc_loop *parent; /* Containing loop. */
63
64 oacc_loop *child; /* First inner loop. */
65
66 oacc_loop *sibling; /* Next loop within same parent. */
67
68 location_t loc; /* Location of the loop start. */
69
70 gcall *marker; /* Initial head marker. */
71
01914336
MJ
72 gcall *heads[GOMP_DIM_MAX]; /* Head marker functions. */
73 gcall *tails[GOMP_DIM_MAX]; /* Tail marker functions. */
629b3d75
MJ
74
75 tree routine; /* Pseudo-loop enclosing a routine. */
76
77 unsigned mask; /* Partitioning mask. */
02889d23 78 unsigned e_mask; /* Partitioning of element loops (when tiling). */
629b3d75
MJ
79 unsigned inner; /* Partitioning of inner loops. */
80 unsigned flags; /* Partitioning flags. */
02889d23 81 vec<gcall *> ifns; /* Contained loop abstraction functions. */
629b3d75
MJ
82 tree chunk_size; /* Chunk size. */
83 gcall *head_end; /* Final marker of head sequence. */
84};
85
86/* Holds offload tables with decls. */
87vec<tree, va_gc> *offload_funcs, *offload_vars;
88
89/* Return level at which oacc routine may spawn a partitioned loop, or
90 -1 if it is not a routine (i.e. is an offload fn). */
91
4c187162 92int
629b3d75
MJ
93oacc_fn_attrib_level (tree attr)
94{
95 tree pos = TREE_VALUE (attr);
96
97 if (!TREE_PURPOSE (pos))
98 return -1;
99
100 int ix = 0;
101 for (ix = 0; ix != GOMP_DIM_MAX;
102 ix++, pos = TREE_CHAIN (pos))
103 if (!integer_zerop (TREE_PURPOSE (pos)))
104 break;
105
106 return ix;
107}
108
109/* Helper function for omp_finish_file routine. Takes decls from V_DECLS and
110 adds their addresses and sizes to constructor-vector V_CTOR. */
111
112static void
113add_decls_addresses_to_decl_constructor (vec<tree, va_gc> *v_decls,
114 vec<constructor_elt, va_gc> *v_ctor)
115{
116 unsigned len = vec_safe_length (v_decls);
117 for (unsigned i = 0; i < len; i++)
118 {
119 tree it = (*v_decls)[i];
120 bool is_var = VAR_P (it);
121 bool is_link_var
122 = is_var
123#ifdef ACCEL_COMPILER
124 && DECL_HAS_VALUE_EXPR_P (it)
125#endif
126 && lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (it));
127
1c0fdaf7 128 /* See also omp_finish_file and output_offload_tables in lto-cgraph.c. */
bf4ab268 129 if (!in_lto_p && !symtab_node::get (it))
1c0fdaf7
TB
130 continue;
131
629b3d75
MJ
132 tree size = NULL_TREE;
133 if (is_var)
134 size = fold_convert (const_ptr_type_node, DECL_SIZE_UNIT (it));
135
136 tree addr;
137 if (!is_link_var)
138 addr = build_fold_addr_expr (it);
139 else
140 {
141#ifdef ACCEL_COMPILER
142 /* For "omp declare target link" vars add address of the pointer to
143 the target table, instead of address of the var. */
144 tree value_expr = DECL_VALUE_EXPR (it);
145 tree link_ptr_decl = TREE_OPERAND (value_expr, 0);
146 varpool_node::finalize_decl (link_ptr_decl);
147 addr = build_fold_addr_expr (link_ptr_decl);
148#else
149 addr = build_fold_addr_expr (it);
150#endif
151
152 /* Most significant bit of the size marks "omp declare target link"
153 vars in host and target tables. */
154 unsigned HOST_WIDE_INT isize = tree_to_uhwi (size);
155 isize |= 1ULL << (int_size_in_bytes (const_ptr_type_node)
156 * BITS_PER_UNIT - 1);
157 size = wide_int_to_tree (const_ptr_type_node, isize);
158 }
159
160 CONSTRUCTOR_APPEND_ELT (v_ctor, NULL_TREE, addr);
161 if (is_var)
162 CONSTRUCTOR_APPEND_ELT (v_ctor, NULL_TREE, size);
163 }
164}
165
dc703151
JJ
166/* Return true if DECL is a function for which its references should be
167 analyzed. */
168
169static bool
170omp_declare_target_fn_p (tree decl)
171{
172 return (TREE_CODE (decl) == FUNCTION_DECL
173 && lookup_attribute ("omp declare target", DECL_ATTRIBUTES (decl))
174 && !lookup_attribute ("omp declare target host",
175 DECL_ATTRIBUTES (decl))
176 && (!flag_openacc
177 || oacc_get_fn_attrib (decl) == NULL_TREE));
178}
179
180/* Return true if DECL Is a variable for which its initializer references
181 should be analyzed. */
182
183static bool
184omp_declare_target_var_p (tree decl)
185{
186 return (VAR_P (decl)
187 && lookup_attribute ("omp declare target", DECL_ATTRIBUTES (decl))
188 && !lookup_attribute ("omp declare target link",
189 DECL_ATTRIBUTES (decl)));
190}
191
192/* Helper function for omp_discover_implicit_declare_target, called through
193 walk_tree. Mark referenced FUNCTION_DECLs implicitly as
194 declare target to. */
195
196static tree
49ddde69 197omp_discover_declare_target_tgt_fn_r (tree *tp, int *walk_subtrees, void *data)
dc703151
JJ
198{
199 if (TREE_CODE (*tp) == FUNCTION_DECL
200 && !omp_declare_target_fn_p (*tp)
201 && !lookup_attribute ("omp declare target host", DECL_ATTRIBUTES (*tp)))
202 {
203 tree id = get_identifier ("omp declare target");
204 if (!DECL_EXTERNAL (*tp) && DECL_SAVED_TREE (*tp))
205 ((vec<tree> *) data)->safe_push (*tp);
206 DECL_ATTRIBUTES (*tp) = tree_cons (id, NULL_TREE, DECL_ATTRIBUTES (*tp));
207 symtab_node *node = symtab_node::get (*tp);
208 if (node != NULL)
209 {
210 node->offloadable = 1;
211 if (ENABLE_OFFLOADING)
212 g->have_offload = true;
213 }
214 }
215 else if (TYPE_P (*tp))
216 *walk_subtrees = 0;
217 /* else if (TREE_CODE (*tp) == OMP_TARGET)
218 {
219 if (tree dev = omp_find_clause (OMP_TARGET_CLAUSES (*tp)))
220 if (OMP_DEVICE_ANCESTOR (dev))
221 *walk_subtrees = 0;
222 } */
223 return NULL_TREE;
224}
225
49ddde69
JJ
226/* Similarly, but ignore references outside of OMP_TARGET regions. */
227
228static tree
229omp_discover_declare_target_fn_r (tree *tp, int *walk_subtrees, void *data)
230{
231 if (TREE_CODE (*tp) == OMP_TARGET)
232 {
233 /* And not OMP_DEVICE_ANCESTOR. */
234 walk_tree_without_duplicates (&OMP_TARGET_BODY (*tp),
235 omp_discover_declare_target_tgt_fn_r,
236 data);
237 *walk_subtrees = 0;
238 }
239 else if (TYPE_P (*tp))
240 *walk_subtrees = 0;
241 return NULL_TREE;
242}
243
dc703151
JJ
244/* Helper function for omp_discover_implicit_declare_target, called through
245 walk_tree. Mark referenced FUNCTION_DECLs implicitly as
246 declare target to. */
247
248static tree
249omp_discover_declare_target_var_r (tree *tp, int *walk_subtrees, void *data)
250{
251 if (TREE_CODE (*tp) == FUNCTION_DECL)
49ddde69 252 return omp_discover_declare_target_tgt_fn_r (tp, walk_subtrees, data);
dc703151
JJ
253 else if (VAR_P (*tp)
254 && is_global_var (*tp)
255 && !omp_declare_target_var_p (*tp))
256 {
257 tree id = get_identifier ("omp declare target");
258 if (lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (*tp)))
259 {
260 error_at (DECL_SOURCE_LOCATION (*tp),
261 "%qD specified both in declare target %<link%> and "
262 "implicitly in %<to%> clauses", *tp);
263 DECL_ATTRIBUTES (*tp)
264 = remove_attribute ("omp declare target link", DECL_ATTRIBUTES (*tp));
265 }
266 if (TREE_STATIC (*tp) && DECL_INITIAL (*tp))
267 ((vec<tree> *) data)->safe_push (*tp);
268 DECL_ATTRIBUTES (*tp) = tree_cons (id, NULL_TREE, DECL_ATTRIBUTES (*tp));
269 symtab_node *node = symtab_node::get (*tp);
270 if (node != NULL && !node->offloadable)
271 {
272 node->offloadable = 1;
273 if (ENABLE_OFFLOADING)
274 {
275 g->have_offload = true;
276 if (is_a <varpool_node *> (node))
277 vec_safe_push (offload_vars, node->decl);
278 }
279 }
280 }
281 else if (TYPE_P (*tp))
282 *walk_subtrees = 0;
283 return NULL_TREE;
284}
285
286/* Perform the OpenMP implicit declare target to discovery. */
287
288void
289omp_discover_implicit_declare_target (void)
290{
291 cgraph_node *node;
292 varpool_node *vnode;
293 auto_vec<tree> worklist;
294
295 FOR_EACH_DEFINED_FUNCTION (node)
49ddde69
JJ
296 if (DECL_SAVED_TREE (node->decl))
297 {
298 if (omp_declare_target_fn_p (node->decl))
299 worklist.safe_push (node->decl);
300 else if (DECL_STRUCT_FUNCTION (node->decl)
301 && DECL_STRUCT_FUNCTION (node->decl)->has_omp_target)
302 worklist.safe_push (node->decl);
303 }
dc703151
JJ
304 FOR_EACH_STATIC_INITIALIZER (vnode)
305 if (omp_declare_target_var_p (vnode->decl))
306 worklist.safe_push (vnode->decl);
307 while (!worklist.is_empty ())
308 {
309 tree decl = worklist.pop ();
49ddde69
JJ
310 if (VAR_P (decl))
311 walk_tree_without_duplicates (&DECL_INITIAL (decl),
312 omp_discover_declare_target_var_r,
313 &worklist);
314 else if (omp_declare_target_fn_p (decl))
dc703151 315 walk_tree_without_duplicates (&DECL_SAVED_TREE (decl),
49ddde69 316 omp_discover_declare_target_tgt_fn_r,
dc703151
JJ
317 &worklist);
318 else
49ddde69
JJ
319 walk_tree_without_duplicates (&DECL_SAVED_TREE (decl),
320 omp_discover_declare_target_fn_r,
dc703151
JJ
321 &worklist);
322 }
323}
324
325
629b3d75
MJ
326/* Create new symbols containing (address, size) pairs for global variables,
327 marked with "omp declare target" attribute, as well as addresses for the
328 functions, which are outlined offloading regions. */
329void
330omp_finish_file (void)
331{
332 unsigned num_funcs = vec_safe_length (offload_funcs);
333 unsigned num_vars = vec_safe_length (offload_vars);
334
335 if (num_funcs == 0 && num_vars == 0)
336 return;
337
338 if (targetm_common.have_named_sections)
339 {
340 vec<constructor_elt, va_gc> *v_f, *v_v;
341 vec_alloc (v_f, num_funcs);
342 vec_alloc (v_v, num_vars * 2);
343
344 add_decls_addresses_to_decl_constructor (offload_funcs, v_f);
345 add_decls_addresses_to_decl_constructor (offload_vars, v_v);
346
347 tree vars_decl_type = build_array_type_nelts (pointer_sized_int_node,
1c0fdaf7 348 vec_safe_length (v_v));
629b3d75
MJ
349 tree funcs_decl_type = build_array_type_nelts (pointer_sized_int_node,
350 num_funcs);
351 SET_TYPE_ALIGN (vars_decl_type, TYPE_ALIGN (pointer_sized_int_node));
352 SET_TYPE_ALIGN (funcs_decl_type, TYPE_ALIGN (pointer_sized_int_node));
353 tree ctor_v = build_constructor (vars_decl_type, v_v);
354 tree ctor_f = build_constructor (funcs_decl_type, v_f);
355 TREE_CONSTANT (ctor_v) = TREE_CONSTANT (ctor_f) = 1;
356 TREE_STATIC (ctor_v) = TREE_STATIC (ctor_f) = 1;
357 tree funcs_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL,
358 get_identifier (".offload_func_table"),
359 funcs_decl_type);
360 tree vars_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL,
361 get_identifier (".offload_var_table"),
362 vars_decl_type);
363 TREE_STATIC (funcs_decl) = TREE_STATIC (vars_decl) = 1;
364 /* Do not align tables more than TYPE_ALIGN (pointer_sized_int_node),
365 otherwise a joint table in a binary will contain padding between
366 tables from multiple object files. */
367 DECL_USER_ALIGN (funcs_decl) = DECL_USER_ALIGN (vars_decl) = 1;
368 SET_DECL_ALIGN (funcs_decl, TYPE_ALIGN (funcs_decl_type));
369 SET_DECL_ALIGN (vars_decl, TYPE_ALIGN (vars_decl_type));
370 DECL_INITIAL (funcs_decl) = ctor_f;
371 DECL_INITIAL (vars_decl) = ctor_v;
372 set_decl_section_name (funcs_decl, OFFLOAD_FUNC_TABLE_SECTION_NAME);
373 set_decl_section_name (vars_decl, OFFLOAD_VAR_TABLE_SECTION_NAME);
374
375 varpool_node::finalize_decl (vars_decl);
376 varpool_node::finalize_decl (funcs_decl);
377 }
378 else
379 {
380 for (unsigned i = 0; i < num_funcs; i++)
381 {
382 tree it = (*offload_funcs)[i];
1c0fdaf7
TB
383 /* See also add_decls_addresses_to_decl_constructor
384 and output_offload_tables in lto-cgraph.c. */
bf4ab268 385 if (!in_lto_p && !symtab_node::get (it))
1c0fdaf7 386 continue;
629b3d75
MJ
387 targetm.record_offload_symbol (it);
388 }
389 for (unsigned i = 0; i < num_vars; i++)
390 {
391 tree it = (*offload_vars)[i];
bf4ab268 392 if (!in_lto_p && !symtab_node::get (it))
1c0fdaf7 393 continue;
c2211a60
TB
394#ifdef ACCEL_COMPILER
395 if (DECL_HAS_VALUE_EXPR_P (it)
396 && lookup_attribute ("omp declare target link",
397 DECL_ATTRIBUTES (it)))
398 {
399 tree value_expr = DECL_VALUE_EXPR (it);
400 tree link_ptr_decl = TREE_OPERAND (value_expr, 0);
401 targetm.record_offload_symbol (link_ptr_decl);
402 varpool_node::finalize_decl (link_ptr_decl);
403 }
404 else
405#endif
406 targetm.record_offload_symbol (it);
629b3d75
MJ
407 }
408 }
409}
410
02889d23
CLT
411/* Call dim_pos (POS == true) or dim_size (POS == false) builtins for
412 axis DIM. Return a tmp var holding the result. */
413
414static tree
415oacc_dim_call (bool pos, int dim, gimple_seq *seq)
416{
417 tree arg = build_int_cst (unsigned_type_node, dim);
418 tree size = create_tmp_var (integer_type_node);
419 enum internal_fn fn = pos ? IFN_GOACC_DIM_POS : IFN_GOACC_DIM_SIZE;
420 gimple *call = gimple_build_call_internal (fn, 1, arg);
421
422 gimple_call_set_lhs (call, size);
423 gimple_seq_add_stmt (seq, call);
424
425 return size;
426}
427
629b3d75
MJ
428/* Find the number of threads (POS = false), or thread number (POS =
429 true) for an OpenACC region partitioned as MASK. Setup code
430 required for the calculation is added to SEQ. */
431
432static tree
433oacc_thread_numbers (bool pos, int mask, gimple_seq *seq)
434{
435 tree res = pos ? NULL_TREE : build_int_cst (unsigned_type_node, 1);
436 unsigned ix;
437
438 /* Start at gang level, and examine relevant dimension indices. */
439 for (ix = GOMP_DIM_GANG; ix != GOMP_DIM_MAX; ix++)
440 if (GOMP_DIM_MASK (ix) & mask)
441 {
629b3d75
MJ
442 if (res)
443 {
444 /* We had an outer index, so scale that by the size of
445 this dimension. */
02889d23 446 tree n = oacc_dim_call (false, ix, seq);
629b3d75
MJ
447 res = fold_build2 (MULT_EXPR, integer_type_node, res, n);
448 }
449 if (pos)
450 {
451 /* Determine index in this dimension. */
02889d23 452 tree id = oacc_dim_call (true, ix, seq);
629b3d75
MJ
453 if (res)
454 res = fold_build2 (PLUS_EXPR, integer_type_node, res, id);
455 else
456 res = id;
457 }
458 }
459
460 if (res == NULL_TREE)
461 res = integer_zero_node;
462
463 return res;
464}
465
466/* Transform IFN_GOACC_LOOP calls to actual code. See
467 expand_oacc_for for where these are generated. At the vector
468 level, we stride loops, such that each member of a warp will
469 operate on adjacent iterations. At the worker and gang level,
470 each gang/warp executes a set of contiguous iterations. Chunking
471 can override this such that each iteration engine executes a
01914336 472 contiguous chunk, and then moves on to stride to the next chunk. */
629b3d75
MJ
473
474static void
475oacc_xform_loop (gcall *call)
476{
477 gimple_stmt_iterator gsi = gsi_for_stmt (call);
478 enum ifn_goacc_loop_kind code
479 = (enum ifn_goacc_loop_kind) TREE_INT_CST_LOW (gimple_call_arg (call, 0));
480 tree dir = gimple_call_arg (call, 1);
481 tree range = gimple_call_arg (call, 2);
482 tree step = gimple_call_arg (call, 3);
483 tree chunk_size = NULL_TREE;
484 unsigned mask = (unsigned) TREE_INT_CST_LOW (gimple_call_arg (call, 5));
485 tree lhs = gimple_call_lhs (call);
c29c92c7 486 tree type = NULL_TREE;
629b3d75
MJ
487 tree diff_type = TREE_TYPE (range);
488 tree r = NULL_TREE;
489 gimple_seq seq = NULL;
490 bool chunking = false, striding = true;
491 unsigned outer_mask = mask & (~mask + 1); // Outermost partitioning
492 unsigned inner_mask = mask & ~outer_mask; // Inner partitioning (if any)
493
c29c92c7
FX
494 /* Skip lowering if return value of IFN_GOACC_LOOP call is not used. */
495 if (!lhs)
496 {
497 gsi_replace_with_seq (&gsi, seq, true);
498 return;
499 }
500
501 type = TREE_TYPE (lhs);
502
629b3d75
MJ
503#ifdef ACCEL_COMPILER
504 chunk_size = gimple_call_arg (call, 4);
505 if (integer_minus_onep (chunk_size) /* Force static allocation. */
506 || integer_zerop (chunk_size)) /* Default (also static). */
507 {
508 /* If we're at the gang level, we want each to execute a
509 contiguous run of iterations. Otherwise we want each element
510 to stride. */
511 striding = !(outer_mask & GOMP_DIM_MASK (GOMP_DIM_GANG));
512 chunking = false;
513 }
514 else
515 {
516 /* Chunk of size 1 is striding. */
517 striding = integer_onep (chunk_size);
518 chunking = !striding;
519 }
520#endif
521
522 /* striding=true, chunking=true
523 -> invalid.
524 striding=true, chunking=false
525 -> chunks=1
526 striding=false,chunking=true
527 -> chunks=ceil (range/(chunksize*threads*step))
528 striding=false,chunking=false
529 -> chunk_size=ceil(range/(threads*step)),chunks=1 */
530 push_gimplify_context (true);
531
532 switch (code)
533 {
534 default: gcc_unreachable ();
535
536 case IFN_GOACC_LOOP_CHUNKS:
537 if (!chunking)
538 r = build_int_cst (type, 1);
539 else
540 {
541 /* chunk_max
542 = (range - dir) / (chunks * step * num_threads) + dir */
543 tree per = oacc_thread_numbers (false, mask, &seq);
544 per = fold_convert (type, per);
545 chunk_size = fold_convert (type, chunk_size);
546 per = fold_build2 (MULT_EXPR, type, per, chunk_size);
547 per = fold_build2 (MULT_EXPR, type, per, step);
548 r = build2 (MINUS_EXPR, type, range, dir);
549 r = build2 (PLUS_EXPR, type, r, per);
550 r = build2 (TRUNC_DIV_EXPR, type, r, per);
551 }
552 break;
553
554 case IFN_GOACC_LOOP_STEP:
555 {
556 /* If striding, step by the entire compute volume, otherwise
557 step by the inner volume. */
558 unsigned volume = striding ? mask : inner_mask;
559
560 r = oacc_thread_numbers (false, volume, &seq);
561 r = build2 (MULT_EXPR, type, fold_convert (type, r), step);
562 }
563 break;
564
565 case IFN_GOACC_LOOP_OFFSET:
f64b12bd
CP
566 /* Enable vectorization on non-SIMT targets. */
567 if (!targetm.simt.vf
568 && outer_mask == GOMP_DIM_MASK (GOMP_DIM_VECTOR)
569 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
570 the loop. */
571 && (flag_tree_loop_vectorize
572 || !global_options_set.x_flag_tree_loop_vectorize))
573 {
574 basic_block bb = gsi_bb (gsi);
99b1c316
MS
575 class loop *parent = bb->loop_father;
576 class loop *body = parent->inner;
f64b12bd
CP
577
578 parent->force_vectorize = true;
579 parent->safelen = INT_MAX;
580
581 /* "Chunking loops" may have inner loops. */
582 if (parent->inner)
583 {
584 body->force_vectorize = true;
585 body->safelen = INT_MAX;
586 }
587
588 cfun->has_force_vectorize_loops = true;
589 }
629b3d75
MJ
590 if (striding)
591 {
592 r = oacc_thread_numbers (true, mask, &seq);
593 r = fold_convert (diff_type, r);
594 }
595 else
596 {
597 tree inner_size = oacc_thread_numbers (false, inner_mask, &seq);
598 tree outer_size = oacc_thread_numbers (false, outer_mask, &seq);
599 tree volume = fold_build2 (MULT_EXPR, TREE_TYPE (inner_size),
600 inner_size, outer_size);
601
602 volume = fold_convert (diff_type, volume);
603 if (chunking)
604 chunk_size = fold_convert (diff_type, chunk_size);
605 else
606 {
607 tree per = fold_build2 (MULT_EXPR, diff_type, volume, step);
608
609 chunk_size = build2 (MINUS_EXPR, diff_type, range, dir);
610 chunk_size = build2 (PLUS_EXPR, diff_type, chunk_size, per);
611 chunk_size = build2 (TRUNC_DIV_EXPR, diff_type, chunk_size, per);
612 }
613
614 tree span = build2 (MULT_EXPR, diff_type, chunk_size,
615 fold_convert (diff_type, inner_size));
616 r = oacc_thread_numbers (true, outer_mask, &seq);
617 r = fold_convert (diff_type, r);
618 r = build2 (MULT_EXPR, diff_type, r, span);
619
620 tree inner = oacc_thread_numbers (true, inner_mask, &seq);
621 inner = fold_convert (diff_type, inner);
622 r = fold_build2 (PLUS_EXPR, diff_type, r, inner);
623
624 if (chunking)
625 {
626 tree chunk = fold_convert (diff_type, gimple_call_arg (call, 6));
627 tree per
628 = fold_build2 (MULT_EXPR, diff_type, volume, chunk_size);
629 per = build2 (MULT_EXPR, diff_type, per, chunk);
630
631 r = build2 (PLUS_EXPR, diff_type, r, per);
632 }
633 }
634 r = fold_build2 (MULT_EXPR, diff_type, r, step);
635 if (type != diff_type)
636 r = fold_convert (type, r);
637 break;
638
639 case IFN_GOACC_LOOP_BOUND:
640 if (striding)
641 r = range;
642 else
643 {
644 tree inner_size = oacc_thread_numbers (false, inner_mask, &seq);
645 tree outer_size = oacc_thread_numbers (false, outer_mask, &seq);
646 tree volume = fold_build2 (MULT_EXPR, TREE_TYPE (inner_size),
647 inner_size, outer_size);
648
649 volume = fold_convert (diff_type, volume);
650 if (chunking)
651 chunk_size = fold_convert (diff_type, chunk_size);
652 else
653 {
654 tree per = fold_build2 (MULT_EXPR, diff_type, volume, step);
655
656 chunk_size = build2 (MINUS_EXPR, diff_type, range, dir);
657 chunk_size = build2 (PLUS_EXPR, diff_type, chunk_size, per);
658 chunk_size = build2 (TRUNC_DIV_EXPR, diff_type, chunk_size, per);
659 }
660
661 tree span = build2 (MULT_EXPR, diff_type, chunk_size,
662 fold_convert (diff_type, inner_size));
663
664 r = fold_build2 (MULT_EXPR, diff_type, span, step);
665
666 tree offset = gimple_call_arg (call, 6);
667 r = build2 (PLUS_EXPR, diff_type, r,
668 fold_convert (diff_type, offset));
669 r = build2 (integer_onep (dir) ? MIN_EXPR : MAX_EXPR,
670 diff_type, r, range);
671 }
672 if (diff_type != type)
673 r = fold_convert (type, r);
674 break;
675 }
676
677 gimplify_assign (lhs, r, &seq);
678
679 pop_gimplify_context (NULL);
680
681 gsi_replace_with_seq (&gsi, seq, true);
682}
683
02889d23
CLT
684/* Transform a GOACC_TILE call. Determines the element loop span for
685 the specified loop of the nest. This is 1 if we're not tiling.
686
687 GOACC_TILE (collapse_count, loop_no, tile_arg, gwv_tile, gwv_element); */
688
689static void
690oacc_xform_tile (gcall *call)
691{
692 gimple_stmt_iterator gsi = gsi_for_stmt (call);
693 unsigned collapse = tree_to_uhwi (gimple_call_arg (call, 0));
694 /* Inner loops have higher loop_nos. */
695 unsigned loop_no = tree_to_uhwi (gimple_call_arg (call, 1));
696 tree tile_size = gimple_call_arg (call, 2);
697 unsigned e_mask = tree_to_uhwi (gimple_call_arg (call, 4));
698 tree lhs = gimple_call_lhs (call);
699 tree type = TREE_TYPE (lhs);
700 gimple_seq seq = NULL;
701 tree span = build_int_cst (type, 1);
702
703 gcc_assert (!(e_mask
704 & ~(GOMP_DIM_MASK (GOMP_DIM_VECTOR)
705 | GOMP_DIM_MASK (GOMP_DIM_WORKER))));
706 push_gimplify_context (!seen_error ());
707
708#ifndef ACCEL_COMPILER
709 /* Partitioning disabled on host compilers. */
710 e_mask = 0;
711#endif
712 if (!e_mask)
713 /* Not paritioning. */
714 span = integer_one_node;
715 else if (!integer_zerop (tile_size))
716 /* User explicitly specified size. */
717 span = tile_size;
718 else
719 {
720 /* Pick a size based on the paritioning of the element loop and
721 the number of loop nests. */
722 tree first_size = NULL_TREE;
723 tree second_size = NULL_TREE;
724
725 if (e_mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR))
726 first_size = oacc_dim_call (false, GOMP_DIM_VECTOR, &seq);
727 if (e_mask & GOMP_DIM_MASK (GOMP_DIM_WORKER))
728 second_size = oacc_dim_call (false, GOMP_DIM_WORKER, &seq);
729
730 if (!first_size)
731 {
732 first_size = second_size;
733 second_size = NULL_TREE;
734 }
735
736 if (loop_no + 1 == collapse)
737 {
738 span = first_size;
739 if (!loop_no && second_size)
740 span = fold_build2 (MULT_EXPR, TREE_TYPE (span),
741 span, second_size);
742 }
743 else if (loop_no + 2 == collapse)
744 span = second_size;
745 else
746 span = NULL_TREE;
747
748 if (!span)
749 /* There's no obvious element size for this loop. Options
750 are 1, first_size or some non-unity constant (32 is my
751 favourite). We should gather some statistics. */
752 span = first_size;
753 }
754
755 span = fold_convert (type, span);
756 gimplify_assign (lhs, span, &seq);
757
758 pop_gimplify_context (NULL);
759
760 gsi_replace_with_seq (&gsi, seq, true);
761}
762
629b3d75
MJ
763/* Default partitioned and minimum partitioned dimensions. */
764
765static int oacc_default_dims[GOMP_DIM_MAX];
766static int oacc_min_dims[GOMP_DIM_MAX];
767
b75e9c83
TV
768int
769oacc_get_default_dim (int dim)
770{
771 gcc_assert (0 <= dim && dim < GOMP_DIM_MAX);
772 return oacc_default_dims[dim];
773}
774
6e373d13
TV
775int
776oacc_get_min_dim (int dim)
777{
778 gcc_assert (0 <= dim && dim < GOMP_DIM_MAX);
779 return oacc_min_dims[dim];
780}
781
629b3d75
MJ
782/* Parse the default dimension parameter. This is a set of
783 :-separated optional compute dimensions. Each specified dimension
784 is a positive integer. When device type support is added, it is
785 planned to be a comma separated list of such compute dimensions,
786 with all but the first prefixed by the colon-terminated device
787 type. */
788
789static void
790oacc_parse_default_dims (const char *dims)
791{
792 int ix;
793
794 for (ix = GOMP_DIM_MAX; ix--;)
795 {
796 oacc_default_dims[ix] = -1;
797 oacc_min_dims[ix] = 1;
798 }
799
800#ifndef ACCEL_COMPILER
801 /* Cannot be overridden on the host. */
802 dims = NULL;
803#endif
804 if (dims)
805 {
806 const char *pos = dims;
807
808 for (ix = 0; *pos && ix != GOMP_DIM_MAX; ix++)
809 {
810 if (ix)
811 {
812 if (*pos != ':')
813 goto malformed;
814 pos++;
815 }
816
817 if (*pos != ':')
818 {
819 long val;
820 const char *eptr;
821
822 errno = 0;
823 val = strtol (pos, CONST_CAST (char **, &eptr), 10);
824 if (errno || val <= 0 || (int) val != val)
825 goto malformed;
826 pos = eptr;
827 oacc_default_dims[ix] = (int) val;
828 }
829 }
830 if (*pos)
831 {
832 malformed:
833 error_at (UNKNOWN_LOCATION,
904f3daa 834 "%<-fopenacc-dim%> operand is malformed at %qs", pos);
629b3d75
MJ
835 }
836 }
837
838 /* Allow the backend to validate the dimensions. */
46dedae6
TV
839 targetm.goacc.validate_dims (NULL_TREE, oacc_default_dims, -1, 0);
840 targetm.goacc.validate_dims (NULL_TREE, oacc_min_dims, -2, 0);
629b3d75
MJ
841}
842
843/* Validate and update the dimensions for offloaded FN. ATTRS is the
844 raw attribute. DIMS is an array of dimensions, which is filled in.
845 LEVEL is the partitioning level of a routine, or -1 for an offload
01914336 846 region itself. USED is the mask of partitioned execution in the
629b3d75
MJ
847 function. */
848
849static void
850oacc_validate_dims (tree fn, tree attrs, int *dims, int level, unsigned used)
851{
852 tree purpose[GOMP_DIM_MAX];
853 unsigned ix;
854 tree pos = TREE_VALUE (attrs);
629b3d75
MJ
855
856 /* Make sure the attribute creator attached the dimension
857 information. */
858 gcc_assert (pos);
859
860 for (ix = 0; ix != GOMP_DIM_MAX; ix++)
861 {
862 purpose[ix] = TREE_PURPOSE (pos);
863 tree val = TREE_VALUE (pos);
864 dims[ix] = val ? TREE_INT_CST_LOW (val) : -1;
865 pos = TREE_CHAIN (pos);
866 }
867
46dedae6 868 bool changed = targetm.goacc.validate_dims (fn, dims, level, used);
629b3d75
MJ
869
870 /* Default anything left to 1 or a partitioned default. */
871 for (ix = 0; ix != GOMP_DIM_MAX; ix++)
872 if (dims[ix] < 0)
873 {
874 /* The OpenACC spec says 'If the [num_gangs] clause is not
875 specified, an implementation-defined default will be used;
876 the default may depend on the code within the construct.'
877 (2.5.6). Thus an implementation is free to choose
878 non-unity default for a parallel region that doesn't have
879 any gang-partitioned loops. However, it appears that there
880 is a sufficient body of user code that expects non-gang
881 partitioned regions to not execute in gang-redundant mode.
882 So we (a) don't warn about the non-portability and (b) pick
883 the minimum permissible dimension size when there is no
884 partitioned execution. Otherwise we pick the global
885 default for the dimension, which the user can control. The
886 same wording and logic applies to num_workers and
887 vector_length, however the worker- or vector- single
888 execution doesn't have the same impact as gang-redundant
889 execution. (If the minimum gang-level partioning is not 1,
890 the target is probably too confusing.) */
891 dims[ix] = (used & GOMP_DIM_MASK (ix)
892 ? oacc_default_dims[ix] : oacc_min_dims[ix]);
893 changed = true;
894 }
895
896 if (changed)
897 {
898 /* Replace the attribute with new values. */
899 pos = NULL_TREE;
900 for (ix = GOMP_DIM_MAX; ix--;)
25651634
TS
901 pos = tree_cons (purpose[ix],
902 build_int_cst (integer_type_node, dims[ix]), pos);
629b3d75
MJ
903 oacc_replace_fn_attrib (fn, pos);
904 }
905}
906
907/* Create an empty OpenACC loop structure at LOC. */
908
909static oacc_loop *
910new_oacc_loop_raw (oacc_loop *parent, location_t loc)
911{
912 oacc_loop *loop = XCNEW (oacc_loop);
913
914 loop->parent = parent;
629b3d75
MJ
915
916 if (parent)
917 {
918 loop->sibling = parent->child;
919 parent->child = loop;
920 }
921
922 loop->loc = loc;
629b3d75
MJ
923 return loop;
924}
925
926/* Create an outermost, dummy OpenACC loop for offloaded function
927 DECL. */
928
929static oacc_loop *
930new_oacc_loop_outer (tree decl)
931{
932 return new_oacc_loop_raw (NULL, DECL_SOURCE_LOCATION (decl));
933}
934
935/* Start a new OpenACC loop structure beginning at head marker HEAD.
936 Link into PARENT loop. Return the new loop. */
937
938static oacc_loop *
939new_oacc_loop (oacc_loop *parent, gcall *marker)
940{
941 oacc_loop *loop = new_oacc_loop_raw (parent, gimple_location (marker));
942
943 loop->marker = marker;
944
945 /* TODO: This is where device_type flattening would occur for the loop
01914336 946 flags. */
629b3d75
MJ
947
948 loop->flags = TREE_INT_CST_LOW (gimple_call_arg (marker, 3));
949
950 tree chunk_size = integer_zero_node;
951 if (loop->flags & OLF_GANG_STATIC)
952 chunk_size = gimple_call_arg (marker, 4);
953 loop->chunk_size = chunk_size;
954
955 return loop;
956}
957
958/* Create a dummy loop encompassing a call to a openACC routine.
959 Extract the routine's partitioning requirements. */
960
961static void
962new_oacc_loop_routine (oacc_loop *parent, gcall *call, tree decl, tree attrs)
963{
964 oacc_loop *loop = new_oacc_loop_raw (parent, gimple_location (call));
965 int level = oacc_fn_attrib_level (attrs);
966
967 gcc_assert (level >= 0);
968
969 loop->marker = call;
970 loop->routine = decl;
971 loop->mask = ((GOMP_DIM_MASK (GOMP_DIM_MAX) - 1)
972 ^ (GOMP_DIM_MASK (level) - 1));
973}
974
975/* Finish off the current OpenACC loop ending at tail marker TAIL.
976 Return the parent loop. */
977
978static oacc_loop *
979finish_oacc_loop (oacc_loop *loop)
980{
981 /* If the loop has been collapsed, don't partition it. */
02889d23 982 if (loop->ifns.is_empty ())
629b3d75
MJ
983 loop->mask = loop->flags = 0;
984 return loop->parent;
985}
986
987/* Free all OpenACC loop structures within LOOP (inclusive). */
988
989static void
990free_oacc_loop (oacc_loop *loop)
991{
992 if (loop->sibling)
993 free_oacc_loop (loop->sibling);
994 if (loop->child)
995 free_oacc_loop (loop->child);
996
622f6b64 997 loop->ifns.release ();
629b3d75
MJ
998 free (loop);
999}
1000
1001/* Dump out the OpenACC loop head or tail beginning at FROM. */
1002
1003static void
1004dump_oacc_loop_part (FILE *file, gcall *from, int depth,
1005 const char *title, int level)
1006{
1007 enum ifn_unique_kind kind
1008 = (enum ifn_unique_kind) TREE_INT_CST_LOW (gimple_call_arg (from, 0));
1009
1010 fprintf (file, "%*s%s-%d:\n", depth * 2, "", title, level);
1011 for (gimple_stmt_iterator gsi = gsi_for_stmt (from);;)
1012 {
1013 gimple *stmt = gsi_stmt (gsi);
1014
1015 if (gimple_call_internal_p (stmt, IFN_UNIQUE))
1016 {
1017 enum ifn_unique_kind k
1018 = ((enum ifn_unique_kind) TREE_INT_CST_LOW
1019 (gimple_call_arg (stmt, 0)));
1020
1021 if (k == kind && stmt != from)
1022 break;
1023 }
ef6cb4c7 1024 print_gimple_stmt (file, stmt, depth * 2 + 2);
629b3d75
MJ
1025
1026 gsi_next (&gsi);
1027 while (gsi_end_p (gsi))
1028 gsi = gsi_start_bb (single_succ (gsi_bb (gsi)));
1029 }
1030}
1031
5d390fd3 1032/* Dump OpenACC loop LOOP, its children, and its siblings. */
629b3d75
MJ
1033
1034static void
1035dump_oacc_loop (FILE *file, oacc_loop *loop, int depth)
1036{
1037 int ix;
1038
1039 fprintf (file, "%*sLoop %x(%x) %s:%u\n", depth * 2, "",
1040 loop->flags, loop->mask,
1041 LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc));
1042
1043 if (loop->marker)
ef6cb4c7 1044 print_gimple_stmt (file, loop->marker, depth * 2);
629b3d75
MJ
1045
1046 if (loop->routine)
1047 fprintf (file, "%*sRoutine %s:%u:%s\n",
1048 depth * 2, "", DECL_SOURCE_FILE (loop->routine),
1049 DECL_SOURCE_LINE (loop->routine),
1050 IDENTIFIER_POINTER (DECL_NAME (loop->routine)));
1051
1052 for (ix = GOMP_DIM_GANG; ix != GOMP_DIM_MAX; ix++)
1053 if (loop->heads[ix])
1054 dump_oacc_loop_part (file, loop->heads[ix], depth, "Head", ix);
1055 for (ix = GOMP_DIM_MAX; ix--;)
1056 if (loop->tails[ix])
1057 dump_oacc_loop_part (file, loop->tails[ix], depth, "Tail", ix);
1058
1059 if (loop->child)
1060 dump_oacc_loop (file, loop->child, depth + 1);
1061 if (loop->sibling)
1062 dump_oacc_loop (file, loop->sibling, depth);
1063}
1064
1065void debug_oacc_loop (oacc_loop *);
1066
1067/* Dump loops to stderr. */
1068
1069DEBUG_FUNCTION void
1070debug_oacc_loop (oacc_loop *loop)
1071{
1072 dump_oacc_loop (stderr, loop, 0);
1073}
1074
5d390fd3
TS
1075/* Provide diagnostics on OpenACC loop LOOP, its children, and its
1076 siblings. */
1077
1078static void
1079inform_oacc_loop (const oacc_loop *loop)
1080{
1081 const char *gang
1082 = loop->mask & GOMP_DIM_MASK (GOMP_DIM_GANG) ? " gang" : "";
1083 const char *worker
1084 = loop->mask & GOMP_DIM_MASK (GOMP_DIM_WORKER) ? " worker" : "";
1085 const char *vector
1086 = loop->mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR) ? " vector" : "";
1087 const char *seq = loop->mask == 0 ? " seq" : "";
1088 const dump_user_location_t loc
1089 = dump_user_location_t::from_location_t (loop->loc);
1090 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc,
1091 "assigned OpenACC%s%s%s%s loop parallelism\n", gang, worker,
1092 vector, seq);
1093
1094 if (loop->child)
1095 inform_oacc_loop (loop->child);
1096 if (loop->sibling)
1097 inform_oacc_loop (loop->sibling);
1098}
1099
629b3d75
MJ
1100/* DFS walk of basic blocks BB onwards, creating OpenACC loop
1101 structures as we go. By construction these loops are properly
1102 nested. */
1103
1104static void
1105oacc_loop_discover_walk (oacc_loop *loop, basic_block bb)
1106{
1107 int marker = 0;
1108 int remaining = 0;
1109
1110 if (bb->flags & BB_VISITED)
1111 return;
1112
1113 follow:
1114 bb->flags |= BB_VISITED;
1115
1116 /* Scan for loop markers. */
1117 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
1118 gsi_next (&gsi))
1119 {
1120 gimple *stmt = gsi_stmt (gsi);
1121
1122 if (!is_gimple_call (stmt))
1123 continue;
1124
1125 gcall *call = as_a <gcall *> (stmt);
1126
1127 /* If this is a routine, make a dummy loop for it. */
1128 if (tree decl = gimple_call_fndecl (call))
1129 if (tree attrs = oacc_get_fn_attrib (decl))
1130 {
1131 gcc_assert (!marker);
1132 new_oacc_loop_routine (loop, call, decl, attrs);
1133 }
1134
1135 if (!gimple_call_internal_p (call))
1136 continue;
1137
1138 switch (gimple_call_internal_fn (call))
1139 {
1140 default:
1141 break;
1142
1143 case IFN_GOACC_LOOP:
02889d23
CLT
1144 case IFN_GOACC_TILE:
1145 /* Record the abstraction function, so we can manipulate it
1146 later. */
1147 loop->ifns.safe_push (call);
629b3d75
MJ
1148 break;
1149
1150 case IFN_UNIQUE:
1151 enum ifn_unique_kind kind
1152 = (enum ifn_unique_kind) (TREE_INT_CST_LOW
1153 (gimple_call_arg (call, 0)));
1154 if (kind == IFN_UNIQUE_OACC_HEAD_MARK
1155 || kind == IFN_UNIQUE_OACC_TAIL_MARK)
1156 {
1157 if (gimple_call_num_args (call) == 2)
1158 {
1159 gcc_assert (marker && !remaining);
1160 marker = 0;
1161 if (kind == IFN_UNIQUE_OACC_TAIL_MARK)
1162 loop = finish_oacc_loop (loop);
1163 else
1164 loop->head_end = call;
1165 }
1166 else
1167 {
1168 int count = TREE_INT_CST_LOW (gimple_call_arg (call, 2));
1169
1170 if (!marker)
1171 {
1172 if (kind == IFN_UNIQUE_OACC_HEAD_MARK)
1173 loop = new_oacc_loop (loop, call);
1174 remaining = count;
1175 }
1176 gcc_assert (count == remaining);
1177 if (remaining)
1178 {
1179 remaining--;
1180 if (kind == IFN_UNIQUE_OACC_HEAD_MARK)
1181 loop->heads[marker] = call;
1182 else
1183 loop->tails[remaining] = call;
1184 }
1185 marker++;
1186 }
1187 }
1188 }
1189 }
1190 if (remaining || marker)
1191 {
1192 bb = single_succ (bb);
1193 gcc_assert (single_pred_p (bb) && !(bb->flags & BB_VISITED));
1194 goto follow;
1195 }
1196
1197 /* Walk successor blocks. */
1198 edge e;
1199 edge_iterator ei;
1200
1201 FOR_EACH_EDGE (e, ei, bb->succs)
1202 oacc_loop_discover_walk (loop, e->dest);
1203}
1204
1205/* LOOP is the first sibling. Reverse the order in place and return
1206 the new first sibling. Recurse to child loops. */
1207
1208static oacc_loop *
1209oacc_loop_sibling_nreverse (oacc_loop *loop)
1210{
1211 oacc_loop *last = NULL;
1212 do
1213 {
1214 if (loop->child)
01914336 1215 loop->child = oacc_loop_sibling_nreverse (loop->child);
629b3d75
MJ
1216
1217 oacc_loop *next = loop->sibling;
1218 loop->sibling = last;
1219 last = loop;
1220 loop = next;
1221 }
1222 while (loop);
1223
1224 return last;
1225}
1226
1227/* Discover the OpenACC loops marked up by HEAD and TAIL markers for
1228 the current function. */
1229
1230static oacc_loop *
1231oacc_loop_discovery ()
1232{
1233 /* Clear basic block flags, in particular BB_VISITED which we're going to use
1234 in the following. */
1235 clear_bb_flags ();
1236
1237 oacc_loop *top = new_oacc_loop_outer (current_function_decl);
1238 oacc_loop_discover_walk (top, ENTRY_BLOCK_PTR_FOR_FN (cfun));
1239
1240 /* The siblings were constructed in reverse order, reverse them so
1241 that diagnostics come out in an unsurprising order. */
1242 top = oacc_loop_sibling_nreverse (top);
1243
1244 return top;
1245}
1246
1247/* Transform the abstract internal function markers starting at FROM
1248 to be for partitioning level LEVEL. Stop when we meet another HEAD
1249 or TAIL marker. */
1250
1251static void
1252oacc_loop_xform_head_tail (gcall *from, int level)
1253{
1254 enum ifn_unique_kind kind
1255 = (enum ifn_unique_kind) TREE_INT_CST_LOW (gimple_call_arg (from, 0));
1256 tree replacement = build_int_cst (unsigned_type_node, level);
1257
1258 for (gimple_stmt_iterator gsi = gsi_for_stmt (from);;)
1259 {
1260 gimple *stmt = gsi_stmt (gsi);
1261
1262 if (gimple_call_internal_p (stmt, IFN_UNIQUE))
1263 {
1264 enum ifn_unique_kind k
1265 = ((enum ifn_unique_kind)
1266 TREE_INT_CST_LOW (gimple_call_arg (stmt, 0)));
1267
1268 if (k == IFN_UNIQUE_OACC_FORK || k == IFN_UNIQUE_OACC_JOIN)
1269 *gimple_call_arg_ptr (stmt, 2) = replacement;
1270 else if (k == kind && stmt != from)
1271 break;
1272 }
1273 else if (gimple_call_internal_p (stmt, IFN_GOACC_REDUCTION))
1274 *gimple_call_arg_ptr (stmt, 3) = replacement;
1275
1276 gsi_next (&gsi);
1277 while (gsi_end_p (gsi))
1278 gsi = gsi_start_bb (single_succ (gsi_bb (gsi)));
1279 }
1280}
1281
629b3d75
MJ
1282/* Process the discovered OpenACC loops, setting the correct
1283 partitioning level etc. */
1284
1285static void
1286oacc_loop_process (oacc_loop *loop)
1287{
1288 if (loop->child)
1289 oacc_loop_process (loop->child);
1290
1291 if (loop->mask && !loop->routine)
1292 {
1293 int ix;
02889d23
CLT
1294 tree mask_arg = build_int_cst (unsigned_type_node, loop->mask);
1295 tree e_mask_arg = build_int_cst (unsigned_type_node, loop->e_mask);
629b3d75 1296 tree chunk_arg = loop->chunk_size;
02889d23
CLT
1297 gcall *call;
1298
1299 for (ix = 0; loop->ifns.iterate (ix, &call); ix++)
1300 switch (gimple_call_internal_fn (call))
1301 {
1302 case IFN_GOACC_LOOP:
1303 {
1304 bool is_e = gimple_call_arg (call, 5) == integer_minus_one_node;
1305 gimple_call_set_arg (call, 5, is_e ? e_mask_arg : mask_arg);
1306 if (!is_e)
1307 gimple_call_set_arg (call, 4, chunk_arg);
1308 }
1309 break;
1310
1311 case IFN_GOACC_TILE:
1312 gimple_call_set_arg (call, 3, mask_arg);
1313 gimple_call_set_arg (call, 4, e_mask_arg);
1314 break;
629b3d75 1315
02889d23
CLT
1316 default:
1317 gcc_unreachable ();
1318 }
629b3d75 1319
02889d23
CLT
1320 unsigned dim = GOMP_DIM_GANG;
1321 unsigned mask = loop->mask | loop->e_mask;
629b3d75
MJ
1322 for (ix = 0; ix != GOMP_DIM_MAX && mask; ix++)
1323 {
1324 while (!(GOMP_DIM_MASK (dim) & mask))
1325 dim++;
1326
1327 oacc_loop_xform_head_tail (loop->heads[ix], dim);
1328 oacc_loop_xform_head_tail (loop->tails[ix], dim);
1329
1330 mask ^= GOMP_DIM_MASK (dim);
1331 }
1332 }
1333
1334 if (loop->sibling)
1335 oacc_loop_process (loop->sibling);
1336}
1337
1338/* Walk the OpenACC loop heirarchy checking and assigning the
1339 programmer-specified partitionings. OUTER_MASK is the partitioning
1340 this loop is contained within. Return mask of partitioning
1341 encountered. If any auto loops are discovered, set GOMP_DIM_MAX
1342 bit. */
1343
1344static unsigned
1345oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask)
1346{
1347 unsigned this_mask = loop->mask;
1348 unsigned mask_all = 0;
1349 bool noisy = true;
1350
1351#ifdef ACCEL_COMPILER
1352 /* When device_type is supported, we want the device compiler to be
1353 noisy, if the loop parameters are device_type-specific. */
1354 noisy = false;
1355#endif
1356
1357 if (!loop->routine)
1358 {
1359 bool auto_par = (loop->flags & OLF_AUTO) != 0;
1360 bool seq_par = (loop->flags & OLF_SEQ) != 0;
02889d23
CLT
1361 bool tiling = (loop->flags & OLF_TILE) != 0;
1362
629b3d75
MJ
1363 this_mask = ((loop->flags >> OLF_DIM_BASE)
1364 & (GOMP_DIM_MASK (GOMP_DIM_MAX) - 1));
1365
02889d23
CLT
1366 /* Apply auto partitioning if this is a non-partitioned regular
1367 loop, or (no more than) single axis tiled loop. */
1368 bool maybe_auto
1369 = !seq_par && this_mask == (tiling ? this_mask & -this_mask : 0);
1370
629b3d75
MJ
1371 if ((this_mask != 0) + auto_par + seq_par > 1)
1372 {
1373 if (noisy)
1374 error_at (loop->loc,
1375 seq_par
324ff1a0
JJ
1376 ? G_("%<seq%> overrides other OpenACC loop specifiers")
1377 : G_("%<auto%> conflicts with other OpenACC loop "
1378 "specifiers"));
02889d23 1379 maybe_auto = false;
629b3d75
MJ
1380 loop->flags &= ~OLF_AUTO;
1381 if (seq_par)
1382 {
01914336
MJ
1383 loop->flags
1384 &= ~((GOMP_DIM_MASK (GOMP_DIM_MAX) - 1) << OLF_DIM_BASE);
629b3d75
MJ
1385 this_mask = 0;
1386 }
1387 }
02889d23
CLT
1388
1389 if (maybe_auto && (loop->flags & OLF_INDEPENDENT))
1390 {
1391 loop->flags |= OLF_AUTO;
1392 mask_all |= GOMP_DIM_MASK (GOMP_DIM_MAX);
1393 }
629b3d75
MJ
1394 }
1395
1396 if (this_mask & outer_mask)
1397 {
1398 const oacc_loop *outer;
1399 for (outer = loop->parent; outer; outer = outer->parent)
02889d23 1400 if ((outer->mask | outer->e_mask) & this_mask)
629b3d75
MJ
1401 break;
1402
1403 if (noisy)
1404 {
1405 if (outer)
1406 {
1407 error_at (loop->loc,
efebb49e
DM
1408 loop->routine
1409 ? G_("routine call uses same OpenACC parallelism"
1410 " as containing loop")
1411 : G_("inner loop uses same OpenACC parallelism"
1412 " as containing loop"));
629b3d75
MJ
1413 inform (outer->loc, "containing loop here");
1414 }
1415 else
1416 error_at (loop->loc,
efebb49e
DM
1417 loop->routine
1418 ? G_("routine call uses OpenACC parallelism disallowed"
1419 " by containing routine")
1420 : G_("loop uses OpenACC parallelism disallowed"
1421 " by containing routine"));
629b3d75
MJ
1422
1423 if (loop->routine)
1424 inform (DECL_SOURCE_LOCATION (loop->routine),
1425 "routine %qD declared here", loop->routine);
1426 }
1427 this_mask &= ~outer_mask;
1428 }
1429 else
1430 {
1431 unsigned outermost = least_bit_hwi (this_mask);
1432
1433 if (outermost && outermost <= outer_mask)
1434 {
1435 if (noisy)
1436 {
1437 error_at (loop->loc,
1438 "incorrectly nested OpenACC loop parallelism");
1439
1440 const oacc_loop *outer;
1441 for (outer = loop->parent;
1442 outer->flags && outer->flags < outermost;
1443 outer = outer->parent)
1444 continue;
1445 inform (outer->loc, "containing loop here");
1446 }
1447
1448 this_mask &= ~outermost;
1449 }
1450 }
1451
629b3d75
MJ
1452 mask_all |= this_mask;
1453
02889d23
CLT
1454 if (loop->flags & OLF_TILE)
1455 {
1456 /* When tiling, vector goes to the element loop, and failing
1457 that we put worker there. The std doesn't contemplate
1458 specifying all three. We choose to put worker and vector on
1459 the element loops in that case. */
1460 unsigned this_e_mask = this_mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR);
1461 if (!this_e_mask || this_mask & GOMP_DIM_MASK (GOMP_DIM_GANG))
1462 this_e_mask |= this_mask & GOMP_DIM_MASK (GOMP_DIM_WORKER);
1463
1464 loop->e_mask = this_e_mask;
1465 this_mask ^= this_e_mask;
1466 }
1467
1468 loop->mask = this_mask;
1469
1470 if (dump_file)
1471 fprintf (dump_file, "Loop %s:%d user specified %d & %d\n",
1472 LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc),
1473 loop->mask, loop->e_mask);
1474
629b3d75
MJ
1475 if (loop->child)
1476 {
02889d23
CLT
1477 unsigned tmp_mask = outer_mask | this_mask | loop->e_mask;
1478 loop->inner = oacc_loop_fixed_partitions (loop->child, tmp_mask);
629b3d75
MJ
1479 mask_all |= loop->inner;
1480 }
1481
1482 if (loop->sibling)
1483 mask_all |= oacc_loop_fixed_partitions (loop->sibling, outer_mask);
1484
1485 return mask_all;
1486}
1487
1488/* Walk the OpenACC loop heirarchy to assign auto-partitioned loops.
1489 OUTER_MASK is the partitioning this loop is contained within.
02889d23 1490 OUTER_ASSIGN is true if an outer loop is being auto-partitioned.
629b3d75
MJ
1491 Return the cumulative partitioning used by this loop, siblings and
1492 children. */
1493
1494static unsigned
02889d23
CLT
1495oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask,
1496 bool outer_assign)
629b3d75
MJ
1497{
1498 bool assign = (loop->flags & OLF_AUTO) && (loop->flags & OLF_INDEPENDENT);
1499 bool noisy = true;
02889d23 1500 bool tiling = loop->flags & OLF_TILE;
629b3d75
MJ
1501
1502#ifdef ACCEL_COMPILER
1503 /* When device_type is supported, we want the device compiler to be
1504 noisy, if the loop parameters are device_type-specific. */
1505 noisy = false;
1506#endif
1507
891ba5eb 1508 if (assign && (!outer_assign || loop->inner))
629b3d75 1509 {
02889d23
CLT
1510 /* Allocate outermost and non-innermost loops at the outermost
1511 non-innermost available level. */
1512 unsigned this_mask = GOMP_DIM_MASK (GOMP_DIM_GANG);
1513
1514 /* Find the first outermost available partition. */
1515 while (this_mask <= outer_mask)
1516 this_mask <<= 1;
1517
1518 /* Grab two axes if tiling, and we've not assigned anything */
1519 if (tiling && !(loop->mask | loop->e_mask))
1520 this_mask |= this_mask << 1;
1521
1522 /* Prohibit the innermost partitioning at the moment. */
1523 this_mask &= GOMP_DIM_MASK (GOMP_DIM_MAX - 1) - 1;
629b3d75 1524
02889d23
CLT
1525 /* Don't use any dimension explicitly claimed by an inner loop. */
1526 this_mask &= ~loop->inner;
1527
1528 if (tiling && !loop->e_mask)
1529 {
1530 /* If we got two axes, allocate the inner one to the element
1531 loop. */
1532 loop->e_mask = this_mask & (this_mask << 1);
1533 this_mask ^= loop->e_mask;
1534 }
1535
1536 loop->mask |= this_mask;
629b3d75
MJ
1537 }
1538
1539 if (loop->child)
1540 {
02889d23
CLT
1541 unsigned tmp_mask = outer_mask | loop->mask | loop->e_mask;
1542 loop->inner = oacc_loop_auto_partitions (loop->child, tmp_mask,
1543 outer_assign | assign);
629b3d75
MJ
1544 }
1545
02889d23 1546 if (assign && (!loop->mask || (tiling && !loop->e_mask) || !outer_assign))
629b3d75 1547 {
02889d23
CLT
1548 /* Allocate the loop at the innermost available level. Note
1549 that we do this even if we already assigned this loop the
1550 outermost available level above. That way we'll partition
1551 this along 2 axes, if they are available. */
629b3d75
MJ
1552 unsigned this_mask = 0;
1553
01914336 1554 /* Determine the outermost partitioning used within this loop. */
629b3d75
MJ
1555 this_mask = loop->inner | GOMP_DIM_MASK (GOMP_DIM_MAX);
1556 this_mask = least_bit_hwi (this_mask);
1557
1558 /* Pick the partitioning just inside that one. */
1559 this_mask >>= 1;
1560
01914336 1561 /* And avoid picking one use by an outer loop. */
629b3d75
MJ
1562 this_mask &= ~outer_mask;
1563
02889d23
CLT
1564 /* If tiling and we failed completely above, grab the next one
1565 too. Making sure it doesn't hit an outer loop. */
1566 if (tiling)
1567 {
1568 this_mask &= ~(loop->e_mask | loop->mask);
1569 unsigned tile_mask = ((this_mask >> 1)
1570 & ~(outer_mask | loop->e_mask | loop->mask));
1571
1572 if (tile_mask || loop->mask)
1573 {
1574 loop->e_mask |= this_mask;
1575 this_mask = tile_mask;
1576 }
1577 if (!loop->e_mask && noisy)
1578 warning_at (loop->loc, 0,
1579 "insufficient partitioning available"
1580 " to parallelize element loop");
1581 }
629b3d75 1582
02889d23
CLT
1583 loop->mask |= this_mask;
1584 if (!loop->mask && noisy)
1585 warning_at (loop->loc, 0,
efebb49e
DM
1586 tiling
1587 ? G_("insufficient partitioning available"
1588 " to parallelize tile loop")
1589 : G_("insufficient partitioning available"
1590 " to parallelize loop"));
629b3d75
MJ
1591 }
1592
1593 if (assign && dump_file)
02889d23 1594 fprintf (dump_file, "Auto loop %s:%d assigned %d & %d\n",
629b3d75 1595 LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc),
02889d23 1596 loop->mask, loop->e_mask);
629b3d75
MJ
1597
1598 unsigned inner_mask = 0;
1599
1600 if (loop->sibling)
02889d23
CLT
1601 inner_mask |= oacc_loop_auto_partitions (loop->sibling,
1602 outer_mask, outer_assign);
629b3d75 1603
02889d23 1604 inner_mask |= loop->inner | loop->mask | loop->e_mask;
629b3d75
MJ
1605
1606 return inner_mask;
1607}
1608
1609/* Walk the OpenACC loop heirarchy to check and assign partitioning
1610 axes. Return mask of partitioning. */
1611
1612static unsigned
1613oacc_loop_partition (oacc_loop *loop, unsigned outer_mask)
1614{
1615 unsigned mask_all = oacc_loop_fixed_partitions (loop, outer_mask);
1616
1617 if (mask_all & GOMP_DIM_MASK (GOMP_DIM_MAX))
1618 {
1619 mask_all ^= GOMP_DIM_MASK (GOMP_DIM_MAX);
02889d23 1620 mask_all |= oacc_loop_auto_partitions (loop, outer_mask, false);
629b3d75
MJ
1621 }
1622 return mask_all;
1623}
1624
1625/* Default fork/join early expander. Delete the function calls if
1626 there is no RTL expander. */
1627
1628bool
1629default_goacc_fork_join (gcall *ARG_UNUSED (call),
1630 const int *ARG_UNUSED (dims), bool is_fork)
1631{
1632 if (is_fork)
1633 return targetm.have_oacc_fork ();
1634 else
1635 return targetm.have_oacc_join ();
1636}
1637
1638/* Default goacc.reduction early expander.
1639
1640 LHS-opt = IFN_REDUCTION (KIND, RES_PTR, VAR, LEVEL, OP, OFFSET)
1641 If RES_PTR is not integer-zerop:
1642 SETUP - emit 'LHS = *RES_PTR', LHS = NULL
1643 TEARDOWN - emit '*RES_PTR = VAR'
1644 If LHS is not NULL
1645 emit 'LHS = VAR' */
1646
1647void
1648default_goacc_reduction (gcall *call)
1649{
1650 unsigned code = (unsigned)TREE_INT_CST_LOW (gimple_call_arg (call, 0));
1651 gimple_stmt_iterator gsi = gsi_for_stmt (call);
1652 tree lhs = gimple_call_lhs (call);
1653 tree var = gimple_call_arg (call, 2);
1654 gimple_seq seq = NULL;
1655
1656 if (code == IFN_GOACC_REDUCTION_SETUP
1657 || code == IFN_GOACC_REDUCTION_TEARDOWN)
1658 {
1659 /* Setup and Teardown need to copy from/to the receiver object,
1660 if there is one. */
1661 tree ref_to_res = gimple_call_arg (call, 1);
1662
1663 if (!integer_zerop (ref_to_res))
1664 {
1665 tree dst = build_simple_mem_ref (ref_to_res);
1666 tree src = var;
1667
1668 if (code == IFN_GOACC_REDUCTION_SETUP)
1669 {
1670 src = dst;
1671 dst = lhs;
1672 lhs = NULL;
1673 }
1674 gimple_seq_add_stmt (&seq, gimple_build_assign (dst, src));
1675 }
1676 }
1677
1678 /* Copy VAR to LHS, if there is an LHS. */
1679 if (lhs)
1680 gimple_seq_add_stmt (&seq, gimple_build_assign (lhs, var));
1681
1682 gsi_replace_with_seq (&gsi, seq, true);
1683}
1684
1685/* Main entry point for oacc transformations which run on the device
1686 compiler after LTO, so we know what the target device is at this
1687 point (including the host fallback). */
1688
1689static unsigned int
1690execute_oacc_device_lower ()
1691{
1692 tree attrs = oacc_get_fn_attrib (current_function_decl);
1693
1694 if (!attrs)
1695 /* Not an offloaded function. */
1696 return 0;
1697
1698 /* Parse the default dim argument exactly once. */
1699 if ((const void *)flag_openacc_dims != &flag_openacc_dims)
1700 {
1701 oacc_parse_default_dims (flag_openacc_dims);
1702 flag_openacc_dims = (char *)&flag_openacc_dims;
1703 }
1704
b0f271ce
TS
1705 bool is_oacc_kernels
1706 = (lookup_attribute ("oacc kernels",
1707 DECL_ATTRIBUTES (current_function_decl)) != NULL);
1708 bool is_oacc_kernels_parallelized
1709 = (lookup_attribute ("oacc kernels parallelized",
1710 DECL_ATTRIBUTES (current_function_decl)) != NULL);
1711
fd71a9a2
TS
1712 /* Unparallelized OpenACC kernels constructs must get launched as 1 x 1 x 1
1713 kernels, so remove the parallelism dimensions function attributes
1714 potentially set earlier on. */
1715 if (is_oacc_kernels && !is_oacc_kernels_parallelized)
1716 {
1717 oacc_set_fn_attrib (current_function_decl, NULL, NULL);
1718 attrs = oacc_get_fn_attrib (current_function_decl);
1719 }
1720
629b3d75
MJ
1721 /* Discover, partition and process the loops. */
1722 oacc_loop *loops = oacc_loop_discovery ();
1723 int fn_level = oacc_fn_attrib_level (attrs);
1724
1725 if (dump_file)
25651634
TS
1726 {
1727 if (fn_level >= 0)
1728 fprintf (dump_file, "Function is OpenACC routine level %d\n",
1729 fn_level);
b0f271ce
TS
1730 else if (is_oacc_kernels)
1731 fprintf (dump_file, "Function is %s OpenACC kernels offload\n",
1732 (is_oacc_kernels_parallelized
1733 ? "parallelized" : "unparallelized"));
25651634
TS
1734 else
1735 fprintf (dump_file, "Function is OpenACC parallel offload\n");
1736 }
629b3d75
MJ
1737
1738 unsigned outer_mask = fn_level >= 0 ? GOMP_DIM_MASK (fn_level) - 1 : 0;
1739 unsigned used_mask = oacc_loop_partition (loops, outer_mask);
b0f271ce
TS
1740 /* OpenACC kernels constructs are special: they currently don't use the
1741 generic oacc_loop infrastructure and attribute/dimension processing. */
1742 if (is_oacc_kernels && is_oacc_kernels_parallelized)
1743 {
1744 /* Parallelized OpenACC kernels constructs use gang parallelism. See
1745 also tree-parloops.c:create_parallel_loop. */
1746 used_mask |= GOMP_DIM_MASK (GOMP_DIM_GANG);
1747 }
629b3d75 1748
b0f271ce 1749 int dims[GOMP_DIM_MAX];
629b3d75
MJ
1750 oacc_validate_dims (current_function_decl, attrs, dims, fn_level, used_mask);
1751
1752 if (dump_file)
1753 {
1754 const char *comma = "Compute dimensions [";
1755 for (int ix = 0; ix != GOMP_DIM_MAX; ix++, comma = ", ")
1756 fprintf (dump_file, "%s%d", comma, dims[ix]);
1757 fprintf (dump_file, "]\n");
1758 }
1759
1760 oacc_loop_process (loops);
1761 if (dump_file)
1762 {
1763 fprintf (dump_file, "OpenACC loops\n");
1764 dump_oacc_loop (dump_file, loops, 0);
1765 fprintf (dump_file, "\n");
1766 }
5d390fd3
TS
1767 if (dump_enabled_p ())
1768 {
1769 oacc_loop *l = loops;
1770 /* OpenACC kernels constructs are special: they currently don't use the
1771 generic oacc_loop infrastructure. */
1772 if (is_oacc_kernels)
1773 {
1774 /* Create a fake oacc_loop for diagnostic purposes. */
1775 l = new_oacc_loop_raw (NULL,
1776 DECL_SOURCE_LOCATION (current_function_decl));
1777 l->mask = used_mask;
1778 }
1779 else
1780 {
1781 /* Skip the outermost, dummy OpenACC loop */
1782 l = l->child;
1783 }
1784 if (l)
1785 inform_oacc_loop (l);
1786 if (is_oacc_kernels)
1787 free_oacc_loop (l);
1788 }
629b3d75
MJ
1789
1790 /* Offloaded targets may introduce new basic blocks, which require
1791 dominance information to update SSA. */
1792 calculate_dominance_info (CDI_DOMINATORS);
1793
1794 /* Now lower internal loop functions to target-specific code
1795 sequences. */
1796 basic_block bb;
1797 FOR_ALL_BB_FN (bb, cfun)
1798 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
1799 {
1800 gimple *stmt = gsi_stmt (gsi);
1801 if (!is_gimple_call (stmt))
1802 {
1803 gsi_next (&gsi);
1804 continue;
1805 }
1806
1807 gcall *call = as_a <gcall *> (stmt);
1808 if (!gimple_call_internal_p (call))
1809 {
1810 gsi_next (&gsi);
1811 continue;
1812 }
1813
1814 /* Rewind to allow rescan. */
1815 gsi_prev (&gsi);
1816 bool rescan = false, remove = false;
1817 enum internal_fn ifn_code = gimple_call_internal_fn (call);
1818
1819 switch (ifn_code)
1820 {
1821 default: break;
1822
02889d23
CLT
1823 case IFN_GOACC_TILE:
1824 oacc_xform_tile (call);
1825 rescan = true;
1826 break;
1827
629b3d75
MJ
1828 case IFN_GOACC_LOOP:
1829 oacc_xform_loop (call);
1830 rescan = true;
1831 break;
1832
1833 case IFN_GOACC_REDUCTION:
1834 /* Mark the function for SSA renaming. */
1835 mark_virtual_operands_for_renaming (cfun);
1836
1837 /* If the level is -1, this ended up being an unused
1838 axis. Handle as a default. */
1839 if (integer_minus_onep (gimple_call_arg (call, 3)))
1840 default_goacc_reduction (call);
1841 else
1842 targetm.goacc.reduction (call);
1843 rescan = true;
1844 break;
1845
1846 case IFN_UNIQUE:
1847 {
1848 enum ifn_unique_kind kind
1849 = ((enum ifn_unique_kind)
1850 TREE_INT_CST_LOW (gimple_call_arg (call, 0)));
1851
1852 switch (kind)
1853 {
1854 default:
02889d23 1855 break;
629b3d75
MJ
1856
1857 case IFN_UNIQUE_OACC_FORK:
1858 case IFN_UNIQUE_OACC_JOIN:
1859 if (integer_minus_onep (gimple_call_arg (call, 2)))
1860 remove = true;
1861 else if (!targetm.goacc.fork_join
1862 (call, dims, kind == IFN_UNIQUE_OACC_FORK))
1863 remove = true;
1864 break;
1865
1866 case IFN_UNIQUE_OACC_HEAD_MARK:
1867 case IFN_UNIQUE_OACC_TAIL_MARK:
1868 remove = true;
1869 break;
1870 }
1871 break;
1872 }
1873 }
1874
1875 if (gsi_end_p (gsi))
1876 /* We rewound past the beginning of the BB. */
1877 gsi = gsi_start_bb (bb);
1878 else
1879 /* Undo the rewind. */
1880 gsi_next (&gsi);
1881
1882 if (remove)
1883 {
1884 if (gimple_vdef (call))
1885 replace_uses_by (gimple_vdef (call), gimple_vuse (call));
1886 if (gimple_call_lhs (call))
1887 {
1888 /* Propagate the data dependency var. */
1889 gimple *ass = gimple_build_assign (gimple_call_lhs (call),
1890 gimple_call_arg (call, 1));
1891 gsi_replace (&gsi, ass, false);
1892 }
1893 else
1894 gsi_remove (&gsi, true);
1895 }
1896 else if (!rescan)
1897 /* If not rescanning, advance over the call. */
1898 gsi_next (&gsi);
1899 }
1900
1901 free_oacc_loop (loops);
1902
1903 return 0;
1904}
1905
1906/* Default launch dimension validator. Force everything to 1. A
1907 backend that wants to provide larger dimensions must override this
1908 hook. */
1909
1910bool
1911default_goacc_validate_dims (tree ARG_UNUSED (decl), int *dims,
46dedae6
TV
1912 int ARG_UNUSED (fn_level),
1913 unsigned ARG_UNUSED (used))
629b3d75
MJ
1914{
1915 bool changed = false;
1916
1917 for (unsigned ix = 0; ix != GOMP_DIM_MAX; ix++)
1918 {
1919 if (dims[ix] != 1)
1920 {
1921 dims[ix] = 1;
1922 changed = true;
1923 }
1924 }
1925
1926 return changed;
1927}
1928
01914336 1929/* Default dimension bound is unknown on accelerator and 1 on host. */
629b3d75
MJ
1930
1931int
1932default_goacc_dim_limit (int ARG_UNUSED (axis))
1933{
1934#ifdef ACCEL_COMPILER
1935 return 0;
1936#else
1937 return 1;
1938#endif
1939}
1940
1941namespace {
1942
1943const pass_data pass_data_oacc_device_lower =
1944{
1945 GIMPLE_PASS, /* type */
1946 "oaccdevlow", /* name */
fd2b8c8b 1947 OPTGROUP_OMP, /* optinfo_flags */
629b3d75
MJ
1948 TV_NONE, /* tv_id */
1949 PROP_cfg, /* properties_required */
1950 0 /* Possibly PROP_gimple_eomp. */, /* properties_provided */
1951 0, /* properties_destroyed */
1952 0, /* todo_flags_start */
1953 TODO_update_ssa | TODO_cleanup_cfg, /* todo_flags_finish */
1954};
1955
1956class pass_oacc_device_lower : public gimple_opt_pass
1957{
1958public:
1959 pass_oacc_device_lower (gcc::context *ctxt)
1960 : gimple_opt_pass (pass_data_oacc_device_lower, ctxt)
1961 {}
1962
1963 /* opt_pass methods: */
1964 virtual bool gate (function *) { return flag_openacc; };
1965
1966 virtual unsigned int execute (function *)
1967 {
1968 return execute_oacc_device_lower ();
1969 }
1970
1971}; // class pass_oacc_device_lower
1972
1973} // anon namespace
1974
1975gimple_opt_pass *
1976make_pass_oacc_device_lower (gcc::context *ctxt)
1977{
1978 return new pass_oacc_device_lower (ctxt);
1979}
1980
0c6b03b5
AM
1981\f
1982/* Rewrite GOMP_SIMT_ENTER_ALLOC call given by GSI and remove the preceding
1983 GOMP_SIMT_ENTER call identifying the privatized variables, which are
1984 turned to structure fields and receive a DECL_VALUE_EXPR accordingly.
1985 Set *REGIMPLIFY to true, except if no privatized variables were seen. */
1986
1987static void
1988ompdevlow_adjust_simt_enter (gimple_stmt_iterator *gsi, bool *regimplify)
1989{
1990 gimple *alloc_stmt = gsi_stmt (*gsi);
1991 tree simtrec = gimple_call_lhs (alloc_stmt);
1992 tree simduid = gimple_call_arg (alloc_stmt, 0);
1993 gimple *enter_stmt = SSA_NAME_DEF_STMT (simduid);
1994 gcc_assert (gimple_call_internal_p (enter_stmt, IFN_GOMP_SIMT_ENTER));
1995 tree rectype = lang_hooks.types.make_type (RECORD_TYPE);
1996 TYPE_ARTIFICIAL (rectype) = TYPE_NAMELESS (rectype) = 1;
1997 TREE_ADDRESSABLE (rectype) = 1;
1998 TREE_TYPE (simtrec) = build_pointer_type (rectype);
1999 for (unsigned i = 1; i < gimple_call_num_args (enter_stmt); i++)
2000 {
2001 tree *argp = gimple_call_arg_ptr (enter_stmt, i);
2002 if (*argp == null_pointer_node)
2003 continue;
2004 gcc_assert (TREE_CODE (*argp) == ADDR_EXPR
2005 && VAR_P (TREE_OPERAND (*argp, 0)));
2006 tree var = TREE_OPERAND (*argp, 0);
2007
2008 tree field = build_decl (DECL_SOURCE_LOCATION (var), FIELD_DECL,
2009 DECL_NAME (var), TREE_TYPE (var));
2010 SET_DECL_ALIGN (field, DECL_ALIGN (var));
2011 DECL_USER_ALIGN (field) = DECL_USER_ALIGN (var);
2012 TREE_THIS_VOLATILE (field) = TREE_THIS_VOLATILE (var);
2013
2014 insert_field_into_struct (rectype, field);
2015
2016 tree t = build_simple_mem_ref (simtrec);
2017 t = build3 (COMPONENT_REF, TREE_TYPE (var), t, field, NULL);
2018 TREE_THIS_VOLATILE (t) = TREE_THIS_VOLATILE (var);
2019 SET_DECL_VALUE_EXPR (var, t);
2020 DECL_HAS_VALUE_EXPR_P (var) = 1;
2021 *regimplify = true;
2022 }
2023 layout_type (rectype);
2024 tree size = TYPE_SIZE_UNIT (rectype);
2025 tree align = build_int_cst (TREE_TYPE (size), TYPE_ALIGN_UNIT (rectype));
2026
2027 alloc_stmt
2028 = gimple_build_call_internal (IFN_GOMP_SIMT_ENTER_ALLOC, 2, size, align);
2029 gimple_call_set_lhs (alloc_stmt, simtrec);
2030 gsi_replace (gsi, alloc_stmt, false);
2031 gimple_stmt_iterator enter_gsi = gsi_for_stmt (enter_stmt);
2032 enter_stmt = gimple_build_assign (simduid, gimple_call_arg (enter_stmt, 0));
2033 gsi_replace (&enter_gsi, enter_stmt, false);
2034
2035 use_operand_p use;
2036 gimple *exit_stmt;
2037 if (single_imm_use (simtrec, &use, &exit_stmt))
2038 {
2039 gcc_assert (gimple_call_internal_p (exit_stmt, IFN_GOMP_SIMT_EXIT));
2040 gimple_stmt_iterator exit_gsi = gsi_for_stmt (exit_stmt);
25b45c7c 2041 tree clobber = build_clobber (rectype);
0c6b03b5
AM
2042 exit_stmt = gimple_build_assign (build_simple_mem_ref (simtrec), clobber);
2043 gsi_insert_before (&exit_gsi, exit_stmt, GSI_SAME_STMT);
2044 }
2045 else
2046 gcc_checking_assert (has_zero_uses (simtrec));
2047}
2048
2049/* Callback for walk_gimple_stmt used to scan for SIMT-privatized variables. */
2050
2051static tree
2052find_simtpriv_var_op (tree *tp, int *walk_subtrees, void *)
2053{
2054 tree t = *tp;
2055
2056 if (VAR_P (t)
2057 && DECL_HAS_VALUE_EXPR_P (t)
2058 && lookup_attribute ("omp simt private", DECL_ATTRIBUTES (t)))
2059 {
2060 *walk_subtrees = 0;
2061 return t;
2062 }
2063 return NULL_TREE;
2064}
2065
629b3d75
MJ
2066/* Cleanup uses of SIMT placeholder internal functions: on non-SIMT targets,
2067 VF is 1 and LANE is 0; on SIMT targets, VF is folded to a constant, and
2068 LANE is kept to be expanded to RTL later on. Also cleanup all other SIMT
2069 internal functions on non-SIMT targets, and likewise some SIMD internal
2070 functions on SIMT targets. */
2071
2072static unsigned int
2073execute_omp_device_lower ()
2074{
2075 int vf = targetm.simt.vf ? targetm.simt.vf () : 1;
0c6b03b5 2076 bool regimplify = false;
629b3d75
MJ
2077 basic_block bb;
2078 gimple_stmt_iterator gsi;
7a50e708
JJ
2079 bool calls_declare_variant_alt
2080 = cgraph_node::get (cfun->decl)->calls_declare_variant_alt;
629b3d75
MJ
2081 FOR_EACH_BB_FN (bb, cfun)
2082 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2083 {
2084 gimple *stmt = gsi_stmt (gsi);
7a50e708 2085 if (!is_gimple_call (stmt))
629b3d75 2086 continue;
7a50e708
JJ
2087 if (!gimple_call_internal_p (stmt))
2088 {
2089 if (calls_declare_variant_alt)
2090 if (tree fndecl = gimple_call_fndecl (stmt))
2091 {
2092 tree new_fndecl = omp_resolve_declare_variant (fndecl);
2093 if (new_fndecl != fndecl)
2094 {
2095 gimple_call_set_fndecl (stmt, new_fndecl);
2096 update_stmt (stmt);
2097 }
2098 }
2099 continue;
2100 }
629b3d75
MJ
2101 tree lhs = gimple_call_lhs (stmt), rhs = NULL_TREE;
2102 tree type = lhs ? TREE_TYPE (lhs) : integer_type_node;
2103 switch (gimple_call_internal_fn (stmt))
2104 {
2105 case IFN_GOMP_USE_SIMT:
2106 rhs = vf == 1 ? integer_zero_node : integer_one_node;
2107 break;
0c6b03b5
AM
2108 case IFN_GOMP_SIMT_ENTER:
2109 rhs = vf == 1 ? gimple_call_arg (stmt, 0) : NULL_TREE;
2110 goto simtreg_enter_exit;
2111 case IFN_GOMP_SIMT_ENTER_ALLOC:
2112 if (vf != 1)
2113 ompdevlow_adjust_simt_enter (&gsi, &regimplify);
2114 rhs = vf == 1 ? null_pointer_node : NULL_TREE;
2115 goto simtreg_enter_exit;
2116 case IFN_GOMP_SIMT_EXIT:
2117 simtreg_enter_exit:
2118 if (vf != 1)
2119 continue;
2120 unlink_stmt_vdef (stmt);
2121 break;
629b3d75
MJ
2122 case IFN_GOMP_SIMT_LANE:
2123 case IFN_GOMP_SIMT_LAST_LANE:
2124 rhs = vf == 1 ? build_zero_cst (type) : NULL_TREE;
2125 break;
2126 case IFN_GOMP_SIMT_VF:
2127 rhs = build_int_cst (type, vf);
2128 break;
2129 case IFN_GOMP_SIMT_ORDERED_PRED:
2130 rhs = vf == 1 ? integer_zero_node : NULL_TREE;
2131 if (rhs || !lhs)
2132 unlink_stmt_vdef (stmt);
2133 break;
2134 case IFN_GOMP_SIMT_VOTE_ANY:
2135 case IFN_GOMP_SIMT_XCHG_BFLY:
2136 case IFN_GOMP_SIMT_XCHG_IDX:
2137 rhs = vf == 1 ? gimple_call_arg (stmt, 0) : NULL_TREE;
2138 break;
2139 case IFN_GOMP_SIMD_LANE:
2140 case IFN_GOMP_SIMD_LAST_LANE:
2141 rhs = vf != 1 ? build_zero_cst (type) : NULL_TREE;
2142 break;
2143 case IFN_GOMP_SIMD_VF:
2144 rhs = vf != 1 ? build_one_cst (type) : NULL_TREE;
2145 break;
2146 default:
2147 continue;
2148 }
2149 if (lhs && !rhs)
2150 continue;
2151 stmt = lhs ? gimple_build_assign (lhs, rhs) : gimple_build_nop ();
2152 gsi_replace (&gsi, stmt, false);
2153 }
0c6b03b5
AM
2154 if (regimplify)
2155 FOR_EACH_BB_REVERSE_FN (bb, cfun)
2156 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
2157 if (walk_gimple_stmt (&gsi, NULL, find_simtpriv_var_op, NULL))
2158 {
2159 if (gimple_clobber_p (gsi_stmt (gsi)))
2160 gsi_remove (&gsi, true);
2161 else
2162 gimple_regimplify_operands (gsi_stmt (gsi), &gsi);
2163 }
629b3d75
MJ
2164 if (vf != 1)
2165 cfun->has_force_vectorize_loops = false;
2166 return 0;
2167}
2168
2169namespace {
2170
2171const pass_data pass_data_omp_device_lower =
2172{
2173 GIMPLE_PASS, /* type */
2174 "ompdevlow", /* name */
fd2b8c8b 2175 OPTGROUP_OMP, /* optinfo_flags */
629b3d75
MJ
2176 TV_NONE, /* tv_id */
2177 PROP_cfg, /* properties_required */
2178 PROP_gimple_lomp_dev, /* properties_provided */
2179 0, /* properties_destroyed */
2180 0, /* todo_flags_start */
2181 TODO_update_ssa, /* todo_flags_finish */
2182};
2183
2184class pass_omp_device_lower : public gimple_opt_pass
2185{
2186public:
2187 pass_omp_device_lower (gcc::context *ctxt)
2188 : gimple_opt_pass (pass_data_omp_device_lower, ctxt)
2189 {}
2190
2191 /* opt_pass methods: */
4cea8675 2192 virtual bool gate (function *fun)
629b3d75 2193 {
7a50e708
JJ
2194 return (!(fun->curr_properties & PROP_gimple_lomp_dev)
2195 || (flag_openmp
2196 && cgraph_node::get (fun->decl)->calls_declare_variant_alt));
629b3d75
MJ
2197 }
2198 virtual unsigned int execute (function *)
2199 {
2200 return execute_omp_device_lower ();
2201 }
2202
2203}; // class pass_expand_omp_ssa
2204
2205} // anon namespace
2206
2207gimple_opt_pass *
2208make_pass_omp_device_lower (gcc::context *ctxt)
2209{
2210 return new pass_omp_device_lower (ctxt);
2211}
2212
2213/* "omp declare target link" handling pass. */
2214
2215namespace {
2216
2217const pass_data pass_data_omp_target_link =
2218{
2219 GIMPLE_PASS, /* type */
2220 "omptargetlink", /* name */
fd2b8c8b 2221 OPTGROUP_OMP, /* optinfo_flags */
629b3d75
MJ
2222 TV_NONE, /* tv_id */
2223 PROP_ssa, /* properties_required */
2224 0, /* properties_provided */
2225 0, /* properties_destroyed */
2226 0, /* todo_flags_start */
2227 TODO_update_ssa, /* todo_flags_finish */
2228};
2229
2230class pass_omp_target_link : public gimple_opt_pass
2231{
2232public:
2233 pass_omp_target_link (gcc::context *ctxt)
2234 : gimple_opt_pass (pass_data_omp_target_link, ctxt)
2235 {}
2236
2237 /* opt_pass methods: */
2238 virtual bool gate (function *fun)
2239 {
2240#ifdef ACCEL_COMPILER
46dbeb40 2241 return offloading_function_p (fun->decl);
629b3d75
MJ
2242#else
2243 (void) fun;
2244 return false;
2245#endif
2246 }
2247
2248 virtual unsigned execute (function *);
2249};
2250
2251/* Callback for walk_gimple_stmt used to scan for link var operands. */
2252
2253static tree
2254find_link_var_op (tree *tp, int *walk_subtrees, void *)
2255{
2256 tree t = *tp;
2257
56f71478
JJ
2258 if (VAR_P (t)
2259 && DECL_HAS_VALUE_EXPR_P (t)
2260 && is_global_var (t)
629b3d75
MJ
2261 && lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (t)))
2262 {
2263 *walk_subtrees = 0;
2264 return t;
2265 }
2266
2267 return NULL_TREE;
2268}
2269
2270unsigned
2271pass_omp_target_link::execute (function *fun)
2272{
2273 basic_block bb;
2274 FOR_EACH_BB_FN (bb, fun)
2275 {
2276 gimple_stmt_iterator gsi;
2277 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2278 if (walk_gimple_stmt (&gsi, NULL, find_link_var_op, NULL))
2279 gimple_regimplify_operands (gsi_stmt (gsi), &gsi);
2280 }
2281
2282 return 0;
2283}
2284
2285} // anon namespace
2286
2287gimple_opt_pass *
2288make_pass_omp_target_link (gcc::context *ctxt)
2289{
2290 return new pass_omp_target_link (ctxt);
2291}