]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/omp-offload.c
Update copyright years.
[thirdparty/gcc.git] / gcc / omp-offload.c
CommitLineData
629b3d75
MJ
1/* Bits of OpenMP and OpenACC handling that is specific to device offloading
2 and a lowering pass for OpenACC device directives.
3
a5544970 4 Copyright (C) 2005-2019 Free Software Foundation, Inc.
629b3d75
MJ
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "backend.h"
26#include "target.h"
27#include "tree.h"
28#include "gimple.h"
29#include "tree-pass.h"
30#include "ssa.h"
31#include "cgraph.h"
32#include "pretty-print.h"
33#include "diagnostic-core.h"
34#include "fold-const.h"
35#include "internal-fn.h"
0c6b03b5 36#include "langhooks.h"
629b3d75
MJ
37#include "gimplify.h"
38#include "gimple-iterator.h"
39#include "gimplify-me.h"
40#include "gimple-walk.h"
41#include "tree-cfg.h"
42#include "tree-into-ssa.h"
0c6b03b5
AM
43#include "tree-nested.h"
44#include "stor-layout.h"
629b3d75
MJ
45#include "common/common-target.h"
46#include "omp-general.h"
47#include "omp-offload.h"
48#include "lto-section-names.h"
49#include "gomp-constants.h"
50#include "gimple-pretty-print.h"
324ff1a0 51#include "intl.h"
314e6352
ML
52#include "stringpool.h"
53#include "attribs.h"
f64b12bd 54#include "cfgloop.h"
629b3d75
MJ
55
56/* Describe the OpenACC looping structure of a function. The entire
57 function is held in a 'NULL' loop. */
58
59struct oacc_loop
60{
61 oacc_loop *parent; /* Containing loop. */
62
63 oacc_loop *child; /* First inner loop. */
64
65 oacc_loop *sibling; /* Next loop within same parent. */
66
67 location_t loc; /* Location of the loop start. */
68
69 gcall *marker; /* Initial head marker. */
70
01914336
MJ
71 gcall *heads[GOMP_DIM_MAX]; /* Head marker functions. */
72 gcall *tails[GOMP_DIM_MAX]; /* Tail marker functions. */
629b3d75
MJ
73
74 tree routine; /* Pseudo-loop enclosing a routine. */
75
76 unsigned mask; /* Partitioning mask. */
02889d23 77 unsigned e_mask; /* Partitioning of element loops (when tiling). */
629b3d75
MJ
78 unsigned inner; /* Partitioning of inner loops. */
79 unsigned flags; /* Partitioning flags. */
02889d23 80 vec<gcall *> ifns; /* Contained loop abstraction functions. */
629b3d75
MJ
81 tree chunk_size; /* Chunk size. */
82 gcall *head_end; /* Final marker of head sequence. */
83};
84
85/* Holds offload tables with decls. */
86vec<tree, va_gc> *offload_funcs, *offload_vars;
87
88/* Return level at which oacc routine may spawn a partitioned loop, or
89 -1 if it is not a routine (i.e. is an offload fn). */
90
4c187162 91int
629b3d75
MJ
92oacc_fn_attrib_level (tree attr)
93{
94 tree pos = TREE_VALUE (attr);
95
96 if (!TREE_PURPOSE (pos))
97 return -1;
98
99 int ix = 0;
100 for (ix = 0; ix != GOMP_DIM_MAX;
101 ix++, pos = TREE_CHAIN (pos))
102 if (!integer_zerop (TREE_PURPOSE (pos)))
103 break;
104
105 return ix;
106}
107
108/* Helper function for omp_finish_file routine. Takes decls from V_DECLS and
109 adds their addresses and sizes to constructor-vector V_CTOR. */
110
111static void
112add_decls_addresses_to_decl_constructor (vec<tree, va_gc> *v_decls,
113 vec<constructor_elt, va_gc> *v_ctor)
114{
115 unsigned len = vec_safe_length (v_decls);
116 for (unsigned i = 0; i < len; i++)
117 {
118 tree it = (*v_decls)[i];
119 bool is_var = VAR_P (it);
120 bool is_link_var
121 = is_var
122#ifdef ACCEL_COMPILER
123 && DECL_HAS_VALUE_EXPR_P (it)
124#endif
125 && lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (it));
126
127 tree size = NULL_TREE;
128 if (is_var)
129 size = fold_convert (const_ptr_type_node, DECL_SIZE_UNIT (it));
130
131 tree addr;
132 if (!is_link_var)
133 addr = build_fold_addr_expr (it);
134 else
135 {
136#ifdef ACCEL_COMPILER
137 /* For "omp declare target link" vars add address of the pointer to
138 the target table, instead of address of the var. */
139 tree value_expr = DECL_VALUE_EXPR (it);
140 tree link_ptr_decl = TREE_OPERAND (value_expr, 0);
141 varpool_node::finalize_decl (link_ptr_decl);
142 addr = build_fold_addr_expr (link_ptr_decl);
143#else
144 addr = build_fold_addr_expr (it);
145#endif
146
147 /* Most significant bit of the size marks "omp declare target link"
148 vars in host and target tables. */
149 unsigned HOST_WIDE_INT isize = tree_to_uhwi (size);
150 isize |= 1ULL << (int_size_in_bytes (const_ptr_type_node)
151 * BITS_PER_UNIT - 1);
152 size = wide_int_to_tree (const_ptr_type_node, isize);
153 }
154
155 CONSTRUCTOR_APPEND_ELT (v_ctor, NULL_TREE, addr);
156 if (is_var)
157 CONSTRUCTOR_APPEND_ELT (v_ctor, NULL_TREE, size);
158 }
159}
160
161/* Create new symbols containing (address, size) pairs for global variables,
162 marked with "omp declare target" attribute, as well as addresses for the
163 functions, which are outlined offloading regions. */
164void
165omp_finish_file (void)
166{
167 unsigned num_funcs = vec_safe_length (offload_funcs);
168 unsigned num_vars = vec_safe_length (offload_vars);
169
170 if (num_funcs == 0 && num_vars == 0)
171 return;
172
173 if (targetm_common.have_named_sections)
174 {
175 vec<constructor_elt, va_gc> *v_f, *v_v;
176 vec_alloc (v_f, num_funcs);
177 vec_alloc (v_v, num_vars * 2);
178
179 add_decls_addresses_to_decl_constructor (offload_funcs, v_f);
180 add_decls_addresses_to_decl_constructor (offload_vars, v_v);
181
182 tree vars_decl_type = build_array_type_nelts (pointer_sized_int_node,
183 num_vars * 2);
184 tree funcs_decl_type = build_array_type_nelts (pointer_sized_int_node,
185 num_funcs);
186 SET_TYPE_ALIGN (vars_decl_type, TYPE_ALIGN (pointer_sized_int_node));
187 SET_TYPE_ALIGN (funcs_decl_type, TYPE_ALIGN (pointer_sized_int_node));
188 tree ctor_v = build_constructor (vars_decl_type, v_v);
189 tree ctor_f = build_constructor (funcs_decl_type, v_f);
190 TREE_CONSTANT (ctor_v) = TREE_CONSTANT (ctor_f) = 1;
191 TREE_STATIC (ctor_v) = TREE_STATIC (ctor_f) = 1;
192 tree funcs_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL,
193 get_identifier (".offload_func_table"),
194 funcs_decl_type);
195 tree vars_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL,
196 get_identifier (".offload_var_table"),
197 vars_decl_type);
198 TREE_STATIC (funcs_decl) = TREE_STATIC (vars_decl) = 1;
199 /* Do not align tables more than TYPE_ALIGN (pointer_sized_int_node),
200 otherwise a joint table in a binary will contain padding between
201 tables from multiple object files. */
202 DECL_USER_ALIGN (funcs_decl) = DECL_USER_ALIGN (vars_decl) = 1;
203 SET_DECL_ALIGN (funcs_decl, TYPE_ALIGN (funcs_decl_type));
204 SET_DECL_ALIGN (vars_decl, TYPE_ALIGN (vars_decl_type));
205 DECL_INITIAL (funcs_decl) = ctor_f;
206 DECL_INITIAL (vars_decl) = ctor_v;
207 set_decl_section_name (funcs_decl, OFFLOAD_FUNC_TABLE_SECTION_NAME);
208 set_decl_section_name (vars_decl, OFFLOAD_VAR_TABLE_SECTION_NAME);
209
210 varpool_node::finalize_decl (vars_decl);
211 varpool_node::finalize_decl (funcs_decl);
212 }
213 else
214 {
215 for (unsigned i = 0; i < num_funcs; i++)
216 {
217 tree it = (*offload_funcs)[i];
218 targetm.record_offload_symbol (it);
219 }
220 for (unsigned i = 0; i < num_vars; i++)
221 {
222 tree it = (*offload_vars)[i];
223 targetm.record_offload_symbol (it);
224 }
225 }
226}
227
02889d23
CLT
228/* Call dim_pos (POS == true) or dim_size (POS == false) builtins for
229 axis DIM. Return a tmp var holding the result. */
230
231static tree
232oacc_dim_call (bool pos, int dim, gimple_seq *seq)
233{
234 tree arg = build_int_cst (unsigned_type_node, dim);
235 tree size = create_tmp_var (integer_type_node);
236 enum internal_fn fn = pos ? IFN_GOACC_DIM_POS : IFN_GOACC_DIM_SIZE;
237 gimple *call = gimple_build_call_internal (fn, 1, arg);
238
239 gimple_call_set_lhs (call, size);
240 gimple_seq_add_stmt (seq, call);
241
242 return size;
243}
244
629b3d75
MJ
245/* Find the number of threads (POS = false), or thread number (POS =
246 true) for an OpenACC region partitioned as MASK. Setup code
247 required for the calculation is added to SEQ. */
248
249static tree
250oacc_thread_numbers (bool pos, int mask, gimple_seq *seq)
251{
252 tree res = pos ? NULL_TREE : build_int_cst (unsigned_type_node, 1);
253 unsigned ix;
254
255 /* Start at gang level, and examine relevant dimension indices. */
256 for (ix = GOMP_DIM_GANG; ix != GOMP_DIM_MAX; ix++)
257 if (GOMP_DIM_MASK (ix) & mask)
258 {
629b3d75
MJ
259 if (res)
260 {
261 /* We had an outer index, so scale that by the size of
262 this dimension. */
02889d23 263 tree n = oacc_dim_call (false, ix, seq);
629b3d75
MJ
264 res = fold_build2 (MULT_EXPR, integer_type_node, res, n);
265 }
266 if (pos)
267 {
268 /* Determine index in this dimension. */
02889d23 269 tree id = oacc_dim_call (true, ix, seq);
629b3d75
MJ
270 if (res)
271 res = fold_build2 (PLUS_EXPR, integer_type_node, res, id);
272 else
273 res = id;
274 }
275 }
276
277 if (res == NULL_TREE)
278 res = integer_zero_node;
279
280 return res;
281}
282
283/* Transform IFN_GOACC_LOOP calls to actual code. See
284 expand_oacc_for for where these are generated. At the vector
285 level, we stride loops, such that each member of a warp will
286 operate on adjacent iterations. At the worker and gang level,
287 each gang/warp executes a set of contiguous iterations. Chunking
288 can override this such that each iteration engine executes a
01914336 289 contiguous chunk, and then moves on to stride to the next chunk. */
629b3d75
MJ
290
291static void
292oacc_xform_loop (gcall *call)
293{
294 gimple_stmt_iterator gsi = gsi_for_stmt (call);
295 enum ifn_goacc_loop_kind code
296 = (enum ifn_goacc_loop_kind) TREE_INT_CST_LOW (gimple_call_arg (call, 0));
297 tree dir = gimple_call_arg (call, 1);
298 tree range = gimple_call_arg (call, 2);
299 tree step = gimple_call_arg (call, 3);
300 tree chunk_size = NULL_TREE;
301 unsigned mask = (unsigned) TREE_INT_CST_LOW (gimple_call_arg (call, 5));
302 tree lhs = gimple_call_lhs (call);
303 tree type = TREE_TYPE (lhs);
304 tree diff_type = TREE_TYPE (range);
305 tree r = NULL_TREE;
306 gimple_seq seq = NULL;
307 bool chunking = false, striding = true;
308 unsigned outer_mask = mask & (~mask + 1); // Outermost partitioning
309 unsigned inner_mask = mask & ~outer_mask; // Inner partitioning (if any)
310
311#ifdef ACCEL_COMPILER
312 chunk_size = gimple_call_arg (call, 4);
313 if (integer_minus_onep (chunk_size) /* Force static allocation. */
314 || integer_zerop (chunk_size)) /* Default (also static). */
315 {
316 /* If we're at the gang level, we want each to execute a
317 contiguous run of iterations. Otherwise we want each element
318 to stride. */
319 striding = !(outer_mask & GOMP_DIM_MASK (GOMP_DIM_GANG));
320 chunking = false;
321 }
322 else
323 {
324 /* Chunk of size 1 is striding. */
325 striding = integer_onep (chunk_size);
326 chunking = !striding;
327 }
328#endif
329
330 /* striding=true, chunking=true
331 -> invalid.
332 striding=true, chunking=false
333 -> chunks=1
334 striding=false,chunking=true
335 -> chunks=ceil (range/(chunksize*threads*step))
336 striding=false,chunking=false
337 -> chunk_size=ceil(range/(threads*step)),chunks=1 */
338 push_gimplify_context (true);
339
340 switch (code)
341 {
342 default: gcc_unreachable ();
343
344 case IFN_GOACC_LOOP_CHUNKS:
345 if (!chunking)
346 r = build_int_cst (type, 1);
347 else
348 {
349 /* chunk_max
350 = (range - dir) / (chunks * step * num_threads) + dir */
351 tree per = oacc_thread_numbers (false, mask, &seq);
352 per = fold_convert (type, per);
353 chunk_size = fold_convert (type, chunk_size);
354 per = fold_build2 (MULT_EXPR, type, per, chunk_size);
355 per = fold_build2 (MULT_EXPR, type, per, step);
356 r = build2 (MINUS_EXPR, type, range, dir);
357 r = build2 (PLUS_EXPR, type, r, per);
358 r = build2 (TRUNC_DIV_EXPR, type, r, per);
359 }
360 break;
361
362 case IFN_GOACC_LOOP_STEP:
363 {
364 /* If striding, step by the entire compute volume, otherwise
365 step by the inner volume. */
366 unsigned volume = striding ? mask : inner_mask;
367
368 r = oacc_thread_numbers (false, volume, &seq);
369 r = build2 (MULT_EXPR, type, fold_convert (type, r), step);
370 }
371 break;
372
373 case IFN_GOACC_LOOP_OFFSET:
f64b12bd
CP
374 /* Enable vectorization on non-SIMT targets. */
375 if (!targetm.simt.vf
376 && outer_mask == GOMP_DIM_MASK (GOMP_DIM_VECTOR)
377 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
378 the loop. */
379 && (flag_tree_loop_vectorize
380 || !global_options_set.x_flag_tree_loop_vectorize))
381 {
382 basic_block bb = gsi_bb (gsi);
383 struct loop *parent = bb->loop_father;
384 struct loop *body = parent->inner;
385
386 parent->force_vectorize = true;
387 parent->safelen = INT_MAX;
388
389 /* "Chunking loops" may have inner loops. */
390 if (parent->inner)
391 {
392 body->force_vectorize = true;
393 body->safelen = INT_MAX;
394 }
395
396 cfun->has_force_vectorize_loops = true;
397 }
629b3d75
MJ
398 if (striding)
399 {
400 r = oacc_thread_numbers (true, mask, &seq);
401 r = fold_convert (diff_type, r);
402 }
403 else
404 {
405 tree inner_size = oacc_thread_numbers (false, inner_mask, &seq);
406 tree outer_size = oacc_thread_numbers (false, outer_mask, &seq);
407 tree volume = fold_build2 (MULT_EXPR, TREE_TYPE (inner_size),
408 inner_size, outer_size);
409
410 volume = fold_convert (diff_type, volume);
411 if (chunking)
412 chunk_size = fold_convert (diff_type, chunk_size);
413 else
414 {
415 tree per = fold_build2 (MULT_EXPR, diff_type, volume, step);
416
417 chunk_size = build2 (MINUS_EXPR, diff_type, range, dir);
418 chunk_size = build2 (PLUS_EXPR, diff_type, chunk_size, per);
419 chunk_size = build2 (TRUNC_DIV_EXPR, diff_type, chunk_size, per);
420 }
421
422 tree span = build2 (MULT_EXPR, diff_type, chunk_size,
423 fold_convert (diff_type, inner_size));
424 r = oacc_thread_numbers (true, outer_mask, &seq);
425 r = fold_convert (diff_type, r);
426 r = build2 (MULT_EXPR, diff_type, r, span);
427
428 tree inner = oacc_thread_numbers (true, inner_mask, &seq);
429 inner = fold_convert (diff_type, inner);
430 r = fold_build2 (PLUS_EXPR, diff_type, r, inner);
431
432 if (chunking)
433 {
434 tree chunk = fold_convert (diff_type, gimple_call_arg (call, 6));
435 tree per
436 = fold_build2 (MULT_EXPR, diff_type, volume, chunk_size);
437 per = build2 (MULT_EXPR, diff_type, per, chunk);
438
439 r = build2 (PLUS_EXPR, diff_type, r, per);
440 }
441 }
442 r = fold_build2 (MULT_EXPR, diff_type, r, step);
443 if (type != diff_type)
444 r = fold_convert (type, r);
445 break;
446
447 case IFN_GOACC_LOOP_BOUND:
448 if (striding)
449 r = range;
450 else
451 {
452 tree inner_size = oacc_thread_numbers (false, inner_mask, &seq);
453 tree outer_size = oacc_thread_numbers (false, outer_mask, &seq);
454 tree volume = fold_build2 (MULT_EXPR, TREE_TYPE (inner_size),
455 inner_size, outer_size);
456
457 volume = fold_convert (diff_type, volume);
458 if (chunking)
459 chunk_size = fold_convert (diff_type, chunk_size);
460 else
461 {
462 tree per = fold_build2 (MULT_EXPR, diff_type, volume, step);
463
464 chunk_size = build2 (MINUS_EXPR, diff_type, range, dir);
465 chunk_size = build2 (PLUS_EXPR, diff_type, chunk_size, per);
466 chunk_size = build2 (TRUNC_DIV_EXPR, diff_type, chunk_size, per);
467 }
468
469 tree span = build2 (MULT_EXPR, diff_type, chunk_size,
470 fold_convert (diff_type, inner_size));
471
472 r = fold_build2 (MULT_EXPR, diff_type, span, step);
473
474 tree offset = gimple_call_arg (call, 6);
475 r = build2 (PLUS_EXPR, diff_type, r,
476 fold_convert (diff_type, offset));
477 r = build2 (integer_onep (dir) ? MIN_EXPR : MAX_EXPR,
478 diff_type, r, range);
479 }
480 if (diff_type != type)
481 r = fold_convert (type, r);
482 break;
483 }
484
485 gimplify_assign (lhs, r, &seq);
486
487 pop_gimplify_context (NULL);
488
489 gsi_replace_with_seq (&gsi, seq, true);
490}
491
02889d23
CLT
492/* Transform a GOACC_TILE call. Determines the element loop span for
493 the specified loop of the nest. This is 1 if we're not tiling.
494
495 GOACC_TILE (collapse_count, loop_no, tile_arg, gwv_tile, gwv_element); */
496
497static void
498oacc_xform_tile (gcall *call)
499{
500 gimple_stmt_iterator gsi = gsi_for_stmt (call);
501 unsigned collapse = tree_to_uhwi (gimple_call_arg (call, 0));
502 /* Inner loops have higher loop_nos. */
503 unsigned loop_no = tree_to_uhwi (gimple_call_arg (call, 1));
504 tree tile_size = gimple_call_arg (call, 2);
505 unsigned e_mask = tree_to_uhwi (gimple_call_arg (call, 4));
506 tree lhs = gimple_call_lhs (call);
507 tree type = TREE_TYPE (lhs);
508 gimple_seq seq = NULL;
509 tree span = build_int_cst (type, 1);
510
511 gcc_assert (!(e_mask
512 & ~(GOMP_DIM_MASK (GOMP_DIM_VECTOR)
513 | GOMP_DIM_MASK (GOMP_DIM_WORKER))));
514 push_gimplify_context (!seen_error ());
515
516#ifndef ACCEL_COMPILER
517 /* Partitioning disabled on host compilers. */
518 e_mask = 0;
519#endif
520 if (!e_mask)
521 /* Not paritioning. */
522 span = integer_one_node;
523 else if (!integer_zerop (tile_size))
524 /* User explicitly specified size. */
525 span = tile_size;
526 else
527 {
528 /* Pick a size based on the paritioning of the element loop and
529 the number of loop nests. */
530 tree first_size = NULL_TREE;
531 tree second_size = NULL_TREE;
532
533 if (e_mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR))
534 first_size = oacc_dim_call (false, GOMP_DIM_VECTOR, &seq);
535 if (e_mask & GOMP_DIM_MASK (GOMP_DIM_WORKER))
536 second_size = oacc_dim_call (false, GOMP_DIM_WORKER, &seq);
537
538 if (!first_size)
539 {
540 first_size = second_size;
541 second_size = NULL_TREE;
542 }
543
544 if (loop_no + 1 == collapse)
545 {
546 span = first_size;
547 if (!loop_no && second_size)
548 span = fold_build2 (MULT_EXPR, TREE_TYPE (span),
549 span, second_size);
550 }
551 else if (loop_no + 2 == collapse)
552 span = second_size;
553 else
554 span = NULL_TREE;
555
556 if (!span)
557 /* There's no obvious element size for this loop. Options
558 are 1, first_size or some non-unity constant (32 is my
559 favourite). We should gather some statistics. */
560 span = first_size;
561 }
562
563 span = fold_convert (type, span);
564 gimplify_assign (lhs, span, &seq);
565
566 pop_gimplify_context (NULL);
567
568 gsi_replace_with_seq (&gsi, seq, true);
569}
570
629b3d75
MJ
571/* Default partitioned and minimum partitioned dimensions. */
572
573static int oacc_default_dims[GOMP_DIM_MAX];
574static int oacc_min_dims[GOMP_DIM_MAX];
575
b75e9c83
TV
576int
577oacc_get_default_dim (int dim)
578{
579 gcc_assert (0 <= dim && dim < GOMP_DIM_MAX);
580 return oacc_default_dims[dim];
581}
582
629b3d75
MJ
583/* Parse the default dimension parameter. This is a set of
584 :-separated optional compute dimensions. Each specified dimension
585 is a positive integer. When device type support is added, it is
586 planned to be a comma separated list of such compute dimensions,
587 with all but the first prefixed by the colon-terminated device
588 type. */
589
590static void
591oacc_parse_default_dims (const char *dims)
592{
593 int ix;
594
595 for (ix = GOMP_DIM_MAX; ix--;)
596 {
597 oacc_default_dims[ix] = -1;
598 oacc_min_dims[ix] = 1;
599 }
600
601#ifndef ACCEL_COMPILER
602 /* Cannot be overridden on the host. */
603 dims = NULL;
604#endif
605 if (dims)
606 {
607 const char *pos = dims;
608
609 for (ix = 0; *pos && ix != GOMP_DIM_MAX; ix++)
610 {
611 if (ix)
612 {
613 if (*pos != ':')
614 goto malformed;
615 pos++;
616 }
617
618 if (*pos != ':')
619 {
620 long val;
621 const char *eptr;
622
623 errno = 0;
624 val = strtol (pos, CONST_CAST (char **, &eptr), 10);
625 if (errno || val <= 0 || (int) val != val)
626 goto malformed;
627 pos = eptr;
628 oacc_default_dims[ix] = (int) val;
629 }
630 }
631 if (*pos)
632 {
633 malformed:
634 error_at (UNKNOWN_LOCATION,
635 "-fopenacc-dim operand is malformed at '%s'", pos);
636 }
637 }
638
639 /* Allow the backend to validate the dimensions. */
640 targetm.goacc.validate_dims (NULL_TREE, oacc_default_dims, -1);
641 targetm.goacc.validate_dims (NULL_TREE, oacc_min_dims, -2);
642}
643
644/* Validate and update the dimensions for offloaded FN. ATTRS is the
645 raw attribute. DIMS is an array of dimensions, which is filled in.
646 LEVEL is the partitioning level of a routine, or -1 for an offload
01914336 647 region itself. USED is the mask of partitioned execution in the
629b3d75
MJ
648 function. */
649
650static void
651oacc_validate_dims (tree fn, tree attrs, int *dims, int level, unsigned used)
652{
653 tree purpose[GOMP_DIM_MAX];
654 unsigned ix;
655 tree pos = TREE_VALUE (attrs);
629b3d75
MJ
656
657 /* Make sure the attribute creator attached the dimension
658 information. */
659 gcc_assert (pos);
660
661 for (ix = 0; ix != GOMP_DIM_MAX; ix++)
662 {
663 purpose[ix] = TREE_PURPOSE (pos);
664 tree val = TREE_VALUE (pos);
665 dims[ix] = val ? TREE_INT_CST_LOW (val) : -1;
666 pos = TREE_CHAIN (pos);
667 }
668
669 bool changed = targetm.goacc.validate_dims (fn, dims, level);
670
671 /* Default anything left to 1 or a partitioned default. */
672 for (ix = 0; ix != GOMP_DIM_MAX; ix++)
673 if (dims[ix] < 0)
674 {
675 /* The OpenACC spec says 'If the [num_gangs] clause is not
676 specified, an implementation-defined default will be used;
677 the default may depend on the code within the construct.'
678 (2.5.6). Thus an implementation is free to choose
679 non-unity default for a parallel region that doesn't have
680 any gang-partitioned loops. However, it appears that there
681 is a sufficient body of user code that expects non-gang
682 partitioned regions to not execute in gang-redundant mode.
683 So we (a) don't warn about the non-portability and (b) pick
684 the minimum permissible dimension size when there is no
685 partitioned execution. Otherwise we pick the global
686 default for the dimension, which the user can control. The
687 same wording and logic applies to num_workers and
688 vector_length, however the worker- or vector- single
689 execution doesn't have the same impact as gang-redundant
690 execution. (If the minimum gang-level partioning is not 1,
691 the target is probably too confusing.) */
692 dims[ix] = (used & GOMP_DIM_MASK (ix)
693 ? oacc_default_dims[ix] : oacc_min_dims[ix]);
694 changed = true;
695 }
696
697 if (changed)
698 {
699 /* Replace the attribute with new values. */
700 pos = NULL_TREE;
701 for (ix = GOMP_DIM_MAX; ix--;)
25651634
TS
702 pos = tree_cons (purpose[ix],
703 build_int_cst (integer_type_node, dims[ix]), pos);
629b3d75
MJ
704 oacc_replace_fn_attrib (fn, pos);
705 }
706}
707
708/* Create an empty OpenACC loop structure at LOC. */
709
710static oacc_loop *
711new_oacc_loop_raw (oacc_loop *parent, location_t loc)
712{
713 oacc_loop *loop = XCNEW (oacc_loop);
714
715 loop->parent = parent;
629b3d75
MJ
716
717 if (parent)
718 {
719 loop->sibling = parent->child;
720 parent->child = loop;
721 }
722
723 loop->loc = loc;
629b3d75
MJ
724 return loop;
725}
726
727/* Create an outermost, dummy OpenACC loop for offloaded function
728 DECL. */
729
730static oacc_loop *
731new_oacc_loop_outer (tree decl)
732{
733 return new_oacc_loop_raw (NULL, DECL_SOURCE_LOCATION (decl));
734}
735
736/* Start a new OpenACC loop structure beginning at head marker HEAD.
737 Link into PARENT loop. Return the new loop. */
738
739static oacc_loop *
740new_oacc_loop (oacc_loop *parent, gcall *marker)
741{
742 oacc_loop *loop = new_oacc_loop_raw (parent, gimple_location (marker));
743
744 loop->marker = marker;
745
746 /* TODO: This is where device_type flattening would occur for the loop
01914336 747 flags. */
629b3d75
MJ
748
749 loop->flags = TREE_INT_CST_LOW (gimple_call_arg (marker, 3));
750
751 tree chunk_size = integer_zero_node;
752 if (loop->flags & OLF_GANG_STATIC)
753 chunk_size = gimple_call_arg (marker, 4);
754 loop->chunk_size = chunk_size;
755
756 return loop;
757}
758
759/* Create a dummy loop encompassing a call to a openACC routine.
760 Extract the routine's partitioning requirements. */
761
762static void
763new_oacc_loop_routine (oacc_loop *parent, gcall *call, tree decl, tree attrs)
764{
765 oacc_loop *loop = new_oacc_loop_raw (parent, gimple_location (call));
766 int level = oacc_fn_attrib_level (attrs);
767
768 gcc_assert (level >= 0);
769
770 loop->marker = call;
771 loop->routine = decl;
772 loop->mask = ((GOMP_DIM_MASK (GOMP_DIM_MAX) - 1)
773 ^ (GOMP_DIM_MASK (level) - 1));
774}
775
776/* Finish off the current OpenACC loop ending at tail marker TAIL.
777 Return the parent loop. */
778
779static oacc_loop *
780finish_oacc_loop (oacc_loop *loop)
781{
782 /* If the loop has been collapsed, don't partition it. */
02889d23 783 if (loop->ifns.is_empty ())
629b3d75
MJ
784 loop->mask = loop->flags = 0;
785 return loop->parent;
786}
787
788/* Free all OpenACC loop structures within LOOP (inclusive). */
789
790static void
791free_oacc_loop (oacc_loop *loop)
792{
793 if (loop->sibling)
794 free_oacc_loop (loop->sibling);
795 if (loop->child)
796 free_oacc_loop (loop->child);
797
622f6b64 798 loop->ifns.release ();
629b3d75
MJ
799 free (loop);
800}
801
802/* Dump out the OpenACC loop head or tail beginning at FROM. */
803
804static void
805dump_oacc_loop_part (FILE *file, gcall *from, int depth,
806 const char *title, int level)
807{
808 enum ifn_unique_kind kind
809 = (enum ifn_unique_kind) TREE_INT_CST_LOW (gimple_call_arg (from, 0));
810
811 fprintf (file, "%*s%s-%d:\n", depth * 2, "", title, level);
812 for (gimple_stmt_iterator gsi = gsi_for_stmt (from);;)
813 {
814 gimple *stmt = gsi_stmt (gsi);
815
816 if (gimple_call_internal_p (stmt, IFN_UNIQUE))
817 {
818 enum ifn_unique_kind k
819 = ((enum ifn_unique_kind) TREE_INT_CST_LOW
820 (gimple_call_arg (stmt, 0)));
821
822 if (k == kind && stmt != from)
823 break;
824 }
ef6cb4c7 825 print_gimple_stmt (file, stmt, depth * 2 + 2);
629b3d75
MJ
826
827 gsi_next (&gsi);
828 while (gsi_end_p (gsi))
829 gsi = gsi_start_bb (single_succ (gsi_bb (gsi)));
830 }
831}
832
5d390fd3 833/* Dump OpenACC loop LOOP, its children, and its siblings. */
629b3d75
MJ
834
835static void
836dump_oacc_loop (FILE *file, oacc_loop *loop, int depth)
837{
838 int ix;
839
840 fprintf (file, "%*sLoop %x(%x) %s:%u\n", depth * 2, "",
841 loop->flags, loop->mask,
842 LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc));
843
844 if (loop->marker)
ef6cb4c7 845 print_gimple_stmt (file, loop->marker, depth * 2);
629b3d75
MJ
846
847 if (loop->routine)
848 fprintf (file, "%*sRoutine %s:%u:%s\n",
849 depth * 2, "", DECL_SOURCE_FILE (loop->routine),
850 DECL_SOURCE_LINE (loop->routine),
851 IDENTIFIER_POINTER (DECL_NAME (loop->routine)));
852
853 for (ix = GOMP_DIM_GANG; ix != GOMP_DIM_MAX; ix++)
854 if (loop->heads[ix])
855 dump_oacc_loop_part (file, loop->heads[ix], depth, "Head", ix);
856 for (ix = GOMP_DIM_MAX; ix--;)
857 if (loop->tails[ix])
858 dump_oacc_loop_part (file, loop->tails[ix], depth, "Tail", ix);
859
860 if (loop->child)
861 dump_oacc_loop (file, loop->child, depth + 1);
862 if (loop->sibling)
863 dump_oacc_loop (file, loop->sibling, depth);
864}
865
866void debug_oacc_loop (oacc_loop *);
867
868/* Dump loops to stderr. */
869
870DEBUG_FUNCTION void
871debug_oacc_loop (oacc_loop *loop)
872{
873 dump_oacc_loop (stderr, loop, 0);
874}
875
5d390fd3
TS
876/* Provide diagnostics on OpenACC loop LOOP, its children, and its
877 siblings. */
878
879static void
880inform_oacc_loop (const oacc_loop *loop)
881{
882 const char *gang
883 = loop->mask & GOMP_DIM_MASK (GOMP_DIM_GANG) ? " gang" : "";
884 const char *worker
885 = loop->mask & GOMP_DIM_MASK (GOMP_DIM_WORKER) ? " worker" : "";
886 const char *vector
887 = loop->mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR) ? " vector" : "";
888 const char *seq = loop->mask == 0 ? " seq" : "";
889 const dump_user_location_t loc
890 = dump_user_location_t::from_location_t (loop->loc);
891 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc,
892 "assigned OpenACC%s%s%s%s loop parallelism\n", gang, worker,
893 vector, seq);
894
895 if (loop->child)
896 inform_oacc_loop (loop->child);
897 if (loop->sibling)
898 inform_oacc_loop (loop->sibling);
899}
900
629b3d75
MJ
901/* DFS walk of basic blocks BB onwards, creating OpenACC loop
902 structures as we go. By construction these loops are properly
903 nested. */
904
905static void
906oacc_loop_discover_walk (oacc_loop *loop, basic_block bb)
907{
908 int marker = 0;
909 int remaining = 0;
910
911 if (bb->flags & BB_VISITED)
912 return;
913
914 follow:
915 bb->flags |= BB_VISITED;
916
917 /* Scan for loop markers. */
918 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
919 gsi_next (&gsi))
920 {
921 gimple *stmt = gsi_stmt (gsi);
922
923 if (!is_gimple_call (stmt))
924 continue;
925
926 gcall *call = as_a <gcall *> (stmt);
927
928 /* If this is a routine, make a dummy loop for it. */
929 if (tree decl = gimple_call_fndecl (call))
930 if (tree attrs = oacc_get_fn_attrib (decl))
931 {
932 gcc_assert (!marker);
933 new_oacc_loop_routine (loop, call, decl, attrs);
934 }
935
936 if (!gimple_call_internal_p (call))
937 continue;
938
939 switch (gimple_call_internal_fn (call))
940 {
941 default:
942 break;
943
944 case IFN_GOACC_LOOP:
02889d23
CLT
945 case IFN_GOACC_TILE:
946 /* Record the abstraction function, so we can manipulate it
947 later. */
948 loop->ifns.safe_push (call);
629b3d75
MJ
949 break;
950
951 case IFN_UNIQUE:
952 enum ifn_unique_kind kind
953 = (enum ifn_unique_kind) (TREE_INT_CST_LOW
954 (gimple_call_arg (call, 0)));
955 if (kind == IFN_UNIQUE_OACC_HEAD_MARK
956 || kind == IFN_UNIQUE_OACC_TAIL_MARK)
957 {
958 if (gimple_call_num_args (call) == 2)
959 {
960 gcc_assert (marker && !remaining);
961 marker = 0;
962 if (kind == IFN_UNIQUE_OACC_TAIL_MARK)
963 loop = finish_oacc_loop (loop);
964 else
965 loop->head_end = call;
966 }
967 else
968 {
969 int count = TREE_INT_CST_LOW (gimple_call_arg (call, 2));
970
971 if (!marker)
972 {
973 if (kind == IFN_UNIQUE_OACC_HEAD_MARK)
974 loop = new_oacc_loop (loop, call);
975 remaining = count;
976 }
977 gcc_assert (count == remaining);
978 if (remaining)
979 {
980 remaining--;
981 if (kind == IFN_UNIQUE_OACC_HEAD_MARK)
982 loop->heads[marker] = call;
983 else
984 loop->tails[remaining] = call;
985 }
986 marker++;
987 }
988 }
989 }
990 }
991 if (remaining || marker)
992 {
993 bb = single_succ (bb);
994 gcc_assert (single_pred_p (bb) && !(bb->flags & BB_VISITED));
995 goto follow;
996 }
997
998 /* Walk successor blocks. */
999 edge e;
1000 edge_iterator ei;
1001
1002 FOR_EACH_EDGE (e, ei, bb->succs)
1003 oacc_loop_discover_walk (loop, e->dest);
1004}
1005
1006/* LOOP is the first sibling. Reverse the order in place and return
1007 the new first sibling. Recurse to child loops. */
1008
1009static oacc_loop *
1010oacc_loop_sibling_nreverse (oacc_loop *loop)
1011{
1012 oacc_loop *last = NULL;
1013 do
1014 {
1015 if (loop->child)
01914336 1016 loop->child = oacc_loop_sibling_nreverse (loop->child);
629b3d75
MJ
1017
1018 oacc_loop *next = loop->sibling;
1019 loop->sibling = last;
1020 last = loop;
1021 loop = next;
1022 }
1023 while (loop);
1024
1025 return last;
1026}
1027
1028/* Discover the OpenACC loops marked up by HEAD and TAIL markers for
1029 the current function. */
1030
1031static oacc_loop *
1032oacc_loop_discovery ()
1033{
1034 /* Clear basic block flags, in particular BB_VISITED which we're going to use
1035 in the following. */
1036 clear_bb_flags ();
1037
1038 oacc_loop *top = new_oacc_loop_outer (current_function_decl);
1039 oacc_loop_discover_walk (top, ENTRY_BLOCK_PTR_FOR_FN (cfun));
1040
1041 /* The siblings were constructed in reverse order, reverse them so
1042 that diagnostics come out in an unsurprising order. */
1043 top = oacc_loop_sibling_nreverse (top);
1044
1045 return top;
1046}
1047
1048/* Transform the abstract internal function markers starting at FROM
1049 to be for partitioning level LEVEL. Stop when we meet another HEAD
1050 or TAIL marker. */
1051
1052static void
1053oacc_loop_xform_head_tail (gcall *from, int level)
1054{
1055 enum ifn_unique_kind kind
1056 = (enum ifn_unique_kind) TREE_INT_CST_LOW (gimple_call_arg (from, 0));
1057 tree replacement = build_int_cst (unsigned_type_node, level);
1058
1059 for (gimple_stmt_iterator gsi = gsi_for_stmt (from);;)
1060 {
1061 gimple *stmt = gsi_stmt (gsi);
1062
1063 if (gimple_call_internal_p (stmt, IFN_UNIQUE))
1064 {
1065 enum ifn_unique_kind k
1066 = ((enum ifn_unique_kind)
1067 TREE_INT_CST_LOW (gimple_call_arg (stmt, 0)));
1068
1069 if (k == IFN_UNIQUE_OACC_FORK || k == IFN_UNIQUE_OACC_JOIN)
1070 *gimple_call_arg_ptr (stmt, 2) = replacement;
1071 else if (k == kind && stmt != from)
1072 break;
1073 }
1074 else if (gimple_call_internal_p (stmt, IFN_GOACC_REDUCTION))
1075 *gimple_call_arg_ptr (stmt, 3) = replacement;
1076
1077 gsi_next (&gsi);
1078 while (gsi_end_p (gsi))
1079 gsi = gsi_start_bb (single_succ (gsi_bb (gsi)));
1080 }
1081}
1082
629b3d75
MJ
1083/* Process the discovered OpenACC loops, setting the correct
1084 partitioning level etc. */
1085
1086static void
1087oacc_loop_process (oacc_loop *loop)
1088{
1089 if (loop->child)
1090 oacc_loop_process (loop->child);
1091
1092 if (loop->mask && !loop->routine)
1093 {
1094 int ix;
02889d23
CLT
1095 tree mask_arg = build_int_cst (unsigned_type_node, loop->mask);
1096 tree e_mask_arg = build_int_cst (unsigned_type_node, loop->e_mask);
629b3d75 1097 tree chunk_arg = loop->chunk_size;
02889d23
CLT
1098 gcall *call;
1099
1100 for (ix = 0; loop->ifns.iterate (ix, &call); ix++)
1101 switch (gimple_call_internal_fn (call))
1102 {
1103 case IFN_GOACC_LOOP:
1104 {
1105 bool is_e = gimple_call_arg (call, 5) == integer_minus_one_node;
1106 gimple_call_set_arg (call, 5, is_e ? e_mask_arg : mask_arg);
1107 if (!is_e)
1108 gimple_call_set_arg (call, 4, chunk_arg);
1109 }
1110 break;
1111
1112 case IFN_GOACC_TILE:
1113 gimple_call_set_arg (call, 3, mask_arg);
1114 gimple_call_set_arg (call, 4, e_mask_arg);
1115 break;
629b3d75 1116
02889d23
CLT
1117 default:
1118 gcc_unreachable ();
1119 }
629b3d75 1120
02889d23
CLT
1121 unsigned dim = GOMP_DIM_GANG;
1122 unsigned mask = loop->mask | loop->e_mask;
629b3d75
MJ
1123 for (ix = 0; ix != GOMP_DIM_MAX && mask; ix++)
1124 {
1125 while (!(GOMP_DIM_MASK (dim) & mask))
1126 dim++;
1127
1128 oacc_loop_xform_head_tail (loop->heads[ix], dim);
1129 oacc_loop_xform_head_tail (loop->tails[ix], dim);
1130
1131 mask ^= GOMP_DIM_MASK (dim);
1132 }
1133 }
1134
1135 if (loop->sibling)
1136 oacc_loop_process (loop->sibling);
1137}
1138
1139/* Walk the OpenACC loop heirarchy checking and assigning the
1140 programmer-specified partitionings. OUTER_MASK is the partitioning
1141 this loop is contained within. Return mask of partitioning
1142 encountered. If any auto loops are discovered, set GOMP_DIM_MAX
1143 bit. */
1144
1145static unsigned
1146oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask)
1147{
1148 unsigned this_mask = loop->mask;
1149 unsigned mask_all = 0;
1150 bool noisy = true;
1151
1152#ifdef ACCEL_COMPILER
1153 /* When device_type is supported, we want the device compiler to be
1154 noisy, if the loop parameters are device_type-specific. */
1155 noisy = false;
1156#endif
1157
1158 if (!loop->routine)
1159 {
1160 bool auto_par = (loop->flags & OLF_AUTO) != 0;
1161 bool seq_par = (loop->flags & OLF_SEQ) != 0;
02889d23
CLT
1162 bool tiling = (loop->flags & OLF_TILE) != 0;
1163
629b3d75
MJ
1164 this_mask = ((loop->flags >> OLF_DIM_BASE)
1165 & (GOMP_DIM_MASK (GOMP_DIM_MAX) - 1));
1166
02889d23
CLT
1167 /* Apply auto partitioning if this is a non-partitioned regular
1168 loop, or (no more than) single axis tiled loop. */
1169 bool maybe_auto
1170 = !seq_par && this_mask == (tiling ? this_mask & -this_mask : 0);
1171
629b3d75
MJ
1172 if ((this_mask != 0) + auto_par + seq_par > 1)
1173 {
1174 if (noisy)
1175 error_at (loop->loc,
1176 seq_par
324ff1a0
JJ
1177 ? G_("%<seq%> overrides other OpenACC loop specifiers")
1178 : G_("%<auto%> conflicts with other OpenACC loop "
1179 "specifiers"));
02889d23 1180 maybe_auto = false;
629b3d75
MJ
1181 loop->flags &= ~OLF_AUTO;
1182 if (seq_par)
1183 {
01914336
MJ
1184 loop->flags
1185 &= ~((GOMP_DIM_MASK (GOMP_DIM_MAX) - 1) << OLF_DIM_BASE);
629b3d75
MJ
1186 this_mask = 0;
1187 }
1188 }
02889d23
CLT
1189
1190 if (maybe_auto && (loop->flags & OLF_INDEPENDENT))
1191 {
1192 loop->flags |= OLF_AUTO;
1193 mask_all |= GOMP_DIM_MASK (GOMP_DIM_MAX);
1194 }
629b3d75
MJ
1195 }
1196
1197 if (this_mask & outer_mask)
1198 {
1199 const oacc_loop *outer;
1200 for (outer = loop->parent; outer; outer = outer->parent)
02889d23 1201 if ((outer->mask | outer->e_mask) & this_mask)
629b3d75
MJ
1202 break;
1203
1204 if (noisy)
1205 {
1206 if (outer)
1207 {
1208 error_at (loop->loc,
efebb49e
DM
1209 loop->routine
1210 ? G_("routine call uses same OpenACC parallelism"
1211 " as containing loop")
1212 : G_("inner loop uses same OpenACC parallelism"
1213 " as containing loop"));
629b3d75
MJ
1214 inform (outer->loc, "containing loop here");
1215 }
1216 else
1217 error_at (loop->loc,
efebb49e
DM
1218 loop->routine
1219 ? G_("routine call uses OpenACC parallelism disallowed"
1220 " by containing routine")
1221 : G_("loop uses OpenACC parallelism disallowed"
1222 " by containing routine"));
629b3d75
MJ
1223
1224 if (loop->routine)
1225 inform (DECL_SOURCE_LOCATION (loop->routine),
1226 "routine %qD declared here", loop->routine);
1227 }
1228 this_mask &= ~outer_mask;
1229 }
1230 else
1231 {
1232 unsigned outermost = least_bit_hwi (this_mask);
1233
1234 if (outermost && outermost <= outer_mask)
1235 {
1236 if (noisy)
1237 {
1238 error_at (loop->loc,
1239 "incorrectly nested OpenACC loop parallelism");
1240
1241 const oacc_loop *outer;
1242 for (outer = loop->parent;
1243 outer->flags && outer->flags < outermost;
1244 outer = outer->parent)
1245 continue;
1246 inform (outer->loc, "containing loop here");
1247 }
1248
1249 this_mask &= ~outermost;
1250 }
1251 }
1252
629b3d75
MJ
1253 mask_all |= this_mask;
1254
02889d23
CLT
1255 if (loop->flags & OLF_TILE)
1256 {
1257 /* When tiling, vector goes to the element loop, and failing
1258 that we put worker there. The std doesn't contemplate
1259 specifying all three. We choose to put worker and vector on
1260 the element loops in that case. */
1261 unsigned this_e_mask = this_mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR);
1262 if (!this_e_mask || this_mask & GOMP_DIM_MASK (GOMP_DIM_GANG))
1263 this_e_mask |= this_mask & GOMP_DIM_MASK (GOMP_DIM_WORKER);
1264
1265 loop->e_mask = this_e_mask;
1266 this_mask ^= this_e_mask;
1267 }
1268
1269 loop->mask = this_mask;
1270
1271 if (dump_file)
1272 fprintf (dump_file, "Loop %s:%d user specified %d & %d\n",
1273 LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc),
1274 loop->mask, loop->e_mask);
1275
629b3d75
MJ
1276 if (loop->child)
1277 {
02889d23
CLT
1278 unsigned tmp_mask = outer_mask | this_mask | loop->e_mask;
1279 loop->inner = oacc_loop_fixed_partitions (loop->child, tmp_mask);
629b3d75
MJ
1280 mask_all |= loop->inner;
1281 }
1282
1283 if (loop->sibling)
1284 mask_all |= oacc_loop_fixed_partitions (loop->sibling, outer_mask);
1285
1286 return mask_all;
1287}
1288
1289/* Walk the OpenACC loop heirarchy to assign auto-partitioned loops.
1290 OUTER_MASK is the partitioning this loop is contained within.
02889d23 1291 OUTER_ASSIGN is true if an outer loop is being auto-partitioned.
629b3d75
MJ
1292 Return the cumulative partitioning used by this loop, siblings and
1293 children. */
1294
1295static unsigned
02889d23
CLT
1296oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask,
1297 bool outer_assign)
629b3d75
MJ
1298{
1299 bool assign = (loop->flags & OLF_AUTO) && (loop->flags & OLF_INDEPENDENT);
1300 bool noisy = true;
02889d23 1301 bool tiling = loop->flags & OLF_TILE;
629b3d75
MJ
1302
1303#ifdef ACCEL_COMPILER
1304 /* When device_type is supported, we want the device compiler to be
1305 noisy, if the loop parameters are device_type-specific. */
1306 noisy = false;
1307#endif
1308
891ba5eb 1309 if (assign && (!outer_assign || loop->inner))
629b3d75 1310 {
02889d23
CLT
1311 /* Allocate outermost and non-innermost loops at the outermost
1312 non-innermost available level. */
1313 unsigned this_mask = GOMP_DIM_MASK (GOMP_DIM_GANG);
1314
1315 /* Find the first outermost available partition. */
1316 while (this_mask <= outer_mask)
1317 this_mask <<= 1;
1318
1319 /* Grab two axes if tiling, and we've not assigned anything */
1320 if (tiling && !(loop->mask | loop->e_mask))
1321 this_mask |= this_mask << 1;
1322
1323 /* Prohibit the innermost partitioning at the moment. */
1324 this_mask &= GOMP_DIM_MASK (GOMP_DIM_MAX - 1) - 1;
629b3d75 1325
02889d23
CLT
1326 /* Don't use any dimension explicitly claimed by an inner loop. */
1327 this_mask &= ~loop->inner;
1328
1329 if (tiling && !loop->e_mask)
1330 {
1331 /* If we got two axes, allocate the inner one to the element
1332 loop. */
1333 loop->e_mask = this_mask & (this_mask << 1);
1334 this_mask ^= loop->e_mask;
1335 }
1336
1337 loop->mask |= this_mask;
629b3d75
MJ
1338 }
1339
1340 if (loop->child)
1341 {
02889d23
CLT
1342 unsigned tmp_mask = outer_mask | loop->mask | loop->e_mask;
1343 loop->inner = oacc_loop_auto_partitions (loop->child, tmp_mask,
1344 outer_assign | assign);
629b3d75
MJ
1345 }
1346
02889d23 1347 if (assign && (!loop->mask || (tiling && !loop->e_mask) || !outer_assign))
629b3d75 1348 {
02889d23
CLT
1349 /* Allocate the loop at the innermost available level. Note
1350 that we do this even if we already assigned this loop the
1351 outermost available level above. That way we'll partition
1352 this along 2 axes, if they are available. */
629b3d75
MJ
1353 unsigned this_mask = 0;
1354
01914336 1355 /* Determine the outermost partitioning used within this loop. */
629b3d75
MJ
1356 this_mask = loop->inner | GOMP_DIM_MASK (GOMP_DIM_MAX);
1357 this_mask = least_bit_hwi (this_mask);
1358
1359 /* Pick the partitioning just inside that one. */
1360 this_mask >>= 1;
1361
01914336 1362 /* And avoid picking one use by an outer loop. */
629b3d75
MJ
1363 this_mask &= ~outer_mask;
1364
02889d23
CLT
1365 /* If tiling and we failed completely above, grab the next one
1366 too. Making sure it doesn't hit an outer loop. */
1367 if (tiling)
1368 {
1369 this_mask &= ~(loop->e_mask | loop->mask);
1370 unsigned tile_mask = ((this_mask >> 1)
1371 & ~(outer_mask | loop->e_mask | loop->mask));
1372
1373 if (tile_mask || loop->mask)
1374 {
1375 loop->e_mask |= this_mask;
1376 this_mask = tile_mask;
1377 }
1378 if (!loop->e_mask && noisy)
1379 warning_at (loop->loc, 0,
1380 "insufficient partitioning available"
1381 " to parallelize element loop");
1382 }
629b3d75 1383
02889d23
CLT
1384 loop->mask |= this_mask;
1385 if (!loop->mask && noisy)
1386 warning_at (loop->loc, 0,
efebb49e
DM
1387 tiling
1388 ? G_("insufficient partitioning available"
1389 " to parallelize tile loop")
1390 : G_("insufficient partitioning available"
1391 " to parallelize loop"));
629b3d75
MJ
1392 }
1393
1394 if (assign && dump_file)
02889d23 1395 fprintf (dump_file, "Auto loop %s:%d assigned %d & %d\n",
629b3d75 1396 LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc),
02889d23 1397 loop->mask, loop->e_mask);
629b3d75
MJ
1398
1399 unsigned inner_mask = 0;
1400
1401 if (loop->sibling)
02889d23
CLT
1402 inner_mask |= oacc_loop_auto_partitions (loop->sibling,
1403 outer_mask, outer_assign);
629b3d75 1404
02889d23 1405 inner_mask |= loop->inner | loop->mask | loop->e_mask;
629b3d75
MJ
1406
1407 return inner_mask;
1408}
1409
1410/* Walk the OpenACC loop heirarchy to check and assign partitioning
1411 axes. Return mask of partitioning. */
1412
1413static unsigned
1414oacc_loop_partition (oacc_loop *loop, unsigned outer_mask)
1415{
1416 unsigned mask_all = oacc_loop_fixed_partitions (loop, outer_mask);
1417
1418 if (mask_all & GOMP_DIM_MASK (GOMP_DIM_MAX))
1419 {
1420 mask_all ^= GOMP_DIM_MASK (GOMP_DIM_MAX);
02889d23 1421 mask_all |= oacc_loop_auto_partitions (loop, outer_mask, false);
629b3d75
MJ
1422 }
1423 return mask_all;
1424}
1425
1426/* Default fork/join early expander. Delete the function calls if
1427 there is no RTL expander. */
1428
1429bool
1430default_goacc_fork_join (gcall *ARG_UNUSED (call),
1431 const int *ARG_UNUSED (dims), bool is_fork)
1432{
1433 if (is_fork)
1434 return targetm.have_oacc_fork ();
1435 else
1436 return targetm.have_oacc_join ();
1437}
1438
1439/* Default goacc.reduction early expander.
1440
1441 LHS-opt = IFN_REDUCTION (KIND, RES_PTR, VAR, LEVEL, OP, OFFSET)
1442 If RES_PTR is not integer-zerop:
1443 SETUP - emit 'LHS = *RES_PTR', LHS = NULL
1444 TEARDOWN - emit '*RES_PTR = VAR'
1445 If LHS is not NULL
1446 emit 'LHS = VAR' */
1447
1448void
1449default_goacc_reduction (gcall *call)
1450{
1451 unsigned code = (unsigned)TREE_INT_CST_LOW (gimple_call_arg (call, 0));
1452 gimple_stmt_iterator gsi = gsi_for_stmt (call);
1453 tree lhs = gimple_call_lhs (call);
1454 tree var = gimple_call_arg (call, 2);
1455 gimple_seq seq = NULL;
1456
1457 if (code == IFN_GOACC_REDUCTION_SETUP
1458 || code == IFN_GOACC_REDUCTION_TEARDOWN)
1459 {
1460 /* Setup and Teardown need to copy from/to the receiver object,
1461 if there is one. */
1462 tree ref_to_res = gimple_call_arg (call, 1);
1463
1464 if (!integer_zerop (ref_to_res))
1465 {
1466 tree dst = build_simple_mem_ref (ref_to_res);
1467 tree src = var;
1468
1469 if (code == IFN_GOACC_REDUCTION_SETUP)
1470 {
1471 src = dst;
1472 dst = lhs;
1473 lhs = NULL;
1474 }
1475 gimple_seq_add_stmt (&seq, gimple_build_assign (dst, src));
1476 }
1477 }
1478
1479 /* Copy VAR to LHS, if there is an LHS. */
1480 if (lhs)
1481 gimple_seq_add_stmt (&seq, gimple_build_assign (lhs, var));
1482
1483 gsi_replace_with_seq (&gsi, seq, true);
1484}
1485
1486/* Main entry point for oacc transformations which run on the device
1487 compiler after LTO, so we know what the target device is at this
1488 point (including the host fallback). */
1489
1490static unsigned int
1491execute_oacc_device_lower ()
1492{
1493 tree attrs = oacc_get_fn_attrib (current_function_decl);
1494
1495 if (!attrs)
1496 /* Not an offloaded function. */
1497 return 0;
1498
1499 /* Parse the default dim argument exactly once. */
1500 if ((const void *)flag_openacc_dims != &flag_openacc_dims)
1501 {
1502 oacc_parse_default_dims (flag_openacc_dims);
1503 flag_openacc_dims = (char *)&flag_openacc_dims;
1504 }
1505
b0f271ce
TS
1506 bool is_oacc_kernels
1507 = (lookup_attribute ("oacc kernels",
1508 DECL_ATTRIBUTES (current_function_decl)) != NULL);
1509 bool is_oacc_kernels_parallelized
1510 = (lookup_attribute ("oacc kernels parallelized",
1511 DECL_ATTRIBUTES (current_function_decl)) != NULL);
1512
fd71a9a2
TS
1513 /* Unparallelized OpenACC kernels constructs must get launched as 1 x 1 x 1
1514 kernels, so remove the parallelism dimensions function attributes
1515 potentially set earlier on. */
1516 if (is_oacc_kernels && !is_oacc_kernels_parallelized)
1517 {
1518 oacc_set_fn_attrib (current_function_decl, NULL, NULL);
1519 attrs = oacc_get_fn_attrib (current_function_decl);
1520 }
1521
629b3d75
MJ
1522 /* Discover, partition and process the loops. */
1523 oacc_loop *loops = oacc_loop_discovery ();
1524 int fn_level = oacc_fn_attrib_level (attrs);
1525
1526 if (dump_file)
25651634
TS
1527 {
1528 if (fn_level >= 0)
1529 fprintf (dump_file, "Function is OpenACC routine level %d\n",
1530 fn_level);
b0f271ce
TS
1531 else if (is_oacc_kernels)
1532 fprintf (dump_file, "Function is %s OpenACC kernels offload\n",
1533 (is_oacc_kernels_parallelized
1534 ? "parallelized" : "unparallelized"));
25651634
TS
1535 else
1536 fprintf (dump_file, "Function is OpenACC parallel offload\n");
1537 }
629b3d75
MJ
1538
1539 unsigned outer_mask = fn_level >= 0 ? GOMP_DIM_MASK (fn_level) - 1 : 0;
1540 unsigned used_mask = oacc_loop_partition (loops, outer_mask);
b0f271ce
TS
1541 /* OpenACC kernels constructs are special: they currently don't use the
1542 generic oacc_loop infrastructure and attribute/dimension processing. */
1543 if (is_oacc_kernels && is_oacc_kernels_parallelized)
1544 {
1545 /* Parallelized OpenACC kernels constructs use gang parallelism. See
1546 also tree-parloops.c:create_parallel_loop. */
1547 used_mask |= GOMP_DIM_MASK (GOMP_DIM_GANG);
1548 }
629b3d75 1549
b0f271ce 1550 int dims[GOMP_DIM_MAX];
629b3d75
MJ
1551 oacc_validate_dims (current_function_decl, attrs, dims, fn_level, used_mask);
1552
1553 if (dump_file)
1554 {
1555 const char *comma = "Compute dimensions [";
1556 for (int ix = 0; ix != GOMP_DIM_MAX; ix++, comma = ", ")
1557 fprintf (dump_file, "%s%d", comma, dims[ix]);
1558 fprintf (dump_file, "]\n");
1559 }
1560
1561 oacc_loop_process (loops);
1562 if (dump_file)
1563 {
1564 fprintf (dump_file, "OpenACC loops\n");
1565 dump_oacc_loop (dump_file, loops, 0);
1566 fprintf (dump_file, "\n");
1567 }
5d390fd3
TS
1568 if (dump_enabled_p ())
1569 {
1570 oacc_loop *l = loops;
1571 /* OpenACC kernels constructs are special: they currently don't use the
1572 generic oacc_loop infrastructure. */
1573 if (is_oacc_kernels)
1574 {
1575 /* Create a fake oacc_loop for diagnostic purposes. */
1576 l = new_oacc_loop_raw (NULL,
1577 DECL_SOURCE_LOCATION (current_function_decl));
1578 l->mask = used_mask;
1579 }
1580 else
1581 {
1582 /* Skip the outermost, dummy OpenACC loop */
1583 l = l->child;
1584 }
1585 if (l)
1586 inform_oacc_loop (l);
1587 if (is_oacc_kernels)
1588 free_oacc_loop (l);
1589 }
629b3d75
MJ
1590
1591 /* Offloaded targets may introduce new basic blocks, which require
1592 dominance information to update SSA. */
1593 calculate_dominance_info (CDI_DOMINATORS);
1594
1595 /* Now lower internal loop functions to target-specific code
1596 sequences. */
1597 basic_block bb;
1598 FOR_ALL_BB_FN (bb, cfun)
1599 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
1600 {
1601 gimple *stmt = gsi_stmt (gsi);
1602 if (!is_gimple_call (stmt))
1603 {
1604 gsi_next (&gsi);
1605 continue;
1606 }
1607
1608 gcall *call = as_a <gcall *> (stmt);
1609 if (!gimple_call_internal_p (call))
1610 {
1611 gsi_next (&gsi);
1612 continue;
1613 }
1614
1615 /* Rewind to allow rescan. */
1616 gsi_prev (&gsi);
1617 bool rescan = false, remove = false;
1618 enum internal_fn ifn_code = gimple_call_internal_fn (call);
1619
1620 switch (ifn_code)
1621 {
1622 default: break;
1623
02889d23
CLT
1624 case IFN_GOACC_TILE:
1625 oacc_xform_tile (call);
1626 rescan = true;
1627 break;
1628
629b3d75
MJ
1629 case IFN_GOACC_LOOP:
1630 oacc_xform_loop (call);
1631 rescan = true;
1632 break;
1633
1634 case IFN_GOACC_REDUCTION:
1635 /* Mark the function for SSA renaming. */
1636 mark_virtual_operands_for_renaming (cfun);
1637
1638 /* If the level is -1, this ended up being an unused
1639 axis. Handle as a default. */
1640 if (integer_minus_onep (gimple_call_arg (call, 3)))
1641 default_goacc_reduction (call);
1642 else
1643 targetm.goacc.reduction (call);
1644 rescan = true;
1645 break;
1646
1647 case IFN_UNIQUE:
1648 {
1649 enum ifn_unique_kind kind
1650 = ((enum ifn_unique_kind)
1651 TREE_INT_CST_LOW (gimple_call_arg (call, 0)));
1652
1653 switch (kind)
1654 {
1655 default:
02889d23 1656 break;
629b3d75
MJ
1657
1658 case IFN_UNIQUE_OACC_FORK:
1659 case IFN_UNIQUE_OACC_JOIN:
1660 if (integer_minus_onep (gimple_call_arg (call, 2)))
1661 remove = true;
1662 else if (!targetm.goacc.fork_join
1663 (call, dims, kind == IFN_UNIQUE_OACC_FORK))
1664 remove = true;
1665 break;
1666
1667 case IFN_UNIQUE_OACC_HEAD_MARK:
1668 case IFN_UNIQUE_OACC_TAIL_MARK:
1669 remove = true;
1670 break;
1671 }
1672 break;
1673 }
1674 }
1675
1676 if (gsi_end_p (gsi))
1677 /* We rewound past the beginning of the BB. */
1678 gsi = gsi_start_bb (bb);
1679 else
1680 /* Undo the rewind. */
1681 gsi_next (&gsi);
1682
1683 if (remove)
1684 {
1685 if (gimple_vdef (call))
1686 replace_uses_by (gimple_vdef (call), gimple_vuse (call));
1687 if (gimple_call_lhs (call))
1688 {
1689 /* Propagate the data dependency var. */
1690 gimple *ass = gimple_build_assign (gimple_call_lhs (call),
1691 gimple_call_arg (call, 1));
1692 gsi_replace (&gsi, ass, false);
1693 }
1694 else
1695 gsi_remove (&gsi, true);
1696 }
1697 else if (!rescan)
1698 /* If not rescanning, advance over the call. */
1699 gsi_next (&gsi);
1700 }
1701
1702 free_oacc_loop (loops);
1703
1704 return 0;
1705}
1706
1707/* Default launch dimension validator. Force everything to 1. A
1708 backend that wants to provide larger dimensions must override this
1709 hook. */
1710
1711bool
1712default_goacc_validate_dims (tree ARG_UNUSED (decl), int *dims,
1713 int ARG_UNUSED (fn_level))
1714{
1715 bool changed = false;
1716
1717 for (unsigned ix = 0; ix != GOMP_DIM_MAX; ix++)
1718 {
1719 if (dims[ix] != 1)
1720 {
1721 dims[ix] = 1;
1722 changed = true;
1723 }
1724 }
1725
1726 return changed;
1727}
1728
01914336 1729/* Default dimension bound is unknown on accelerator and 1 on host. */
629b3d75
MJ
1730
1731int
1732default_goacc_dim_limit (int ARG_UNUSED (axis))
1733{
1734#ifdef ACCEL_COMPILER
1735 return 0;
1736#else
1737 return 1;
1738#endif
1739}
1740
1741namespace {
1742
1743const pass_data pass_data_oacc_device_lower =
1744{
1745 GIMPLE_PASS, /* type */
1746 "oaccdevlow", /* name */
fd2b8c8b 1747 OPTGROUP_OMP, /* optinfo_flags */
629b3d75
MJ
1748 TV_NONE, /* tv_id */
1749 PROP_cfg, /* properties_required */
1750 0 /* Possibly PROP_gimple_eomp. */, /* properties_provided */
1751 0, /* properties_destroyed */
1752 0, /* todo_flags_start */
1753 TODO_update_ssa | TODO_cleanup_cfg, /* todo_flags_finish */
1754};
1755
1756class pass_oacc_device_lower : public gimple_opt_pass
1757{
1758public:
1759 pass_oacc_device_lower (gcc::context *ctxt)
1760 : gimple_opt_pass (pass_data_oacc_device_lower, ctxt)
1761 {}
1762
1763 /* opt_pass methods: */
1764 virtual bool gate (function *) { return flag_openacc; };
1765
1766 virtual unsigned int execute (function *)
1767 {
1768 return execute_oacc_device_lower ();
1769 }
1770
1771}; // class pass_oacc_device_lower
1772
1773} // anon namespace
1774
1775gimple_opt_pass *
1776make_pass_oacc_device_lower (gcc::context *ctxt)
1777{
1778 return new pass_oacc_device_lower (ctxt);
1779}
1780
0c6b03b5
AM
1781\f
1782/* Rewrite GOMP_SIMT_ENTER_ALLOC call given by GSI and remove the preceding
1783 GOMP_SIMT_ENTER call identifying the privatized variables, which are
1784 turned to structure fields and receive a DECL_VALUE_EXPR accordingly.
1785 Set *REGIMPLIFY to true, except if no privatized variables were seen. */
1786
1787static void
1788ompdevlow_adjust_simt_enter (gimple_stmt_iterator *gsi, bool *regimplify)
1789{
1790 gimple *alloc_stmt = gsi_stmt (*gsi);
1791 tree simtrec = gimple_call_lhs (alloc_stmt);
1792 tree simduid = gimple_call_arg (alloc_stmt, 0);
1793 gimple *enter_stmt = SSA_NAME_DEF_STMT (simduid);
1794 gcc_assert (gimple_call_internal_p (enter_stmt, IFN_GOMP_SIMT_ENTER));
1795 tree rectype = lang_hooks.types.make_type (RECORD_TYPE);
1796 TYPE_ARTIFICIAL (rectype) = TYPE_NAMELESS (rectype) = 1;
1797 TREE_ADDRESSABLE (rectype) = 1;
1798 TREE_TYPE (simtrec) = build_pointer_type (rectype);
1799 for (unsigned i = 1; i < gimple_call_num_args (enter_stmt); i++)
1800 {
1801 tree *argp = gimple_call_arg_ptr (enter_stmt, i);
1802 if (*argp == null_pointer_node)
1803 continue;
1804 gcc_assert (TREE_CODE (*argp) == ADDR_EXPR
1805 && VAR_P (TREE_OPERAND (*argp, 0)));
1806 tree var = TREE_OPERAND (*argp, 0);
1807
1808 tree field = build_decl (DECL_SOURCE_LOCATION (var), FIELD_DECL,
1809 DECL_NAME (var), TREE_TYPE (var));
1810 SET_DECL_ALIGN (field, DECL_ALIGN (var));
1811 DECL_USER_ALIGN (field) = DECL_USER_ALIGN (var);
1812 TREE_THIS_VOLATILE (field) = TREE_THIS_VOLATILE (var);
1813
1814 insert_field_into_struct (rectype, field);
1815
1816 tree t = build_simple_mem_ref (simtrec);
1817 t = build3 (COMPONENT_REF, TREE_TYPE (var), t, field, NULL);
1818 TREE_THIS_VOLATILE (t) = TREE_THIS_VOLATILE (var);
1819 SET_DECL_VALUE_EXPR (var, t);
1820 DECL_HAS_VALUE_EXPR_P (var) = 1;
1821 *regimplify = true;
1822 }
1823 layout_type (rectype);
1824 tree size = TYPE_SIZE_UNIT (rectype);
1825 tree align = build_int_cst (TREE_TYPE (size), TYPE_ALIGN_UNIT (rectype));
1826
1827 alloc_stmt
1828 = gimple_build_call_internal (IFN_GOMP_SIMT_ENTER_ALLOC, 2, size, align);
1829 gimple_call_set_lhs (alloc_stmt, simtrec);
1830 gsi_replace (gsi, alloc_stmt, false);
1831 gimple_stmt_iterator enter_gsi = gsi_for_stmt (enter_stmt);
1832 enter_stmt = gimple_build_assign (simduid, gimple_call_arg (enter_stmt, 0));
1833 gsi_replace (&enter_gsi, enter_stmt, false);
1834
1835 use_operand_p use;
1836 gimple *exit_stmt;
1837 if (single_imm_use (simtrec, &use, &exit_stmt))
1838 {
1839 gcc_assert (gimple_call_internal_p (exit_stmt, IFN_GOMP_SIMT_EXIT));
1840 gimple_stmt_iterator exit_gsi = gsi_for_stmt (exit_stmt);
1841 tree clobber = build_constructor (rectype, NULL);
1842 TREE_THIS_VOLATILE (clobber) = 1;
1843 exit_stmt = gimple_build_assign (build_simple_mem_ref (simtrec), clobber);
1844 gsi_insert_before (&exit_gsi, exit_stmt, GSI_SAME_STMT);
1845 }
1846 else
1847 gcc_checking_assert (has_zero_uses (simtrec));
1848}
1849
1850/* Callback for walk_gimple_stmt used to scan for SIMT-privatized variables. */
1851
1852static tree
1853find_simtpriv_var_op (tree *tp, int *walk_subtrees, void *)
1854{
1855 tree t = *tp;
1856
1857 if (VAR_P (t)
1858 && DECL_HAS_VALUE_EXPR_P (t)
1859 && lookup_attribute ("omp simt private", DECL_ATTRIBUTES (t)))
1860 {
1861 *walk_subtrees = 0;
1862 return t;
1863 }
1864 return NULL_TREE;
1865}
1866
629b3d75
MJ
1867/* Cleanup uses of SIMT placeholder internal functions: on non-SIMT targets,
1868 VF is 1 and LANE is 0; on SIMT targets, VF is folded to a constant, and
1869 LANE is kept to be expanded to RTL later on. Also cleanup all other SIMT
1870 internal functions on non-SIMT targets, and likewise some SIMD internal
1871 functions on SIMT targets. */
1872
1873static unsigned int
1874execute_omp_device_lower ()
1875{
1876 int vf = targetm.simt.vf ? targetm.simt.vf () : 1;
0c6b03b5 1877 bool regimplify = false;
629b3d75
MJ
1878 basic_block bb;
1879 gimple_stmt_iterator gsi;
1880 FOR_EACH_BB_FN (bb, cfun)
1881 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1882 {
1883 gimple *stmt = gsi_stmt (gsi);
1884 if (!is_gimple_call (stmt) || !gimple_call_internal_p (stmt))
1885 continue;
1886 tree lhs = gimple_call_lhs (stmt), rhs = NULL_TREE;
1887 tree type = lhs ? TREE_TYPE (lhs) : integer_type_node;
1888 switch (gimple_call_internal_fn (stmt))
1889 {
1890 case IFN_GOMP_USE_SIMT:
1891 rhs = vf == 1 ? integer_zero_node : integer_one_node;
1892 break;
0c6b03b5
AM
1893 case IFN_GOMP_SIMT_ENTER:
1894 rhs = vf == 1 ? gimple_call_arg (stmt, 0) : NULL_TREE;
1895 goto simtreg_enter_exit;
1896 case IFN_GOMP_SIMT_ENTER_ALLOC:
1897 if (vf != 1)
1898 ompdevlow_adjust_simt_enter (&gsi, &regimplify);
1899 rhs = vf == 1 ? null_pointer_node : NULL_TREE;
1900 goto simtreg_enter_exit;
1901 case IFN_GOMP_SIMT_EXIT:
1902 simtreg_enter_exit:
1903 if (vf != 1)
1904 continue;
1905 unlink_stmt_vdef (stmt);
1906 break;
629b3d75
MJ
1907 case IFN_GOMP_SIMT_LANE:
1908 case IFN_GOMP_SIMT_LAST_LANE:
1909 rhs = vf == 1 ? build_zero_cst (type) : NULL_TREE;
1910 break;
1911 case IFN_GOMP_SIMT_VF:
1912 rhs = build_int_cst (type, vf);
1913 break;
1914 case IFN_GOMP_SIMT_ORDERED_PRED:
1915 rhs = vf == 1 ? integer_zero_node : NULL_TREE;
1916 if (rhs || !lhs)
1917 unlink_stmt_vdef (stmt);
1918 break;
1919 case IFN_GOMP_SIMT_VOTE_ANY:
1920 case IFN_GOMP_SIMT_XCHG_BFLY:
1921 case IFN_GOMP_SIMT_XCHG_IDX:
1922 rhs = vf == 1 ? gimple_call_arg (stmt, 0) : NULL_TREE;
1923 break;
1924 case IFN_GOMP_SIMD_LANE:
1925 case IFN_GOMP_SIMD_LAST_LANE:
1926 rhs = vf != 1 ? build_zero_cst (type) : NULL_TREE;
1927 break;
1928 case IFN_GOMP_SIMD_VF:
1929 rhs = vf != 1 ? build_one_cst (type) : NULL_TREE;
1930 break;
1931 default:
1932 continue;
1933 }
1934 if (lhs && !rhs)
1935 continue;
1936 stmt = lhs ? gimple_build_assign (lhs, rhs) : gimple_build_nop ();
1937 gsi_replace (&gsi, stmt, false);
1938 }
0c6b03b5
AM
1939 if (regimplify)
1940 FOR_EACH_BB_REVERSE_FN (bb, cfun)
1941 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
1942 if (walk_gimple_stmt (&gsi, NULL, find_simtpriv_var_op, NULL))
1943 {
1944 if (gimple_clobber_p (gsi_stmt (gsi)))
1945 gsi_remove (&gsi, true);
1946 else
1947 gimple_regimplify_operands (gsi_stmt (gsi), &gsi);
1948 }
629b3d75
MJ
1949 if (vf != 1)
1950 cfun->has_force_vectorize_loops = false;
1951 return 0;
1952}
1953
1954namespace {
1955
1956const pass_data pass_data_omp_device_lower =
1957{
1958 GIMPLE_PASS, /* type */
1959 "ompdevlow", /* name */
fd2b8c8b 1960 OPTGROUP_OMP, /* optinfo_flags */
629b3d75
MJ
1961 TV_NONE, /* tv_id */
1962 PROP_cfg, /* properties_required */
1963 PROP_gimple_lomp_dev, /* properties_provided */
1964 0, /* properties_destroyed */
1965 0, /* todo_flags_start */
1966 TODO_update_ssa, /* todo_flags_finish */
1967};
1968
1969class pass_omp_device_lower : public gimple_opt_pass
1970{
1971public:
1972 pass_omp_device_lower (gcc::context *ctxt)
1973 : gimple_opt_pass (pass_data_omp_device_lower, ctxt)
1974 {}
1975
1976 /* opt_pass methods: */
4cea8675 1977 virtual bool gate (function *fun)
629b3d75 1978 {
4cea8675 1979 return !(fun->curr_properties & PROP_gimple_lomp_dev);
629b3d75
MJ
1980 }
1981 virtual unsigned int execute (function *)
1982 {
1983 return execute_omp_device_lower ();
1984 }
1985
1986}; // class pass_expand_omp_ssa
1987
1988} // anon namespace
1989
1990gimple_opt_pass *
1991make_pass_omp_device_lower (gcc::context *ctxt)
1992{
1993 return new pass_omp_device_lower (ctxt);
1994}
1995
1996/* "omp declare target link" handling pass. */
1997
1998namespace {
1999
2000const pass_data pass_data_omp_target_link =
2001{
2002 GIMPLE_PASS, /* type */
2003 "omptargetlink", /* name */
fd2b8c8b 2004 OPTGROUP_OMP, /* optinfo_flags */
629b3d75
MJ
2005 TV_NONE, /* tv_id */
2006 PROP_ssa, /* properties_required */
2007 0, /* properties_provided */
2008 0, /* properties_destroyed */
2009 0, /* todo_flags_start */
2010 TODO_update_ssa, /* todo_flags_finish */
2011};
2012
2013class pass_omp_target_link : public gimple_opt_pass
2014{
2015public:
2016 pass_omp_target_link (gcc::context *ctxt)
2017 : gimple_opt_pass (pass_data_omp_target_link, ctxt)
2018 {}
2019
2020 /* opt_pass methods: */
2021 virtual bool gate (function *fun)
2022 {
2023#ifdef ACCEL_COMPILER
46dbeb40 2024 return offloading_function_p (fun->decl);
629b3d75
MJ
2025#else
2026 (void) fun;
2027 return false;
2028#endif
2029 }
2030
2031 virtual unsigned execute (function *);
2032};
2033
2034/* Callback for walk_gimple_stmt used to scan for link var operands. */
2035
2036static tree
2037find_link_var_op (tree *tp, int *walk_subtrees, void *)
2038{
2039 tree t = *tp;
2040
56f71478
JJ
2041 if (VAR_P (t)
2042 && DECL_HAS_VALUE_EXPR_P (t)
2043 && is_global_var (t)
629b3d75
MJ
2044 && lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (t)))
2045 {
2046 *walk_subtrees = 0;
2047 return t;
2048 }
2049
2050 return NULL_TREE;
2051}
2052
2053unsigned
2054pass_omp_target_link::execute (function *fun)
2055{
2056 basic_block bb;
2057 FOR_EACH_BB_FN (bb, fun)
2058 {
2059 gimple_stmt_iterator gsi;
2060 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2061 if (walk_gimple_stmt (&gsi, NULL, find_link_var_op, NULL))
2062 gimple_regimplify_operands (gsi_stmt (gsi), &gsi);
2063 }
2064
2065 return 0;
2066}
2067
2068} // anon namespace
2069
2070gimple_opt_pass *
2071make_pass_omp_target_link (gcc::context *ctxt)
2072{
2073 return new pass_omp_target_link (ctxt);
2074}