]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/omp-offload.c
OpenMP/PTX privatization in SIMD regions
[thirdparty/gcc.git] / gcc / omp-offload.c
CommitLineData
629b3d75
MJ
1/* Bits of OpenMP and OpenACC handling that is specific to device offloading
2 and a lowering pass for OpenACC device directives.
3
cbe34bb5 4 Copyright (C) 2005-2017 Free Software Foundation, Inc.
629b3d75
MJ
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "backend.h"
26#include "target.h"
27#include "tree.h"
28#include "gimple.h"
29#include "tree-pass.h"
30#include "ssa.h"
31#include "cgraph.h"
32#include "pretty-print.h"
33#include "diagnostic-core.h"
34#include "fold-const.h"
35#include "internal-fn.h"
0c6b03b5 36#include "langhooks.h"
629b3d75
MJ
37#include "gimplify.h"
38#include "gimple-iterator.h"
39#include "gimplify-me.h"
40#include "gimple-walk.h"
41#include "tree-cfg.h"
42#include "tree-into-ssa.h"
0c6b03b5
AM
43#include "tree-nested.h"
44#include "stor-layout.h"
629b3d75
MJ
45#include "common/common-target.h"
46#include "omp-general.h"
47#include "omp-offload.h"
48#include "lto-section-names.h"
49#include "gomp-constants.h"
50#include "gimple-pretty-print.h"
324ff1a0 51#include "intl.h"
629b3d75
MJ
52
53/* Describe the OpenACC looping structure of a function. The entire
54 function is held in a 'NULL' loop. */
55
56struct oacc_loop
57{
58 oacc_loop *parent; /* Containing loop. */
59
60 oacc_loop *child; /* First inner loop. */
61
62 oacc_loop *sibling; /* Next loop within same parent. */
63
64 location_t loc; /* Location of the loop start. */
65
66 gcall *marker; /* Initial head marker. */
67
01914336
MJ
68 gcall *heads[GOMP_DIM_MAX]; /* Head marker functions. */
69 gcall *tails[GOMP_DIM_MAX]; /* Tail marker functions. */
629b3d75
MJ
70
71 tree routine; /* Pseudo-loop enclosing a routine. */
72
73 unsigned mask; /* Partitioning mask. */
02889d23 74 unsigned e_mask; /* Partitioning of element loops (when tiling). */
629b3d75
MJ
75 unsigned inner; /* Partitioning of inner loops. */
76 unsigned flags; /* Partitioning flags. */
02889d23 77 vec<gcall *> ifns; /* Contained loop abstraction functions. */
629b3d75
MJ
78 tree chunk_size; /* Chunk size. */
79 gcall *head_end; /* Final marker of head sequence. */
80};
81
82/* Holds offload tables with decls. */
83vec<tree, va_gc> *offload_funcs, *offload_vars;
84
85/* Return level at which oacc routine may spawn a partitioned loop, or
86 -1 if it is not a routine (i.e. is an offload fn). */
87
88static int
89oacc_fn_attrib_level (tree attr)
90{
91 tree pos = TREE_VALUE (attr);
92
93 if (!TREE_PURPOSE (pos))
94 return -1;
95
96 int ix = 0;
97 for (ix = 0; ix != GOMP_DIM_MAX;
98 ix++, pos = TREE_CHAIN (pos))
99 if (!integer_zerop (TREE_PURPOSE (pos)))
100 break;
101
102 return ix;
103}
104
105/* Helper function for omp_finish_file routine. Takes decls from V_DECLS and
106 adds their addresses and sizes to constructor-vector V_CTOR. */
107
108static void
109add_decls_addresses_to_decl_constructor (vec<tree, va_gc> *v_decls,
110 vec<constructor_elt, va_gc> *v_ctor)
111{
112 unsigned len = vec_safe_length (v_decls);
113 for (unsigned i = 0; i < len; i++)
114 {
115 tree it = (*v_decls)[i];
116 bool is_var = VAR_P (it);
117 bool is_link_var
118 = is_var
119#ifdef ACCEL_COMPILER
120 && DECL_HAS_VALUE_EXPR_P (it)
121#endif
122 && lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (it));
123
124 tree size = NULL_TREE;
125 if (is_var)
126 size = fold_convert (const_ptr_type_node, DECL_SIZE_UNIT (it));
127
128 tree addr;
129 if (!is_link_var)
130 addr = build_fold_addr_expr (it);
131 else
132 {
133#ifdef ACCEL_COMPILER
134 /* For "omp declare target link" vars add address of the pointer to
135 the target table, instead of address of the var. */
136 tree value_expr = DECL_VALUE_EXPR (it);
137 tree link_ptr_decl = TREE_OPERAND (value_expr, 0);
138 varpool_node::finalize_decl (link_ptr_decl);
139 addr = build_fold_addr_expr (link_ptr_decl);
140#else
141 addr = build_fold_addr_expr (it);
142#endif
143
144 /* Most significant bit of the size marks "omp declare target link"
145 vars in host and target tables. */
146 unsigned HOST_WIDE_INT isize = tree_to_uhwi (size);
147 isize |= 1ULL << (int_size_in_bytes (const_ptr_type_node)
148 * BITS_PER_UNIT - 1);
149 size = wide_int_to_tree (const_ptr_type_node, isize);
150 }
151
152 CONSTRUCTOR_APPEND_ELT (v_ctor, NULL_TREE, addr);
153 if (is_var)
154 CONSTRUCTOR_APPEND_ELT (v_ctor, NULL_TREE, size);
155 }
156}
157
158/* Create new symbols containing (address, size) pairs for global variables,
159 marked with "omp declare target" attribute, as well as addresses for the
160 functions, which are outlined offloading regions. */
161void
162omp_finish_file (void)
163{
164 unsigned num_funcs = vec_safe_length (offload_funcs);
165 unsigned num_vars = vec_safe_length (offload_vars);
166
167 if (num_funcs == 0 && num_vars == 0)
168 return;
169
170 if (targetm_common.have_named_sections)
171 {
172 vec<constructor_elt, va_gc> *v_f, *v_v;
173 vec_alloc (v_f, num_funcs);
174 vec_alloc (v_v, num_vars * 2);
175
176 add_decls_addresses_to_decl_constructor (offload_funcs, v_f);
177 add_decls_addresses_to_decl_constructor (offload_vars, v_v);
178
179 tree vars_decl_type = build_array_type_nelts (pointer_sized_int_node,
180 num_vars * 2);
181 tree funcs_decl_type = build_array_type_nelts (pointer_sized_int_node,
182 num_funcs);
183 SET_TYPE_ALIGN (vars_decl_type, TYPE_ALIGN (pointer_sized_int_node));
184 SET_TYPE_ALIGN (funcs_decl_type, TYPE_ALIGN (pointer_sized_int_node));
185 tree ctor_v = build_constructor (vars_decl_type, v_v);
186 tree ctor_f = build_constructor (funcs_decl_type, v_f);
187 TREE_CONSTANT (ctor_v) = TREE_CONSTANT (ctor_f) = 1;
188 TREE_STATIC (ctor_v) = TREE_STATIC (ctor_f) = 1;
189 tree funcs_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL,
190 get_identifier (".offload_func_table"),
191 funcs_decl_type);
192 tree vars_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL,
193 get_identifier (".offload_var_table"),
194 vars_decl_type);
195 TREE_STATIC (funcs_decl) = TREE_STATIC (vars_decl) = 1;
196 /* Do not align tables more than TYPE_ALIGN (pointer_sized_int_node),
197 otherwise a joint table in a binary will contain padding between
198 tables from multiple object files. */
199 DECL_USER_ALIGN (funcs_decl) = DECL_USER_ALIGN (vars_decl) = 1;
200 SET_DECL_ALIGN (funcs_decl, TYPE_ALIGN (funcs_decl_type));
201 SET_DECL_ALIGN (vars_decl, TYPE_ALIGN (vars_decl_type));
202 DECL_INITIAL (funcs_decl) = ctor_f;
203 DECL_INITIAL (vars_decl) = ctor_v;
204 set_decl_section_name (funcs_decl, OFFLOAD_FUNC_TABLE_SECTION_NAME);
205 set_decl_section_name (vars_decl, OFFLOAD_VAR_TABLE_SECTION_NAME);
206
207 varpool_node::finalize_decl (vars_decl);
208 varpool_node::finalize_decl (funcs_decl);
209 }
210 else
211 {
212 for (unsigned i = 0; i < num_funcs; i++)
213 {
214 tree it = (*offload_funcs)[i];
215 targetm.record_offload_symbol (it);
216 }
217 for (unsigned i = 0; i < num_vars; i++)
218 {
219 tree it = (*offload_vars)[i];
220 targetm.record_offload_symbol (it);
221 }
222 }
223}
224
02889d23
CLT
225/* Call dim_pos (POS == true) or dim_size (POS == false) builtins for
226 axis DIM. Return a tmp var holding the result. */
227
228static tree
229oacc_dim_call (bool pos, int dim, gimple_seq *seq)
230{
231 tree arg = build_int_cst (unsigned_type_node, dim);
232 tree size = create_tmp_var (integer_type_node);
233 enum internal_fn fn = pos ? IFN_GOACC_DIM_POS : IFN_GOACC_DIM_SIZE;
234 gimple *call = gimple_build_call_internal (fn, 1, arg);
235
236 gimple_call_set_lhs (call, size);
237 gimple_seq_add_stmt (seq, call);
238
239 return size;
240}
241
629b3d75
MJ
242/* Find the number of threads (POS = false), or thread number (POS =
243 true) for an OpenACC region partitioned as MASK. Setup code
244 required for the calculation is added to SEQ. */
245
246static tree
247oacc_thread_numbers (bool pos, int mask, gimple_seq *seq)
248{
249 tree res = pos ? NULL_TREE : build_int_cst (unsigned_type_node, 1);
250 unsigned ix;
251
252 /* Start at gang level, and examine relevant dimension indices. */
253 for (ix = GOMP_DIM_GANG; ix != GOMP_DIM_MAX; ix++)
254 if (GOMP_DIM_MASK (ix) & mask)
255 {
629b3d75
MJ
256 if (res)
257 {
258 /* We had an outer index, so scale that by the size of
259 this dimension. */
02889d23 260 tree n = oacc_dim_call (false, ix, seq);
629b3d75
MJ
261 res = fold_build2 (MULT_EXPR, integer_type_node, res, n);
262 }
263 if (pos)
264 {
265 /* Determine index in this dimension. */
02889d23 266 tree id = oacc_dim_call (true, ix, seq);
629b3d75
MJ
267 if (res)
268 res = fold_build2 (PLUS_EXPR, integer_type_node, res, id);
269 else
270 res = id;
271 }
272 }
273
274 if (res == NULL_TREE)
275 res = integer_zero_node;
276
277 return res;
278}
279
280/* Transform IFN_GOACC_LOOP calls to actual code. See
281 expand_oacc_for for where these are generated. At the vector
282 level, we stride loops, such that each member of a warp will
283 operate on adjacent iterations. At the worker and gang level,
284 each gang/warp executes a set of contiguous iterations. Chunking
285 can override this such that each iteration engine executes a
01914336 286 contiguous chunk, and then moves on to stride to the next chunk. */
629b3d75
MJ
287
288static void
289oacc_xform_loop (gcall *call)
290{
291 gimple_stmt_iterator gsi = gsi_for_stmt (call);
292 enum ifn_goacc_loop_kind code
293 = (enum ifn_goacc_loop_kind) TREE_INT_CST_LOW (gimple_call_arg (call, 0));
294 tree dir = gimple_call_arg (call, 1);
295 tree range = gimple_call_arg (call, 2);
296 tree step = gimple_call_arg (call, 3);
297 tree chunk_size = NULL_TREE;
298 unsigned mask = (unsigned) TREE_INT_CST_LOW (gimple_call_arg (call, 5));
299 tree lhs = gimple_call_lhs (call);
300 tree type = TREE_TYPE (lhs);
301 tree diff_type = TREE_TYPE (range);
302 tree r = NULL_TREE;
303 gimple_seq seq = NULL;
304 bool chunking = false, striding = true;
305 unsigned outer_mask = mask & (~mask + 1); // Outermost partitioning
306 unsigned inner_mask = mask & ~outer_mask; // Inner partitioning (if any)
307
308#ifdef ACCEL_COMPILER
309 chunk_size = gimple_call_arg (call, 4);
310 if (integer_minus_onep (chunk_size) /* Force static allocation. */
311 || integer_zerop (chunk_size)) /* Default (also static). */
312 {
313 /* If we're at the gang level, we want each to execute a
314 contiguous run of iterations. Otherwise we want each element
315 to stride. */
316 striding = !(outer_mask & GOMP_DIM_MASK (GOMP_DIM_GANG));
317 chunking = false;
318 }
319 else
320 {
321 /* Chunk of size 1 is striding. */
322 striding = integer_onep (chunk_size);
323 chunking = !striding;
324 }
325#endif
326
327 /* striding=true, chunking=true
328 -> invalid.
329 striding=true, chunking=false
330 -> chunks=1
331 striding=false,chunking=true
332 -> chunks=ceil (range/(chunksize*threads*step))
333 striding=false,chunking=false
334 -> chunk_size=ceil(range/(threads*step)),chunks=1 */
335 push_gimplify_context (true);
336
337 switch (code)
338 {
339 default: gcc_unreachable ();
340
341 case IFN_GOACC_LOOP_CHUNKS:
342 if (!chunking)
343 r = build_int_cst (type, 1);
344 else
345 {
346 /* chunk_max
347 = (range - dir) / (chunks * step * num_threads) + dir */
348 tree per = oacc_thread_numbers (false, mask, &seq);
349 per = fold_convert (type, per);
350 chunk_size = fold_convert (type, chunk_size);
351 per = fold_build2 (MULT_EXPR, type, per, chunk_size);
352 per = fold_build2 (MULT_EXPR, type, per, step);
353 r = build2 (MINUS_EXPR, type, range, dir);
354 r = build2 (PLUS_EXPR, type, r, per);
355 r = build2 (TRUNC_DIV_EXPR, type, r, per);
356 }
357 break;
358
359 case IFN_GOACC_LOOP_STEP:
360 {
361 /* If striding, step by the entire compute volume, otherwise
362 step by the inner volume. */
363 unsigned volume = striding ? mask : inner_mask;
364
365 r = oacc_thread_numbers (false, volume, &seq);
366 r = build2 (MULT_EXPR, type, fold_convert (type, r), step);
367 }
368 break;
369
370 case IFN_GOACC_LOOP_OFFSET:
371 if (striding)
372 {
373 r = oacc_thread_numbers (true, mask, &seq);
374 r = fold_convert (diff_type, r);
375 }
376 else
377 {
378 tree inner_size = oacc_thread_numbers (false, inner_mask, &seq);
379 tree outer_size = oacc_thread_numbers (false, outer_mask, &seq);
380 tree volume = fold_build2 (MULT_EXPR, TREE_TYPE (inner_size),
381 inner_size, outer_size);
382
383 volume = fold_convert (diff_type, volume);
384 if (chunking)
385 chunk_size = fold_convert (diff_type, chunk_size);
386 else
387 {
388 tree per = fold_build2 (MULT_EXPR, diff_type, volume, step);
389
390 chunk_size = build2 (MINUS_EXPR, diff_type, range, dir);
391 chunk_size = build2 (PLUS_EXPR, diff_type, chunk_size, per);
392 chunk_size = build2 (TRUNC_DIV_EXPR, diff_type, chunk_size, per);
393 }
394
395 tree span = build2 (MULT_EXPR, diff_type, chunk_size,
396 fold_convert (diff_type, inner_size));
397 r = oacc_thread_numbers (true, outer_mask, &seq);
398 r = fold_convert (diff_type, r);
399 r = build2 (MULT_EXPR, diff_type, r, span);
400
401 tree inner = oacc_thread_numbers (true, inner_mask, &seq);
402 inner = fold_convert (diff_type, inner);
403 r = fold_build2 (PLUS_EXPR, diff_type, r, inner);
404
405 if (chunking)
406 {
407 tree chunk = fold_convert (diff_type, gimple_call_arg (call, 6));
408 tree per
409 = fold_build2 (MULT_EXPR, diff_type, volume, chunk_size);
410 per = build2 (MULT_EXPR, diff_type, per, chunk);
411
412 r = build2 (PLUS_EXPR, diff_type, r, per);
413 }
414 }
415 r = fold_build2 (MULT_EXPR, diff_type, r, step);
416 if (type != diff_type)
417 r = fold_convert (type, r);
418 break;
419
420 case IFN_GOACC_LOOP_BOUND:
421 if (striding)
422 r = range;
423 else
424 {
425 tree inner_size = oacc_thread_numbers (false, inner_mask, &seq);
426 tree outer_size = oacc_thread_numbers (false, outer_mask, &seq);
427 tree volume = fold_build2 (MULT_EXPR, TREE_TYPE (inner_size),
428 inner_size, outer_size);
429
430 volume = fold_convert (diff_type, volume);
431 if (chunking)
432 chunk_size = fold_convert (diff_type, chunk_size);
433 else
434 {
435 tree per = fold_build2 (MULT_EXPR, diff_type, volume, step);
436
437 chunk_size = build2 (MINUS_EXPR, diff_type, range, dir);
438 chunk_size = build2 (PLUS_EXPR, diff_type, chunk_size, per);
439 chunk_size = build2 (TRUNC_DIV_EXPR, diff_type, chunk_size, per);
440 }
441
442 tree span = build2 (MULT_EXPR, diff_type, chunk_size,
443 fold_convert (diff_type, inner_size));
444
445 r = fold_build2 (MULT_EXPR, diff_type, span, step);
446
447 tree offset = gimple_call_arg (call, 6);
448 r = build2 (PLUS_EXPR, diff_type, r,
449 fold_convert (diff_type, offset));
450 r = build2 (integer_onep (dir) ? MIN_EXPR : MAX_EXPR,
451 diff_type, r, range);
452 }
453 if (diff_type != type)
454 r = fold_convert (type, r);
455 break;
456 }
457
458 gimplify_assign (lhs, r, &seq);
459
460 pop_gimplify_context (NULL);
461
462 gsi_replace_with_seq (&gsi, seq, true);
463}
464
02889d23
CLT
465/* Transform a GOACC_TILE call. Determines the element loop span for
466 the specified loop of the nest. This is 1 if we're not tiling.
467
468 GOACC_TILE (collapse_count, loop_no, tile_arg, gwv_tile, gwv_element); */
469
470static void
471oacc_xform_tile (gcall *call)
472{
473 gimple_stmt_iterator gsi = gsi_for_stmt (call);
474 unsigned collapse = tree_to_uhwi (gimple_call_arg (call, 0));
475 /* Inner loops have higher loop_nos. */
476 unsigned loop_no = tree_to_uhwi (gimple_call_arg (call, 1));
477 tree tile_size = gimple_call_arg (call, 2);
478 unsigned e_mask = tree_to_uhwi (gimple_call_arg (call, 4));
479 tree lhs = gimple_call_lhs (call);
480 tree type = TREE_TYPE (lhs);
481 gimple_seq seq = NULL;
482 tree span = build_int_cst (type, 1);
483
484 gcc_assert (!(e_mask
485 & ~(GOMP_DIM_MASK (GOMP_DIM_VECTOR)
486 | GOMP_DIM_MASK (GOMP_DIM_WORKER))));
487 push_gimplify_context (!seen_error ());
488
489#ifndef ACCEL_COMPILER
490 /* Partitioning disabled on host compilers. */
491 e_mask = 0;
492#endif
493 if (!e_mask)
494 /* Not paritioning. */
495 span = integer_one_node;
496 else if (!integer_zerop (tile_size))
497 /* User explicitly specified size. */
498 span = tile_size;
499 else
500 {
501 /* Pick a size based on the paritioning of the element loop and
502 the number of loop nests. */
503 tree first_size = NULL_TREE;
504 tree second_size = NULL_TREE;
505
506 if (e_mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR))
507 first_size = oacc_dim_call (false, GOMP_DIM_VECTOR, &seq);
508 if (e_mask & GOMP_DIM_MASK (GOMP_DIM_WORKER))
509 second_size = oacc_dim_call (false, GOMP_DIM_WORKER, &seq);
510
511 if (!first_size)
512 {
513 first_size = second_size;
514 second_size = NULL_TREE;
515 }
516
517 if (loop_no + 1 == collapse)
518 {
519 span = first_size;
520 if (!loop_no && second_size)
521 span = fold_build2 (MULT_EXPR, TREE_TYPE (span),
522 span, second_size);
523 }
524 else if (loop_no + 2 == collapse)
525 span = second_size;
526 else
527 span = NULL_TREE;
528
529 if (!span)
530 /* There's no obvious element size for this loop. Options
531 are 1, first_size or some non-unity constant (32 is my
532 favourite). We should gather some statistics. */
533 span = first_size;
534 }
535
536 span = fold_convert (type, span);
537 gimplify_assign (lhs, span, &seq);
538
539 pop_gimplify_context (NULL);
540
541 gsi_replace_with_seq (&gsi, seq, true);
542}
543
629b3d75
MJ
544/* Default partitioned and minimum partitioned dimensions. */
545
546static int oacc_default_dims[GOMP_DIM_MAX];
547static int oacc_min_dims[GOMP_DIM_MAX];
548
549/* Parse the default dimension parameter. This is a set of
550 :-separated optional compute dimensions. Each specified dimension
551 is a positive integer. When device type support is added, it is
552 planned to be a comma separated list of such compute dimensions,
553 with all but the first prefixed by the colon-terminated device
554 type. */
555
556static void
557oacc_parse_default_dims (const char *dims)
558{
559 int ix;
560
561 for (ix = GOMP_DIM_MAX; ix--;)
562 {
563 oacc_default_dims[ix] = -1;
564 oacc_min_dims[ix] = 1;
565 }
566
567#ifndef ACCEL_COMPILER
568 /* Cannot be overridden on the host. */
569 dims = NULL;
570#endif
571 if (dims)
572 {
573 const char *pos = dims;
574
575 for (ix = 0; *pos && ix != GOMP_DIM_MAX; ix++)
576 {
577 if (ix)
578 {
579 if (*pos != ':')
580 goto malformed;
581 pos++;
582 }
583
584 if (*pos != ':')
585 {
586 long val;
587 const char *eptr;
588
589 errno = 0;
590 val = strtol (pos, CONST_CAST (char **, &eptr), 10);
591 if (errno || val <= 0 || (int) val != val)
592 goto malformed;
593 pos = eptr;
594 oacc_default_dims[ix] = (int) val;
595 }
596 }
597 if (*pos)
598 {
599 malformed:
600 error_at (UNKNOWN_LOCATION,
601 "-fopenacc-dim operand is malformed at '%s'", pos);
602 }
603 }
604
605 /* Allow the backend to validate the dimensions. */
606 targetm.goacc.validate_dims (NULL_TREE, oacc_default_dims, -1);
607 targetm.goacc.validate_dims (NULL_TREE, oacc_min_dims, -2);
608}
609
610/* Validate and update the dimensions for offloaded FN. ATTRS is the
611 raw attribute. DIMS is an array of dimensions, which is filled in.
612 LEVEL is the partitioning level of a routine, or -1 for an offload
01914336 613 region itself. USED is the mask of partitioned execution in the
629b3d75
MJ
614 function. */
615
616static void
617oacc_validate_dims (tree fn, tree attrs, int *dims, int level, unsigned used)
618{
619 tree purpose[GOMP_DIM_MAX];
620 unsigned ix;
621 tree pos = TREE_VALUE (attrs);
622 bool is_kernel = oacc_fn_attrib_kernels_p (attrs);
623
624 /* Make sure the attribute creator attached the dimension
625 information. */
626 gcc_assert (pos);
627
628 for (ix = 0; ix != GOMP_DIM_MAX; ix++)
629 {
630 purpose[ix] = TREE_PURPOSE (pos);
631 tree val = TREE_VALUE (pos);
632 dims[ix] = val ? TREE_INT_CST_LOW (val) : -1;
633 pos = TREE_CHAIN (pos);
634 }
635
636 bool changed = targetm.goacc.validate_dims (fn, dims, level);
637
638 /* Default anything left to 1 or a partitioned default. */
639 for (ix = 0; ix != GOMP_DIM_MAX; ix++)
640 if (dims[ix] < 0)
641 {
642 /* The OpenACC spec says 'If the [num_gangs] clause is not
643 specified, an implementation-defined default will be used;
644 the default may depend on the code within the construct.'
645 (2.5.6). Thus an implementation is free to choose
646 non-unity default for a parallel region that doesn't have
647 any gang-partitioned loops. However, it appears that there
648 is a sufficient body of user code that expects non-gang
649 partitioned regions to not execute in gang-redundant mode.
650 So we (a) don't warn about the non-portability and (b) pick
651 the minimum permissible dimension size when there is no
652 partitioned execution. Otherwise we pick the global
653 default for the dimension, which the user can control. The
654 same wording and logic applies to num_workers and
655 vector_length, however the worker- or vector- single
656 execution doesn't have the same impact as gang-redundant
657 execution. (If the minimum gang-level partioning is not 1,
658 the target is probably too confusing.) */
659 dims[ix] = (used & GOMP_DIM_MASK (ix)
660 ? oacc_default_dims[ix] : oacc_min_dims[ix]);
661 changed = true;
662 }
663
664 if (changed)
665 {
666 /* Replace the attribute with new values. */
667 pos = NULL_TREE;
668 for (ix = GOMP_DIM_MAX; ix--;)
669 {
670 pos = tree_cons (purpose[ix],
671 build_int_cst (integer_type_node, dims[ix]),
672 pos);
673 if (is_kernel)
674 TREE_PUBLIC (pos) = 1;
675 }
676 oacc_replace_fn_attrib (fn, pos);
677 }
678}
679
680/* Create an empty OpenACC loop structure at LOC. */
681
682static oacc_loop *
683new_oacc_loop_raw (oacc_loop *parent, location_t loc)
684{
685 oacc_loop *loop = XCNEW (oacc_loop);
686
687 loop->parent = parent;
629b3d75
MJ
688
689 if (parent)
690 {
691 loop->sibling = parent->child;
692 parent->child = loop;
693 }
694
695 loop->loc = loc;
629b3d75
MJ
696 return loop;
697}
698
699/* Create an outermost, dummy OpenACC loop for offloaded function
700 DECL. */
701
702static oacc_loop *
703new_oacc_loop_outer (tree decl)
704{
705 return new_oacc_loop_raw (NULL, DECL_SOURCE_LOCATION (decl));
706}
707
708/* Start a new OpenACC loop structure beginning at head marker HEAD.
709 Link into PARENT loop. Return the new loop. */
710
711static oacc_loop *
712new_oacc_loop (oacc_loop *parent, gcall *marker)
713{
714 oacc_loop *loop = new_oacc_loop_raw (parent, gimple_location (marker));
715
716 loop->marker = marker;
717
718 /* TODO: This is where device_type flattening would occur for the loop
01914336 719 flags. */
629b3d75
MJ
720
721 loop->flags = TREE_INT_CST_LOW (gimple_call_arg (marker, 3));
722
723 tree chunk_size = integer_zero_node;
724 if (loop->flags & OLF_GANG_STATIC)
725 chunk_size = gimple_call_arg (marker, 4);
726 loop->chunk_size = chunk_size;
727
728 return loop;
729}
730
731/* Create a dummy loop encompassing a call to a openACC routine.
732 Extract the routine's partitioning requirements. */
733
734static void
735new_oacc_loop_routine (oacc_loop *parent, gcall *call, tree decl, tree attrs)
736{
737 oacc_loop *loop = new_oacc_loop_raw (parent, gimple_location (call));
738 int level = oacc_fn_attrib_level (attrs);
739
740 gcc_assert (level >= 0);
741
742 loop->marker = call;
743 loop->routine = decl;
744 loop->mask = ((GOMP_DIM_MASK (GOMP_DIM_MAX) - 1)
745 ^ (GOMP_DIM_MASK (level) - 1));
746}
747
748/* Finish off the current OpenACC loop ending at tail marker TAIL.
749 Return the parent loop. */
750
751static oacc_loop *
752finish_oacc_loop (oacc_loop *loop)
753{
754 /* If the loop has been collapsed, don't partition it. */
02889d23 755 if (loop->ifns.is_empty ())
629b3d75
MJ
756 loop->mask = loop->flags = 0;
757 return loop->parent;
758}
759
760/* Free all OpenACC loop structures within LOOP (inclusive). */
761
762static void
763free_oacc_loop (oacc_loop *loop)
764{
765 if (loop->sibling)
766 free_oacc_loop (loop->sibling);
767 if (loop->child)
768 free_oacc_loop (loop->child);
769
622f6b64 770 loop->ifns.release ();
629b3d75
MJ
771 free (loop);
772}
773
774/* Dump out the OpenACC loop head or tail beginning at FROM. */
775
776static void
777dump_oacc_loop_part (FILE *file, gcall *from, int depth,
778 const char *title, int level)
779{
780 enum ifn_unique_kind kind
781 = (enum ifn_unique_kind) TREE_INT_CST_LOW (gimple_call_arg (from, 0));
782
783 fprintf (file, "%*s%s-%d:\n", depth * 2, "", title, level);
784 for (gimple_stmt_iterator gsi = gsi_for_stmt (from);;)
785 {
786 gimple *stmt = gsi_stmt (gsi);
787
788 if (gimple_call_internal_p (stmt, IFN_UNIQUE))
789 {
790 enum ifn_unique_kind k
791 = ((enum ifn_unique_kind) TREE_INT_CST_LOW
792 (gimple_call_arg (stmt, 0)));
793
794 if (k == kind && stmt != from)
795 break;
796 }
797 print_gimple_stmt (file, stmt, depth * 2 + 2, 0);
798
799 gsi_next (&gsi);
800 while (gsi_end_p (gsi))
801 gsi = gsi_start_bb (single_succ (gsi_bb (gsi)));
802 }
803}
804
805/* Dump OpenACC loops LOOP, its siblings and its children. */
806
807static void
808dump_oacc_loop (FILE *file, oacc_loop *loop, int depth)
809{
810 int ix;
811
812 fprintf (file, "%*sLoop %x(%x) %s:%u\n", depth * 2, "",
813 loop->flags, loop->mask,
814 LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc));
815
816 if (loop->marker)
817 print_gimple_stmt (file, loop->marker, depth * 2, 0);
818
819 if (loop->routine)
820 fprintf (file, "%*sRoutine %s:%u:%s\n",
821 depth * 2, "", DECL_SOURCE_FILE (loop->routine),
822 DECL_SOURCE_LINE (loop->routine),
823 IDENTIFIER_POINTER (DECL_NAME (loop->routine)));
824
825 for (ix = GOMP_DIM_GANG; ix != GOMP_DIM_MAX; ix++)
826 if (loop->heads[ix])
827 dump_oacc_loop_part (file, loop->heads[ix], depth, "Head", ix);
828 for (ix = GOMP_DIM_MAX; ix--;)
829 if (loop->tails[ix])
830 dump_oacc_loop_part (file, loop->tails[ix], depth, "Tail", ix);
831
832 if (loop->child)
833 dump_oacc_loop (file, loop->child, depth + 1);
834 if (loop->sibling)
835 dump_oacc_loop (file, loop->sibling, depth);
836}
837
838void debug_oacc_loop (oacc_loop *);
839
840/* Dump loops to stderr. */
841
842DEBUG_FUNCTION void
843debug_oacc_loop (oacc_loop *loop)
844{
845 dump_oacc_loop (stderr, loop, 0);
846}
847
848/* DFS walk of basic blocks BB onwards, creating OpenACC loop
849 structures as we go. By construction these loops are properly
850 nested. */
851
852static void
853oacc_loop_discover_walk (oacc_loop *loop, basic_block bb)
854{
855 int marker = 0;
856 int remaining = 0;
857
858 if (bb->flags & BB_VISITED)
859 return;
860
861 follow:
862 bb->flags |= BB_VISITED;
863
864 /* Scan for loop markers. */
865 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
866 gsi_next (&gsi))
867 {
868 gimple *stmt = gsi_stmt (gsi);
869
870 if (!is_gimple_call (stmt))
871 continue;
872
873 gcall *call = as_a <gcall *> (stmt);
874
875 /* If this is a routine, make a dummy loop for it. */
876 if (tree decl = gimple_call_fndecl (call))
877 if (tree attrs = oacc_get_fn_attrib (decl))
878 {
879 gcc_assert (!marker);
880 new_oacc_loop_routine (loop, call, decl, attrs);
881 }
882
883 if (!gimple_call_internal_p (call))
884 continue;
885
886 switch (gimple_call_internal_fn (call))
887 {
888 default:
889 break;
890
891 case IFN_GOACC_LOOP:
02889d23
CLT
892 case IFN_GOACC_TILE:
893 /* Record the abstraction function, so we can manipulate it
894 later. */
895 loop->ifns.safe_push (call);
629b3d75
MJ
896 break;
897
898 case IFN_UNIQUE:
899 enum ifn_unique_kind kind
900 = (enum ifn_unique_kind) (TREE_INT_CST_LOW
901 (gimple_call_arg (call, 0)));
902 if (kind == IFN_UNIQUE_OACC_HEAD_MARK
903 || kind == IFN_UNIQUE_OACC_TAIL_MARK)
904 {
905 if (gimple_call_num_args (call) == 2)
906 {
907 gcc_assert (marker && !remaining);
908 marker = 0;
909 if (kind == IFN_UNIQUE_OACC_TAIL_MARK)
910 loop = finish_oacc_loop (loop);
911 else
912 loop->head_end = call;
913 }
914 else
915 {
916 int count = TREE_INT_CST_LOW (gimple_call_arg (call, 2));
917
918 if (!marker)
919 {
920 if (kind == IFN_UNIQUE_OACC_HEAD_MARK)
921 loop = new_oacc_loop (loop, call);
922 remaining = count;
923 }
924 gcc_assert (count == remaining);
925 if (remaining)
926 {
927 remaining--;
928 if (kind == IFN_UNIQUE_OACC_HEAD_MARK)
929 loop->heads[marker] = call;
930 else
931 loop->tails[remaining] = call;
932 }
933 marker++;
934 }
935 }
936 }
937 }
938 if (remaining || marker)
939 {
940 bb = single_succ (bb);
941 gcc_assert (single_pred_p (bb) && !(bb->flags & BB_VISITED));
942 goto follow;
943 }
944
945 /* Walk successor blocks. */
946 edge e;
947 edge_iterator ei;
948
949 FOR_EACH_EDGE (e, ei, bb->succs)
950 oacc_loop_discover_walk (loop, e->dest);
951}
952
953/* LOOP is the first sibling. Reverse the order in place and return
954 the new first sibling. Recurse to child loops. */
955
956static oacc_loop *
957oacc_loop_sibling_nreverse (oacc_loop *loop)
958{
959 oacc_loop *last = NULL;
960 do
961 {
962 if (loop->child)
01914336 963 loop->child = oacc_loop_sibling_nreverse (loop->child);
629b3d75
MJ
964
965 oacc_loop *next = loop->sibling;
966 loop->sibling = last;
967 last = loop;
968 loop = next;
969 }
970 while (loop);
971
972 return last;
973}
974
975/* Discover the OpenACC loops marked up by HEAD and TAIL markers for
976 the current function. */
977
978static oacc_loop *
979oacc_loop_discovery ()
980{
981 /* Clear basic block flags, in particular BB_VISITED which we're going to use
982 in the following. */
983 clear_bb_flags ();
984
985 oacc_loop *top = new_oacc_loop_outer (current_function_decl);
986 oacc_loop_discover_walk (top, ENTRY_BLOCK_PTR_FOR_FN (cfun));
987
988 /* The siblings were constructed in reverse order, reverse them so
989 that diagnostics come out in an unsurprising order. */
990 top = oacc_loop_sibling_nreverse (top);
991
992 return top;
993}
994
995/* Transform the abstract internal function markers starting at FROM
996 to be for partitioning level LEVEL. Stop when we meet another HEAD
997 or TAIL marker. */
998
999static void
1000oacc_loop_xform_head_tail (gcall *from, int level)
1001{
1002 enum ifn_unique_kind kind
1003 = (enum ifn_unique_kind) TREE_INT_CST_LOW (gimple_call_arg (from, 0));
1004 tree replacement = build_int_cst (unsigned_type_node, level);
1005
1006 for (gimple_stmt_iterator gsi = gsi_for_stmt (from);;)
1007 {
1008 gimple *stmt = gsi_stmt (gsi);
1009
1010 if (gimple_call_internal_p (stmt, IFN_UNIQUE))
1011 {
1012 enum ifn_unique_kind k
1013 = ((enum ifn_unique_kind)
1014 TREE_INT_CST_LOW (gimple_call_arg (stmt, 0)));
1015
1016 if (k == IFN_UNIQUE_OACC_FORK || k == IFN_UNIQUE_OACC_JOIN)
1017 *gimple_call_arg_ptr (stmt, 2) = replacement;
1018 else if (k == kind && stmt != from)
1019 break;
1020 }
1021 else if (gimple_call_internal_p (stmt, IFN_GOACC_REDUCTION))
1022 *gimple_call_arg_ptr (stmt, 3) = replacement;
1023
1024 gsi_next (&gsi);
1025 while (gsi_end_p (gsi))
1026 gsi = gsi_start_bb (single_succ (gsi_bb (gsi)));
1027 }
1028}
1029
629b3d75
MJ
1030/* Process the discovered OpenACC loops, setting the correct
1031 partitioning level etc. */
1032
1033static void
1034oacc_loop_process (oacc_loop *loop)
1035{
1036 if (loop->child)
1037 oacc_loop_process (loop->child);
1038
1039 if (loop->mask && !loop->routine)
1040 {
1041 int ix;
02889d23
CLT
1042 tree mask_arg = build_int_cst (unsigned_type_node, loop->mask);
1043 tree e_mask_arg = build_int_cst (unsigned_type_node, loop->e_mask);
629b3d75 1044 tree chunk_arg = loop->chunk_size;
02889d23
CLT
1045 gcall *call;
1046
1047 for (ix = 0; loop->ifns.iterate (ix, &call); ix++)
1048 switch (gimple_call_internal_fn (call))
1049 {
1050 case IFN_GOACC_LOOP:
1051 {
1052 bool is_e = gimple_call_arg (call, 5) == integer_minus_one_node;
1053 gimple_call_set_arg (call, 5, is_e ? e_mask_arg : mask_arg);
1054 if (!is_e)
1055 gimple_call_set_arg (call, 4, chunk_arg);
1056 }
1057 break;
1058
1059 case IFN_GOACC_TILE:
1060 gimple_call_set_arg (call, 3, mask_arg);
1061 gimple_call_set_arg (call, 4, e_mask_arg);
1062 break;
629b3d75 1063
02889d23
CLT
1064 default:
1065 gcc_unreachable ();
1066 }
629b3d75 1067
02889d23
CLT
1068 unsigned dim = GOMP_DIM_GANG;
1069 unsigned mask = loop->mask | loop->e_mask;
629b3d75
MJ
1070 for (ix = 0; ix != GOMP_DIM_MAX && mask; ix++)
1071 {
1072 while (!(GOMP_DIM_MASK (dim) & mask))
1073 dim++;
1074
1075 oacc_loop_xform_head_tail (loop->heads[ix], dim);
1076 oacc_loop_xform_head_tail (loop->tails[ix], dim);
1077
1078 mask ^= GOMP_DIM_MASK (dim);
1079 }
1080 }
1081
1082 if (loop->sibling)
1083 oacc_loop_process (loop->sibling);
1084}
1085
1086/* Walk the OpenACC loop heirarchy checking and assigning the
1087 programmer-specified partitionings. OUTER_MASK is the partitioning
1088 this loop is contained within. Return mask of partitioning
1089 encountered. If any auto loops are discovered, set GOMP_DIM_MAX
1090 bit. */
1091
1092static unsigned
1093oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask)
1094{
1095 unsigned this_mask = loop->mask;
1096 unsigned mask_all = 0;
1097 bool noisy = true;
1098
1099#ifdef ACCEL_COMPILER
1100 /* When device_type is supported, we want the device compiler to be
1101 noisy, if the loop parameters are device_type-specific. */
1102 noisy = false;
1103#endif
1104
1105 if (!loop->routine)
1106 {
1107 bool auto_par = (loop->flags & OLF_AUTO) != 0;
1108 bool seq_par = (loop->flags & OLF_SEQ) != 0;
02889d23
CLT
1109 bool tiling = (loop->flags & OLF_TILE) != 0;
1110
629b3d75
MJ
1111 this_mask = ((loop->flags >> OLF_DIM_BASE)
1112 & (GOMP_DIM_MASK (GOMP_DIM_MAX) - 1));
1113
02889d23
CLT
1114 /* Apply auto partitioning if this is a non-partitioned regular
1115 loop, or (no more than) single axis tiled loop. */
1116 bool maybe_auto
1117 = !seq_par && this_mask == (tiling ? this_mask & -this_mask : 0);
1118
629b3d75
MJ
1119 if ((this_mask != 0) + auto_par + seq_par > 1)
1120 {
1121 if (noisy)
1122 error_at (loop->loc,
1123 seq_par
324ff1a0
JJ
1124 ? G_("%<seq%> overrides other OpenACC loop specifiers")
1125 : G_("%<auto%> conflicts with other OpenACC loop "
1126 "specifiers"));
02889d23 1127 maybe_auto = false;
629b3d75
MJ
1128 loop->flags &= ~OLF_AUTO;
1129 if (seq_par)
1130 {
01914336
MJ
1131 loop->flags
1132 &= ~((GOMP_DIM_MASK (GOMP_DIM_MAX) - 1) << OLF_DIM_BASE);
629b3d75
MJ
1133 this_mask = 0;
1134 }
1135 }
02889d23
CLT
1136
1137 if (maybe_auto && (loop->flags & OLF_INDEPENDENT))
1138 {
1139 loop->flags |= OLF_AUTO;
1140 mask_all |= GOMP_DIM_MASK (GOMP_DIM_MAX);
1141 }
629b3d75
MJ
1142 }
1143
1144 if (this_mask & outer_mask)
1145 {
1146 const oacc_loop *outer;
1147 for (outer = loop->parent; outer; outer = outer->parent)
02889d23 1148 if ((outer->mask | outer->e_mask) & this_mask)
629b3d75
MJ
1149 break;
1150
1151 if (noisy)
1152 {
1153 if (outer)
1154 {
1155 error_at (loop->loc,
efebb49e
DM
1156 loop->routine
1157 ? G_("routine call uses same OpenACC parallelism"
1158 " as containing loop")
1159 : G_("inner loop uses same OpenACC parallelism"
1160 " as containing loop"));
629b3d75
MJ
1161 inform (outer->loc, "containing loop here");
1162 }
1163 else
1164 error_at (loop->loc,
efebb49e
DM
1165 loop->routine
1166 ? G_("routine call uses OpenACC parallelism disallowed"
1167 " by containing routine")
1168 : G_("loop uses OpenACC parallelism disallowed"
1169 " by containing routine"));
629b3d75
MJ
1170
1171 if (loop->routine)
1172 inform (DECL_SOURCE_LOCATION (loop->routine),
1173 "routine %qD declared here", loop->routine);
1174 }
1175 this_mask &= ~outer_mask;
1176 }
1177 else
1178 {
1179 unsigned outermost = least_bit_hwi (this_mask);
1180
1181 if (outermost && outermost <= outer_mask)
1182 {
1183 if (noisy)
1184 {
1185 error_at (loop->loc,
1186 "incorrectly nested OpenACC loop parallelism");
1187
1188 const oacc_loop *outer;
1189 for (outer = loop->parent;
1190 outer->flags && outer->flags < outermost;
1191 outer = outer->parent)
1192 continue;
1193 inform (outer->loc, "containing loop here");
1194 }
1195
1196 this_mask &= ~outermost;
1197 }
1198 }
1199
629b3d75
MJ
1200 mask_all |= this_mask;
1201
02889d23
CLT
1202 if (loop->flags & OLF_TILE)
1203 {
1204 /* When tiling, vector goes to the element loop, and failing
1205 that we put worker there. The std doesn't contemplate
1206 specifying all three. We choose to put worker and vector on
1207 the element loops in that case. */
1208 unsigned this_e_mask = this_mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR);
1209 if (!this_e_mask || this_mask & GOMP_DIM_MASK (GOMP_DIM_GANG))
1210 this_e_mask |= this_mask & GOMP_DIM_MASK (GOMP_DIM_WORKER);
1211
1212 loop->e_mask = this_e_mask;
1213 this_mask ^= this_e_mask;
1214 }
1215
1216 loop->mask = this_mask;
1217
1218 if (dump_file)
1219 fprintf (dump_file, "Loop %s:%d user specified %d & %d\n",
1220 LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc),
1221 loop->mask, loop->e_mask);
1222
629b3d75
MJ
1223 if (loop->child)
1224 {
02889d23
CLT
1225 unsigned tmp_mask = outer_mask | this_mask | loop->e_mask;
1226 loop->inner = oacc_loop_fixed_partitions (loop->child, tmp_mask);
629b3d75
MJ
1227 mask_all |= loop->inner;
1228 }
1229
1230 if (loop->sibling)
1231 mask_all |= oacc_loop_fixed_partitions (loop->sibling, outer_mask);
1232
1233 return mask_all;
1234}
1235
1236/* Walk the OpenACC loop heirarchy to assign auto-partitioned loops.
1237 OUTER_MASK is the partitioning this loop is contained within.
02889d23 1238 OUTER_ASSIGN is true if an outer loop is being auto-partitioned.
629b3d75
MJ
1239 Return the cumulative partitioning used by this loop, siblings and
1240 children. */
1241
1242static unsigned
02889d23
CLT
1243oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask,
1244 bool outer_assign)
629b3d75
MJ
1245{
1246 bool assign = (loop->flags & OLF_AUTO) && (loop->flags & OLF_INDEPENDENT);
1247 bool noisy = true;
02889d23 1248 bool tiling = loop->flags & OLF_TILE;
629b3d75
MJ
1249
1250#ifdef ACCEL_COMPILER
1251 /* When device_type is supported, we want the device compiler to be
1252 noisy, if the loop parameters are device_type-specific. */
1253 noisy = false;
1254#endif
1255
891ba5eb 1256 if (assign && (!outer_assign || loop->inner))
629b3d75 1257 {
02889d23
CLT
1258 /* Allocate outermost and non-innermost loops at the outermost
1259 non-innermost available level. */
1260 unsigned this_mask = GOMP_DIM_MASK (GOMP_DIM_GANG);
1261
1262 /* Find the first outermost available partition. */
1263 while (this_mask <= outer_mask)
1264 this_mask <<= 1;
1265
1266 /* Grab two axes if tiling, and we've not assigned anything */
1267 if (tiling && !(loop->mask | loop->e_mask))
1268 this_mask |= this_mask << 1;
1269
1270 /* Prohibit the innermost partitioning at the moment. */
1271 this_mask &= GOMP_DIM_MASK (GOMP_DIM_MAX - 1) - 1;
629b3d75 1272
02889d23
CLT
1273 /* Don't use any dimension explicitly claimed by an inner loop. */
1274 this_mask &= ~loop->inner;
1275
1276 if (tiling && !loop->e_mask)
1277 {
1278 /* If we got two axes, allocate the inner one to the element
1279 loop. */
1280 loop->e_mask = this_mask & (this_mask << 1);
1281 this_mask ^= loop->e_mask;
1282 }
1283
1284 loop->mask |= this_mask;
629b3d75
MJ
1285 }
1286
1287 if (loop->child)
1288 {
02889d23
CLT
1289 unsigned tmp_mask = outer_mask | loop->mask | loop->e_mask;
1290 loop->inner = oacc_loop_auto_partitions (loop->child, tmp_mask,
1291 outer_assign | assign);
629b3d75
MJ
1292 }
1293
02889d23 1294 if (assign && (!loop->mask || (tiling && !loop->e_mask) || !outer_assign))
629b3d75 1295 {
02889d23
CLT
1296 /* Allocate the loop at the innermost available level. Note
1297 that we do this even if we already assigned this loop the
1298 outermost available level above. That way we'll partition
1299 this along 2 axes, if they are available. */
629b3d75
MJ
1300 unsigned this_mask = 0;
1301
01914336 1302 /* Determine the outermost partitioning used within this loop. */
629b3d75
MJ
1303 this_mask = loop->inner | GOMP_DIM_MASK (GOMP_DIM_MAX);
1304 this_mask = least_bit_hwi (this_mask);
1305
1306 /* Pick the partitioning just inside that one. */
1307 this_mask >>= 1;
1308
01914336 1309 /* And avoid picking one use by an outer loop. */
629b3d75
MJ
1310 this_mask &= ~outer_mask;
1311
02889d23
CLT
1312 /* If tiling and we failed completely above, grab the next one
1313 too. Making sure it doesn't hit an outer loop. */
1314 if (tiling)
1315 {
1316 this_mask &= ~(loop->e_mask | loop->mask);
1317 unsigned tile_mask = ((this_mask >> 1)
1318 & ~(outer_mask | loop->e_mask | loop->mask));
1319
1320 if (tile_mask || loop->mask)
1321 {
1322 loop->e_mask |= this_mask;
1323 this_mask = tile_mask;
1324 }
1325 if (!loop->e_mask && noisy)
1326 warning_at (loop->loc, 0,
1327 "insufficient partitioning available"
1328 " to parallelize element loop");
1329 }
629b3d75 1330
02889d23
CLT
1331 loop->mask |= this_mask;
1332 if (!loop->mask && noisy)
1333 warning_at (loop->loc, 0,
efebb49e
DM
1334 tiling
1335 ? G_("insufficient partitioning available"
1336 " to parallelize tile loop")
1337 : G_("insufficient partitioning available"
1338 " to parallelize loop"));
629b3d75
MJ
1339 }
1340
1341 if (assign && dump_file)
02889d23 1342 fprintf (dump_file, "Auto loop %s:%d assigned %d & %d\n",
629b3d75 1343 LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc),
02889d23 1344 loop->mask, loop->e_mask);
629b3d75
MJ
1345
1346 unsigned inner_mask = 0;
1347
1348 if (loop->sibling)
02889d23
CLT
1349 inner_mask |= oacc_loop_auto_partitions (loop->sibling,
1350 outer_mask, outer_assign);
629b3d75 1351
02889d23 1352 inner_mask |= loop->inner | loop->mask | loop->e_mask;
629b3d75
MJ
1353
1354 return inner_mask;
1355}
1356
1357/* Walk the OpenACC loop heirarchy to check and assign partitioning
1358 axes. Return mask of partitioning. */
1359
1360static unsigned
1361oacc_loop_partition (oacc_loop *loop, unsigned outer_mask)
1362{
1363 unsigned mask_all = oacc_loop_fixed_partitions (loop, outer_mask);
1364
1365 if (mask_all & GOMP_DIM_MASK (GOMP_DIM_MAX))
1366 {
1367 mask_all ^= GOMP_DIM_MASK (GOMP_DIM_MAX);
02889d23 1368 mask_all |= oacc_loop_auto_partitions (loop, outer_mask, false);
629b3d75
MJ
1369 }
1370 return mask_all;
1371}
1372
1373/* Default fork/join early expander. Delete the function calls if
1374 there is no RTL expander. */
1375
1376bool
1377default_goacc_fork_join (gcall *ARG_UNUSED (call),
1378 const int *ARG_UNUSED (dims), bool is_fork)
1379{
1380 if (is_fork)
1381 return targetm.have_oacc_fork ();
1382 else
1383 return targetm.have_oacc_join ();
1384}
1385
1386/* Default goacc.reduction early expander.
1387
1388 LHS-opt = IFN_REDUCTION (KIND, RES_PTR, VAR, LEVEL, OP, OFFSET)
1389 If RES_PTR is not integer-zerop:
1390 SETUP - emit 'LHS = *RES_PTR', LHS = NULL
1391 TEARDOWN - emit '*RES_PTR = VAR'
1392 If LHS is not NULL
1393 emit 'LHS = VAR' */
1394
1395void
1396default_goacc_reduction (gcall *call)
1397{
1398 unsigned code = (unsigned)TREE_INT_CST_LOW (gimple_call_arg (call, 0));
1399 gimple_stmt_iterator gsi = gsi_for_stmt (call);
1400 tree lhs = gimple_call_lhs (call);
1401 tree var = gimple_call_arg (call, 2);
1402 gimple_seq seq = NULL;
1403
1404 if (code == IFN_GOACC_REDUCTION_SETUP
1405 || code == IFN_GOACC_REDUCTION_TEARDOWN)
1406 {
1407 /* Setup and Teardown need to copy from/to the receiver object,
1408 if there is one. */
1409 tree ref_to_res = gimple_call_arg (call, 1);
1410
1411 if (!integer_zerop (ref_to_res))
1412 {
1413 tree dst = build_simple_mem_ref (ref_to_res);
1414 tree src = var;
1415
1416 if (code == IFN_GOACC_REDUCTION_SETUP)
1417 {
1418 src = dst;
1419 dst = lhs;
1420 lhs = NULL;
1421 }
1422 gimple_seq_add_stmt (&seq, gimple_build_assign (dst, src));
1423 }
1424 }
1425
1426 /* Copy VAR to LHS, if there is an LHS. */
1427 if (lhs)
1428 gimple_seq_add_stmt (&seq, gimple_build_assign (lhs, var));
1429
1430 gsi_replace_with_seq (&gsi, seq, true);
1431}
1432
1433/* Main entry point for oacc transformations which run on the device
1434 compiler after LTO, so we know what the target device is at this
1435 point (including the host fallback). */
1436
1437static unsigned int
1438execute_oacc_device_lower ()
1439{
1440 tree attrs = oacc_get_fn_attrib (current_function_decl);
1441
1442 if (!attrs)
1443 /* Not an offloaded function. */
1444 return 0;
1445
1446 /* Parse the default dim argument exactly once. */
1447 if ((const void *)flag_openacc_dims != &flag_openacc_dims)
1448 {
1449 oacc_parse_default_dims (flag_openacc_dims);
1450 flag_openacc_dims = (char *)&flag_openacc_dims;
1451 }
1452
1453 /* Discover, partition and process the loops. */
1454 oacc_loop *loops = oacc_loop_discovery ();
1455 int fn_level = oacc_fn_attrib_level (attrs);
1456
1457 if (dump_file)
1458 fprintf (dump_file, oacc_fn_attrib_kernels_p (attrs)
1459 ? "Function is kernels offload\n"
1460 : fn_level < 0 ? "Function is parallel offload\n"
1461 : "Function is routine level %d\n", fn_level);
1462
1463 unsigned outer_mask = fn_level >= 0 ? GOMP_DIM_MASK (fn_level) - 1 : 0;
1464 unsigned used_mask = oacc_loop_partition (loops, outer_mask);
1465 int dims[GOMP_DIM_MAX];
1466
1467 oacc_validate_dims (current_function_decl, attrs, dims, fn_level, used_mask);
1468
1469 if (dump_file)
1470 {
1471 const char *comma = "Compute dimensions [";
1472 for (int ix = 0; ix != GOMP_DIM_MAX; ix++, comma = ", ")
1473 fprintf (dump_file, "%s%d", comma, dims[ix]);
1474 fprintf (dump_file, "]\n");
1475 }
1476
1477 oacc_loop_process (loops);
1478 if (dump_file)
1479 {
1480 fprintf (dump_file, "OpenACC loops\n");
1481 dump_oacc_loop (dump_file, loops, 0);
1482 fprintf (dump_file, "\n");
1483 }
1484
1485 /* Offloaded targets may introduce new basic blocks, which require
1486 dominance information to update SSA. */
1487 calculate_dominance_info (CDI_DOMINATORS);
1488
1489 /* Now lower internal loop functions to target-specific code
1490 sequences. */
1491 basic_block bb;
1492 FOR_ALL_BB_FN (bb, cfun)
1493 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
1494 {
1495 gimple *stmt = gsi_stmt (gsi);
1496 if (!is_gimple_call (stmt))
1497 {
1498 gsi_next (&gsi);
1499 continue;
1500 }
1501
1502 gcall *call = as_a <gcall *> (stmt);
1503 if (!gimple_call_internal_p (call))
1504 {
1505 gsi_next (&gsi);
1506 continue;
1507 }
1508
1509 /* Rewind to allow rescan. */
1510 gsi_prev (&gsi);
1511 bool rescan = false, remove = false;
1512 enum internal_fn ifn_code = gimple_call_internal_fn (call);
1513
1514 switch (ifn_code)
1515 {
1516 default: break;
1517
02889d23
CLT
1518 case IFN_GOACC_TILE:
1519 oacc_xform_tile (call);
1520 rescan = true;
1521 break;
1522
629b3d75
MJ
1523 case IFN_GOACC_LOOP:
1524 oacc_xform_loop (call);
1525 rescan = true;
1526 break;
1527
1528 case IFN_GOACC_REDUCTION:
1529 /* Mark the function for SSA renaming. */
1530 mark_virtual_operands_for_renaming (cfun);
1531
1532 /* If the level is -1, this ended up being an unused
1533 axis. Handle as a default. */
1534 if (integer_minus_onep (gimple_call_arg (call, 3)))
1535 default_goacc_reduction (call);
1536 else
1537 targetm.goacc.reduction (call);
1538 rescan = true;
1539 break;
1540
1541 case IFN_UNIQUE:
1542 {
1543 enum ifn_unique_kind kind
1544 = ((enum ifn_unique_kind)
1545 TREE_INT_CST_LOW (gimple_call_arg (call, 0)));
1546
1547 switch (kind)
1548 {
1549 default:
02889d23 1550 break;
629b3d75
MJ
1551
1552 case IFN_UNIQUE_OACC_FORK:
1553 case IFN_UNIQUE_OACC_JOIN:
1554 if (integer_minus_onep (gimple_call_arg (call, 2)))
1555 remove = true;
1556 else if (!targetm.goacc.fork_join
1557 (call, dims, kind == IFN_UNIQUE_OACC_FORK))
1558 remove = true;
1559 break;
1560
1561 case IFN_UNIQUE_OACC_HEAD_MARK:
1562 case IFN_UNIQUE_OACC_TAIL_MARK:
1563 remove = true;
1564 break;
1565 }
1566 break;
1567 }
1568 }
1569
1570 if (gsi_end_p (gsi))
1571 /* We rewound past the beginning of the BB. */
1572 gsi = gsi_start_bb (bb);
1573 else
1574 /* Undo the rewind. */
1575 gsi_next (&gsi);
1576
1577 if (remove)
1578 {
1579 if (gimple_vdef (call))
1580 replace_uses_by (gimple_vdef (call), gimple_vuse (call));
1581 if (gimple_call_lhs (call))
1582 {
1583 /* Propagate the data dependency var. */
1584 gimple *ass = gimple_build_assign (gimple_call_lhs (call),
1585 gimple_call_arg (call, 1));
1586 gsi_replace (&gsi, ass, false);
1587 }
1588 else
1589 gsi_remove (&gsi, true);
1590 }
1591 else if (!rescan)
1592 /* If not rescanning, advance over the call. */
1593 gsi_next (&gsi);
1594 }
1595
1596 free_oacc_loop (loops);
1597
1598 return 0;
1599}
1600
1601/* Default launch dimension validator. Force everything to 1. A
1602 backend that wants to provide larger dimensions must override this
1603 hook. */
1604
1605bool
1606default_goacc_validate_dims (tree ARG_UNUSED (decl), int *dims,
1607 int ARG_UNUSED (fn_level))
1608{
1609 bool changed = false;
1610
1611 for (unsigned ix = 0; ix != GOMP_DIM_MAX; ix++)
1612 {
1613 if (dims[ix] != 1)
1614 {
1615 dims[ix] = 1;
1616 changed = true;
1617 }
1618 }
1619
1620 return changed;
1621}
1622
01914336 1623/* Default dimension bound is unknown on accelerator and 1 on host. */
629b3d75
MJ
1624
1625int
1626default_goacc_dim_limit (int ARG_UNUSED (axis))
1627{
1628#ifdef ACCEL_COMPILER
1629 return 0;
1630#else
1631 return 1;
1632#endif
1633}
1634
1635namespace {
1636
1637const pass_data pass_data_oacc_device_lower =
1638{
1639 GIMPLE_PASS, /* type */
1640 "oaccdevlow", /* name */
fd2b8c8b 1641 OPTGROUP_OMP, /* optinfo_flags */
629b3d75
MJ
1642 TV_NONE, /* tv_id */
1643 PROP_cfg, /* properties_required */
1644 0 /* Possibly PROP_gimple_eomp. */, /* properties_provided */
1645 0, /* properties_destroyed */
1646 0, /* todo_flags_start */
1647 TODO_update_ssa | TODO_cleanup_cfg, /* todo_flags_finish */
1648};
1649
1650class pass_oacc_device_lower : public gimple_opt_pass
1651{
1652public:
1653 pass_oacc_device_lower (gcc::context *ctxt)
1654 : gimple_opt_pass (pass_data_oacc_device_lower, ctxt)
1655 {}
1656
1657 /* opt_pass methods: */
1658 virtual bool gate (function *) { return flag_openacc; };
1659
1660 virtual unsigned int execute (function *)
1661 {
1662 return execute_oacc_device_lower ();
1663 }
1664
1665}; // class pass_oacc_device_lower
1666
1667} // anon namespace
1668
1669gimple_opt_pass *
1670make_pass_oacc_device_lower (gcc::context *ctxt)
1671{
1672 return new pass_oacc_device_lower (ctxt);
1673}
1674
0c6b03b5
AM
1675\f
1676/* Rewrite GOMP_SIMT_ENTER_ALLOC call given by GSI and remove the preceding
1677 GOMP_SIMT_ENTER call identifying the privatized variables, which are
1678 turned to structure fields and receive a DECL_VALUE_EXPR accordingly.
1679 Set *REGIMPLIFY to true, except if no privatized variables were seen. */
1680
1681static void
1682ompdevlow_adjust_simt_enter (gimple_stmt_iterator *gsi, bool *regimplify)
1683{
1684 gimple *alloc_stmt = gsi_stmt (*gsi);
1685 tree simtrec = gimple_call_lhs (alloc_stmt);
1686 tree simduid = gimple_call_arg (alloc_stmt, 0);
1687 gimple *enter_stmt = SSA_NAME_DEF_STMT (simduid);
1688 gcc_assert (gimple_call_internal_p (enter_stmt, IFN_GOMP_SIMT_ENTER));
1689 tree rectype = lang_hooks.types.make_type (RECORD_TYPE);
1690 TYPE_ARTIFICIAL (rectype) = TYPE_NAMELESS (rectype) = 1;
1691 TREE_ADDRESSABLE (rectype) = 1;
1692 TREE_TYPE (simtrec) = build_pointer_type (rectype);
1693 for (unsigned i = 1; i < gimple_call_num_args (enter_stmt); i++)
1694 {
1695 tree *argp = gimple_call_arg_ptr (enter_stmt, i);
1696 if (*argp == null_pointer_node)
1697 continue;
1698 gcc_assert (TREE_CODE (*argp) == ADDR_EXPR
1699 && VAR_P (TREE_OPERAND (*argp, 0)));
1700 tree var = TREE_OPERAND (*argp, 0);
1701
1702 tree field = build_decl (DECL_SOURCE_LOCATION (var), FIELD_DECL,
1703 DECL_NAME (var), TREE_TYPE (var));
1704 SET_DECL_ALIGN (field, DECL_ALIGN (var));
1705 DECL_USER_ALIGN (field) = DECL_USER_ALIGN (var);
1706 TREE_THIS_VOLATILE (field) = TREE_THIS_VOLATILE (var);
1707
1708 insert_field_into_struct (rectype, field);
1709
1710 tree t = build_simple_mem_ref (simtrec);
1711 t = build3 (COMPONENT_REF, TREE_TYPE (var), t, field, NULL);
1712 TREE_THIS_VOLATILE (t) = TREE_THIS_VOLATILE (var);
1713 SET_DECL_VALUE_EXPR (var, t);
1714 DECL_HAS_VALUE_EXPR_P (var) = 1;
1715 *regimplify = true;
1716 }
1717 layout_type (rectype);
1718 tree size = TYPE_SIZE_UNIT (rectype);
1719 tree align = build_int_cst (TREE_TYPE (size), TYPE_ALIGN_UNIT (rectype));
1720
1721 alloc_stmt
1722 = gimple_build_call_internal (IFN_GOMP_SIMT_ENTER_ALLOC, 2, size, align);
1723 gimple_call_set_lhs (alloc_stmt, simtrec);
1724 gsi_replace (gsi, alloc_stmt, false);
1725 gimple_stmt_iterator enter_gsi = gsi_for_stmt (enter_stmt);
1726 enter_stmt = gimple_build_assign (simduid, gimple_call_arg (enter_stmt, 0));
1727 gsi_replace (&enter_gsi, enter_stmt, false);
1728
1729 use_operand_p use;
1730 gimple *exit_stmt;
1731 if (single_imm_use (simtrec, &use, &exit_stmt))
1732 {
1733 gcc_assert (gimple_call_internal_p (exit_stmt, IFN_GOMP_SIMT_EXIT));
1734 gimple_stmt_iterator exit_gsi = gsi_for_stmt (exit_stmt);
1735 tree clobber = build_constructor (rectype, NULL);
1736 TREE_THIS_VOLATILE (clobber) = 1;
1737 exit_stmt = gimple_build_assign (build_simple_mem_ref (simtrec), clobber);
1738 gsi_insert_before (&exit_gsi, exit_stmt, GSI_SAME_STMT);
1739 }
1740 else
1741 gcc_checking_assert (has_zero_uses (simtrec));
1742}
1743
1744/* Callback for walk_gimple_stmt used to scan for SIMT-privatized variables. */
1745
1746static tree
1747find_simtpriv_var_op (tree *tp, int *walk_subtrees, void *)
1748{
1749 tree t = *tp;
1750
1751 if (VAR_P (t)
1752 && DECL_HAS_VALUE_EXPR_P (t)
1753 && lookup_attribute ("omp simt private", DECL_ATTRIBUTES (t)))
1754 {
1755 *walk_subtrees = 0;
1756 return t;
1757 }
1758 return NULL_TREE;
1759}
1760
629b3d75
MJ
1761/* Cleanup uses of SIMT placeholder internal functions: on non-SIMT targets,
1762 VF is 1 and LANE is 0; on SIMT targets, VF is folded to a constant, and
1763 LANE is kept to be expanded to RTL later on. Also cleanup all other SIMT
1764 internal functions on non-SIMT targets, and likewise some SIMD internal
1765 functions on SIMT targets. */
1766
1767static unsigned int
1768execute_omp_device_lower ()
1769{
1770 int vf = targetm.simt.vf ? targetm.simt.vf () : 1;
0c6b03b5 1771 bool regimplify = false;
629b3d75
MJ
1772 basic_block bb;
1773 gimple_stmt_iterator gsi;
1774 FOR_EACH_BB_FN (bb, cfun)
1775 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1776 {
1777 gimple *stmt = gsi_stmt (gsi);
1778 if (!is_gimple_call (stmt) || !gimple_call_internal_p (stmt))
1779 continue;
1780 tree lhs = gimple_call_lhs (stmt), rhs = NULL_TREE;
1781 tree type = lhs ? TREE_TYPE (lhs) : integer_type_node;
1782 switch (gimple_call_internal_fn (stmt))
1783 {
1784 case IFN_GOMP_USE_SIMT:
1785 rhs = vf == 1 ? integer_zero_node : integer_one_node;
1786 break;
0c6b03b5
AM
1787 case IFN_GOMP_SIMT_ENTER:
1788 rhs = vf == 1 ? gimple_call_arg (stmt, 0) : NULL_TREE;
1789 goto simtreg_enter_exit;
1790 case IFN_GOMP_SIMT_ENTER_ALLOC:
1791 if (vf != 1)
1792 ompdevlow_adjust_simt_enter (&gsi, &regimplify);
1793 rhs = vf == 1 ? null_pointer_node : NULL_TREE;
1794 goto simtreg_enter_exit;
1795 case IFN_GOMP_SIMT_EXIT:
1796 simtreg_enter_exit:
1797 if (vf != 1)
1798 continue;
1799 unlink_stmt_vdef (stmt);
1800 break;
629b3d75
MJ
1801 case IFN_GOMP_SIMT_LANE:
1802 case IFN_GOMP_SIMT_LAST_LANE:
1803 rhs = vf == 1 ? build_zero_cst (type) : NULL_TREE;
1804 break;
1805 case IFN_GOMP_SIMT_VF:
1806 rhs = build_int_cst (type, vf);
1807 break;
1808 case IFN_GOMP_SIMT_ORDERED_PRED:
1809 rhs = vf == 1 ? integer_zero_node : NULL_TREE;
1810 if (rhs || !lhs)
1811 unlink_stmt_vdef (stmt);
1812 break;
1813 case IFN_GOMP_SIMT_VOTE_ANY:
1814 case IFN_GOMP_SIMT_XCHG_BFLY:
1815 case IFN_GOMP_SIMT_XCHG_IDX:
1816 rhs = vf == 1 ? gimple_call_arg (stmt, 0) : NULL_TREE;
1817 break;
1818 case IFN_GOMP_SIMD_LANE:
1819 case IFN_GOMP_SIMD_LAST_LANE:
1820 rhs = vf != 1 ? build_zero_cst (type) : NULL_TREE;
1821 break;
1822 case IFN_GOMP_SIMD_VF:
1823 rhs = vf != 1 ? build_one_cst (type) : NULL_TREE;
1824 break;
1825 default:
1826 continue;
1827 }
1828 if (lhs && !rhs)
1829 continue;
1830 stmt = lhs ? gimple_build_assign (lhs, rhs) : gimple_build_nop ();
1831 gsi_replace (&gsi, stmt, false);
1832 }
0c6b03b5
AM
1833 if (regimplify)
1834 FOR_EACH_BB_REVERSE_FN (bb, cfun)
1835 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
1836 if (walk_gimple_stmt (&gsi, NULL, find_simtpriv_var_op, NULL))
1837 {
1838 if (gimple_clobber_p (gsi_stmt (gsi)))
1839 gsi_remove (&gsi, true);
1840 else
1841 gimple_regimplify_operands (gsi_stmt (gsi), &gsi);
1842 }
629b3d75
MJ
1843 if (vf != 1)
1844 cfun->has_force_vectorize_loops = false;
1845 return 0;
1846}
1847
1848namespace {
1849
1850const pass_data pass_data_omp_device_lower =
1851{
1852 GIMPLE_PASS, /* type */
1853 "ompdevlow", /* name */
fd2b8c8b 1854 OPTGROUP_OMP, /* optinfo_flags */
629b3d75
MJ
1855 TV_NONE, /* tv_id */
1856 PROP_cfg, /* properties_required */
1857 PROP_gimple_lomp_dev, /* properties_provided */
1858 0, /* properties_destroyed */
1859 0, /* todo_flags_start */
1860 TODO_update_ssa, /* todo_flags_finish */
1861};
1862
1863class pass_omp_device_lower : public gimple_opt_pass
1864{
1865public:
1866 pass_omp_device_lower (gcc::context *ctxt)
1867 : gimple_opt_pass (pass_data_omp_device_lower, ctxt)
1868 {}
1869
1870 /* opt_pass methods: */
4cea8675 1871 virtual bool gate (function *fun)
629b3d75 1872 {
4cea8675 1873 return !(fun->curr_properties & PROP_gimple_lomp_dev);
629b3d75
MJ
1874 }
1875 virtual unsigned int execute (function *)
1876 {
1877 return execute_omp_device_lower ();
1878 }
1879
1880}; // class pass_expand_omp_ssa
1881
1882} // anon namespace
1883
1884gimple_opt_pass *
1885make_pass_omp_device_lower (gcc::context *ctxt)
1886{
1887 return new pass_omp_device_lower (ctxt);
1888}
1889
1890/* "omp declare target link" handling pass. */
1891
1892namespace {
1893
1894const pass_data pass_data_omp_target_link =
1895{
1896 GIMPLE_PASS, /* type */
1897 "omptargetlink", /* name */
fd2b8c8b 1898 OPTGROUP_OMP, /* optinfo_flags */
629b3d75
MJ
1899 TV_NONE, /* tv_id */
1900 PROP_ssa, /* properties_required */
1901 0, /* properties_provided */
1902 0, /* properties_destroyed */
1903 0, /* todo_flags_start */
1904 TODO_update_ssa, /* todo_flags_finish */
1905};
1906
1907class pass_omp_target_link : public gimple_opt_pass
1908{
1909public:
1910 pass_omp_target_link (gcc::context *ctxt)
1911 : gimple_opt_pass (pass_data_omp_target_link, ctxt)
1912 {}
1913
1914 /* opt_pass methods: */
1915 virtual bool gate (function *fun)
1916 {
1917#ifdef ACCEL_COMPILER
1918 tree attrs = DECL_ATTRIBUTES (fun->decl);
1919 return lookup_attribute ("omp declare target", attrs)
1920 || lookup_attribute ("omp target entrypoint", attrs);
1921#else
1922 (void) fun;
1923 return false;
1924#endif
1925 }
1926
1927 virtual unsigned execute (function *);
1928};
1929
1930/* Callback for walk_gimple_stmt used to scan for link var operands. */
1931
1932static tree
1933find_link_var_op (tree *tp, int *walk_subtrees, void *)
1934{
1935 tree t = *tp;
1936
56f71478
JJ
1937 if (VAR_P (t)
1938 && DECL_HAS_VALUE_EXPR_P (t)
1939 && is_global_var (t)
629b3d75
MJ
1940 && lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (t)))
1941 {
1942 *walk_subtrees = 0;
1943 return t;
1944 }
1945
1946 return NULL_TREE;
1947}
1948
1949unsigned
1950pass_omp_target_link::execute (function *fun)
1951{
1952 basic_block bb;
1953 FOR_EACH_BB_FN (bb, fun)
1954 {
1955 gimple_stmt_iterator gsi;
1956 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1957 if (walk_gimple_stmt (&gsi, NULL, find_link_var_op, NULL))
1958 gimple_regimplify_operands (gsi_stmt (gsi), &gsi);
1959 }
1960
1961 return 0;
1962}
1963
1964} // anon namespace
1965
1966gimple_opt_pass *
1967make_pass_omp_target_link (gcc::context *ctxt)
1968{
1969 return new pass_omp_target_link (ctxt);
1970}