]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/omp-offload.c
Update copyright years.
[thirdparty/gcc.git] / gcc / omp-offload.c
1 /* Bits of OpenMP and OpenACC handling that is specific to device offloading
2 and a lowering pass for OpenACC device directives.
3
4 Copyright (C) 2005-2018 Free Software Foundation, Inc.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "tree-pass.h"
30 #include "ssa.h"
31 #include "cgraph.h"
32 #include "pretty-print.h"
33 #include "diagnostic-core.h"
34 #include "fold-const.h"
35 #include "internal-fn.h"
36 #include "langhooks.h"
37 #include "gimplify.h"
38 #include "gimple-iterator.h"
39 #include "gimplify-me.h"
40 #include "gimple-walk.h"
41 #include "tree-cfg.h"
42 #include "tree-into-ssa.h"
43 #include "tree-nested.h"
44 #include "stor-layout.h"
45 #include "common/common-target.h"
46 #include "omp-general.h"
47 #include "omp-offload.h"
48 #include "lto-section-names.h"
49 #include "gomp-constants.h"
50 #include "gimple-pretty-print.h"
51 #include "intl.h"
52 #include "stringpool.h"
53 #include "attribs.h"
54 #include "cfgloop.h"
55
56 /* Describe the OpenACC looping structure of a function. The entire
57 function is held in a 'NULL' loop. */
58
59 struct oacc_loop
60 {
61 oacc_loop *parent; /* Containing loop. */
62
63 oacc_loop *child; /* First inner loop. */
64
65 oacc_loop *sibling; /* Next loop within same parent. */
66
67 location_t loc; /* Location of the loop start. */
68
69 gcall *marker; /* Initial head marker. */
70
71 gcall *heads[GOMP_DIM_MAX]; /* Head marker functions. */
72 gcall *tails[GOMP_DIM_MAX]; /* Tail marker functions. */
73
74 tree routine; /* Pseudo-loop enclosing a routine. */
75
76 unsigned mask; /* Partitioning mask. */
77 unsigned e_mask; /* Partitioning of element loops (when tiling). */
78 unsigned inner; /* Partitioning of inner loops. */
79 unsigned flags; /* Partitioning flags. */
80 vec<gcall *> ifns; /* Contained loop abstraction functions. */
81 tree chunk_size; /* Chunk size. */
82 gcall *head_end; /* Final marker of head sequence. */
83 };
84
85 /* Holds offload tables with decls. */
86 vec<tree, va_gc> *offload_funcs, *offload_vars;
87
88 /* Return level at which oacc routine may spawn a partitioned loop, or
89 -1 if it is not a routine (i.e. is an offload fn). */
90
91 static int
92 oacc_fn_attrib_level (tree attr)
93 {
94 tree pos = TREE_VALUE (attr);
95
96 if (!TREE_PURPOSE (pos))
97 return -1;
98
99 int ix = 0;
100 for (ix = 0; ix != GOMP_DIM_MAX;
101 ix++, pos = TREE_CHAIN (pos))
102 if (!integer_zerop (TREE_PURPOSE (pos)))
103 break;
104
105 return ix;
106 }
107
108 /* Helper function for omp_finish_file routine. Takes decls from V_DECLS and
109 adds their addresses and sizes to constructor-vector V_CTOR. */
110
111 static void
112 add_decls_addresses_to_decl_constructor (vec<tree, va_gc> *v_decls,
113 vec<constructor_elt, va_gc> *v_ctor)
114 {
115 unsigned len = vec_safe_length (v_decls);
116 for (unsigned i = 0; i < len; i++)
117 {
118 tree it = (*v_decls)[i];
119 bool is_var = VAR_P (it);
120 bool is_link_var
121 = is_var
122 #ifdef ACCEL_COMPILER
123 && DECL_HAS_VALUE_EXPR_P (it)
124 #endif
125 && lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (it));
126
127 tree size = NULL_TREE;
128 if (is_var)
129 size = fold_convert (const_ptr_type_node, DECL_SIZE_UNIT (it));
130
131 tree addr;
132 if (!is_link_var)
133 addr = build_fold_addr_expr (it);
134 else
135 {
136 #ifdef ACCEL_COMPILER
137 /* For "omp declare target link" vars add address of the pointer to
138 the target table, instead of address of the var. */
139 tree value_expr = DECL_VALUE_EXPR (it);
140 tree link_ptr_decl = TREE_OPERAND (value_expr, 0);
141 varpool_node::finalize_decl (link_ptr_decl);
142 addr = build_fold_addr_expr (link_ptr_decl);
143 #else
144 addr = build_fold_addr_expr (it);
145 #endif
146
147 /* Most significant bit of the size marks "omp declare target link"
148 vars in host and target tables. */
149 unsigned HOST_WIDE_INT isize = tree_to_uhwi (size);
150 isize |= 1ULL << (int_size_in_bytes (const_ptr_type_node)
151 * BITS_PER_UNIT - 1);
152 size = wide_int_to_tree (const_ptr_type_node, isize);
153 }
154
155 CONSTRUCTOR_APPEND_ELT (v_ctor, NULL_TREE, addr);
156 if (is_var)
157 CONSTRUCTOR_APPEND_ELT (v_ctor, NULL_TREE, size);
158 }
159 }
160
161 /* Create new symbols containing (address, size) pairs for global variables,
162 marked with "omp declare target" attribute, as well as addresses for the
163 functions, which are outlined offloading regions. */
164 void
165 omp_finish_file (void)
166 {
167 unsigned num_funcs = vec_safe_length (offload_funcs);
168 unsigned num_vars = vec_safe_length (offload_vars);
169
170 if (num_funcs == 0 && num_vars == 0)
171 return;
172
173 if (targetm_common.have_named_sections)
174 {
175 vec<constructor_elt, va_gc> *v_f, *v_v;
176 vec_alloc (v_f, num_funcs);
177 vec_alloc (v_v, num_vars * 2);
178
179 add_decls_addresses_to_decl_constructor (offload_funcs, v_f);
180 add_decls_addresses_to_decl_constructor (offload_vars, v_v);
181
182 tree vars_decl_type = build_array_type_nelts (pointer_sized_int_node,
183 num_vars * 2);
184 tree funcs_decl_type = build_array_type_nelts (pointer_sized_int_node,
185 num_funcs);
186 SET_TYPE_ALIGN (vars_decl_type, TYPE_ALIGN (pointer_sized_int_node));
187 SET_TYPE_ALIGN (funcs_decl_type, TYPE_ALIGN (pointer_sized_int_node));
188 tree ctor_v = build_constructor (vars_decl_type, v_v);
189 tree ctor_f = build_constructor (funcs_decl_type, v_f);
190 TREE_CONSTANT (ctor_v) = TREE_CONSTANT (ctor_f) = 1;
191 TREE_STATIC (ctor_v) = TREE_STATIC (ctor_f) = 1;
192 tree funcs_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL,
193 get_identifier (".offload_func_table"),
194 funcs_decl_type);
195 tree vars_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL,
196 get_identifier (".offload_var_table"),
197 vars_decl_type);
198 TREE_STATIC (funcs_decl) = TREE_STATIC (vars_decl) = 1;
199 /* Do not align tables more than TYPE_ALIGN (pointer_sized_int_node),
200 otherwise a joint table in a binary will contain padding between
201 tables from multiple object files. */
202 DECL_USER_ALIGN (funcs_decl) = DECL_USER_ALIGN (vars_decl) = 1;
203 SET_DECL_ALIGN (funcs_decl, TYPE_ALIGN (funcs_decl_type));
204 SET_DECL_ALIGN (vars_decl, TYPE_ALIGN (vars_decl_type));
205 DECL_INITIAL (funcs_decl) = ctor_f;
206 DECL_INITIAL (vars_decl) = ctor_v;
207 set_decl_section_name (funcs_decl, OFFLOAD_FUNC_TABLE_SECTION_NAME);
208 set_decl_section_name (vars_decl, OFFLOAD_VAR_TABLE_SECTION_NAME);
209
210 varpool_node::finalize_decl (vars_decl);
211 varpool_node::finalize_decl (funcs_decl);
212 }
213 else
214 {
215 for (unsigned i = 0; i < num_funcs; i++)
216 {
217 tree it = (*offload_funcs)[i];
218 targetm.record_offload_symbol (it);
219 }
220 for (unsigned i = 0; i < num_vars; i++)
221 {
222 tree it = (*offload_vars)[i];
223 targetm.record_offload_symbol (it);
224 }
225 }
226 }
227
228 /* Call dim_pos (POS == true) or dim_size (POS == false) builtins for
229 axis DIM. Return a tmp var holding the result. */
230
231 static tree
232 oacc_dim_call (bool pos, int dim, gimple_seq *seq)
233 {
234 tree arg = build_int_cst (unsigned_type_node, dim);
235 tree size = create_tmp_var (integer_type_node);
236 enum internal_fn fn = pos ? IFN_GOACC_DIM_POS : IFN_GOACC_DIM_SIZE;
237 gimple *call = gimple_build_call_internal (fn, 1, arg);
238
239 gimple_call_set_lhs (call, size);
240 gimple_seq_add_stmt (seq, call);
241
242 return size;
243 }
244
245 /* Find the number of threads (POS = false), or thread number (POS =
246 true) for an OpenACC region partitioned as MASK. Setup code
247 required for the calculation is added to SEQ. */
248
249 static tree
250 oacc_thread_numbers (bool pos, int mask, gimple_seq *seq)
251 {
252 tree res = pos ? NULL_TREE : build_int_cst (unsigned_type_node, 1);
253 unsigned ix;
254
255 /* Start at gang level, and examine relevant dimension indices. */
256 for (ix = GOMP_DIM_GANG; ix != GOMP_DIM_MAX; ix++)
257 if (GOMP_DIM_MASK (ix) & mask)
258 {
259 if (res)
260 {
261 /* We had an outer index, so scale that by the size of
262 this dimension. */
263 tree n = oacc_dim_call (false, ix, seq);
264 res = fold_build2 (MULT_EXPR, integer_type_node, res, n);
265 }
266 if (pos)
267 {
268 /* Determine index in this dimension. */
269 tree id = oacc_dim_call (true, ix, seq);
270 if (res)
271 res = fold_build2 (PLUS_EXPR, integer_type_node, res, id);
272 else
273 res = id;
274 }
275 }
276
277 if (res == NULL_TREE)
278 res = integer_zero_node;
279
280 return res;
281 }
282
283 /* Transform IFN_GOACC_LOOP calls to actual code. See
284 expand_oacc_for for where these are generated. At the vector
285 level, we stride loops, such that each member of a warp will
286 operate on adjacent iterations. At the worker and gang level,
287 each gang/warp executes a set of contiguous iterations. Chunking
288 can override this such that each iteration engine executes a
289 contiguous chunk, and then moves on to stride to the next chunk. */
290
291 static void
292 oacc_xform_loop (gcall *call)
293 {
294 gimple_stmt_iterator gsi = gsi_for_stmt (call);
295 enum ifn_goacc_loop_kind code
296 = (enum ifn_goacc_loop_kind) TREE_INT_CST_LOW (gimple_call_arg (call, 0));
297 tree dir = gimple_call_arg (call, 1);
298 tree range = gimple_call_arg (call, 2);
299 tree step = gimple_call_arg (call, 3);
300 tree chunk_size = NULL_TREE;
301 unsigned mask = (unsigned) TREE_INT_CST_LOW (gimple_call_arg (call, 5));
302 tree lhs = gimple_call_lhs (call);
303 tree type = TREE_TYPE (lhs);
304 tree diff_type = TREE_TYPE (range);
305 tree r = NULL_TREE;
306 gimple_seq seq = NULL;
307 bool chunking = false, striding = true;
308 unsigned outer_mask = mask & (~mask + 1); // Outermost partitioning
309 unsigned inner_mask = mask & ~outer_mask; // Inner partitioning (if any)
310
311 #ifdef ACCEL_COMPILER
312 chunk_size = gimple_call_arg (call, 4);
313 if (integer_minus_onep (chunk_size) /* Force static allocation. */
314 || integer_zerop (chunk_size)) /* Default (also static). */
315 {
316 /* If we're at the gang level, we want each to execute a
317 contiguous run of iterations. Otherwise we want each element
318 to stride. */
319 striding = !(outer_mask & GOMP_DIM_MASK (GOMP_DIM_GANG));
320 chunking = false;
321 }
322 else
323 {
324 /* Chunk of size 1 is striding. */
325 striding = integer_onep (chunk_size);
326 chunking = !striding;
327 }
328 #endif
329
330 /* striding=true, chunking=true
331 -> invalid.
332 striding=true, chunking=false
333 -> chunks=1
334 striding=false,chunking=true
335 -> chunks=ceil (range/(chunksize*threads*step))
336 striding=false,chunking=false
337 -> chunk_size=ceil(range/(threads*step)),chunks=1 */
338 push_gimplify_context (true);
339
340 switch (code)
341 {
342 default: gcc_unreachable ();
343
344 case IFN_GOACC_LOOP_CHUNKS:
345 if (!chunking)
346 r = build_int_cst (type, 1);
347 else
348 {
349 /* chunk_max
350 = (range - dir) / (chunks * step * num_threads) + dir */
351 tree per = oacc_thread_numbers (false, mask, &seq);
352 per = fold_convert (type, per);
353 chunk_size = fold_convert (type, chunk_size);
354 per = fold_build2 (MULT_EXPR, type, per, chunk_size);
355 per = fold_build2 (MULT_EXPR, type, per, step);
356 r = build2 (MINUS_EXPR, type, range, dir);
357 r = build2 (PLUS_EXPR, type, r, per);
358 r = build2 (TRUNC_DIV_EXPR, type, r, per);
359 }
360 break;
361
362 case IFN_GOACC_LOOP_STEP:
363 {
364 /* If striding, step by the entire compute volume, otherwise
365 step by the inner volume. */
366 unsigned volume = striding ? mask : inner_mask;
367
368 r = oacc_thread_numbers (false, volume, &seq);
369 r = build2 (MULT_EXPR, type, fold_convert (type, r), step);
370 }
371 break;
372
373 case IFN_GOACC_LOOP_OFFSET:
374 /* Enable vectorization on non-SIMT targets. */
375 if (!targetm.simt.vf
376 && outer_mask == GOMP_DIM_MASK (GOMP_DIM_VECTOR)
377 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
378 the loop. */
379 && (flag_tree_loop_vectorize
380 || !global_options_set.x_flag_tree_loop_vectorize))
381 {
382 basic_block bb = gsi_bb (gsi);
383 struct loop *parent = bb->loop_father;
384 struct loop *body = parent->inner;
385
386 parent->force_vectorize = true;
387 parent->safelen = INT_MAX;
388
389 /* "Chunking loops" may have inner loops. */
390 if (parent->inner)
391 {
392 body->force_vectorize = true;
393 body->safelen = INT_MAX;
394 }
395
396 cfun->has_force_vectorize_loops = true;
397 }
398 if (striding)
399 {
400 r = oacc_thread_numbers (true, mask, &seq);
401 r = fold_convert (diff_type, r);
402 }
403 else
404 {
405 tree inner_size = oacc_thread_numbers (false, inner_mask, &seq);
406 tree outer_size = oacc_thread_numbers (false, outer_mask, &seq);
407 tree volume = fold_build2 (MULT_EXPR, TREE_TYPE (inner_size),
408 inner_size, outer_size);
409
410 volume = fold_convert (diff_type, volume);
411 if (chunking)
412 chunk_size = fold_convert (diff_type, chunk_size);
413 else
414 {
415 tree per = fold_build2 (MULT_EXPR, diff_type, volume, step);
416
417 chunk_size = build2 (MINUS_EXPR, diff_type, range, dir);
418 chunk_size = build2 (PLUS_EXPR, diff_type, chunk_size, per);
419 chunk_size = build2 (TRUNC_DIV_EXPR, diff_type, chunk_size, per);
420 }
421
422 tree span = build2 (MULT_EXPR, diff_type, chunk_size,
423 fold_convert (diff_type, inner_size));
424 r = oacc_thread_numbers (true, outer_mask, &seq);
425 r = fold_convert (diff_type, r);
426 r = build2 (MULT_EXPR, diff_type, r, span);
427
428 tree inner = oacc_thread_numbers (true, inner_mask, &seq);
429 inner = fold_convert (diff_type, inner);
430 r = fold_build2 (PLUS_EXPR, diff_type, r, inner);
431
432 if (chunking)
433 {
434 tree chunk = fold_convert (diff_type, gimple_call_arg (call, 6));
435 tree per
436 = fold_build2 (MULT_EXPR, diff_type, volume, chunk_size);
437 per = build2 (MULT_EXPR, diff_type, per, chunk);
438
439 r = build2 (PLUS_EXPR, diff_type, r, per);
440 }
441 }
442 r = fold_build2 (MULT_EXPR, diff_type, r, step);
443 if (type != diff_type)
444 r = fold_convert (type, r);
445 break;
446
447 case IFN_GOACC_LOOP_BOUND:
448 if (striding)
449 r = range;
450 else
451 {
452 tree inner_size = oacc_thread_numbers (false, inner_mask, &seq);
453 tree outer_size = oacc_thread_numbers (false, outer_mask, &seq);
454 tree volume = fold_build2 (MULT_EXPR, TREE_TYPE (inner_size),
455 inner_size, outer_size);
456
457 volume = fold_convert (diff_type, volume);
458 if (chunking)
459 chunk_size = fold_convert (diff_type, chunk_size);
460 else
461 {
462 tree per = fold_build2 (MULT_EXPR, diff_type, volume, step);
463
464 chunk_size = build2 (MINUS_EXPR, diff_type, range, dir);
465 chunk_size = build2 (PLUS_EXPR, diff_type, chunk_size, per);
466 chunk_size = build2 (TRUNC_DIV_EXPR, diff_type, chunk_size, per);
467 }
468
469 tree span = build2 (MULT_EXPR, diff_type, chunk_size,
470 fold_convert (diff_type, inner_size));
471
472 r = fold_build2 (MULT_EXPR, diff_type, span, step);
473
474 tree offset = gimple_call_arg (call, 6);
475 r = build2 (PLUS_EXPR, diff_type, r,
476 fold_convert (diff_type, offset));
477 r = build2 (integer_onep (dir) ? MIN_EXPR : MAX_EXPR,
478 diff_type, r, range);
479 }
480 if (diff_type != type)
481 r = fold_convert (type, r);
482 break;
483 }
484
485 gimplify_assign (lhs, r, &seq);
486
487 pop_gimplify_context (NULL);
488
489 gsi_replace_with_seq (&gsi, seq, true);
490 }
491
492 /* Transform a GOACC_TILE call. Determines the element loop span for
493 the specified loop of the nest. This is 1 if we're not tiling.
494
495 GOACC_TILE (collapse_count, loop_no, tile_arg, gwv_tile, gwv_element); */
496
497 static void
498 oacc_xform_tile (gcall *call)
499 {
500 gimple_stmt_iterator gsi = gsi_for_stmt (call);
501 unsigned collapse = tree_to_uhwi (gimple_call_arg (call, 0));
502 /* Inner loops have higher loop_nos. */
503 unsigned loop_no = tree_to_uhwi (gimple_call_arg (call, 1));
504 tree tile_size = gimple_call_arg (call, 2);
505 unsigned e_mask = tree_to_uhwi (gimple_call_arg (call, 4));
506 tree lhs = gimple_call_lhs (call);
507 tree type = TREE_TYPE (lhs);
508 gimple_seq seq = NULL;
509 tree span = build_int_cst (type, 1);
510
511 gcc_assert (!(e_mask
512 & ~(GOMP_DIM_MASK (GOMP_DIM_VECTOR)
513 | GOMP_DIM_MASK (GOMP_DIM_WORKER))));
514 push_gimplify_context (!seen_error ());
515
516 #ifndef ACCEL_COMPILER
517 /* Partitioning disabled on host compilers. */
518 e_mask = 0;
519 #endif
520 if (!e_mask)
521 /* Not paritioning. */
522 span = integer_one_node;
523 else if (!integer_zerop (tile_size))
524 /* User explicitly specified size. */
525 span = tile_size;
526 else
527 {
528 /* Pick a size based on the paritioning of the element loop and
529 the number of loop nests. */
530 tree first_size = NULL_TREE;
531 tree second_size = NULL_TREE;
532
533 if (e_mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR))
534 first_size = oacc_dim_call (false, GOMP_DIM_VECTOR, &seq);
535 if (e_mask & GOMP_DIM_MASK (GOMP_DIM_WORKER))
536 second_size = oacc_dim_call (false, GOMP_DIM_WORKER, &seq);
537
538 if (!first_size)
539 {
540 first_size = second_size;
541 second_size = NULL_TREE;
542 }
543
544 if (loop_no + 1 == collapse)
545 {
546 span = first_size;
547 if (!loop_no && second_size)
548 span = fold_build2 (MULT_EXPR, TREE_TYPE (span),
549 span, second_size);
550 }
551 else if (loop_no + 2 == collapse)
552 span = second_size;
553 else
554 span = NULL_TREE;
555
556 if (!span)
557 /* There's no obvious element size for this loop. Options
558 are 1, first_size or some non-unity constant (32 is my
559 favourite). We should gather some statistics. */
560 span = first_size;
561 }
562
563 span = fold_convert (type, span);
564 gimplify_assign (lhs, span, &seq);
565
566 pop_gimplify_context (NULL);
567
568 gsi_replace_with_seq (&gsi, seq, true);
569 }
570
571 /* Default partitioned and minimum partitioned dimensions. */
572
573 static int oacc_default_dims[GOMP_DIM_MAX];
574 static int oacc_min_dims[GOMP_DIM_MAX];
575
576 /* Parse the default dimension parameter. This is a set of
577 :-separated optional compute dimensions. Each specified dimension
578 is a positive integer. When device type support is added, it is
579 planned to be a comma separated list of such compute dimensions,
580 with all but the first prefixed by the colon-terminated device
581 type. */
582
583 static void
584 oacc_parse_default_dims (const char *dims)
585 {
586 int ix;
587
588 for (ix = GOMP_DIM_MAX; ix--;)
589 {
590 oacc_default_dims[ix] = -1;
591 oacc_min_dims[ix] = 1;
592 }
593
594 #ifndef ACCEL_COMPILER
595 /* Cannot be overridden on the host. */
596 dims = NULL;
597 #endif
598 if (dims)
599 {
600 const char *pos = dims;
601
602 for (ix = 0; *pos && ix != GOMP_DIM_MAX; ix++)
603 {
604 if (ix)
605 {
606 if (*pos != ':')
607 goto malformed;
608 pos++;
609 }
610
611 if (*pos != ':')
612 {
613 long val;
614 const char *eptr;
615
616 errno = 0;
617 val = strtol (pos, CONST_CAST (char **, &eptr), 10);
618 if (errno || val <= 0 || (int) val != val)
619 goto malformed;
620 pos = eptr;
621 oacc_default_dims[ix] = (int) val;
622 }
623 }
624 if (*pos)
625 {
626 malformed:
627 error_at (UNKNOWN_LOCATION,
628 "-fopenacc-dim operand is malformed at '%s'", pos);
629 }
630 }
631
632 /* Allow the backend to validate the dimensions. */
633 targetm.goacc.validate_dims (NULL_TREE, oacc_default_dims, -1);
634 targetm.goacc.validate_dims (NULL_TREE, oacc_min_dims, -2);
635 }
636
637 /* Validate and update the dimensions for offloaded FN. ATTRS is the
638 raw attribute. DIMS is an array of dimensions, which is filled in.
639 LEVEL is the partitioning level of a routine, or -1 for an offload
640 region itself. USED is the mask of partitioned execution in the
641 function. */
642
643 static void
644 oacc_validate_dims (tree fn, tree attrs, int *dims, int level, unsigned used)
645 {
646 tree purpose[GOMP_DIM_MAX];
647 unsigned ix;
648 tree pos = TREE_VALUE (attrs);
649
650 /* Make sure the attribute creator attached the dimension
651 information. */
652 gcc_assert (pos);
653
654 for (ix = 0; ix != GOMP_DIM_MAX; ix++)
655 {
656 purpose[ix] = TREE_PURPOSE (pos);
657 tree val = TREE_VALUE (pos);
658 dims[ix] = val ? TREE_INT_CST_LOW (val) : -1;
659 pos = TREE_CHAIN (pos);
660 }
661
662 bool changed = targetm.goacc.validate_dims (fn, dims, level);
663
664 /* Default anything left to 1 or a partitioned default. */
665 for (ix = 0; ix != GOMP_DIM_MAX; ix++)
666 if (dims[ix] < 0)
667 {
668 /* The OpenACC spec says 'If the [num_gangs] clause is not
669 specified, an implementation-defined default will be used;
670 the default may depend on the code within the construct.'
671 (2.5.6). Thus an implementation is free to choose
672 non-unity default for a parallel region that doesn't have
673 any gang-partitioned loops. However, it appears that there
674 is a sufficient body of user code that expects non-gang
675 partitioned regions to not execute in gang-redundant mode.
676 So we (a) don't warn about the non-portability and (b) pick
677 the minimum permissible dimension size when there is no
678 partitioned execution. Otherwise we pick the global
679 default for the dimension, which the user can control. The
680 same wording and logic applies to num_workers and
681 vector_length, however the worker- or vector- single
682 execution doesn't have the same impact as gang-redundant
683 execution. (If the minimum gang-level partioning is not 1,
684 the target is probably too confusing.) */
685 dims[ix] = (used & GOMP_DIM_MASK (ix)
686 ? oacc_default_dims[ix] : oacc_min_dims[ix]);
687 changed = true;
688 }
689
690 if (changed)
691 {
692 /* Replace the attribute with new values. */
693 pos = NULL_TREE;
694 for (ix = GOMP_DIM_MAX; ix--;)
695 pos = tree_cons (purpose[ix],
696 build_int_cst (integer_type_node, dims[ix]), pos);
697 oacc_replace_fn_attrib (fn, pos);
698 }
699 }
700
701 /* Create an empty OpenACC loop structure at LOC. */
702
703 static oacc_loop *
704 new_oacc_loop_raw (oacc_loop *parent, location_t loc)
705 {
706 oacc_loop *loop = XCNEW (oacc_loop);
707
708 loop->parent = parent;
709
710 if (parent)
711 {
712 loop->sibling = parent->child;
713 parent->child = loop;
714 }
715
716 loop->loc = loc;
717 return loop;
718 }
719
720 /* Create an outermost, dummy OpenACC loop for offloaded function
721 DECL. */
722
723 static oacc_loop *
724 new_oacc_loop_outer (tree decl)
725 {
726 return new_oacc_loop_raw (NULL, DECL_SOURCE_LOCATION (decl));
727 }
728
729 /* Start a new OpenACC loop structure beginning at head marker HEAD.
730 Link into PARENT loop. Return the new loop. */
731
732 static oacc_loop *
733 new_oacc_loop (oacc_loop *parent, gcall *marker)
734 {
735 oacc_loop *loop = new_oacc_loop_raw (parent, gimple_location (marker));
736
737 loop->marker = marker;
738
739 /* TODO: This is where device_type flattening would occur for the loop
740 flags. */
741
742 loop->flags = TREE_INT_CST_LOW (gimple_call_arg (marker, 3));
743
744 tree chunk_size = integer_zero_node;
745 if (loop->flags & OLF_GANG_STATIC)
746 chunk_size = gimple_call_arg (marker, 4);
747 loop->chunk_size = chunk_size;
748
749 return loop;
750 }
751
752 /* Create a dummy loop encompassing a call to a openACC routine.
753 Extract the routine's partitioning requirements. */
754
755 static void
756 new_oacc_loop_routine (oacc_loop *parent, gcall *call, tree decl, tree attrs)
757 {
758 oacc_loop *loop = new_oacc_loop_raw (parent, gimple_location (call));
759 int level = oacc_fn_attrib_level (attrs);
760
761 gcc_assert (level >= 0);
762
763 loop->marker = call;
764 loop->routine = decl;
765 loop->mask = ((GOMP_DIM_MASK (GOMP_DIM_MAX) - 1)
766 ^ (GOMP_DIM_MASK (level) - 1));
767 }
768
769 /* Finish off the current OpenACC loop ending at tail marker TAIL.
770 Return the parent loop. */
771
772 static oacc_loop *
773 finish_oacc_loop (oacc_loop *loop)
774 {
775 /* If the loop has been collapsed, don't partition it. */
776 if (loop->ifns.is_empty ())
777 loop->mask = loop->flags = 0;
778 return loop->parent;
779 }
780
781 /* Free all OpenACC loop structures within LOOP (inclusive). */
782
783 static void
784 free_oacc_loop (oacc_loop *loop)
785 {
786 if (loop->sibling)
787 free_oacc_loop (loop->sibling);
788 if (loop->child)
789 free_oacc_loop (loop->child);
790
791 loop->ifns.release ();
792 free (loop);
793 }
794
795 /* Dump out the OpenACC loop head or tail beginning at FROM. */
796
797 static void
798 dump_oacc_loop_part (FILE *file, gcall *from, int depth,
799 const char *title, int level)
800 {
801 enum ifn_unique_kind kind
802 = (enum ifn_unique_kind) TREE_INT_CST_LOW (gimple_call_arg (from, 0));
803
804 fprintf (file, "%*s%s-%d:\n", depth * 2, "", title, level);
805 for (gimple_stmt_iterator gsi = gsi_for_stmt (from);;)
806 {
807 gimple *stmt = gsi_stmt (gsi);
808
809 if (gimple_call_internal_p (stmt, IFN_UNIQUE))
810 {
811 enum ifn_unique_kind k
812 = ((enum ifn_unique_kind) TREE_INT_CST_LOW
813 (gimple_call_arg (stmt, 0)));
814
815 if (k == kind && stmt != from)
816 break;
817 }
818 print_gimple_stmt (file, stmt, depth * 2 + 2);
819
820 gsi_next (&gsi);
821 while (gsi_end_p (gsi))
822 gsi = gsi_start_bb (single_succ (gsi_bb (gsi)));
823 }
824 }
825
826 /* Dump OpenACC loops LOOP, its siblings and its children. */
827
828 static void
829 dump_oacc_loop (FILE *file, oacc_loop *loop, int depth)
830 {
831 int ix;
832
833 fprintf (file, "%*sLoop %x(%x) %s:%u\n", depth * 2, "",
834 loop->flags, loop->mask,
835 LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc));
836
837 if (loop->marker)
838 print_gimple_stmt (file, loop->marker, depth * 2);
839
840 if (loop->routine)
841 fprintf (file, "%*sRoutine %s:%u:%s\n",
842 depth * 2, "", DECL_SOURCE_FILE (loop->routine),
843 DECL_SOURCE_LINE (loop->routine),
844 IDENTIFIER_POINTER (DECL_NAME (loop->routine)));
845
846 for (ix = GOMP_DIM_GANG; ix != GOMP_DIM_MAX; ix++)
847 if (loop->heads[ix])
848 dump_oacc_loop_part (file, loop->heads[ix], depth, "Head", ix);
849 for (ix = GOMP_DIM_MAX; ix--;)
850 if (loop->tails[ix])
851 dump_oacc_loop_part (file, loop->tails[ix], depth, "Tail", ix);
852
853 if (loop->child)
854 dump_oacc_loop (file, loop->child, depth + 1);
855 if (loop->sibling)
856 dump_oacc_loop (file, loop->sibling, depth);
857 }
858
859 void debug_oacc_loop (oacc_loop *);
860
861 /* Dump loops to stderr. */
862
863 DEBUG_FUNCTION void
864 debug_oacc_loop (oacc_loop *loop)
865 {
866 dump_oacc_loop (stderr, loop, 0);
867 }
868
869 /* DFS walk of basic blocks BB onwards, creating OpenACC loop
870 structures as we go. By construction these loops are properly
871 nested. */
872
873 static void
874 oacc_loop_discover_walk (oacc_loop *loop, basic_block bb)
875 {
876 int marker = 0;
877 int remaining = 0;
878
879 if (bb->flags & BB_VISITED)
880 return;
881
882 follow:
883 bb->flags |= BB_VISITED;
884
885 /* Scan for loop markers. */
886 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
887 gsi_next (&gsi))
888 {
889 gimple *stmt = gsi_stmt (gsi);
890
891 if (!is_gimple_call (stmt))
892 continue;
893
894 gcall *call = as_a <gcall *> (stmt);
895
896 /* If this is a routine, make a dummy loop for it. */
897 if (tree decl = gimple_call_fndecl (call))
898 if (tree attrs = oacc_get_fn_attrib (decl))
899 {
900 gcc_assert (!marker);
901 new_oacc_loop_routine (loop, call, decl, attrs);
902 }
903
904 if (!gimple_call_internal_p (call))
905 continue;
906
907 switch (gimple_call_internal_fn (call))
908 {
909 default:
910 break;
911
912 case IFN_GOACC_LOOP:
913 case IFN_GOACC_TILE:
914 /* Record the abstraction function, so we can manipulate it
915 later. */
916 loop->ifns.safe_push (call);
917 break;
918
919 case IFN_UNIQUE:
920 enum ifn_unique_kind kind
921 = (enum ifn_unique_kind) (TREE_INT_CST_LOW
922 (gimple_call_arg (call, 0)));
923 if (kind == IFN_UNIQUE_OACC_HEAD_MARK
924 || kind == IFN_UNIQUE_OACC_TAIL_MARK)
925 {
926 if (gimple_call_num_args (call) == 2)
927 {
928 gcc_assert (marker && !remaining);
929 marker = 0;
930 if (kind == IFN_UNIQUE_OACC_TAIL_MARK)
931 loop = finish_oacc_loop (loop);
932 else
933 loop->head_end = call;
934 }
935 else
936 {
937 int count = TREE_INT_CST_LOW (gimple_call_arg (call, 2));
938
939 if (!marker)
940 {
941 if (kind == IFN_UNIQUE_OACC_HEAD_MARK)
942 loop = new_oacc_loop (loop, call);
943 remaining = count;
944 }
945 gcc_assert (count == remaining);
946 if (remaining)
947 {
948 remaining--;
949 if (kind == IFN_UNIQUE_OACC_HEAD_MARK)
950 loop->heads[marker] = call;
951 else
952 loop->tails[remaining] = call;
953 }
954 marker++;
955 }
956 }
957 }
958 }
959 if (remaining || marker)
960 {
961 bb = single_succ (bb);
962 gcc_assert (single_pred_p (bb) && !(bb->flags & BB_VISITED));
963 goto follow;
964 }
965
966 /* Walk successor blocks. */
967 edge e;
968 edge_iterator ei;
969
970 FOR_EACH_EDGE (e, ei, bb->succs)
971 oacc_loop_discover_walk (loop, e->dest);
972 }
973
974 /* LOOP is the first sibling. Reverse the order in place and return
975 the new first sibling. Recurse to child loops. */
976
977 static oacc_loop *
978 oacc_loop_sibling_nreverse (oacc_loop *loop)
979 {
980 oacc_loop *last = NULL;
981 do
982 {
983 if (loop->child)
984 loop->child = oacc_loop_sibling_nreverse (loop->child);
985
986 oacc_loop *next = loop->sibling;
987 loop->sibling = last;
988 last = loop;
989 loop = next;
990 }
991 while (loop);
992
993 return last;
994 }
995
996 /* Discover the OpenACC loops marked up by HEAD and TAIL markers for
997 the current function. */
998
999 static oacc_loop *
1000 oacc_loop_discovery ()
1001 {
1002 /* Clear basic block flags, in particular BB_VISITED which we're going to use
1003 in the following. */
1004 clear_bb_flags ();
1005
1006 oacc_loop *top = new_oacc_loop_outer (current_function_decl);
1007 oacc_loop_discover_walk (top, ENTRY_BLOCK_PTR_FOR_FN (cfun));
1008
1009 /* The siblings were constructed in reverse order, reverse them so
1010 that diagnostics come out in an unsurprising order. */
1011 top = oacc_loop_sibling_nreverse (top);
1012
1013 return top;
1014 }
1015
1016 /* Transform the abstract internal function markers starting at FROM
1017 to be for partitioning level LEVEL. Stop when we meet another HEAD
1018 or TAIL marker. */
1019
1020 static void
1021 oacc_loop_xform_head_tail (gcall *from, int level)
1022 {
1023 enum ifn_unique_kind kind
1024 = (enum ifn_unique_kind) TREE_INT_CST_LOW (gimple_call_arg (from, 0));
1025 tree replacement = build_int_cst (unsigned_type_node, level);
1026
1027 for (gimple_stmt_iterator gsi = gsi_for_stmt (from);;)
1028 {
1029 gimple *stmt = gsi_stmt (gsi);
1030
1031 if (gimple_call_internal_p (stmt, IFN_UNIQUE))
1032 {
1033 enum ifn_unique_kind k
1034 = ((enum ifn_unique_kind)
1035 TREE_INT_CST_LOW (gimple_call_arg (stmt, 0)));
1036
1037 if (k == IFN_UNIQUE_OACC_FORK || k == IFN_UNIQUE_OACC_JOIN)
1038 *gimple_call_arg_ptr (stmt, 2) = replacement;
1039 else if (k == kind && stmt != from)
1040 break;
1041 }
1042 else if (gimple_call_internal_p (stmt, IFN_GOACC_REDUCTION))
1043 *gimple_call_arg_ptr (stmt, 3) = replacement;
1044
1045 gsi_next (&gsi);
1046 while (gsi_end_p (gsi))
1047 gsi = gsi_start_bb (single_succ (gsi_bb (gsi)));
1048 }
1049 }
1050
1051 /* Process the discovered OpenACC loops, setting the correct
1052 partitioning level etc. */
1053
1054 static void
1055 oacc_loop_process (oacc_loop *loop)
1056 {
1057 if (loop->child)
1058 oacc_loop_process (loop->child);
1059
1060 if (loop->mask && !loop->routine)
1061 {
1062 int ix;
1063 tree mask_arg = build_int_cst (unsigned_type_node, loop->mask);
1064 tree e_mask_arg = build_int_cst (unsigned_type_node, loop->e_mask);
1065 tree chunk_arg = loop->chunk_size;
1066 gcall *call;
1067
1068 for (ix = 0; loop->ifns.iterate (ix, &call); ix++)
1069 switch (gimple_call_internal_fn (call))
1070 {
1071 case IFN_GOACC_LOOP:
1072 {
1073 bool is_e = gimple_call_arg (call, 5) == integer_minus_one_node;
1074 gimple_call_set_arg (call, 5, is_e ? e_mask_arg : mask_arg);
1075 if (!is_e)
1076 gimple_call_set_arg (call, 4, chunk_arg);
1077 }
1078 break;
1079
1080 case IFN_GOACC_TILE:
1081 gimple_call_set_arg (call, 3, mask_arg);
1082 gimple_call_set_arg (call, 4, e_mask_arg);
1083 break;
1084
1085 default:
1086 gcc_unreachable ();
1087 }
1088
1089 unsigned dim = GOMP_DIM_GANG;
1090 unsigned mask = loop->mask | loop->e_mask;
1091 for (ix = 0; ix != GOMP_DIM_MAX && mask; ix++)
1092 {
1093 while (!(GOMP_DIM_MASK (dim) & mask))
1094 dim++;
1095
1096 oacc_loop_xform_head_tail (loop->heads[ix], dim);
1097 oacc_loop_xform_head_tail (loop->tails[ix], dim);
1098
1099 mask ^= GOMP_DIM_MASK (dim);
1100 }
1101 }
1102
1103 if (loop->sibling)
1104 oacc_loop_process (loop->sibling);
1105 }
1106
1107 /* Walk the OpenACC loop heirarchy checking and assigning the
1108 programmer-specified partitionings. OUTER_MASK is the partitioning
1109 this loop is contained within. Return mask of partitioning
1110 encountered. If any auto loops are discovered, set GOMP_DIM_MAX
1111 bit. */
1112
1113 static unsigned
1114 oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask)
1115 {
1116 unsigned this_mask = loop->mask;
1117 unsigned mask_all = 0;
1118 bool noisy = true;
1119
1120 #ifdef ACCEL_COMPILER
1121 /* When device_type is supported, we want the device compiler to be
1122 noisy, if the loop parameters are device_type-specific. */
1123 noisy = false;
1124 #endif
1125
1126 if (!loop->routine)
1127 {
1128 bool auto_par = (loop->flags & OLF_AUTO) != 0;
1129 bool seq_par = (loop->flags & OLF_SEQ) != 0;
1130 bool tiling = (loop->flags & OLF_TILE) != 0;
1131
1132 this_mask = ((loop->flags >> OLF_DIM_BASE)
1133 & (GOMP_DIM_MASK (GOMP_DIM_MAX) - 1));
1134
1135 /* Apply auto partitioning if this is a non-partitioned regular
1136 loop, or (no more than) single axis tiled loop. */
1137 bool maybe_auto
1138 = !seq_par && this_mask == (tiling ? this_mask & -this_mask : 0);
1139
1140 if ((this_mask != 0) + auto_par + seq_par > 1)
1141 {
1142 if (noisy)
1143 error_at (loop->loc,
1144 seq_par
1145 ? G_("%<seq%> overrides other OpenACC loop specifiers")
1146 : G_("%<auto%> conflicts with other OpenACC loop "
1147 "specifiers"));
1148 maybe_auto = false;
1149 loop->flags &= ~OLF_AUTO;
1150 if (seq_par)
1151 {
1152 loop->flags
1153 &= ~((GOMP_DIM_MASK (GOMP_DIM_MAX) - 1) << OLF_DIM_BASE);
1154 this_mask = 0;
1155 }
1156 }
1157
1158 if (maybe_auto && (loop->flags & OLF_INDEPENDENT))
1159 {
1160 loop->flags |= OLF_AUTO;
1161 mask_all |= GOMP_DIM_MASK (GOMP_DIM_MAX);
1162 }
1163 }
1164
1165 if (this_mask & outer_mask)
1166 {
1167 const oacc_loop *outer;
1168 for (outer = loop->parent; outer; outer = outer->parent)
1169 if ((outer->mask | outer->e_mask) & this_mask)
1170 break;
1171
1172 if (noisy)
1173 {
1174 if (outer)
1175 {
1176 error_at (loop->loc,
1177 loop->routine
1178 ? G_("routine call uses same OpenACC parallelism"
1179 " as containing loop")
1180 : G_("inner loop uses same OpenACC parallelism"
1181 " as containing loop"));
1182 inform (outer->loc, "containing loop here");
1183 }
1184 else
1185 error_at (loop->loc,
1186 loop->routine
1187 ? G_("routine call uses OpenACC parallelism disallowed"
1188 " by containing routine")
1189 : G_("loop uses OpenACC parallelism disallowed"
1190 " by containing routine"));
1191
1192 if (loop->routine)
1193 inform (DECL_SOURCE_LOCATION (loop->routine),
1194 "routine %qD declared here", loop->routine);
1195 }
1196 this_mask &= ~outer_mask;
1197 }
1198 else
1199 {
1200 unsigned outermost = least_bit_hwi (this_mask);
1201
1202 if (outermost && outermost <= outer_mask)
1203 {
1204 if (noisy)
1205 {
1206 error_at (loop->loc,
1207 "incorrectly nested OpenACC loop parallelism");
1208
1209 const oacc_loop *outer;
1210 for (outer = loop->parent;
1211 outer->flags && outer->flags < outermost;
1212 outer = outer->parent)
1213 continue;
1214 inform (outer->loc, "containing loop here");
1215 }
1216
1217 this_mask &= ~outermost;
1218 }
1219 }
1220
1221 mask_all |= this_mask;
1222
1223 if (loop->flags & OLF_TILE)
1224 {
1225 /* When tiling, vector goes to the element loop, and failing
1226 that we put worker there. The std doesn't contemplate
1227 specifying all three. We choose to put worker and vector on
1228 the element loops in that case. */
1229 unsigned this_e_mask = this_mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR);
1230 if (!this_e_mask || this_mask & GOMP_DIM_MASK (GOMP_DIM_GANG))
1231 this_e_mask |= this_mask & GOMP_DIM_MASK (GOMP_DIM_WORKER);
1232
1233 loop->e_mask = this_e_mask;
1234 this_mask ^= this_e_mask;
1235 }
1236
1237 loop->mask = this_mask;
1238
1239 if (dump_file)
1240 fprintf (dump_file, "Loop %s:%d user specified %d & %d\n",
1241 LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc),
1242 loop->mask, loop->e_mask);
1243
1244 if (loop->child)
1245 {
1246 unsigned tmp_mask = outer_mask | this_mask | loop->e_mask;
1247 loop->inner = oacc_loop_fixed_partitions (loop->child, tmp_mask);
1248 mask_all |= loop->inner;
1249 }
1250
1251 if (loop->sibling)
1252 mask_all |= oacc_loop_fixed_partitions (loop->sibling, outer_mask);
1253
1254 return mask_all;
1255 }
1256
1257 /* Walk the OpenACC loop heirarchy to assign auto-partitioned loops.
1258 OUTER_MASK is the partitioning this loop is contained within.
1259 OUTER_ASSIGN is true if an outer loop is being auto-partitioned.
1260 Return the cumulative partitioning used by this loop, siblings and
1261 children. */
1262
1263 static unsigned
1264 oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask,
1265 bool outer_assign)
1266 {
1267 bool assign = (loop->flags & OLF_AUTO) && (loop->flags & OLF_INDEPENDENT);
1268 bool noisy = true;
1269 bool tiling = loop->flags & OLF_TILE;
1270
1271 #ifdef ACCEL_COMPILER
1272 /* When device_type is supported, we want the device compiler to be
1273 noisy, if the loop parameters are device_type-specific. */
1274 noisy = false;
1275 #endif
1276
1277 if (assign && (!outer_assign || loop->inner))
1278 {
1279 /* Allocate outermost and non-innermost loops at the outermost
1280 non-innermost available level. */
1281 unsigned this_mask = GOMP_DIM_MASK (GOMP_DIM_GANG);
1282
1283 /* Find the first outermost available partition. */
1284 while (this_mask <= outer_mask)
1285 this_mask <<= 1;
1286
1287 /* Grab two axes if tiling, and we've not assigned anything */
1288 if (tiling && !(loop->mask | loop->e_mask))
1289 this_mask |= this_mask << 1;
1290
1291 /* Prohibit the innermost partitioning at the moment. */
1292 this_mask &= GOMP_DIM_MASK (GOMP_DIM_MAX - 1) - 1;
1293
1294 /* Don't use any dimension explicitly claimed by an inner loop. */
1295 this_mask &= ~loop->inner;
1296
1297 if (tiling && !loop->e_mask)
1298 {
1299 /* If we got two axes, allocate the inner one to the element
1300 loop. */
1301 loop->e_mask = this_mask & (this_mask << 1);
1302 this_mask ^= loop->e_mask;
1303 }
1304
1305 loop->mask |= this_mask;
1306 }
1307
1308 if (loop->child)
1309 {
1310 unsigned tmp_mask = outer_mask | loop->mask | loop->e_mask;
1311 loop->inner = oacc_loop_auto_partitions (loop->child, tmp_mask,
1312 outer_assign | assign);
1313 }
1314
1315 if (assign && (!loop->mask || (tiling && !loop->e_mask) || !outer_assign))
1316 {
1317 /* Allocate the loop at the innermost available level. Note
1318 that we do this even if we already assigned this loop the
1319 outermost available level above. That way we'll partition
1320 this along 2 axes, if they are available. */
1321 unsigned this_mask = 0;
1322
1323 /* Determine the outermost partitioning used within this loop. */
1324 this_mask = loop->inner | GOMP_DIM_MASK (GOMP_DIM_MAX);
1325 this_mask = least_bit_hwi (this_mask);
1326
1327 /* Pick the partitioning just inside that one. */
1328 this_mask >>= 1;
1329
1330 /* And avoid picking one use by an outer loop. */
1331 this_mask &= ~outer_mask;
1332
1333 /* If tiling and we failed completely above, grab the next one
1334 too. Making sure it doesn't hit an outer loop. */
1335 if (tiling)
1336 {
1337 this_mask &= ~(loop->e_mask | loop->mask);
1338 unsigned tile_mask = ((this_mask >> 1)
1339 & ~(outer_mask | loop->e_mask | loop->mask));
1340
1341 if (tile_mask || loop->mask)
1342 {
1343 loop->e_mask |= this_mask;
1344 this_mask = tile_mask;
1345 }
1346 if (!loop->e_mask && noisy)
1347 warning_at (loop->loc, 0,
1348 "insufficient partitioning available"
1349 " to parallelize element loop");
1350 }
1351
1352 loop->mask |= this_mask;
1353 if (!loop->mask && noisy)
1354 warning_at (loop->loc, 0,
1355 tiling
1356 ? G_("insufficient partitioning available"
1357 " to parallelize tile loop")
1358 : G_("insufficient partitioning available"
1359 " to parallelize loop"));
1360 }
1361
1362 if (assign && dump_file)
1363 fprintf (dump_file, "Auto loop %s:%d assigned %d & %d\n",
1364 LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc),
1365 loop->mask, loop->e_mask);
1366
1367 unsigned inner_mask = 0;
1368
1369 if (loop->sibling)
1370 inner_mask |= oacc_loop_auto_partitions (loop->sibling,
1371 outer_mask, outer_assign);
1372
1373 inner_mask |= loop->inner | loop->mask | loop->e_mask;
1374
1375 return inner_mask;
1376 }
1377
1378 /* Walk the OpenACC loop heirarchy to check and assign partitioning
1379 axes. Return mask of partitioning. */
1380
1381 static unsigned
1382 oacc_loop_partition (oacc_loop *loop, unsigned outer_mask)
1383 {
1384 unsigned mask_all = oacc_loop_fixed_partitions (loop, outer_mask);
1385
1386 if (mask_all & GOMP_DIM_MASK (GOMP_DIM_MAX))
1387 {
1388 mask_all ^= GOMP_DIM_MASK (GOMP_DIM_MAX);
1389 mask_all |= oacc_loop_auto_partitions (loop, outer_mask, false);
1390 }
1391 return mask_all;
1392 }
1393
1394 /* Default fork/join early expander. Delete the function calls if
1395 there is no RTL expander. */
1396
1397 bool
1398 default_goacc_fork_join (gcall *ARG_UNUSED (call),
1399 const int *ARG_UNUSED (dims), bool is_fork)
1400 {
1401 if (is_fork)
1402 return targetm.have_oacc_fork ();
1403 else
1404 return targetm.have_oacc_join ();
1405 }
1406
1407 /* Default goacc.reduction early expander.
1408
1409 LHS-opt = IFN_REDUCTION (KIND, RES_PTR, VAR, LEVEL, OP, OFFSET)
1410 If RES_PTR is not integer-zerop:
1411 SETUP - emit 'LHS = *RES_PTR', LHS = NULL
1412 TEARDOWN - emit '*RES_PTR = VAR'
1413 If LHS is not NULL
1414 emit 'LHS = VAR' */
1415
1416 void
1417 default_goacc_reduction (gcall *call)
1418 {
1419 unsigned code = (unsigned)TREE_INT_CST_LOW (gimple_call_arg (call, 0));
1420 gimple_stmt_iterator gsi = gsi_for_stmt (call);
1421 tree lhs = gimple_call_lhs (call);
1422 tree var = gimple_call_arg (call, 2);
1423 gimple_seq seq = NULL;
1424
1425 if (code == IFN_GOACC_REDUCTION_SETUP
1426 || code == IFN_GOACC_REDUCTION_TEARDOWN)
1427 {
1428 /* Setup and Teardown need to copy from/to the receiver object,
1429 if there is one. */
1430 tree ref_to_res = gimple_call_arg (call, 1);
1431
1432 if (!integer_zerop (ref_to_res))
1433 {
1434 tree dst = build_simple_mem_ref (ref_to_res);
1435 tree src = var;
1436
1437 if (code == IFN_GOACC_REDUCTION_SETUP)
1438 {
1439 src = dst;
1440 dst = lhs;
1441 lhs = NULL;
1442 }
1443 gimple_seq_add_stmt (&seq, gimple_build_assign (dst, src));
1444 }
1445 }
1446
1447 /* Copy VAR to LHS, if there is an LHS. */
1448 if (lhs)
1449 gimple_seq_add_stmt (&seq, gimple_build_assign (lhs, var));
1450
1451 gsi_replace_with_seq (&gsi, seq, true);
1452 }
1453
1454 /* Main entry point for oacc transformations which run on the device
1455 compiler after LTO, so we know what the target device is at this
1456 point (including the host fallback). */
1457
1458 static unsigned int
1459 execute_oacc_device_lower ()
1460 {
1461 tree attrs = oacc_get_fn_attrib (current_function_decl);
1462
1463 if (!attrs)
1464 /* Not an offloaded function. */
1465 return 0;
1466
1467 /* Parse the default dim argument exactly once. */
1468 if ((const void *)flag_openacc_dims != &flag_openacc_dims)
1469 {
1470 oacc_parse_default_dims (flag_openacc_dims);
1471 flag_openacc_dims = (char *)&flag_openacc_dims;
1472 }
1473
1474 bool is_oacc_kernels
1475 = (lookup_attribute ("oacc kernels",
1476 DECL_ATTRIBUTES (current_function_decl)) != NULL);
1477 bool is_oacc_kernels_parallelized
1478 = (lookup_attribute ("oacc kernels parallelized",
1479 DECL_ATTRIBUTES (current_function_decl)) != NULL);
1480
1481 /* Unparallelized OpenACC kernels constructs must get launched as 1 x 1 x 1
1482 kernels, so remove the parallelism dimensions function attributes
1483 potentially set earlier on. */
1484 if (is_oacc_kernels && !is_oacc_kernels_parallelized)
1485 {
1486 oacc_set_fn_attrib (current_function_decl, NULL, NULL);
1487 attrs = oacc_get_fn_attrib (current_function_decl);
1488 }
1489
1490 /* Discover, partition and process the loops. */
1491 oacc_loop *loops = oacc_loop_discovery ();
1492 int fn_level = oacc_fn_attrib_level (attrs);
1493
1494 if (dump_file)
1495 {
1496 if (fn_level >= 0)
1497 fprintf (dump_file, "Function is OpenACC routine level %d\n",
1498 fn_level);
1499 else if (is_oacc_kernels)
1500 fprintf (dump_file, "Function is %s OpenACC kernels offload\n",
1501 (is_oacc_kernels_parallelized
1502 ? "parallelized" : "unparallelized"));
1503 else
1504 fprintf (dump_file, "Function is OpenACC parallel offload\n");
1505 }
1506
1507 unsigned outer_mask = fn_level >= 0 ? GOMP_DIM_MASK (fn_level) - 1 : 0;
1508 unsigned used_mask = oacc_loop_partition (loops, outer_mask);
1509 /* OpenACC kernels constructs are special: they currently don't use the
1510 generic oacc_loop infrastructure and attribute/dimension processing. */
1511 if (is_oacc_kernels && is_oacc_kernels_parallelized)
1512 {
1513 /* Parallelized OpenACC kernels constructs use gang parallelism. See
1514 also tree-parloops.c:create_parallel_loop. */
1515 used_mask |= GOMP_DIM_MASK (GOMP_DIM_GANG);
1516 }
1517
1518 int dims[GOMP_DIM_MAX];
1519 oacc_validate_dims (current_function_decl, attrs, dims, fn_level, used_mask);
1520
1521 if (dump_file)
1522 {
1523 const char *comma = "Compute dimensions [";
1524 for (int ix = 0; ix != GOMP_DIM_MAX; ix++, comma = ", ")
1525 fprintf (dump_file, "%s%d", comma, dims[ix]);
1526 fprintf (dump_file, "]\n");
1527 }
1528
1529 oacc_loop_process (loops);
1530 if (dump_file)
1531 {
1532 fprintf (dump_file, "OpenACC loops\n");
1533 dump_oacc_loop (dump_file, loops, 0);
1534 fprintf (dump_file, "\n");
1535 }
1536
1537 /* Offloaded targets may introduce new basic blocks, which require
1538 dominance information to update SSA. */
1539 calculate_dominance_info (CDI_DOMINATORS);
1540
1541 /* Now lower internal loop functions to target-specific code
1542 sequences. */
1543 basic_block bb;
1544 FOR_ALL_BB_FN (bb, cfun)
1545 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
1546 {
1547 gimple *stmt = gsi_stmt (gsi);
1548 if (!is_gimple_call (stmt))
1549 {
1550 gsi_next (&gsi);
1551 continue;
1552 }
1553
1554 gcall *call = as_a <gcall *> (stmt);
1555 if (!gimple_call_internal_p (call))
1556 {
1557 gsi_next (&gsi);
1558 continue;
1559 }
1560
1561 /* Rewind to allow rescan. */
1562 gsi_prev (&gsi);
1563 bool rescan = false, remove = false;
1564 enum internal_fn ifn_code = gimple_call_internal_fn (call);
1565
1566 switch (ifn_code)
1567 {
1568 default: break;
1569
1570 case IFN_GOACC_TILE:
1571 oacc_xform_tile (call);
1572 rescan = true;
1573 break;
1574
1575 case IFN_GOACC_LOOP:
1576 oacc_xform_loop (call);
1577 rescan = true;
1578 break;
1579
1580 case IFN_GOACC_REDUCTION:
1581 /* Mark the function for SSA renaming. */
1582 mark_virtual_operands_for_renaming (cfun);
1583
1584 /* If the level is -1, this ended up being an unused
1585 axis. Handle as a default. */
1586 if (integer_minus_onep (gimple_call_arg (call, 3)))
1587 default_goacc_reduction (call);
1588 else
1589 targetm.goacc.reduction (call);
1590 rescan = true;
1591 break;
1592
1593 case IFN_UNIQUE:
1594 {
1595 enum ifn_unique_kind kind
1596 = ((enum ifn_unique_kind)
1597 TREE_INT_CST_LOW (gimple_call_arg (call, 0)));
1598
1599 switch (kind)
1600 {
1601 default:
1602 break;
1603
1604 case IFN_UNIQUE_OACC_FORK:
1605 case IFN_UNIQUE_OACC_JOIN:
1606 if (integer_minus_onep (gimple_call_arg (call, 2)))
1607 remove = true;
1608 else if (!targetm.goacc.fork_join
1609 (call, dims, kind == IFN_UNIQUE_OACC_FORK))
1610 remove = true;
1611 break;
1612
1613 case IFN_UNIQUE_OACC_HEAD_MARK:
1614 case IFN_UNIQUE_OACC_TAIL_MARK:
1615 remove = true;
1616 break;
1617 }
1618 break;
1619 }
1620 }
1621
1622 if (gsi_end_p (gsi))
1623 /* We rewound past the beginning of the BB. */
1624 gsi = gsi_start_bb (bb);
1625 else
1626 /* Undo the rewind. */
1627 gsi_next (&gsi);
1628
1629 if (remove)
1630 {
1631 if (gimple_vdef (call))
1632 replace_uses_by (gimple_vdef (call), gimple_vuse (call));
1633 if (gimple_call_lhs (call))
1634 {
1635 /* Propagate the data dependency var. */
1636 gimple *ass = gimple_build_assign (gimple_call_lhs (call),
1637 gimple_call_arg (call, 1));
1638 gsi_replace (&gsi, ass, false);
1639 }
1640 else
1641 gsi_remove (&gsi, true);
1642 }
1643 else if (!rescan)
1644 /* If not rescanning, advance over the call. */
1645 gsi_next (&gsi);
1646 }
1647
1648 free_oacc_loop (loops);
1649
1650 return 0;
1651 }
1652
1653 /* Default launch dimension validator. Force everything to 1. A
1654 backend that wants to provide larger dimensions must override this
1655 hook. */
1656
1657 bool
1658 default_goacc_validate_dims (tree ARG_UNUSED (decl), int *dims,
1659 int ARG_UNUSED (fn_level))
1660 {
1661 bool changed = false;
1662
1663 for (unsigned ix = 0; ix != GOMP_DIM_MAX; ix++)
1664 {
1665 if (dims[ix] != 1)
1666 {
1667 dims[ix] = 1;
1668 changed = true;
1669 }
1670 }
1671
1672 return changed;
1673 }
1674
1675 /* Default dimension bound is unknown on accelerator and 1 on host. */
1676
1677 int
1678 default_goacc_dim_limit (int ARG_UNUSED (axis))
1679 {
1680 #ifdef ACCEL_COMPILER
1681 return 0;
1682 #else
1683 return 1;
1684 #endif
1685 }
1686
1687 namespace {
1688
1689 const pass_data pass_data_oacc_device_lower =
1690 {
1691 GIMPLE_PASS, /* type */
1692 "oaccdevlow", /* name */
1693 OPTGROUP_OMP, /* optinfo_flags */
1694 TV_NONE, /* tv_id */
1695 PROP_cfg, /* properties_required */
1696 0 /* Possibly PROP_gimple_eomp. */, /* properties_provided */
1697 0, /* properties_destroyed */
1698 0, /* todo_flags_start */
1699 TODO_update_ssa | TODO_cleanup_cfg, /* todo_flags_finish */
1700 };
1701
1702 class pass_oacc_device_lower : public gimple_opt_pass
1703 {
1704 public:
1705 pass_oacc_device_lower (gcc::context *ctxt)
1706 : gimple_opt_pass (pass_data_oacc_device_lower, ctxt)
1707 {}
1708
1709 /* opt_pass methods: */
1710 virtual bool gate (function *) { return flag_openacc; };
1711
1712 virtual unsigned int execute (function *)
1713 {
1714 return execute_oacc_device_lower ();
1715 }
1716
1717 }; // class pass_oacc_device_lower
1718
1719 } // anon namespace
1720
1721 gimple_opt_pass *
1722 make_pass_oacc_device_lower (gcc::context *ctxt)
1723 {
1724 return new pass_oacc_device_lower (ctxt);
1725 }
1726
1727 \f
1728 /* Rewrite GOMP_SIMT_ENTER_ALLOC call given by GSI and remove the preceding
1729 GOMP_SIMT_ENTER call identifying the privatized variables, which are
1730 turned to structure fields and receive a DECL_VALUE_EXPR accordingly.
1731 Set *REGIMPLIFY to true, except if no privatized variables were seen. */
1732
1733 static void
1734 ompdevlow_adjust_simt_enter (gimple_stmt_iterator *gsi, bool *regimplify)
1735 {
1736 gimple *alloc_stmt = gsi_stmt (*gsi);
1737 tree simtrec = gimple_call_lhs (alloc_stmt);
1738 tree simduid = gimple_call_arg (alloc_stmt, 0);
1739 gimple *enter_stmt = SSA_NAME_DEF_STMT (simduid);
1740 gcc_assert (gimple_call_internal_p (enter_stmt, IFN_GOMP_SIMT_ENTER));
1741 tree rectype = lang_hooks.types.make_type (RECORD_TYPE);
1742 TYPE_ARTIFICIAL (rectype) = TYPE_NAMELESS (rectype) = 1;
1743 TREE_ADDRESSABLE (rectype) = 1;
1744 TREE_TYPE (simtrec) = build_pointer_type (rectype);
1745 for (unsigned i = 1; i < gimple_call_num_args (enter_stmt); i++)
1746 {
1747 tree *argp = gimple_call_arg_ptr (enter_stmt, i);
1748 if (*argp == null_pointer_node)
1749 continue;
1750 gcc_assert (TREE_CODE (*argp) == ADDR_EXPR
1751 && VAR_P (TREE_OPERAND (*argp, 0)));
1752 tree var = TREE_OPERAND (*argp, 0);
1753
1754 tree field = build_decl (DECL_SOURCE_LOCATION (var), FIELD_DECL,
1755 DECL_NAME (var), TREE_TYPE (var));
1756 SET_DECL_ALIGN (field, DECL_ALIGN (var));
1757 DECL_USER_ALIGN (field) = DECL_USER_ALIGN (var);
1758 TREE_THIS_VOLATILE (field) = TREE_THIS_VOLATILE (var);
1759
1760 insert_field_into_struct (rectype, field);
1761
1762 tree t = build_simple_mem_ref (simtrec);
1763 t = build3 (COMPONENT_REF, TREE_TYPE (var), t, field, NULL);
1764 TREE_THIS_VOLATILE (t) = TREE_THIS_VOLATILE (var);
1765 SET_DECL_VALUE_EXPR (var, t);
1766 DECL_HAS_VALUE_EXPR_P (var) = 1;
1767 *regimplify = true;
1768 }
1769 layout_type (rectype);
1770 tree size = TYPE_SIZE_UNIT (rectype);
1771 tree align = build_int_cst (TREE_TYPE (size), TYPE_ALIGN_UNIT (rectype));
1772
1773 alloc_stmt
1774 = gimple_build_call_internal (IFN_GOMP_SIMT_ENTER_ALLOC, 2, size, align);
1775 gimple_call_set_lhs (alloc_stmt, simtrec);
1776 gsi_replace (gsi, alloc_stmt, false);
1777 gimple_stmt_iterator enter_gsi = gsi_for_stmt (enter_stmt);
1778 enter_stmt = gimple_build_assign (simduid, gimple_call_arg (enter_stmt, 0));
1779 gsi_replace (&enter_gsi, enter_stmt, false);
1780
1781 use_operand_p use;
1782 gimple *exit_stmt;
1783 if (single_imm_use (simtrec, &use, &exit_stmt))
1784 {
1785 gcc_assert (gimple_call_internal_p (exit_stmt, IFN_GOMP_SIMT_EXIT));
1786 gimple_stmt_iterator exit_gsi = gsi_for_stmt (exit_stmt);
1787 tree clobber = build_constructor (rectype, NULL);
1788 TREE_THIS_VOLATILE (clobber) = 1;
1789 exit_stmt = gimple_build_assign (build_simple_mem_ref (simtrec), clobber);
1790 gsi_insert_before (&exit_gsi, exit_stmt, GSI_SAME_STMT);
1791 }
1792 else
1793 gcc_checking_assert (has_zero_uses (simtrec));
1794 }
1795
1796 /* Callback for walk_gimple_stmt used to scan for SIMT-privatized variables. */
1797
1798 static tree
1799 find_simtpriv_var_op (tree *tp, int *walk_subtrees, void *)
1800 {
1801 tree t = *tp;
1802
1803 if (VAR_P (t)
1804 && DECL_HAS_VALUE_EXPR_P (t)
1805 && lookup_attribute ("omp simt private", DECL_ATTRIBUTES (t)))
1806 {
1807 *walk_subtrees = 0;
1808 return t;
1809 }
1810 return NULL_TREE;
1811 }
1812
1813 /* Cleanup uses of SIMT placeholder internal functions: on non-SIMT targets,
1814 VF is 1 and LANE is 0; on SIMT targets, VF is folded to a constant, and
1815 LANE is kept to be expanded to RTL later on. Also cleanup all other SIMT
1816 internal functions on non-SIMT targets, and likewise some SIMD internal
1817 functions on SIMT targets. */
1818
1819 static unsigned int
1820 execute_omp_device_lower ()
1821 {
1822 int vf = targetm.simt.vf ? targetm.simt.vf () : 1;
1823 bool regimplify = false;
1824 basic_block bb;
1825 gimple_stmt_iterator gsi;
1826 FOR_EACH_BB_FN (bb, cfun)
1827 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1828 {
1829 gimple *stmt = gsi_stmt (gsi);
1830 if (!is_gimple_call (stmt) || !gimple_call_internal_p (stmt))
1831 continue;
1832 tree lhs = gimple_call_lhs (stmt), rhs = NULL_TREE;
1833 tree type = lhs ? TREE_TYPE (lhs) : integer_type_node;
1834 switch (gimple_call_internal_fn (stmt))
1835 {
1836 case IFN_GOMP_USE_SIMT:
1837 rhs = vf == 1 ? integer_zero_node : integer_one_node;
1838 break;
1839 case IFN_GOMP_SIMT_ENTER:
1840 rhs = vf == 1 ? gimple_call_arg (stmt, 0) : NULL_TREE;
1841 goto simtreg_enter_exit;
1842 case IFN_GOMP_SIMT_ENTER_ALLOC:
1843 if (vf != 1)
1844 ompdevlow_adjust_simt_enter (&gsi, &regimplify);
1845 rhs = vf == 1 ? null_pointer_node : NULL_TREE;
1846 goto simtreg_enter_exit;
1847 case IFN_GOMP_SIMT_EXIT:
1848 simtreg_enter_exit:
1849 if (vf != 1)
1850 continue;
1851 unlink_stmt_vdef (stmt);
1852 break;
1853 case IFN_GOMP_SIMT_LANE:
1854 case IFN_GOMP_SIMT_LAST_LANE:
1855 rhs = vf == 1 ? build_zero_cst (type) : NULL_TREE;
1856 break;
1857 case IFN_GOMP_SIMT_VF:
1858 rhs = build_int_cst (type, vf);
1859 break;
1860 case IFN_GOMP_SIMT_ORDERED_PRED:
1861 rhs = vf == 1 ? integer_zero_node : NULL_TREE;
1862 if (rhs || !lhs)
1863 unlink_stmt_vdef (stmt);
1864 break;
1865 case IFN_GOMP_SIMT_VOTE_ANY:
1866 case IFN_GOMP_SIMT_XCHG_BFLY:
1867 case IFN_GOMP_SIMT_XCHG_IDX:
1868 rhs = vf == 1 ? gimple_call_arg (stmt, 0) : NULL_TREE;
1869 break;
1870 case IFN_GOMP_SIMD_LANE:
1871 case IFN_GOMP_SIMD_LAST_LANE:
1872 rhs = vf != 1 ? build_zero_cst (type) : NULL_TREE;
1873 break;
1874 case IFN_GOMP_SIMD_VF:
1875 rhs = vf != 1 ? build_one_cst (type) : NULL_TREE;
1876 break;
1877 default:
1878 continue;
1879 }
1880 if (lhs && !rhs)
1881 continue;
1882 stmt = lhs ? gimple_build_assign (lhs, rhs) : gimple_build_nop ();
1883 gsi_replace (&gsi, stmt, false);
1884 }
1885 if (regimplify)
1886 FOR_EACH_BB_REVERSE_FN (bb, cfun)
1887 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
1888 if (walk_gimple_stmt (&gsi, NULL, find_simtpriv_var_op, NULL))
1889 {
1890 if (gimple_clobber_p (gsi_stmt (gsi)))
1891 gsi_remove (&gsi, true);
1892 else
1893 gimple_regimplify_operands (gsi_stmt (gsi), &gsi);
1894 }
1895 if (vf != 1)
1896 cfun->has_force_vectorize_loops = false;
1897 return 0;
1898 }
1899
1900 namespace {
1901
1902 const pass_data pass_data_omp_device_lower =
1903 {
1904 GIMPLE_PASS, /* type */
1905 "ompdevlow", /* name */
1906 OPTGROUP_OMP, /* optinfo_flags */
1907 TV_NONE, /* tv_id */
1908 PROP_cfg, /* properties_required */
1909 PROP_gimple_lomp_dev, /* properties_provided */
1910 0, /* properties_destroyed */
1911 0, /* todo_flags_start */
1912 TODO_update_ssa, /* todo_flags_finish */
1913 };
1914
1915 class pass_omp_device_lower : public gimple_opt_pass
1916 {
1917 public:
1918 pass_omp_device_lower (gcc::context *ctxt)
1919 : gimple_opt_pass (pass_data_omp_device_lower, ctxt)
1920 {}
1921
1922 /* opt_pass methods: */
1923 virtual bool gate (function *fun)
1924 {
1925 return !(fun->curr_properties & PROP_gimple_lomp_dev);
1926 }
1927 virtual unsigned int execute (function *)
1928 {
1929 return execute_omp_device_lower ();
1930 }
1931
1932 }; // class pass_expand_omp_ssa
1933
1934 } // anon namespace
1935
1936 gimple_opt_pass *
1937 make_pass_omp_device_lower (gcc::context *ctxt)
1938 {
1939 return new pass_omp_device_lower (ctxt);
1940 }
1941
1942 /* "omp declare target link" handling pass. */
1943
1944 namespace {
1945
1946 const pass_data pass_data_omp_target_link =
1947 {
1948 GIMPLE_PASS, /* type */
1949 "omptargetlink", /* name */
1950 OPTGROUP_OMP, /* optinfo_flags */
1951 TV_NONE, /* tv_id */
1952 PROP_ssa, /* properties_required */
1953 0, /* properties_provided */
1954 0, /* properties_destroyed */
1955 0, /* todo_flags_start */
1956 TODO_update_ssa, /* todo_flags_finish */
1957 };
1958
1959 class pass_omp_target_link : public gimple_opt_pass
1960 {
1961 public:
1962 pass_omp_target_link (gcc::context *ctxt)
1963 : gimple_opt_pass (pass_data_omp_target_link, ctxt)
1964 {}
1965
1966 /* opt_pass methods: */
1967 virtual bool gate (function *fun)
1968 {
1969 #ifdef ACCEL_COMPILER
1970 tree attrs = DECL_ATTRIBUTES (fun->decl);
1971 return lookup_attribute ("omp declare target", attrs)
1972 || lookup_attribute ("omp target entrypoint", attrs);
1973 #else
1974 (void) fun;
1975 return false;
1976 #endif
1977 }
1978
1979 virtual unsigned execute (function *);
1980 };
1981
1982 /* Callback for walk_gimple_stmt used to scan for link var operands. */
1983
1984 static tree
1985 find_link_var_op (tree *tp, int *walk_subtrees, void *)
1986 {
1987 tree t = *tp;
1988
1989 if (VAR_P (t)
1990 && DECL_HAS_VALUE_EXPR_P (t)
1991 && is_global_var (t)
1992 && lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (t)))
1993 {
1994 *walk_subtrees = 0;
1995 return t;
1996 }
1997
1998 return NULL_TREE;
1999 }
2000
2001 unsigned
2002 pass_omp_target_link::execute (function *fun)
2003 {
2004 basic_block bb;
2005 FOR_EACH_BB_FN (bb, fun)
2006 {
2007 gimple_stmt_iterator gsi;
2008 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2009 if (walk_gimple_stmt (&gsi, NULL, find_link_var_op, NULL))
2010 gimple_regimplify_operands (gsi_stmt (gsi), &gsi);
2011 }
2012
2013 return 0;
2014 }
2015
2016 } // anon namespace
2017
2018 gimple_opt_pass *
2019 make_pass_omp_target_link (gcc::context *ctxt)
2020 {
2021 return new pass_omp_target_link (ctxt);
2022 }