]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/omp-offload.c
[testsuite] Add missing dg-require-effective-target label_values
[thirdparty/gcc.git] / gcc / omp-offload.c
1 /* Bits of OpenMP and OpenACC handling that is specific to device offloading
2 and a lowering pass for OpenACC device directives.
3
4 Copyright (C) 2005-2019 Free Software Foundation, Inc.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "tree-pass.h"
30 #include "ssa.h"
31 #include "cgraph.h"
32 #include "pretty-print.h"
33 #include "diagnostic-core.h"
34 #include "fold-const.h"
35 #include "internal-fn.h"
36 #include "langhooks.h"
37 #include "gimplify.h"
38 #include "gimple-iterator.h"
39 #include "gimplify-me.h"
40 #include "gimple-walk.h"
41 #include "tree-cfg.h"
42 #include "tree-into-ssa.h"
43 #include "tree-nested.h"
44 #include "stor-layout.h"
45 #include "common/common-target.h"
46 #include "omp-general.h"
47 #include "omp-offload.h"
48 #include "lto-section-names.h"
49 #include "gomp-constants.h"
50 #include "gimple-pretty-print.h"
51 #include "intl.h"
52 #include "stringpool.h"
53 #include "attribs.h"
54 #include "cfgloop.h"
55
56 /* Describe the OpenACC looping structure of a function. The entire
57 function is held in a 'NULL' loop. */
58
59 struct oacc_loop
60 {
61 oacc_loop *parent; /* Containing loop. */
62
63 oacc_loop *child; /* First inner loop. */
64
65 oacc_loop *sibling; /* Next loop within same parent. */
66
67 location_t loc; /* Location of the loop start. */
68
69 gcall *marker; /* Initial head marker. */
70
71 gcall *heads[GOMP_DIM_MAX]; /* Head marker functions. */
72 gcall *tails[GOMP_DIM_MAX]; /* Tail marker functions. */
73
74 tree routine; /* Pseudo-loop enclosing a routine. */
75
76 unsigned mask; /* Partitioning mask. */
77 unsigned e_mask; /* Partitioning of element loops (when tiling). */
78 unsigned inner; /* Partitioning of inner loops. */
79 unsigned flags; /* Partitioning flags. */
80 vec<gcall *> ifns; /* Contained loop abstraction functions. */
81 tree chunk_size; /* Chunk size. */
82 gcall *head_end; /* Final marker of head sequence. */
83 };
84
85 /* Holds offload tables with decls. */
86 vec<tree, va_gc> *offload_funcs, *offload_vars;
87
88 /* Return level at which oacc routine may spawn a partitioned loop, or
89 -1 if it is not a routine (i.e. is an offload fn). */
90
91 int
92 oacc_fn_attrib_level (tree attr)
93 {
94 tree pos = TREE_VALUE (attr);
95
96 if (!TREE_PURPOSE (pos))
97 return -1;
98
99 int ix = 0;
100 for (ix = 0; ix != GOMP_DIM_MAX;
101 ix++, pos = TREE_CHAIN (pos))
102 if (!integer_zerop (TREE_PURPOSE (pos)))
103 break;
104
105 return ix;
106 }
107
108 /* Helper function for omp_finish_file routine. Takes decls from V_DECLS and
109 adds their addresses and sizes to constructor-vector V_CTOR. */
110
111 static void
112 add_decls_addresses_to_decl_constructor (vec<tree, va_gc> *v_decls,
113 vec<constructor_elt, va_gc> *v_ctor)
114 {
115 unsigned len = vec_safe_length (v_decls);
116 for (unsigned i = 0; i < len; i++)
117 {
118 tree it = (*v_decls)[i];
119 bool is_var = VAR_P (it);
120 bool is_link_var
121 = is_var
122 #ifdef ACCEL_COMPILER
123 && DECL_HAS_VALUE_EXPR_P (it)
124 #endif
125 && lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (it));
126
127 tree size = NULL_TREE;
128 if (is_var)
129 size = fold_convert (const_ptr_type_node, DECL_SIZE_UNIT (it));
130
131 tree addr;
132 if (!is_link_var)
133 addr = build_fold_addr_expr (it);
134 else
135 {
136 #ifdef ACCEL_COMPILER
137 /* For "omp declare target link" vars add address of the pointer to
138 the target table, instead of address of the var. */
139 tree value_expr = DECL_VALUE_EXPR (it);
140 tree link_ptr_decl = TREE_OPERAND (value_expr, 0);
141 varpool_node::finalize_decl (link_ptr_decl);
142 addr = build_fold_addr_expr (link_ptr_decl);
143 #else
144 addr = build_fold_addr_expr (it);
145 #endif
146
147 /* Most significant bit of the size marks "omp declare target link"
148 vars in host and target tables. */
149 unsigned HOST_WIDE_INT isize = tree_to_uhwi (size);
150 isize |= 1ULL << (int_size_in_bytes (const_ptr_type_node)
151 * BITS_PER_UNIT - 1);
152 size = wide_int_to_tree (const_ptr_type_node, isize);
153 }
154
155 CONSTRUCTOR_APPEND_ELT (v_ctor, NULL_TREE, addr);
156 if (is_var)
157 CONSTRUCTOR_APPEND_ELT (v_ctor, NULL_TREE, size);
158 }
159 }
160
161 /* Create new symbols containing (address, size) pairs for global variables,
162 marked with "omp declare target" attribute, as well as addresses for the
163 functions, which are outlined offloading regions. */
164 void
165 omp_finish_file (void)
166 {
167 unsigned num_funcs = vec_safe_length (offload_funcs);
168 unsigned num_vars = vec_safe_length (offload_vars);
169
170 if (num_funcs == 0 && num_vars == 0)
171 return;
172
173 if (targetm_common.have_named_sections)
174 {
175 vec<constructor_elt, va_gc> *v_f, *v_v;
176 vec_alloc (v_f, num_funcs);
177 vec_alloc (v_v, num_vars * 2);
178
179 add_decls_addresses_to_decl_constructor (offload_funcs, v_f);
180 add_decls_addresses_to_decl_constructor (offload_vars, v_v);
181
182 tree vars_decl_type = build_array_type_nelts (pointer_sized_int_node,
183 num_vars * 2);
184 tree funcs_decl_type = build_array_type_nelts (pointer_sized_int_node,
185 num_funcs);
186 SET_TYPE_ALIGN (vars_decl_type, TYPE_ALIGN (pointer_sized_int_node));
187 SET_TYPE_ALIGN (funcs_decl_type, TYPE_ALIGN (pointer_sized_int_node));
188 tree ctor_v = build_constructor (vars_decl_type, v_v);
189 tree ctor_f = build_constructor (funcs_decl_type, v_f);
190 TREE_CONSTANT (ctor_v) = TREE_CONSTANT (ctor_f) = 1;
191 TREE_STATIC (ctor_v) = TREE_STATIC (ctor_f) = 1;
192 tree funcs_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL,
193 get_identifier (".offload_func_table"),
194 funcs_decl_type);
195 tree vars_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL,
196 get_identifier (".offload_var_table"),
197 vars_decl_type);
198 TREE_STATIC (funcs_decl) = TREE_STATIC (vars_decl) = 1;
199 /* Do not align tables more than TYPE_ALIGN (pointer_sized_int_node),
200 otherwise a joint table in a binary will contain padding between
201 tables from multiple object files. */
202 DECL_USER_ALIGN (funcs_decl) = DECL_USER_ALIGN (vars_decl) = 1;
203 SET_DECL_ALIGN (funcs_decl, TYPE_ALIGN (funcs_decl_type));
204 SET_DECL_ALIGN (vars_decl, TYPE_ALIGN (vars_decl_type));
205 DECL_INITIAL (funcs_decl) = ctor_f;
206 DECL_INITIAL (vars_decl) = ctor_v;
207 set_decl_section_name (funcs_decl, OFFLOAD_FUNC_TABLE_SECTION_NAME);
208 set_decl_section_name (vars_decl, OFFLOAD_VAR_TABLE_SECTION_NAME);
209
210 varpool_node::finalize_decl (vars_decl);
211 varpool_node::finalize_decl (funcs_decl);
212 }
213 else
214 {
215 for (unsigned i = 0; i < num_funcs; i++)
216 {
217 tree it = (*offload_funcs)[i];
218 targetm.record_offload_symbol (it);
219 }
220 for (unsigned i = 0; i < num_vars; i++)
221 {
222 tree it = (*offload_vars)[i];
223 targetm.record_offload_symbol (it);
224 }
225 }
226 }
227
228 /* Call dim_pos (POS == true) or dim_size (POS == false) builtins for
229 axis DIM. Return a tmp var holding the result. */
230
231 static tree
232 oacc_dim_call (bool pos, int dim, gimple_seq *seq)
233 {
234 tree arg = build_int_cst (unsigned_type_node, dim);
235 tree size = create_tmp_var (integer_type_node);
236 enum internal_fn fn = pos ? IFN_GOACC_DIM_POS : IFN_GOACC_DIM_SIZE;
237 gimple *call = gimple_build_call_internal (fn, 1, arg);
238
239 gimple_call_set_lhs (call, size);
240 gimple_seq_add_stmt (seq, call);
241
242 return size;
243 }
244
245 /* Find the number of threads (POS = false), or thread number (POS =
246 true) for an OpenACC region partitioned as MASK. Setup code
247 required for the calculation is added to SEQ. */
248
249 static tree
250 oacc_thread_numbers (bool pos, int mask, gimple_seq *seq)
251 {
252 tree res = pos ? NULL_TREE : build_int_cst (unsigned_type_node, 1);
253 unsigned ix;
254
255 /* Start at gang level, and examine relevant dimension indices. */
256 for (ix = GOMP_DIM_GANG; ix != GOMP_DIM_MAX; ix++)
257 if (GOMP_DIM_MASK (ix) & mask)
258 {
259 if (res)
260 {
261 /* We had an outer index, so scale that by the size of
262 this dimension. */
263 tree n = oacc_dim_call (false, ix, seq);
264 res = fold_build2 (MULT_EXPR, integer_type_node, res, n);
265 }
266 if (pos)
267 {
268 /* Determine index in this dimension. */
269 tree id = oacc_dim_call (true, ix, seq);
270 if (res)
271 res = fold_build2 (PLUS_EXPR, integer_type_node, res, id);
272 else
273 res = id;
274 }
275 }
276
277 if (res == NULL_TREE)
278 res = integer_zero_node;
279
280 return res;
281 }
282
283 /* Transform IFN_GOACC_LOOP calls to actual code. See
284 expand_oacc_for for where these are generated. At the vector
285 level, we stride loops, such that each member of a warp will
286 operate on adjacent iterations. At the worker and gang level,
287 each gang/warp executes a set of contiguous iterations. Chunking
288 can override this such that each iteration engine executes a
289 contiguous chunk, and then moves on to stride to the next chunk. */
290
291 static void
292 oacc_xform_loop (gcall *call)
293 {
294 gimple_stmt_iterator gsi = gsi_for_stmt (call);
295 enum ifn_goacc_loop_kind code
296 = (enum ifn_goacc_loop_kind) TREE_INT_CST_LOW (gimple_call_arg (call, 0));
297 tree dir = gimple_call_arg (call, 1);
298 tree range = gimple_call_arg (call, 2);
299 tree step = gimple_call_arg (call, 3);
300 tree chunk_size = NULL_TREE;
301 unsigned mask = (unsigned) TREE_INT_CST_LOW (gimple_call_arg (call, 5));
302 tree lhs = gimple_call_lhs (call);
303 tree type = NULL_TREE;
304 tree diff_type = TREE_TYPE (range);
305 tree r = NULL_TREE;
306 gimple_seq seq = NULL;
307 bool chunking = false, striding = true;
308 unsigned outer_mask = mask & (~mask + 1); // Outermost partitioning
309 unsigned inner_mask = mask & ~outer_mask; // Inner partitioning (if any)
310
311 /* Skip lowering if return value of IFN_GOACC_LOOP call is not used. */
312 if (!lhs)
313 {
314 gsi_replace_with_seq (&gsi, seq, true);
315 return;
316 }
317
318 type = TREE_TYPE (lhs);
319
320 #ifdef ACCEL_COMPILER
321 chunk_size = gimple_call_arg (call, 4);
322 if (integer_minus_onep (chunk_size) /* Force static allocation. */
323 || integer_zerop (chunk_size)) /* Default (also static). */
324 {
325 /* If we're at the gang level, we want each to execute a
326 contiguous run of iterations. Otherwise we want each element
327 to stride. */
328 striding = !(outer_mask & GOMP_DIM_MASK (GOMP_DIM_GANG));
329 chunking = false;
330 }
331 else
332 {
333 /* Chunk of size 1 is striding. */
334 striding = integer_onep (chunk_size);
335 chunking = !striding;
336 }
337 #endif
338
339 /* striding=true, chunking=true
340 -> invalid.
341 striding=true, chunking=false
342 -> chunks=1
343 striding=false,chunking=true
344 -> chunks=ceil (range/(chunksize*threads*step))
345 striding=false,chunking=false
346 -> chunk_size=ceil(range/(threads*step)),chunks=1 */
347 push_gimplify_context (true);
348
349 switch (code)
350 {
351 default: gcc_unreachable ();
352
353 case IFN_GOACC_LOOP_CHUNKS:
354 if (!chunking)
355 r = build_int_cst (type, 1);
356 else
357 {
358 /* chunk_max
359 = (range - dir) / (chunks * step * num_threads) + dir */
360 tree per = oacc_thread_numbers (false, mask, &seq);
361 per = fold_convert (type, per);
362 chunk_size = fold_convert (type, chunk_size);
363 per = fold_build2 (MULT_EXPR, type, per, chunk_size);
364 per = fold_build2 (MULT_EXPR, type, per, step);
365 r = build2 (MINUS_EXPR, type, range, dir);
366 r = build2 (PLUS_EXPR, type, r, per);
367 r = build2 (TRUNC_DIV_EXPR, type, r, per);
368 }
369 break;
370
371 case IFN_GOACC_LOOP_STEP:
372 {
373 /* If striding, step by the entire compute volume, otherwise
374 step by the inner volume. */
375 unsigned volume = striding ? mask : inner_mask;
376
377 r = oacc_thread_numbers (false, volume, &seq);
378 r = build2 (MULT_EXPR, type, fold_convert (type, r), step);
379 }
380 break;
381
382 case IFN_GOACC_LOOP_OFFSET:
383 /* Enable vectorization on non-SIMT targets. */
384 if (!targetm.simt.vf
385 && outer_mask == GOMP_DIM_MASK (GOMP_DIM_VECTOR)
386 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
387 the loop. */
388 && (flag_tree_loop_vectorize
389 || !global_options_set.x_flag_tree_loop_vectorize))
390 {
391 basic_block bb = gsi_bb (gsi);
392 struct loop *parent = bb->loop_father;
393 struct loop *body = parent->inner;
394
395 parent->force_vectorize = true;
396 parent->safelen = INT_MAX;
397
398 /* "Chunking loops" may have inner loops. */
399 if (parent->inner)
400 {
401 body->force_vectorize = true;
402 body->safelen = INT_MAX;
403 }
404
405 cfun->has_force_vectorize_loops = true;
406 }
407 if (striding)
408 {
409 r = oacc_thread_numbers (true, mask, &seq);
410 r = fold_convert (diff_type, r);
411 }
412 else
413 {
414 tree inner_size = oacc_thread_numbers (false, inner_mask, &seq);
415 tree outer_size = oacc_thread_numbers (false, outer_mask, &seq);
416 tree volume = fold_build2 (MULT_EXPR, TREE_TYPE (inner_size),
417 inner_size, outer_size);
418
419 volume = fold_convert (diff_type, volume);
420 if (chunking)
421 chunk_size = fold_convert (diff_type, chunk_size);
422 else
423 {
424 tree per = fold_build2 (MULT_EXPR, diff_type, volume, step);
425
426 chunk_size = build2 (MINUS_EXPR, diff_type, range, dir);
427 chunk_size = build2 (PLUS_EXPR, diff_type, chunk_size, per);
428 chunk_size = build2 (TRUNC_DIV_EXPR, diff_type, chunk_size, per);
429 }
430
431 tree span = build2 (MULT_EXPR, diff_type, chunk_size,
432 fold_convert (diff_type, inner_size));
433 r = oacc_thread_numbers (true, outer_mask, &seq);
434 r = fold_convert (diff_type, r);
435 r = build2 (MULT_EXPR, diff_type, r, span);
436
437 tree inner = oacc_thread_numbers (true, inner_mask, &seq);
438 inner = fold_convert (diff_type, inner);
439 r = fold_build2 (PLUS_EXPR, diff_type, r, inner);
440
441 if (chunking)
442 {
443 tree chunk = fold_convert (diff_type, gimple_call_arg (call, 6));
444 tree per
445 = fold_build2 (MULT_EXPR, diff_type, volume, chunk_size);
446 per = build2 (MULT_EXPR, diff_type, per, chunk);
447
448 r = build2 (PLUS_EXPR, diff_type, r, per);
449 }
450 }
451 r = fold_build2 (MULT_EXPR, diff_type, r, step);
452 if (type != diff_type)
453 r = fold_convert (type, r);
454 break;
455
456 case IFN_GOACC_LOOP_BOUND:
457 if (striding)
458 r = range;
459 else
460 {
461 tree inner_size = oacc_thread_numbers (false, inner_mask, &seq);
462 tree outer_size = oacc_thread_numbers (false, outer_mask, &seq);
463 tree volume = fold_build2 (MULT_EXPR, TREE_TYPE (inner_size),
464 inner_size, outer_size);
465
466 volume = fold_convert (diff_type, volume);
467 if (chunking)
468 chunk_size = fold_convert (diff_type, chunk_size);
469 else
470 {
471 tree per = fold_build2 (MULT_EXPR, diff_type, volume, step);
472
473 chunk_size = build2 (MINUS_EXPR, diff_type, range, dir);
474 chunk_size = build2 (PLUS_EXPR, diff_type, chunk_size, per);
475 chunk_size = build2 (TRUNC_DIV_EXPR, diff_type, chunk_size, per);
476 }
477
478 tree span = build2 (MULT_EXPR, diff_type, chunk_size,
479 fold_convert (diff_type, inner_size));
480
481 r = fold_build2 (MULT_EXPR, diff_type, span, step);
482
483 tree offset = gimple_call_arg (call, 6);
484 r = build2 (PLUS_EXPR, diff_type, r,
485 fold_convert (diff_type, offset));
486 r = build2 (integer_onep (dir) ? MIN_EXPR : MAX_EXPR,
487 diff_type, r, range);
488 }
489 if (diff_type != type)
490 r = fold_convert (type, r);
491 break;
492 }
493
494 gimplify_assign (lhs, r, &seq);
495
496 pop_gimplify_context (NULL);
497
498 gsi_replace_with_seq (&gsi, seq, true);
499 }
500
501 /* Transform a GOACC_TILE call. Determines the element loop span for
502 the specified loop of the nest. This is 1 if we're not tiling.
503
504 GOACC_TILE (collapse_count, loop_no, tile_arg, gwv_tile, gwv_element); */
505
506 static void
507 oacc_xform_tile (gcall *call)
508 {
509 gimple_stmt_iterator gsi = gsi_for_stmt (call);
510 unsigned collapse = tree_to_uhwi (gimple_call_arg (call, 0));
511 /* Inner loops have higher loop_nos. */
512 unsigned loop_no = tree_to_uhwi (gimple_call_arg (call, 1));
513 tree tile_size = gimple_call_arg (call, 2);
514 unsigned e_mask = tree_to_uhwi (gimple_call_arg (call, 4));
515 tree lhs = gimple_call_lhs (call);
516 tree type = TREE_TYPE (lhs);
517 gimple_seq seq = NULL;
518 tree span = build_int_cst (type, 1);
519
520 gcc_assert (!(e_mask
521 & ~(GOMP_DIM_MASK (GOMP_DIM_VECTOR)
522 | GOMP_DIM_MASK (GOMP_DIM_WORKER))));
523 push_gimplify_context (!seen_error ());
524
525 #ifndef ACCEL_COMPILER
526 /* Partitioning disabled on host compilers. */
527 e_mask = 0;
528 #endif
529 if (!e_mask)
530 /* Not paritioning. */
531 span = integer_one_node;
532 else if (!integer_zerop (tile_size))
533 /* User explicitly specified size. */
534 span = tile_size;
535 else
536 {
537 /* Pick a size based on the paritioning of the element loop and
538 the number of loop nests. */
539 tree first_size = NULL_TREE;
540 tree second_size = NULL_TREE;
541
542 if (e_mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR))
543 first_size = oacc_dim_call (false, GOMP_DIM_VECTOR, &seq);
544 if (e_mask & GOMP_DIM_MASK (GOMP_DIM_WORKER))
545 second_size = oacc_dim_call (false, GOMP_DIM_WORKER, &seq);
546
547 if (!first_size)
548 {
549 first_size = second_size;
550 second_size = NULL_TREE;
551 }
552
553 if (loop_no + 1 == collapse)
554 {
555 span = first_size;
556 if (!loop_no && second_size)
557 span = fold_build2 (MULT_EXPR, TREE_TYPE (span),
558 span, second_size);
559 }
560 else if (loop_no + 2 == collapse)
561 span = second_size;
562 else
563 span = NULL_TREE;
564
565 if (!span)
566 /* There's no obvious element size for this loop. Options
567 are 1, first_size or some non-unity constant (32 is my
568 favourite). We should gather some statistics. */
569 span = first_size;
570 }
571
572 span = fold_convert (type, span);
573 gimplify_assign (lhs, span, &seq);
574
575 pop_gimplify_context (NULL);
576
577 gsi_replace_with_seq (&gsi, seq, true);
578 }
579
580 /* Default partitioned and minimum partitioned dimensions. */
581
582 static int oacc_default_dims[GOMP_DIM_MAX];
583 static int oacc_min_dims[GOMP_DIM_MAX];
584
585 int
586 oacc_get_default_dim (int dim)
587 {
588 gcc_assert (0 <= dim && dim < GOMP_DIM_MAX);
589 return oacc_default_dims[dim];
590 }
591
592 int
593 oacc_get_min_dim (int dim)
594 {
595 gcc_assert (0 <= dim && dim < GOMP_DIM_MAX);
596 return oacc_min_dims[dim];
597 }
598
599 /* Parse the default dimension parameter. This is a set of
600 :-separated optional compute dimensions. Each specified dimension
601 is a positive integer. When device type support is added, it is
602 planned to be a comma separated list of such compute dimensions,
603 with all but the first prefixed by the colon-terminated device
604 type. */
605
606 static void
607 oacc_parse_default_dims (const char *dims)
608 {
609 int ix;
610
611 for (ix = GOMP_DIM_MAX; ix--;)
612 {
613 oacc_default_dims[ix] = -1;
614 oacc_min_dims[ix] = 1;
615 }
616
617 #ifndef ACCEL_COMPILER
618 /* Cannot be overridden on the host. */
619 dims = NULL;
620 #endif
621 if (dims)
622 {
623 const char *pos = dims;
624
625 for (ix = 0; *pos && ix != GOMP_DIM_MAX; ix++)
626 {
627 if (ix)
628 {
629 if (*pos != ':')
630 goto malformed;
631 pos++;
632 }
633
634 if (*pos != ':')
635 {
636 long val;
637 const char *eptr;
638
639 errno = 0;
640 val = strtol (pos, CONST_CAST (char **, &eptr), 10);
641 if (errno || val <= 0 || (int) val != val)
642 goto malformed;
643 pos = eptr;
644 oacc_default_dims[ix] = (int) val;
645 }
646 }
647 if (*pos)
648 {
649 malformed:
650 error_at (UNKNOWN_LOCATION,
651 "%<-fopenacc-dim%> operand is malformed at %qs", pos);
652 }
653 }
654
655 /* Allow the backend to validate the dimensions. */
656 targetm.goacc.validate_dims (NULL_TREE, oacc_default_dims, -1, 0);
657 targetm.goacc.validate_dims (NULL_TREE, oacc_min_dims, -2, 0);
658 }
659
660 /* Validate and update the dimensions for offloaded FN. ATTRS is the
661 raw attribute. DIMS is an array of dimensions, which is filled in.
662 LEVEL is the partitioning level of a routine, or -1 for an offload
663 region itself. USED is the mask of partitioned execution in the
664 function. */
665
666 static void
667 oacc_validate_dims (tree fn, tree attrs, int *dims, int level, unsigned used)
668 {
669 tree purpose[GOMP_DIM_MAX];
670 unsigned ix;
671 tree pos = TREE_VALUE (attrs);
672
673 /* Make sure the attribute creator attached the dimension
674 information. */
675 gcc_assert (pos);
676
677 for (ix = 0; ix != GOMP_DIM_MAX; ix++)
678 {
679 purpose[ix] = TREE_PURPOSE (pos);
680 tree val = TREE_VALUE (pos);
681 dims[ix] = val ? TREE_INT_CST_LOW (val) : -1;
682 pos = TREE_CHAIN (pos);
683 }
684
685 bool changed = targetm.goacc.validate_dims (fn, dims, level, used);
686
687 /* Default anything left to 1 or a partitioned default. */
688 for (ix = 0; ix != GOMP_DIM_MAX; ix++)
689 if (dims[ix] < 0)
690 {
691 /* The OpenACC spec says 'If the [num_gangs] clause is not
692 specified, an implementation-defined default will be used;
693 the default may depend on the code within the construct.'
694 (2.5.6). Thus an implementation is free to choose
695 non-unity default for a parallel region that doesn't have
696 any gang-partitioned loops. However, it appears that there
697 is a sufficient body of user code that expects non-gang
698 partitioned regions to not execute in gang-redundant mode.
699 So we (a) don't warn about the non-portability and (b) pick
700 the minimum permissible dimension size when there is no
701 partitioned execution. Otherwise we pick the global
702 default for the dimension, which the user can control. The
703 same wording and logic applies to num_workers and
704 vector_length, however the worker- or vector- single
705 execution doesn't have the same impact as gang-redundant
706 execution. (If the minimum gang-level partioning is not 1,
707 the target is probably too confusing.) */
708 dims[ix] = (used & GOMP_DIM_MASK (ix)
709 ? oacc_default_dims[ix] : oacc_min_dims[ix]);
710 changed = true;
711 }
712
713 if (changed)
714 {
715 /* Replace the attribute with new values. */
716 pos = NULL_TREE;
717 for (ix = GOMP_DIM_MAX; ix--;)
718 pos = tree_cons (purpose[ix],
719 build_int_cst (integer_type_node, dims[ix]), pos);
720 oacc_replace_fn_attrib (fn, pos);
721 }
722 }
723
724 /* Create an empty OpenACC loop structure at LOC. */
725
726 static oacc_loop *
727 new_oacc_loop_raw (oacc_loop *parent, location_t loc)
728 {
729 oacc_loop *loop = XCNEW (oacc_loop);
730
731 loop->parent = parent;
732
733 if (parent)
734 {
735 loop->sibling = parent->child;
736 parent->child = loop;
737 }
738
739 loop->loc = loc;
740 return loop;
741 }
742
743 /* Create an outermost, dummy OpenACC loop for offloaded function
744 DECL. */
745
746 static oacc_loop *
747 new_oacc_loop_outer (tree decl)
748 {
749 return new_oacc_loop_raw (NULL, DECL_SOURCE_LOCATION (decl));
750 }
751
752 /* Start a new OpenACC loop structure beginning at head marker HEAD.
753 Link into PARENT loop. Return the new loop. */
754
755 static oacc_loop *
756 new_oacc_loop (oacc_loop *parent, gcall *marker)
757 {
758 oacc_loop *loop = new_oacc_loop_raw (parent, gimple_location (marker));
759
760 loop->marker = marker;
761
762 /* TODO: This is where device_type flattening would occur for the loop
763 flags. */
764
765 loop->flags = TREE_INT_CST_LOW (gimple_call_arg (marker, 3));
766
767 tree chunk_size = integer_zero_node;
768 if (loop->flags & OLF_GANG_STATIC)
769 chunk_size = gimple_call_arg (marker, 4);
770 loop->chunk_size = chunk_size;
771
772 return loop;
773 }
774
775 /* Create a dummy loop encompassing a call to a openACC routine.
776 Extract the routine's partitioning requirements. */
777
778 static void
779 new_oacc_loop_routine (oacc_loop *parent, gcall *call, tree decl, tree attrs)
780 {
781 oacc_loop *loop = new_oacc_loop_raw (parent, gimple_location (call));
782 int level = oacc_fn_attrib_level (attrs);
783
784 gcc_assert (level >= 0);
785
786 loop->marker = call;
787 loop->routine = decl;
788 loop->mask = ((GOMP_DIM_MASK (GOMP_DIM_MAX) - 1)
789 ^ (GOMP_DIM_MASK (level) - 1));
790 }
791
792 /* Finish off the current OpenACC loop ending at tail marker TAIL.
793 Return the parent loop. */
794
795 static oacc_loop *
796 finish_oacc_loop (oacc_loop *loop)
797 {
798 /* If the loop has been collapsed, don't partition it. */
799 if (loop->ifns.is_empty ())
800 loop->mask = loop->flags = 0;
801 return loop->parent;
802 }
803
804 /* Free all OpenACC loop structures within LOOP (inclusive). */
805
806 static void
807 free_oacc_loop (oacc_loop *loop)
808 {
809 if (loop->sibling)
810 free_oacc_loop (loop->sibling);
811 if (loop->child)
812 free_oacc_loop (loop->child);
813
814 loop->ifns.release ();
815 free (loop);
816 }
817
818 /* Dump out the OpenACC loop head or tail beginning at FROM. */
819
820 static void
821 dump_oacc_loop_part (FILE *file, gcall *from, int depth,
822 const char *title, int level)
823 {
824 enum ifn_unique_kind kind
825 = (enum ifn_unique_kind) TREE_INT_CST_LOW (gimple_call_arg (from, 0));
826
827 fprintf (file, "%*s%s-%d:\n", depth * 2, "", title, level);
828 for (gimple_stmt_iterator gsi = gsi_for_stmt (from);;)
829 {
830 gimple *stmt = gsi_stmt (gsi);
831
832 if (gimple_call_internal_p (stmt, IFN_UNIQUE))
833 {
834 enum ifn_unique_kind k
835 = ((enum ifn_unique_kind) TREE_INT_CST_LOW
836 (gimple_call_arg (stmt, 0)));
837
838 if (k == kind && stmt != from)
839 break;
840 }
841 print_gimple_stmt (file, stmt, depth * 2 + 2);
842
843 gsi_next (&gsi);
844 while (gsi_end_p (gsi))
845 gsi = gsi_start_bb (single_succ (gsi_bb (gsi)));
846 }
847 }
848
849 /* Dump OpenACC loop LOOP, its children, and its siblings. */
850
851 static void
852 dump_oacc_loop (FILE *file, oacc_loop *loop, int depth)
853 {
854 int ix;
855
856 fprintf (file, "%*sLoop %x(%x) %s:%u\n", depth * 2, "",
857 loop->flags, loop->mask,
858 LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc));
859
860 if (loop->marker)
861 print_gimple_stmt (file, loop->marker, depth * 2);
862
863 if (loop->routine)
864 fprintf (file, "%*sRoutine %s:%u:%s\n",
865 depth * 2, "", DECL_SOURCE_FILE (loop->routine),
866 DECL_SOURCE_LINE (loop->routine),
867 IDENTIFIER_POINTER (DECL_NAME (loop->routine)));
868
869 for (ix = GOMP_DIM_GANG; ix != GOMP_DIM_MAX; ix++)
870 if (loop->heads[ix])
871 dump_oacc_loop_part (file, loop->heads[ix], depth, "Head", ix);
872 for (ix = GOMP_DIM_MAX; ix--;)
873 if (loop->tails[ix])
874 dump_oacc_loop_part (file, loop->tails[ix], depth, "Tail", ix);
875
876 if (loop->child)
877 dump_oacc_loop (file, loop->child, depth + 1);
878 if (loop->sibling)
879 dump_oacc_loop (file, loop->sibling, depth);
880 }
881
882 void debug_oacc_loop (oacc_loop *);
883
884 /* Dump loops to stderr. */
885
886 DEBUG_FUNCTION void
887 debug_oacc_loop (oacc_loop *loop)
888 {
889 dump_oacc_loop (stderr, loop, 0);
890 }
891
892 /* Provide diagnostics on OpenACC loop LOOP, its children, and its
893 siblings. */
894
895 static void
896 inform_oacc_loop (const oacc_loop *loop)
897 {
898 const char *gang
899 = loop->mask & GOMP_DIM_MASK (GOMP_DIM_GANG) ? " gang" : "";
900 const char *worker
901 = loop->mask & GOMP_DIM_MASK (GOMP_DIM_WORKER) ? " worker" : "";
902 const char *vector
903 = loop->mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR) ? " vector" : "";
904 const char *seq = loop->mask == 0 ? " seq" : "";
905 const dump_user_location_t loc
906 = dump_user_location_t::from_location_t (loop->loc);
907 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc,
908 "assigned OpenACC%s%s%s%s loop parallelism\n", gang, worker,
909 vector, seq);
910
911 if (loop->child)
912 inform_oacc_loop (loop->child);
913 if (loop->sibling)
914 inform_oacc_loop (loop->sibling);
915 }
916
917 /* DFS walk of basic blocks BB onwards, creating OpenACC loop
918 structures as we go. By construction these loops are properly
919 nested. */
920
921 static void
922 oacc_loop_discover_walk (oacc_loop *loop, basic_block bb)
923 {
924 int marker = 0;
925 int remaining = 0;
926
927 if (bb->flags & BB_VISITED)
928 return;
929
930 follow:
931 bb->flags |= BB_VISITED;
932
933 /* Scan for loop markers. */
934 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
935 gsi_next (&gsi))
936 {
937 gimple *stmt = gsi_stmt (gsi);
938
939 if (!is_gimple_call (stmt))
940 continue;
941
942 gcall *call = as_a <gcall *> (stmt);
943
944 /* If this is a routine, make a dummy loop for it. */
945 if (tree decl = gimple_call_fndecl (call))
946 if (tree attrs = oacc_get_fn_attrib (decl))
947 {
948 gcc_assert (!marker);
949 new_oacc_loop_routine (loop, call, decl, attrs);
950 }
951
952 if (!gimple_call_internal_p (call))
953 continue;
954
955 switch (gimple_call_internal_fn (call))
956 {
957 default:
958 break;
959
960 case IFN_GOACC_LOOP:
961 case IFN_GOACC_TILE:
962 /* Record the abstraction function, so we can manipulate it
963 later. */
964 loop->ifns.safe_push (call);
965 break;
966
967 case IFN_UNIQUE:
968 enum ifn_unique_kind kind
969 = (enum ifn_unique_kind) (TREE_INT_CST_LOW
970 (gimple_call_arg (call, 0)));
971 if (kind == IFN_UNIQUE_OACC_HEAD_MARK
972 || kind == IFN_UNIQUE_OACC_TAIL_MARK)
973 {
974 if (gimple_call_num_args (call) == 2)
975 {
976 gcc_assert (marker && !remaining);
977 marker = 0;
978 if (kind == IFN_UNIQUE_OACC_TAIL_MARK)
979 loop = finish_oacc_loop (loop);
980 else
981 loop->head_end = call;
982 }
983 else
984 {
985 int count = TREE_INT_CST_LOW (gimple_call_arg (call, 2));
986
987 if (!marker)
988 {
989 if (kind == IFN_UNIQUE_OACC_HEAD_MARK)
990 loop = new_oacc_loop (loop, call);
991 remaining = count;
992 }
993 gcc_assert (count == remaining);
994 if (remaining)
995 {
996 remaining--;
997 if (kind == IFN_UNIQUE_OACC_HEAD_MARK)
998 loop->heads[marker] = call;
999 else
1000 loop->tails[remaining] = call;
1001 }
1002 marker++;
1003 }
1004 }
1005 }
1006 }
1007 if (remaining || marker)
1008 {
1009 bb = single_succ (bb);
1010 gcc_assert (single_pred_p (bb) && !(bb->flags & BB_VISITED));
1011 goto follow;
1012 }
1013
1014 /* Walk successor blocks. */
1015 edge e;
1016 edge_iterator ei;
1017
1018 FOR_EACH_EDGE (e, ei, bb->succs)
1019 oacc_loop_discover_walk (loop, e->dest);
1020 }
1021
1022 /* LOOP is the first sibling. Reverse the order in place and return
1023 the new first sibling. Recurse to child loops. */
1024
1025 static oacc_loop *
1026 oacc_loop_sibling_nreverse (oacc_loop *loop)
1027 {
1028 oacc_loop *last = NULL;
1029 do
1030 {
1031 if (loop->child)
1032 loop->child = oacc_loop_sibling_nreverse (loop->child);
1033
1034 oacc_loop *next = loop->sibling;
1035 loop->sibling = last;
1036 last = loop;
1037 loop = next;
1038 }
1039 while (loop);
1040
1041 return last;
1042 }
1043
1044 /* Discover the OpenACC loops marked up by HEAD and TAIL markers for
1045 the current function. */
1046
1047 static oacc_loop *
1048 oacc_loop_discovery ()
1049 {
1050 /* Clear basic block flags, in particular BB_VISITED which we're going to use
1051 in the following. */
1052 clear_bb_flags ();
1053
1054 oacc_loop *top = new_oacc_loop_outer (current_function_decl);
1055 oacc_loop_discover_walk (top, ENTRY_BLOCK_PTR_FOR_FN (cfun));
1056
1057 /* The siblings were constructed in reverse order, reverse them so
1058 that diagnostics come out in an unsurprising order. */
1059 top = oacc_loop_sibling_nreverse (top);
1060
1061 return top;
1062 }
1063
1064 /* Transform the abstract internal function markers starting at FROM
1065 to be for partitioning level LEVEL. Stop when we meet another HEAD
1066 or TAIL marker. */
1067
1068 static void
1069 oacc_loop_xform_head_tail (gcall *from, int level)
1070 {
1071 enum ifn_unique_kind kind
1072 = (enum ifn_unique_kind) TREE_INT_CST_LOW (gimple_call_arg (from, 0));
1073 tree replacement = build_int_cst (unsigned_type_node, level);
1074
1075 for (gimple_stmt_iterator gsi = gsi_for_stmt (from);;)
1076 {
1077 gimple *stmt = gsi_stmt (gsi);
1078
1079 if (gimple_call_internal_p (stmt, IFN_UNIQUE))
1080 {
1081 enum ifn_unique_kind k
1082 = ((enum ifn_unique_kind)
1083 TREE_INT_CST_LOW (gimple_call_arg (stmt, 0)));
1084
1085 if (k == IFN_UNIQUE_OACC_FORK || k == IFN_UNIQUE_OACC_JOIN)
1086 *gimple_call_arg_ptr (stmt, 2) = replacement;
1087 else if (k == kind && stmt != from)
1088 break;
1089 }
1090 else if (gimple_call_internal_p (stmt, IFN_GOACC_REDUCTION))
1091 *gimple_call_arg_ptr (stmt, 3) = replacement;
1092
1093 gsi_next (&gsi);
1094 while (gsi_end_p (gsi))
1095 gsi = gsi_start_bb (single_succ (gsi_bb (gsi)));
1096 }
1097 }
1098
1099 /* Process the discovered OpenACC loops, setting the correct
1100 partitioning level etc. */
1101
1102 static void
1103 oacc_loop_process (oacc_loop *loop)
1104 {
1105 if (loop->child)
1106 oacc_loop_process (loop->child);
1107
1108 if (loop->mask && !loop->routine)
1109 {
1110 int ix;
1111 tree mask_arg = build_int_cst (unsigned_type_node, loop->mask);
1112 tree e_mask_arg = build_int_cst (unsigned_type_node, loop->e_mask);
1113 tree chunk_arg = loop->chunk_size;
1114 gcall *call;
1115
1116 for (ix = 0; loop->ifns.iterate (ix, &call); ix++)
1117 switch (gimple_call_internal_fn (call))
1118 {
1119 case IFN_GOACC_LOOP:
1120 {
1121 bool is_e = gimple_call_arg (call, 5) == integer_minus_one_node;
1122 gimple_call_set_arg (call, 5, is_e ? e_mask_arg : mask_arg);
1123 if (!is_e)
1124 gimple_call_set_arg (call, 4, chunk_arg);
1125 }
1126 break;
1127
1128 case IFN_GOACC_TILE:
1129 gimple_call_set_arg (call, 3, mask_arg);
1130 gimple_call_set_arg (call, 4, e_mask_arg);
1131 break;
1132
1133 default:
1134 gcc_unreachable ();
1135 }
1136
1137 unsigned dim = GOMP_DIM_GANG;
1138 unsigned mask = loop->mask | loop->e_mask;
1139 for (ix = 0; ix != GOMP_DIM_MAX && mask; ix++)
1140 {
1141 while (!(GOMP_DIM_MASK (dim) & mask))
1142 dim++;
1143
1144 oacc_loop_xform_head_tail (loop->heads[ix], dim);
1145 oacc_loop_xform_head_tail (loop->tails[ix], dim);
1146
1147 mask ^= GOMP_DIM_MASK (dim);
1148 }
1149 }
1150
1151 if (loop->sibling)
1152 oacc_loop_process (loop->sibling);
1153 }
1154
1155 /* Walk the OpenACC loop heirarchy checking and assigning the
1156 programmer-specified partitionings. OUTER_MASK is the partitioning
1157 this loop is contained within. Return mask of partitioning
1158 encountered. If any auto loops are discovered, set GOMP_DIM_MAX
1159 bit. */
1160
1161 static unsigned
1162 oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask)
1163 {
1164 unsigned this_mask = loop->mask;
1165 unsigned mask_all = 0;
1166 bool noisy = true;
1167
1168 #ifdef ACCEL_COMPILER
1169 /* When device_type is supported, we want the device compiler to be
1170 noisy, if the loop parameters are device_type-specific. */
1171 noisy = false;
1172 #endif
1173
1174 if (!loop->routine)
1175 {
1176 bool auto_par = (loop->flags & OLF_AUTO) != 0;
1177 bool seq_par = (loop->flags & OLF_SEQ) != 0;
1178 bool tiling = (loop->flags & OLF_TILE) != 0;
1179
1180 this_mask = ((loop->flags >> OLF_DIM_BASE)
1181 & (GOMP_DIM_MASK (GOMP_DIM_MAX) - 1));
1182
1183 /* Apply auto partitioning if this is a non-partitioned regular
1184 loop, or (no more than) single axis tiled loop. */
1185 bool maybe_auto
1186 = !seq_par && this_mask == (tiling ? this_mask & -this_mask : 0);
1187
1188 if ((this_mask != 0) + auto_par + seq_par > 1)
1189 {
1190 if (noisy)
1191 error_at (loop->loc,
1192 seq_par
1193 ? G_("%<seq%> overrides other OpenACC loop specifiers")
1194 : G_("%<auto%> conflicts with other OpenACC loop "
1195 "specifiers"));
1196 maybe_auto = false;
1197 loop->flags &= ~OLF_AUTO;
1198 if (seq_par)
1199 {
1200 loop->flags
1201 &= ~((GOMP_DIM_MASK (GOMP_DIM_MAX) - 1) << OLF_DIM_BASE);
1202 this_mask = 0;
1203 }
1204 }
1205
1206 if (maybe_auto && (loop->flags & OLF_INDEPENDENT))
1207 {
1208 loop->flags |= OLF_AUTO;
1209 mask_all |= GOMP_DIM_MASK (GOMP_DIM_MAX);
1210 }
1211 }
1212
1213 if (this_mask & outer_mask)
1214 {
1215 const oacc_loop *outer;
1216 for (outer = loop->parent; outer; outer = outer->parent)
1217 if ((outer->mask | outer->e_mask) & this_mask)
1218 break;
1219
1220 if (noisy)
1221 {
1222 if (outer)
1223 {
1224 error_at (loop->loc,
1225 loop->routine
1226 ? G_("routine call uses same OpenACC parallelism"
1227 " as containing loop")
1228 : G_("inner loop uses same OpenACC parallelism"
1229 " as containing loop"));
1230 inform (outer->loc, "containing loop here");
1231 }
1232 else
1233 error_at (loop->loc,
1234 loop->routine
1235 ? G_("routine call uses OpenACC parallelism disallowed"
1236 " by containing routine")
1237 : G_("loop uses OpenACC parallelism disallowed"
1238 " by containing routine"));
1239
1240 if (loop->routine)
1241 inform (DECL_SOURCE_LOCATION (loop->routine),
1242 "routine %qD declared here", loop->routine);
1243 }
1244 this_mask &= ~outer_mask;
1245 }
1246 else
1247 {
1248 unsigned outermost = least_bit_hwi (this_mask);
1249
1250 if (outermost && outermost <= outer_mask)
1251 {
1252 if (noisy)
1253 {
1254 error_at (loop->loc,
1255 "incorrectly nested OpenACC loop parallelism");
1256
1257 const oacc_loop *outer;
1258 for (outer = loop->parent;
1259 outer->flags && outer->flags < outermost;
1260 outer = outer->parent)
1261 continue;
1262 inform (outer->loc, "containing loop here");
1263 }
1264
1265 this_mask &= ~outermost;
1266 }
1267 }
1268
1269 mask_all |= this_mask;
1270
1271 if (loop->flags & OLF_TILE)
1272 {
1273 /* When tiling, vector goes to the element loop, and failing
1274 that we put worker there. The std doesn't contemplate
1275 specifying all three. We choose to put worker and vector on
1276 the element loops in that case. */
1277 unsigned this_e_mask = this_mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR);
1278 if (!this_e_mask || this_mask & GOMP_DIM_MASK (GOMP_DIM_GANG))
1279 this_e_mask |= this_mask & GOMP_DIM_MASK (GOMP_DIM_WORKER);
1280
1281 loop->e_mask = this_e_mask;
1282 this_mask ^= this_e_mask;
1283 }
1284
1285 loop->mask = this_mask;
1286
1287 if (dump_file)
1288 fprintf (dump_file, "Loop %s:%d user specified %d & %d\n",
1289 LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc),
1290 loop->mask, loop->e_mask);
1291
1292 if (loop->child)
1293 {
1294 unsigned tmp_mask = outer_mask | this_mask | loop->e_mask;
1295 loop->inner = oacc_loop_fixed_partitions (loop->child, tmp_mask);
1296 mask_all |= loop->inner;
1297 }
1298
1299 if (loop->sibling)
1300 mask_all |= oacc_loop_fixed_partitions (loop->sibling, outer_mask);
1301
1302 return mask_all;
1303 }
1304
1305 /* Walk the OpenACC loop heirarchy to assign auto-partitioned loops.
1306 OUTER_MASK is the partitioning this loop is contained within.
1307 OUTER_ASSIGN is true if an outer loop is being auto-partitioned.
1308 Return the cumulative partitioning used by this loop, siblings and
1309 children. */
1310
1311 static unsigned
1312 oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask,
1313 bool outer_assign)
1314 {
1315 bool assign = (loop->flags & OLF_AUTO) && (loop->flags & OLF_INDEPENDENT);
1316 bool noisy = true;
1317 bool tiling = loop->flags & OLF_TILE;
1318
1319 #ifdef ACCEL_COMPILER
1320 /* When device_type is supported, we want the device compiler to be
1321 noisy, if the loop parameters are device_type-specific. */
1322 noisy = false;
1323 #endif
1324
1325 if (assign && (!outer_assign || loop->inner))
1326 {
1327 /* Allocate outermost and non-innermost loops at the outermost
1328 non-innermost available level. */
1329 unsigned this_mask = GOMP_DIM_MASK (GOMP_DIM_GANG);
1330
1331 /* Find the first outermost available partition. */
1332 while (this_mask <= outer_mask)
1333 this_mask <<= 1;
1334
1335 /* Grab two axes if tiling, and we've not assigned anything */
1336 if (tiling && !(loop->mask | loop->e_mask))
1337 this_mask |= this_mask << 1;
1338
1339 /* Prohibit the innermost partitioning at the moment. */
1340 this_mask &= GOMP_DIM_MASK (GOMP_DIM_MAX - 1) - 1;
1341
1342 /* Don't use any dimension explicitly claimed by an inner loop. */
1343 this_mask &= ~loop->inner;
1344
1345 if (tiling && !loop->e_mask)
1346 {
1347 /* If we got two axes, allocate the inner one to the element
1348 loop. */
1349 loop->e_mask = this_mask & (this_mask << 1);
1350 this_mask ^= loop->e_mask;
1351 }
1352
1353 loop->mask |= this_mask;
1354 }
1355
1356 if (loop->child)
1357 {
1358 unsigned tmp_mask = outer_mask | loop->mask | loop->e_mask;
1359 loop->inner = oacc_loop_auto_partitions (loop->child, tmp_mask,
1360 outer_assign | assign);
1361 }
1362
1363 if (assign && (!loop->mask || (tiling && !loop->e_mask) || !outer_assign))
1364 {
1365 /* Allocate the loop at the innermost available level. Note
1366 that we do this even if we already assigned this loop the
1367 outermost available level above. That way we'll partition
1368 this along 2 axes, if they are available. */
1369 unsigned this_mask = 0;
1370
1371 /* Determine the outermost partitioning used within this loop. */
1372 this_mask = loop->inner | GOMP_DIM_MASK (GOMP_DIM_MAX);
1373 this_mask = least_bit_hwi (this_mask);
1374
1375 /* Pick the partitioning just inside that one. */
1376 this_mask >>= 1;
1377
1378 /* And avoid picking one use by an outer loop. */
1379 this_mask &= ~outer_mask;
1380
1381 /* If tiling and we failed completely above, grab the next one
1382 too. Making sure it doesn't hit an outer loop. */
1383 if (tiling)
1384 {
1385 this_mask &= ~(loop->e_mask | loop->mask);
1386 unsigned tile_mask = ((this_mask >> 1)
1387 & ~(outer_mask | loop->e_mask | loop->mask));
1388
1389 if (tile_mask || loop->mask)
1390 {
1391 loop->e_mask |= this_mask;
1392 this_mask = tile_mask;
1393 }
1394 if (!loop->e_mask && noisy)
1395 warning_at (loop->loc, 0,
1396 "insufficient partitioning available"
1397 " to parallelize element loop");
1398 }
1399
1400 loop->mask |= this_mask;
1401 if (!loop->mask && noisy)
1402 warning_at (loop->loc, 0,
1403 tiling
1404 ? G_("insufficient partitioning available"
1405 " to parallelize tile loop")
1406 : G_("insufficient partitioning available"
1407 " to parallelize loop"));
1408 }
1409
1410 if (assign && dump_file)
1411 fprintf (dump_file, "Auto loop %s:%d assigned %d & %d\n",
1412 LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc),
1413 loop->mask, loop->e_mask);
1414
1415 unsigned inner_mask = 0;
1416
1417 if (loop->sibling)
1418 inner_mask |= oacc_loop_auto_partitions (loop->sibling,
1419 outer_mask, outer_assign);
1420
1421 inner_mask |= loop->inner | loop->mask | loop->e_mask;
1422
1423 return inner_mask;
1424 }
1425
1426 /* Walk the OpenACC loop heirarchy to check and assign partitioning
1427 axes. Return mask of partitioning. */
1428
1429 static unsigned
1430 oacc_loop_partition (oacc_loop *loop, unsigned outer_mask)
1431 {
1432 unsigned mask_all = oacc_loop_fixed_partitions (loop, outer_mask);
1433
1434 if (mask_all & GOMP_DIM_MASK (GOMP_DIM_MAX))
1435 {
1436 mask_all ^= GOMP_DIM_MASK (GOMP_DIM_MAX);
1437 mask_all |= oacc_loop_auto_partitions (loop, outer_mask, false);
1438 }
1439 return mask_all;
1440 }
1441
1442 /* Default fork/join early expander. Delete the function calls if
1443 there is no RTL expander. */
1444
1445 bool
1446 default_goacc_fork_join (gcall *ARG_UNUSED (call),
1447 const int *ARG_UNUSED (dims), bool is_fork)
1448 {
1449 if (is_fork)
1450 return targetm.have_oacc_fork ();
1451 else
1452 return targetm.have_oacc_join ();
1453 }
1454
1455 /* Default goacc.reduction early expander.
1456
1457 LHS-opt = IFN_REDUCTION (KIND, RES_PTR, VAR, LEVEL, OP, OFFSET)
1458 If RES_PTR is not integer-zerop:
1459 SETUP - emit 'LHS = *RES_PTR', LHS = NULL
1460 TEARDOWN - emit '*RES_PTR = VAR'
1461 If LHS is not NULL
1462 emit 'LHS = VAR' */
1463
1464 void
1465 default_goacc_reduction (gcall *call)
1466 {
1467 unsigned code = (unsigned)TREE_INT_CST_LOW (gimple_call_arg (call, 0));
1468 gimple_stmt_iterator gsi = gsi_for_stmt (call);
1469 tree lhs = gimple_call_lhs (call);
1470 tree var = gimple_call_arg (call, 2);
1471 gimple_seq seq = NULL;
1472
1473 if (code == IFN_GOACC_REDUCTION_SETUP
1474 || code == IFN_GOACC_REDUCTION_TEARDOWN)
1475 {
1476 /* Setup and Teardown need to copy from/to the receiver object,
1477 if there is one. */
1478 tree ref_to_res = gimple_call_arg (call, 1);
1479
1480 if (!integer_zerop (ref_to_res))
1481 {
1482 tree dst = build_simple_mem_ref (ref_to_res);
1483 tree src = var;
1484
1485 if (code == IFN_GOACC_REDUCTION_SETUP)
1486 {
1487 src = dst;
1488 dst = lhs;
1489 lhs = NULL;
1490 }
1491 gimple_seq_add_stmt (&seq, gimple_build_assign (dst, src));
1492 }
1493 }
1494
1495 /* Copy VAR to LHS, if there is an LHS. */
1496 if (lhs)
1497 gimple_seq_add_stmt (&seq, gimple_build_assign (lhs, var));
1498
1499 gsi_replace_with_seq (&gsi, seq, true);
1500 }
1501
1502 /* Main entry point for oacc transformations which run on the device
1503 compiler after LTO, so we know what the target device is at this
1504 point (including the host fallback). */
1505
1506 static unsigned int
1507 execute_oacc_device_lower ()
1508 {
1509 tree attrs = oacc_get_fn_attrib (current_function_decl);
1510
1511 if (!attrs)
1512 /* Not an offloaded function. */
1513 return 0;
1514
1515 /* Parse the default dim argument exactly once. */
1516 if ((const void *)flag_openacc_dims != &flag_openacc_dims)
1517 {
1518 oacc_parse_default_dims (flag_openacc_dims);
1519 flag_openacc_dims = (char *)&flag_openacc_dims;
1520 }
1521
1522 bool is_oacc_kernels
1523 = (lookup_attribute ("oacc kernels",
1524 DECL_ATTRIBUTES (current_function_decl)) != NULL);
1525 bool is_oacc_kernels_parallelized
1526 = (lookup_attribute ("oacc kernels parallelized",
1527 DECL_ATTRIBUTES (current_function_decl)) != NULL);
1528
1529 /* Unparallelized OpenACC kernels constructs must get launched as 1 x 1 x 1
1530 kernels, so remove the parallelism dimensions function attributes
1531 potentially set earlier on. */
1532 if (is_oacc_kernels && !is_oacc_kernels_parallelized)
1533 {
1534 oacc_set_fn_attrib (current_function_decl, NULL, NULL);
1535 attrs = oacc_get_fn_attrib (current_function_decl);
1536 }
1537
1538 /* Discover, partition and process the loops. */
1539 oacc_loop *loops = oacc_loop_discovery ();
1540 int fn_level = oacc_fn_attrib_level (attrs);
1541
1542 if (dump_file)
1543 {
1544 if (fn_level >= 0)
1545 fprintf (dump_file, "Function is OpenACC routine level %d\n",
1546 fn_level);
1547 else if (is_oacc_kernels)
1548 fprintf (dump_file, "Function is %s OpenACC kernels offload\n",
1549 (is_oacc_kernels_parallelized
1550 ? "parallelized" : "unparallelized"));
1551 else
1552 fprintf (dump_file, "Function is OpenACC parallel offload\n");
1553 }
1554
1555 unsigned outer_mask = fn_level >= 0 ? GOMP_DIM_MASK (fn_level) - 1 : 0;
1556 unsigned used_mask = oacc_loop_partition (loops, outer_mask);
1557 /* OpenACC kernels constructs are special: they currently don't use the
1558 generic oacc_loop infrastructure and attribute/dimension processing. */
1559 if (is_oacc_kernels && is_oacc_kernels_parallelized)
1560 {
1561 /* Parallelized OpenACC kernels constructs use gang parallelism. See
1562 also tree-parloops.c:create_parallel_loop. */
1563 used_mask |= GOMP_DIM_MASK (GOMP_DIM_GANG);
1564 }
1565
1566 int dims[GOMP_DIM_MAX];
1567 oacc_validate_dims (current_function_decl, attrs, dims, fn_level, used_mask);
1568
1569 if (dump_file)
1570 {
1571 const char *comma = "Compute dimensions [";
1572 for (int ix = 0; ix != GOMP_DIM_MAX; ix++, comma = ", ")
1573 fprintf (dump_file, "%s%d", comma, dims[ix]);
1574 fprintf (dump_file, "]\n");
1575 }
1576
1577 oacc_loop_process (loops);
1578 if (dump_file)
1579 {
1580 fprintf (dump_file, "OpenACC loops\n");
1581 dump_oacc_loop (dump_file, loops, 0);
1582 fprintf (dump_file, "\n");
1583 }
1584 if (dump_enabled_p ())
1585 {
1586 oacc_loop *l = loops;
1587 /* OpenACC kernels constructs are special: they currently don't use the
1588 generic oacc_loop infrastructure. */
1589 if (is_oacc_kernels)
1590 {
1591 /* Create a fake oacc_loop for diagnostic purposes. */
1592 l = new_oacc_loop_raw (NULL,
1593 DECL_SOURCE_LOCATION (current_function_decl));
1594 l->mask = used_mask;
1595 }
1596 else
1597 {
1598 /* Skip the outermost, dummy OpenACC loop */
1599 l = l->child;
1600 }
1601 if (l)
1602 inform_oacc_loop (l);
1603 if (is_oacc_kernels)
1604 free_oacc_loop (l);
1605 }
1606
1607 /* Offloaded targets may introduce new basic blocks, which require
1608 dominance information to update SSA. */
1609 calculate_dominance_info (CDI_DOMINATORS);
1610
1611 /* Now lower internal loop functions to target-specific code
1612 sequences. */
1613 basic_block bb;
1614 FOR_ALL_BB_FN (bb, cfun)
1615 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
1616 {
1617 gimple *stmt = gsi_stmt (gsi);
1618 if (!is_gimple_call (stmt))
1619 {
1620 gsi_next (&gsi);
1621 continue;
1622 }
1623
1624 gcall *call = as_a <gcall *> (stmt);
1625 if (!gimple_call_internal_p (call))
1626 {
1627 gsi_next (&gsi);
1628 continue;
1629 }
1630
1631 /* Rewind to allow rescan. */
1632 gsi_prev (&gsi);
1633 bool rescan = false, remove = false;
1634 enum internal_fn ifn_code = gimple_call_internal_fn (call);
1635
1636 switch (ifn_code)
1637 {
1638 default: break;
1639
1640 case IFN_GOACC_TILE:
1641 oacc_xform_tile (call);
1642 rescan = true;
1643 break;
1644
1645 case IFN_GOACC_LOOP:
1646 oacc_xform_loop (call);
1647 rescan = true;
1648 break;
1649
1650 case IFN_GOACC_REDUCTION:
1651 /* Mark the function for SSA renaming. */
1652 mark_virtual_operands_for_renaming (cfun);
1653
1654 /* If the level is -1, this ended up being an unused
1655 axis. Handle as a default. */
1656 if (integer_minus_onep (gimple_call_arg (call, 3)))
1657 default_goacc_reduction (call);
1658 else
1659 targetm.goacc.reduction (call);
1660 rescan = true;
1661 break;
1662
1663 case IFN_UNIQUE:
1664 {
1665 enum ifn_unique_kind kind
1666 = ((enum ifn_unique_kind)
1667 TREE_INT_CST_LOW (gimple_call_arg (call, 0)));
1668
1669 switch (kind)
1670 {
1671 default:
1672 break;
1673
1674 case IFN_UNIQUE_OACC_FORK:
1675 case IFN_UNIQUE_OACC_JOIN:
1676 if (integer_minus_onep (gimple_call_arg (call, 2)))
1677 remove = true;
1678 else if (!targetm.goacc.fork_join
1679 (call, dims, kind == IFN_UNIQUE_OACC_FORK))
1680 remove = true;
1681 break;
1682
1683 case IFN_UNIQUE_OACC_HEAD_MARK:
1684 case IFN_UNIQUE_OACC_TAIL_MARK:
1685 remove = true;
1686 break;
1687 }
1688 break;
1689 }
1690 }
1691
1692 if (gsi_end_p (gsi))
1693 /* We rewound past the beginning of the BB. */
1694 gsi = gsi_start_bb (bb);
1695 else
1696 /* Undo the rewind. */
1697 gsi_next (&gsi);
1698
1699 if (remove)
1700 {
1701 if (gimple_vdef (call))
1702 replace_uses_by (gimple_vdef (call), gimple_vuse (call));
1703 if (gimple_call_lhs (call))
1704 {
1705 /* Propagate the data dependency var. */
1706 gimple *ass = gimple_build_assign (gimple_call_lhs (call),
1707 gimple_call_arg (call, 1));
1708 gsi_replace (&gsi, ass, false);
1709 }
1710 else
1711 gsi_remove (&gsi, true);
1712 }
1713 else if (!rescan)
1714 /* If not rescanning, advance over the call. */
1715 gsi_next (&gsi);
1716 }
1717
1718 free_oacc_loop (loops);
1719
1720 return 0;
1721 }
1722
1723 /* Default launch dimension validator. Force everything to 1. A
1724 backend that wants to provide larger dimensions must override this
1725 hook. */
1726
1727 bool
1728 default_goacc_validate_dims (tree ARG_UNUSED (decl), int *dims,
1729 int ARG_UNUSED (fn_level),
1730 unsigned ARG_UNUSED (used))
1731 {
1732 bool changed = false;
1733
1734 for (unsigned ix = 0; ix != GOMP_DIM_MAX; ix++)
1735 {
1736 if (dims[ix] != 1)
1737 {
1738 dims[ix] = 1;
1739 changed = true;
1740 }
1741 }
1742
1743 return changed;
1744 }
1745
1746 /* Default dimension bound is unknown on accelerator and 1 on host. */
1747
1748 int
1749 default_goacc_dim_limit (int ARG_UNUSED (axis))
1750 {
1751 #ifdef ACCEL_COMPILER
1752 return 0;
1753 #else
1754 return 1;
1755 #endif
1756 }
1757
1758 namespace {
1759
1760 const pass_data pass_data_oacc_device_lower =
1761 {
1762 GIMPLE_PASS, /* type */
1763 "oaccdevlow", /* name */
1764 OPTGROUP_OMP, /* optinfo_flags */
1765 TV_NONE, /* tv_id */
1766 PROP_cfg, /* properties_required */
1767 0 /* Possibly PROP_gimple_eomp. */, /* properties_provided */
1768 0, /* properties_destroyed */
1769 0, /* todo_flags_start */
1770 TODO_update_ssa | TODO_cleanup_cfg, /* todo_flags_finish */
1771 };
1772
1773 class pass_oacc_device_lower : public gimple_opt_pass
1774 {
1775 public:
1776 pass_oacc_device_lower (gcc::context *ctxt)
1777 : gimple_opt_pass (pass_data_oacc_device_lower, ctxt)
1778 {}
1779
1780 /* opt_pass methods: */
1781 virtual bool gate (function *) { return flag_openacc; };
1782
1783 virtual unsigned int execute (function *)
1784 {
1785 return execute_oacc_device_lower ();
1786 }
1787
1788 }; // class pass_oacc_device_lower
1789
1790 } // anon namespace
1791
1792 gimple_opt_pass *
1793 make_pass_oacc_device_lower (gcc::context *ctxt)
1794 {
1795 return new pass_oacc_device_lower (ctxt);
1796 }
1797
1798 \f
1799 /* Rewrite GOMP_SIMT_ENTER_ALLOC call given by GSI and remove the preceding
1800 GOMP_SIMT_ENTER call identifying the privatized variables, which are
1801 turned to structure fields and receive a DECL_VALUE_EXPR accordingly.
1802 Set *REGIMPLIFY to true, except if no privatized variables were seen. */
1803
1804 static void
1805 ompdevlow_adjust_simt_enter (gimple_stmt_iterator *gsi, bool *regimplify)
1806 {
1807 gimple *alloc_stmt = gsi_stmt (*gsi);
1808 tree simtrec = gimple_call_lhs (alloc_stmt);
1809 tree simduid = gimple_call_arg (alloc_stmt, 0);
1810 gimple *enter_stmt = SSA_NAME_DEF_STMT (simduid);
1811 gcc_assert (gimple_call_internal_p (enter_stmt, IFN_GOMP_SIMT_ENTER));
1812 tree rectype = lang_hooks.types.make_type (RECORD_TYPE);
1813 TYPE_ARTIFICIAL (rectype) = TYPE_NAMELESS (rectype) = 1;
1814 TREE_ADDRESSABLE (rectype) = 1;
1815 TREE_TYPE (simtrec) = build_pointer_type (rectype);
1816 for (unsigned i = 1; i < gimple_call_num_args (enter_stmt); i++)
1817 {
1818 tree *argp = gimple_call_arg_ptr (enter_stmt, i);
1819 if (*argp == null_pointer_node)
1820 continue;
1821 gcc_assert (TREE_CODE (*argp) == ADDR_EXPR
1822 && VAR_P (TREE_OPERAND (*argp, 0)));
1823 tree var = TREE_OPERAND (*argp, 0);
1824
1825 tree field = build_decl (DECL_SOURCE_LOCATION (var), FIELD_DECL,
1826 DECL_NAME (var), TREE_TYPE (var));
1827 SET_DECL_ALIGN (field, DECL_ALIGN (var));
1828 DECL_USER_ALIGN (field) = DECL_USER_ALIGN (var);
1829 TREE_THIS_VOLATILE (field) = TREE_THIS_VOLATILE (var);
1830
1831 insert_field_into_struct (rectype, field);
1832
1833 tree t = build_simple_mem_ref (simtrec);
1834 t = build3 (COMPONENT_REF, TREE_TYPE (var), t, field, NULL);
1835 TREE_THIS_VOLATILE (t) = TREE_THIS_VOLATILE (var);
1836 SET_DECL_VALUE_EXPR (var, t);
1837 DECL_HAS_VALUE_EXPR_P (var) = 1;
1838 *regimplify = true;
1839 }
1840 layout_type (rectype);
1841 tree size = TYPE_SIZE_UNIT (rectype);
1842 tree align = build_int_cst (TREE_TYPE (size), TYPE_ALIGN_UNIT (rectype));
1843
1844 alloc_stmt
1845 = gimple_build_call_internal (IFN_GOMP_SIMT_ENTER_ALLOC, 2, size, align);
1846 gimple_call_set_lhs (alloc_stmt, simtrec);
1847 gsi_replace (gsi, alloc_stmt, false);
1848 gimple_stmt_iterator enter_gsi = gsi_for_stmt (enter_stmt);
1849 enter_stmt = gimple_build_assign (simduid, gimple_call_arg (enter_stmt, 0));
1850 gsi_replace (&enter_gsi, enter_stmt, false);
1851
1852 use_operand_p use;
1853 gimple *exit_stmt;
1854 if (single_imm_use (simtrec, &use, &exit_stmt))
1855 {
1856 gcc_assert (gimple_call_internal_p (exit_stmt, IFN_GOMP_SIMT_EXIT));
1857 gimple_stmt_iterator exit_gsi = gsi_for_stmt (exit_stmt);
1858 tree clobber = build_constructor (rectype, NULL);
1859 TREE_THIS_VOLATILE (clobber) = 1;
1860 exit_stmt = gimple_build_assign (build_simple_mem_ref (simtrec), clobber);
1861 gsi_insert_before (&exit_gsi, exit_stmt, GSI_SAME_STMT);
1862 }
1863 else
1864 gcc_checking_assert (has_zero_uses (simtrec));
1865 }
1866
1867 /* Callback for walk_gimple_stmt used to scan for SIMT-privatized variables. */
1868
1869 static tree
1870 find_simtpriv_var_op (tree *tp, int *walk_subtrees, void *)
1871 {
1872 tree t = *tp;
1873
1874 if (VAR_P (t)
1875 && DECL_HAS_VALUE_EXPR_P (t)
1876 && lookup_attribute ("omp simt private", DECL_ATTRIBUTES (t)))
1877 {
1878 *walk_subtrees = 0;
1879 return t;
1880 }
1881 return NULL_TREE;
1882 }
1883
1884 /* Cleanup uses of SIMT placeholder internal functions: on non-SIMT targets,
1885 VF is 1 and LANE is 0; on SIMT targets, VF is folded to a constant, and
1886 LANE is kept to be expanded to RTL later on. Also cleanup all other SIMT
1887 internal functions on non-SIMT targets, and likewise some SIMD internal
1888 functions on SIMT targets. */
1889
1890 static unsigned int
1891 execute_omp_device_lower ()
1892 {
1893 int vf = targetm.simt.vf ? targetm.simt.vf () : 1;
1894 bool regimplify = false;
1895 basic_block bb;
1896 gimple_stmt_iterator gsi;
1897 FOR_EACH_BB_FN (bb, cfun)
1898 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1899 {
1900 gimple *stmt = gsi_stmt (gsi);
1901 if (!is_gimple_call (stmt) || !gimple_call_internal_p (stmt))
1902 continue;
1903 tree lhs = gimple_call_lhs (stmt), rhs = NULL_TREE;
1904 tree type = lhs ? TREE_TYPE (lhs) : integer_type_node;
1905 switch (gimple_call_internal_fn (stmt))
1906 {
1907 case IFN_GOMP_USE_SIMT:
1908 rhs = vf == 1 ? integer_zero_node : integer_one_node;
1909 break;
1910 case IFN_GOMP_SIMT_ENTER:
1911 rhs = vf == 1 ? gimple_call_arg (stmt, 0) : NULL_TREE;
1912 goto simtreg_enter_exit;
1913 case IFN_GOMP_SIMT_ENTER_ALLOC:
1914 if (vf != 1)
1915 ompdevlow_adjust_simt_enter (&gsi, &regimplify);
1916 rhs = vf == 1 ? null_pointer_node : NULL_TREE;
1917 goto simtreg_enter_exit;
1918 case IFN_GOMP_SIMT_EXIT:
1919 simtreg_enter_exit:
1920 if (vf != 1)
1921 continue;
1922 unlink_stmt_vdef (stmt);
1923 break;
1924 case IFN_GOMP_SIMT_LANE:
1925 case IFN_GOMP_SIMT_LAST_LANE:
1926 rhs = vf == 1 ? build_zero_cst (type) : NULL_TREE;
1927 break;
1928 case IFN_GOMP_SIMT_VF:
1929 rhs = build_int_cst (type, vf);
1930 break;
1931 case IFN_GOMP_SIMT_ORDERED_PRED:
1932 rhs = vf == 1 ? integer_zero_node : NULL_TREE;
1933 if (rhs || !lhs)
1934 unlink_stmt_vdef (stmt);
1935 break;
1936 case IFN_GOMP_SIMT_VOTE_ANY:
1937 case IFN_GOMP_SIMT_XCHG_BFLY:
1938 case IFN_GOMP_SIMT_XCHG_IDX:
1939 rhs = vf == 1 ? gimple_call_arg (stmt, 0) : NULL_TREE;
1940 break;
1941 case IFN_GOMP_SIMD_LANE:
1942 case IFN_GOMP_SIMD_LAST_LANE:
1943 rhs = vf != 1 ? build_zero_cst (type) : NULL_TREE;
1944 break;
1945 case IFN_GOMP_SIMD_VF:
1946 rhs = vf != 1 ? build_one_cst (type) : NULL_TREE;
1947 break;
1948 default:
1949 continue;
1950 }
1951 if (lhs && !rhs)
1952 continue;
1953 stmt = lhs ? gimple_build_assign (lhs, rhs) : gimple_build_nop ();
1954 gsi_replace (&gsi, stmt, false);
1955 }
1956 if (regimplify)
1957 FOR_EACH_BB_REVERSE_FN (bb, cfun)
1958 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
1959 if (walk_gimple_stmt (&gsi, NULL, find_simtpriv_var_op, NULL))
1960 {
1961 if (gimple_clobber_p (gsi_stmt (gsi)))
1962 gsi_remove (&gsi, true);
1963 else
1964 gimple_regimplify_operands (gsi_stmt (gsi), &gsi);
1965 }
1966 if (vf != 1)
1967 cfun->has_force_vectorize_loops = false;
1968 return 0;
1969 }
1970
1971 namespace {
1972
1973 const pass_data pass_data_omp_device_lower =
1974 {
1975 GIMPLE_PASS, /* type */
1976 "ompdevlow", /* name */
1977 OPTGROUP_OMP, /* optinfo_flags */
1978 TV_NONE, /* tv_id */
1979 PROP_cfg, /* properties_required */
1980 PROP_gimple_lomp_dev, /* properties_provided */
1981 0, /* properties_destroyed */
1982 0, /* todo_flags_start */
1983 TODO_update_ssa, /* todo_flags_finish */
1984 };
1985
1986 class pass_omp_device_lower : public gimple_opt_pass
1987 {
1988 public:
1989 pass_omp_device_lower (gcc::context *ctxt)
1990 : gimple_opt_pass (pass_data_omp_device_lower, ctxt)
1991 {}
1992
1993 /* opt_pass methods: */
1994 virtual bool gate (function *fun)
1995 {
1996 return !(fun->curr_properties & PROP_gimple_lomp_dev);
1997 }
1998 virtual unsigned int execute (function *)
1999 {
2000 return execute_omp_device_lower ();
2001 }
2002
2003 }; // class pass_expand_omp_ssa
2004
2005 } // anon namespace
2006
2007 gimple_opt_pass *
2008 make_pass_omp_device_lower (gcc::context *ctxt)
2009 {
2010 return new pass_omp_device_lower (ctxt);
2011 }
2012
2013 /* "omp declare target link" handling pass. */
2014
2015 namespace {
2016
2017 const pass_data pass_data_omp_target_link =
2018 {
2019 GIMPLE_PASS, /* type */
2020 "omptargetlink", /* name */
2021 OPTGROUP_OMP, /* optinfo_flags */
2022 TV_NONE, /* tv_id */
2023 PROP_ssa, /* properties_required */
2024 0, /* properties_provided */
2025 0, /* properties_destroyed */
2026 0, /* todo_flags_start */
2027 TODO_update_ssa, /* todo_flags_finish */
2028 };
2029
2030 class pass_omp_target_link : public gimple_opt_pass
2031 {
2032 public:
2033 pass_omp_target_link (gcc::context *ctxt)
2034 : gimple_opt_pass (pass_data_omp_target_link, ctxt)
2035 {}
2036
2037 /* opt_pass methods: */
2038 virtual bool gate (function *fun)
2039 {
2040 #ifdef ACCEL_COMPILER
2041 return offloading_function_p (fun->decl);
2042 #else
2043 (void) fun;
2044 return false;
2045 #endif
2046 }
2047
2048 virtual unsigned execute (function *);
2049 };
2050
2051 /* Callback for walk_gimple_stmt used to scan for link var operands. */
2052
2053 static tree
2054 find_link_var_op (tree *tp, int *walk_subtrees, void *)
2055 {
2056 tree t = *tp;
2057
2058 if (VAR_P (t)
2059 && DECL_HAS_VALUE_EXPR_P (t)
2060 && is_global_var (t)
2061 && lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (t)))
2062 {
2063 *walk_subtrees = 0;
2064 return t;
2065 }
2066
2067 return NULL_TREE;
2068 }
2069
2070 unsigned
2071 pass_omp_target_link::execute (function *fun)
2072 {
2073 basic_block bb;
2074 FOR_EACH_BB_FN (bb, fun)
2075 {
2076 gimple_stmt_iterator gsi;
2077 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2078 if (walk_gimple_stmt (&gsi, NULL, find_link_var_op, NULL))
2079 gimple_regimplify_operands (gsi_stmt (gsi), &gsi);
2080 }
2081
2082 return 0;
2083 }
2084
2085 } // anon namespace
2086
2087 gimple_opt_pass *
2088 make_pass_omp_target_link (gcc::context *ctxt)
2089 {
2090 return new pass_omp_target_link (ctxt);
2091 }