1 /* Bits of OpenMP and OpenACC handling that is specific to device offloading
2 and a lowering pass for OpenACC device directives.
4 Copyright (C) 2005-2020 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
29 #include "tree-pass.h"
32 #include "pretty-print.h"
33 #include "diagnostic-core.h"
34 #include "fold-const.h"
35 #include "internal-fn.h"
36 #include "langhooks.h"
38 #include "gimple-iterator.h"
39 #include "gimplify-me.h"
40 #include "gimple-walk.h"
42 #include "tree-into-ssa.h"
43 #include "tree-nested.h"
44 #include "stor-layout.h"
45 #include "common/common-target.h"
46 #include "omp-general.h"
47 #include "omp-offload.h"
48 #include "lto-section-names.h"
49 #include "gomp-constants.h"
50 #include "gimple-pretty-print.h"
52 #include "stringpool.h"
57 /* Describe the OpenACC looping structure of a function. The entire
58 function is held in a 'NULL' loop. */
62 oacc_loop
*parent
; /* Containing loop. */
64 oacc_loop
*child
; /* First inner loop. */
66 oacc_loop
*sibling
; /* Next loop within same parent. */
68 location_t loc
; /* Location of the loop start. */
70 gcall
*marker
; /* Initial head marker. */
72 gcall
*heads
[GOMP_DIM_MAX
]; /* Head marker functions. */
73 gcall
*tails
[GOMP_DIM_MAX
]; /* Tail marker functions. */
75 tree routine
; /* Pseudo-loop enclosing a routine. */
77 unsigned mask
; /* Partitioning mask. */
78 unsigned e_mask
; /* Partitioning of element loops (when tiling). */
79 unsigned inner
; /* Partitioning of inner loops. */
80 unsigned flags
; /* Partitioning flags. */
81 vec
<gcall
*> ifns
; /* Contained loop abstraction functions. */
82 tree chunk_size
; /* Chunk size. */
83 gcall
*head_end
; /* Final marker of head sequence. */
86 /* Holds offload tables with decls. */
87 vec
<tree
, va_gc
> *offload_funcs
, *offload_vars
;
89 /* Return level at which oacc routine may spawn a partitioned loop, or
90 -1 if it is not a routine (i.e. is an offload fn). */
93 oacc_fn_attrib_level (tree attr
)
95 tree pos
= TREE_VALUE (attr
);
97 if (!TREE_PURPOSE (pos
))
101 for (ix
= 0; ix
!= GOMP_DIM_MAX
;
102 ix
++, pos
= TREE_CHAIN (pos
))
103 if (!integer_zerop (TREE_PURPOSE (pos
)))
109 /* Helper function for omp_finish_file routine. Takes decls from V_DECLS and
110 adds their addresses and sizes to constructor-vector V_CTOR. */
113 add_decls_addresses_to_decl_constructor (vec
<tree
, va_gc
> *v_decls
,
114 vec
<constructor_elt
, va_gc
> *v_ctor
)
116 unsigned len
= vec_safe_length (v_decls
);
117 for (unsigned i
= 0; i
< len
; i
++)
119 tree it
= (*v_decls
)[i
];
120 bool is_var
= VAR_P (it
);
123 #ifdef ACCEL_COMPILER
124 && DECL_HAS_VALUE_EXPR_P (it
)
126 && lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (it
));
128 tree size
= NULL_TREE
;
130 size
= fold_convert (const_ptr_type_node
, DECL_SIZE_UNIT (it
));
134 addr
= build_fold_addr_expr (it
);
137 #ifdef ACCEL_COMPILER
138 /* For "omp declare target link" vars add address of the pointer to
139 the target table, instead of address of the var. */
140 tree value_expr
= DECL_VALUE_EXPR (it
);
141 tree link_ptr_decl
= TREE_OPERAND (value_expr
, 0);
142 varpool_node::finalize_decl (link_ptr_decl
);
143 addr
= build_fold_addr_expr (link_ptr_decl
);
145 addr
= build_fold_addr_expr (it
);
148 /* Most significant bit of the size marks "omp declare target link"
149 vars in host and target tables. */
150 unsigned HOST_WIDE_INT isize
= tree_to_uhwi (size
);
151 isize
|= 1ULL << (int_size_in_bytes (const_ptr_type_node
)
152 * BITS_PER_UNIT
- 1);
153 size
= wide_int_to_tree (const_ptr_type_node
, isize
);
156 CONSTRUCTOR_APPEND_ELT (v_ctor
, NULL_TREE
, addr
);
158 CONSTRUCTOR_APPEND_ELT (v_ctor
, NULL_TREE
, size
);
162 /* Return true if DECL is a function for which its references should be
166 omp_declare_target_fn_p (tree decl
)
168 return (TREE_CODE (decl
) == FUNCTION_DECL
169 && lookup_attribute ("omp declare target", DECL_ATTRIBUTES (decl
))
170 && !lookup_attribute ("omp declare target host",
171 DECL_ATTRIBUTES (decl
))
173 || oacc_get_fn_attrib (decl
) == NULL_TREE
));
176 /* Return true if DECL Is a variable for which its initializer references
177 should be analyzed. */
180 omp_declare_target_var_p (tree decl
)
183 && lookup_attribute ("omp declare target", DECL_ATTRIBUTES (decl
))
184 && !lookup_attribute ("omp declare target link",
185 DECL_ATTRIBUTES (decl
)));
188 /* Helper function for omp_discover_implicit_declare_target, called through
189 walk_tree. Mark referenced FUNCTION_DECLs implicitly as
190 declare target to. */
193 omp_discover_declare_target_tgt_fn_r (tree
*tp
, int *walk_subtrees
, void *data
)
195 if (TREE_CODE (*tp
) == FUNCTION_DECL
196 && !omp_declare_target_fn_p (*tp
)
197 && !lookup_attribute ("omp declare target host", DECL_ATTRIBUTES (*tp
)))
199 tree id
= get_identifier ("omp declare target");
200 if (!DECL_EXTERNAL (*tp
) && DECL_SAVED_TREE (*tp
))
201 ((vec
<tree
> *) data
)->safe_push (*tp
);
202 DECL_ATTRIBUTES (*tp
) = tree_cons (id
, NULL_TREE
, DECL_ATTRIBUTES (*tp
));
203 symtab_node
*node
= symtab_node::get (*tp
);
206 node
->offloadable
= 1;
207 if (ENABLE_OFFLOADING
)
208 g
->have_offload
= true;
211 else if (TYPE_P (*tp
))
213 /* else if (TREE_CODE (*tp) == OMP_TARGET)
215 if (tree dev = omp_find_clause (OMP_TARGET_CLAUSES (*tp)))
216 if (OMP_DEVICE_ANCESTOR (dev))
222 /* Similarly, but ignore references outside of OMP_TARGET regions. */
225 omp_discover_declare_target_fn_r (tree
*tp
, int *walk_subtrees
, void *data
)
227 if (TREE_CODE (*tp
) == OMP_TARGET
)
229 /* And not OMP_DEVICE_ANCESTOR. */
230 walk_tree_without_duplicates (&OMP_TARGET_BODY (*tp
),
231 omp_discover_declare_target_tgt_fn_r
,
235 else if (TYPE_P (*tp
))
240 /* Helper function for omp_discover_implicit_declare_target, called through
241 walk_tree. Mark referenced FUNCTION_DECLs implicitly as
242 declare target to. */
245 omp_discover_declare_target_var_r (tree
*tp
, int *walk_subtrees
, void *data
)
247 if (TREE_CODE (*tp
) == FUNCTION_DECL
)
248 return omp_discover_declare_target_tgt_fn_r (tp
, walk_subtrees
, data
);
250 && is_global_var (*tp
)
251 && !omp_declare_target_var_p (*tp
))
253 tree id
= get_identifier ("omp declare target");
254 if (lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (*tp
)))
256 error_at (DECL_SOURCE_LOCATION (*tp
),
257 "%qD specified both in declare target %<link%> and "
258 "implicitly in %<to%> clauses", *tp
);
259 DECL_ATTRIBUTES (*tp
)
260 = remove_attribute ("omp declare target link", DECL_ATTRIBUTES (*tp
));
262 if (TREE_STATIC (*tp
) && DECL_INITIAL (*tp
))
263 ((vec
<tree
> *) data
)->safe_push (*tp
);
264 DECL_ATTRIBUTES (*tp
) = tree_cons (id
, NULL_TREE
, DECL_ATTRIBUTES (*tp
));
265 symtab_node
*node
= symtab_node::get (*tp
);
266 if (node
!= NULL
&& !node
->offloadable
)
268 node
->offloadable
= 1;
269 if (ENABLE_OFFLOADING
)
271 g
->have_offload
= true;
272 if (is_a
<varpool_node
*> (node
))
273 vec_safe_push (offload_vars
, node
->decl
);
277 else if (TYPE_P (*tp
))
282 /* Perform the OpenMP implicit declare target to discovery. */
285 omp_discover_implicit_declare_target (void)
289 auto_vec
<tree
> worklist
;
291 FOR_EACH_DEFINED_FUNCTION (node
)
292 if (DECL_SAVED_TREE (node
->decl
))
294 if (omp_declare_target_fn_p (node
->decl
))
295 worklist
.safe_push (node
->decl
);
296 else if (DECL_STRUCT_FUNCTION (node
->decl
)
297 && DECL_STRUCT_FUNCTION (node
->decl
)->has_omp_target
)
298 worklist
.safe_push (node
->decl
);
300 FOR_EACH_STATIC_INITIALIZER (vnode
)
301 if (omp_declare_target_var_p (vnode
->decl
))
302 worklist
.safe_push (vnode
->decl
);
303 while (!worklist
.is_empty ())
305 tree decl
= worklist
.pop ();
307 walk_tree_without_duplicates (&DECL_INITIAL (decl
),
308 omp_discover_declare_target_var_r
,
310 else if (omp_declare_target_fn_p (decl
))
311 walk_tree_without_duplicates (&DECL_SAVED_TREE (decl
),
312 omp_discover_declare_target_tgt_fn_r
,
315 walk_tree_without_duplicates (&DECL_SAVED_TREE (decl
),
316 omp_discover_declare_target_fn_r
,
322 /* Create new symbols containing (address, size) pairs for global variables,
323 marked with "omp declare target" attribute, as well as addresses for the
324 functions, which are outlined offloading regions. */
326 omp_finish_file (void)
328 unsigned num_funcs
= vec_safe_length (offload_funcs
);
329 unsigned num_vars
= vec_safe_length (offload_vars
);
331 if (num_funcs
== 0 && num_vars
== 0)
334 if (targetm_common
.have_named_sections
)
336 vec
<constructor_elt
, va_gc
> *v_f
, *v_v
;
337 vec_alloc (v_f
, num_funcs
);
338 vec_alloc (v_v
, num_vars
* 2);
340 add_decls_addresses_to_decl_constructor (offload_funcs
, v_f
);
341 add_decls_addresses_to_decl_constructor (offload_vars
, v_v
);
343 tree vars_decl_type
= build_array_type_nelts (pointer_sized_int_node
,
345 tree funcs_decl_type
= build_array_type_nelts (pointer_sized_int_node
,
347 SET_TYPE_ALIGN (vars_decl_type
, TYPE_ALIGN (pointer_sized_int_node
));
348 SET_TYPE_ALIGN (funcs_decl_type
, TYPE_ALIGN (pointer_sized_int_node
));
349 tree ctor_v
= build_constructor (vars_decl_type
, v_v
);
350 tree ctor_f
= build_constructor (funcs_decl_type
, v_f
);
351 TREE_CONSTANT (ctor_v
) = TREE_CONSTANT (ctor_f
) = 1;
352 TREE_STATIC (ctor_v
) = TREE_STATIC (ctor_f
) = 1;
353 tree funcs_decl
= build_decl (UNKNOWN_LOCATION
, VAR_DECL
,
354 get_identifier (".offload_func_table"),
356 tree vars_decl
= build_decl (UNKNOWN_LOCATION
, VAR_DECL
,
357 get_identifier (".offload_var_table"),
359 TREE_STATIC (funcs_decl
) = TREE_STATIC (vars_decl
) = 1;
360 /* Do not align tables more than TYPE_ALIGN (pointer_sized_int_node),
361 otherwise a joint table in a binary will contain padding between
362 tables from multiple object files. */
363 DECL_USER_ALIGN (funcs_decl
) = DECL_USER_ALIGN (vars_decl
) = 1;
364 SET_DECL_ALIGN (funcs_decl
, TYPE_ALIGN (funcs_decl_type
));
365 SET_DECL_ALIGN (vars_decl
, TYPE_ALIGN (vars_decl_type
));
366 DECL_INITIAL (funcs_decl
) = ctor_f
;
367 DECL_INITIAL (vars_decl
) = ctor_v
;
368 set_decl_section_name (funcs_decl
, OFFLOAD_FUNC_TABLE_SECTION_NAME
);
369 set_decl_section_name (vars_decl
, OFFLOAD_VAR_TABLE_SECTION_NAME
);
371 varpool_node::finalize_decl (vars_decl
);
372 varpool_node::finalize_decl (funcs_decl
);
376 for (unsigned i
= 0; i
< num_funcs
; i
++)
378 tree it
= (*offload_funcs
)[i
];
379 targetm
.record_offload_symbol (it
);
381 for (unsigned i
= 0; i
< num_vars
; i
++)
383 tree it
= (*offload_vars
)[i
];
384 #ifdef ACCEL_COMPILER
385 if (DECL_HAS_VALUE_EXPR_P (it
)
386 && lookup_attribute ("omp declare target link",
387 DECL_ATTRIBUTES (it
)))
389 tree value_expr
= DECL_VALUE_EXPR (it
);
390 tree link_ptr_decl
= TREE_OPERAND (value_expr
, 0);
391 targetm
.record_offload_symbol (link_ptr_decl
);
392 varpool_node::finalize_decl (link_ptr_decl
);
396 targetm
.record_offload_symbol (it
);
401 /* Call dim_pos (POS == true) or dim_size (POS == false) builtins for
402 axis DIM. Return a tmp var holding the result. */
405 oacc_dim_call (bool pos
, int dim
, gimple_seq
*seq
)
407 tree arg
= build_int_cst (unsigned_type_node
, dim
);
408 tree size
= create_tmp_var (integer_type_node
);
409 enum internal_fn fn
= pos
? IFN_GOACC_DIM_POS
: IFN_GOACC_DIM_SIZE
;
410 gimple
*call
= gimple_build_call_internal (fn
, 1, arg
);
412 gimple_call_set_lhs (call
, size
);
413 gimple_seq_add_stmt (seq
, call
);
418 /* Find the number of threads (POS = false), or thread number (POS =
419 true) for an OpenACC region partitioned as MASK. Setup code
420 required for the calculation is added to SEQ. */
423 oacc_thread_numbers (bool pos
, int mask
, gimple_seq
*seq
)
425 tree res
= pos
? NULL_TREE
: build_int_cst (unsigned_type_node
, 1);
428 /* Start at gang level, and examine relevant dimension indices. */
429 for (ix
= GOMP_DIM_GANG
; ix
!= GOMP_DIM_MAX
; ix
++)
430 if (GOMP_DIM_MASK (ix
) & mask
)
434 /* We had an outer index, so scale that by the size of
436 tree n
= oacc_dim_call (false, ix
, seq
);
437 res
= fold_build2 (MULT_EXPR
, integer_type_node
, res
, n
);
441 /* Determine index in this dimension. */
442 tree id
= oacc_dim_call (true, ix
, seq
);
444 res
= fold_build2 (PLUS_EXPR
, integer_type_node
, res
, id
);
450 if (res
== NULL_TREE
)
451 res
= integer_zero_node
;
456 /* Transform IFN_GOACC_LOOP calls to actual code. See
457 expand_oacc_for for where these are generated. At the vector
458 level, we stride loops, such that each member of a warp will
459 operate on adjacent iterations. At the worker and gang level,
460 each gang/warp executes a set of contiguous iterations. Chunking
461 can override this such that each iteration engine executes a
462 contiguous chunk, and then moves on to stride to the next chunk. */
465 oacc_xform_loop (gcall
*call
)
467 gimple_stmt_iterator gsi
= gsi_for_stmt (call
);
468 enum ifn_goacc_loop_kind code
469 = (enum ifn_goacc_loop_kind
) TREE_INT_CST_LOW (gimple_call_arg (call
, 0));
470 tree dir
= gimple_call_arg (call
, 1);
471 tree range
= gimple_call_arg (call
, 2);
472 tree step
= gimple_call_arg (call
, 3);
473 tree chunk_size
= NULL_TREE
;
474 unsigned mask
= (unsigned) TREE_INT_CST_LOW (gimple_call_arg (call
, 5));
475 tree lhs
= gimple_call_lhs (call
);
476 tree type
= NULL_TREE
;
477 tree diff_type
= TREE_TYPE (range
);
479 gimple_seq seq
= NULL
;
480 bool chunking
= false, striding
= true;
481 unsigned outer_mask
= mask
& (~mask
+ 1); // Outermost partitioning
482 unsigned inner_mask
= mask
& ~outer_mask
; // Inner partitioning (if any)
484 /* Skip lowering if return value of IFN_GOACC_LOOP call is not used. */
487 gsi_replace_with_seq (&gsi
, seq
, true);
491 type
= TREE_TYPE (lhs
);
493 #ifdef ACCEL_COMPILER
494 chunk_size
= gimple_call_arg (call
, 4);
495 if (integer_minus_onep (chunk_size
) /* Force static allocation. */
496 || integer_zerop (chunk_size
)) /* Default (also static). */
498 /* If we're at the gang level, we want each to execute a
499 contiguous run of iterations. Otherwise we want each element
501 striding
= !(outer_mask
& GOMP_DIM_MASK (GOMP_DIM_GANG
));
506 /* Chunk of size 1 is striding. */
507 striding
= integer_onep (chunk_size
);
508 chunking
= !striding
;
512 /* striding=true, chunking=true
514 striding=true, chunking=false
516 striding=false,chunking=true
517 -> chunks=ceil (range/(chunksize*threads*step))
518 striding=false,chunking=false
519 -> chunk_size=ceil(range/(threads*step)),chunks=1 */
520 push_gimplify_context (true);
524 default: gcc_unreachable ();
526 case IFN_GOACC_LOOP_CHUNKS
:
528 r
= build_int_cst (type
, 1);
532 = (range - dir) / (chunks * step * num_threads) + dir */
533 tree per
= oacc_thread_numbers (false, mask
, &seq
);
534 per
= fold_convert (type
, per
);
535 chunk_size
= fold_convert (type
, chunk_size
);
536 per
= fold_build2 (MULT_EXPR
, type
, per
, chunk_size
);
537 per
= fold_build2 (MULT_EXPR
, type
, per
, step
);
538 r
= build2 (MINUS_EXPR
, type
, range
, dir
);
539 r
= build2 (PLUS_EXPR
, type
, r
, per
);
540 r
= build2 (TRUNC_DIV_EXPR
, type
, r
, per
);
544 case IFN_GOACC_LOOP_STEP
:
546 /* If striding, step by the entire compute volume, otherwise
547 step by the inner volume. */
548 unsigned volume
= striding
? mask
: inner_mask
;
550 r
= oacc_thread_numbers (false, volume
, &seq
);
551 r
= build2 (MULT_EXPR
, type
, fold_convert (type
, r
), step
);
555 case IFN_GOACC_LOOP_OFFSET
:
556 /* Enable vectorization on non-SIMT targets. */
558 && outer_mask
== GOMP_DIM_MASK (GOMP_DIM_VECTOR
)
559 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
561 && (flag_tree_loop_vectorize
562 || !global_options_set
.x_flag_tree_loop_vectorize
))
564 basic_block bb
= gsi_bb (gsi
);
565 class loop
*parent
= bb
->loop_father
;
566 class loop
*body
= parent
->inner
;
568 parent
->force_vectorize
= true;
569 parent
->safelen
= INT_MAX
;
571 /* "Chunking loops" may have inner loops. */
574 body
->force_vectorize
= true;
575 body
->safelen
= INT_MAX
;
578 cfun
->has_force_vectorize_loops
= true;
582 r
= oacc_thread_numbers (true, mask
, &seq
);
583 r
= fold_convert (diff_type
, r
);
587 tree inner_size
= oacc_thread_numbers (false, inner_mask
, &seq
);
588 tree outer_size
= oacc_thread_numbers (false, outer_mask
, &seq
);
589 tree volume
= fold_build2 (MULT_EXPR
, TREE_TYPE (inner_size
),
590 inner_size
, outer_size
);
592 volume
= fold_convert (diff_type
, volume
);
594 chunk_size
= fold_convert (diff_type
, chunk_size
);
597 tree per
= fold_build2 (MULT_EXPR
, diff_type
, volume
, step
);
599 chunk_size
= build2 (MINUS_EXPR
, diff_type
, range
, dir
);
600 chunk_size
= build2 (PLUS_EXPR
, diff_type
, chunk_size
, per
);
601 chunk_size
= build2 (TRUNC_DIV_EXPR
, diff_type
, chunk_size
, per
);
604 tree span
= build2 (MULT_EXPR
, diff_type
, chunk_size
,
605 fold_convert (diff_type
, inner_size
));
606 r
= oacc_thread_numbers (true, outer_mask
, &seq
);
607 r
= fold_convert (diff_type
, r
);
608 r
= build2 (MULT_EXPR
, diff_type
, r
, span
);
610 tree inner
= oacc_thread_numbers (true, inner_mask
, &seq
);
611 inner
= fold_convert (diff_type
, inner
);
612 r
= fold_build2 (PLUS_EXPR
, diff_type
, r
, inner
);
616 tree chunk
= fold_convert (diff_type
, gimple_call_arg (call
, 6));
618 = fold_build2 (MULT_EXPR
, diff_type
, volume
, chunk_size
);
619 per
= build2 (MULT_EXPR
, diff_type
, per
, chunk
);
621 r
= build2 (PLUS_EXPR
, diff_type
, r
, per
);
624 r
= fold_build2 (MULT_EXPR
, diff_type
, r
, step
);
625 if (type
!= diff_type
)
626 r
= fold_convert (type
, r
);
629 case IFN_GOACC_LOOP_BOUND
:
634 tree inner_size
= oacc_thread_numbers (false, inner_mask
, &seq
);
635 tree outer_size
= oacc_thread_numbers (false, outer_mask
, &seq
);
636 tree volume
= fold_build2 (MULT_EXPR
, TREE_TYPE (inner_size
),
637 inner_size
, outer_size
);
639 volume
= fold_convert (diff_type
, volume
);
641 chunk_size
= fold_convert (diff_type
, chunk_size
);
644 tree per
= fold_build2 (MULT_EXPR
, diff_type
, volume
, step
);
646 chunk_size
= build2 (MINUS_EXPR
, diff_type
, range
, dir
);
647 chunk_size
= build2 (PLUS_EXPR
, diff_type
, chunk_size
, per
);
648 chunk_size
= build2 (TRUNC_DIV_EXPR
, diff_type
, chunk_size
, per
);
651 tree span
= build2 (MULT_EXPR
, diff_type
, chunk_size
,
652 fold_convert (diff_type
, inner_size
));
654 r
= fold_build2 (MULT_EXPR
, diff_type
, span
, step
);
656 tree offset
= gimple_call_arg (call
, 6);
657 r
= build2 (PLUS_EXPR
, diff_type
, r
,
658 fold_convert (diff_type
, offset
));
659 r
= build2 (integer_onep (dir
) ? MIN_EXPR
: MAX_EXPR
,
660 diff_type
, r
, range
);
662 if (diff_type
!= type
)
663 r
= fold_convert (type
, r
);
667 gimplify_assign (lhs
, r
, &seq
);
669 pop_gimplify_context (NULL
);
671 gsi_replace_with_seq (&gsi
, seq
, true);
674 /* Transform a GOACC_TILE call. Determines the element loop span for
675 the specified loop of the nest. This is 1 if we're not tiling.
677 GOACC_TILE (collapse_count, loop_no, tile_arg, gwv_tile, gwv_element); */
680 oacc_xform_tile (gcall
*call
)
682 gimple_stmt_iterator gsi
= gsi_for_stmt (call
);
683 unsigned collapse
= tree_to_uhwi (gimple_call_arg (call
, 0));
684 /* Inner loops have higher loop_nos. */
685 unsigned loop_no
= tree_to_uhwi (gimple_call_arg (call
, 1));
686 tree tile_size
= gimple_call_arg (call
, 2);
687 unsigned e_mask
= tree_to_uhwi (gimple_call_arg (call
, 4));
688 tree lhs
= gimple_call_lhs (call
);
689 tree type
= TREE_TYPE (lhs
);
690 gimple_seq seq
= NULL
;
691 tree span
= build_int_cst (type
, 1);
694 & ~(GOMP_DIM_MASK (GOMP_DIM_VECTOR
)
695 | GOMP_DIM_MASK (GOMP_DIM_WORKER
))));
696 push_gimplify_context (!seen_error ());
698 #ifndef ACCEL_COMPILER
699 /* Partitioning disabled on host compilers. */
703 /* Not paritioning. */
704 span
= integer_one_node
;
705 else if (!integer_zerop (tile_size
))
706 /* User explicitly specified size. */
710 /* Pick a size based on the paritioning of the element loop and
711 the number of loop nests. */
712 tree first_size
= NULL_TREE
;
713 tree second_size
= NULL_TREE
;
715 if (e_mask
& GOMP_DIM_MASK (GOMP_DIM_VECTOR
))
716 first_size
= oacc_dim_call (false, GOMP_DIM_VECTOR
, &seq
);
717 if (e_mask
& GOMP_DIM_MASK (GOMP_DIM_WORKER
))
718 second_size
= oacc_dim_call (false, GOMP_DIM_WORKER
, &seq
);
722 first_size
= second_size
;
723 second_size
= NULL_TREE
;
726 if (loop_no
+ 1 == collapse
)
729 if (!loop_no
&& second_size
)
730 span
= fold_build2 (MULT_EXPR
, TREE_TYPE (span
),
733 else if (loop_no
+ 2 == collapse
)
739 /* There's no obvious element size for this loop. Options
740 are 1, first_size or some non-unity constant (32 is my
741 favourite). We should gather some statistics. */
745 span
= fold_convert (type
, span
);
746 gimplify_assign (lhs
, span
, &seq
);
748 pop_gimplify_context (NULL
);
750 gsi_replace_with_seq (&gsi
, seq
, true);
753 /* Default partitioned and minimum partitioned dimensions. */
755 static int oacc_default_dims
[GOMP_DIM_MAX
];
756 static int oacc_min_dims
[GOMP_DIM_MAX
];
759 oacc_get_default_dim (int dim
)
761 gcc_assert (0 <= dim
&& dim
< GOMP_DIM_MAX
);
762 return oacc_default_dims
[dim
];
766 oacc_get_min_dim (int dim
)
768 gcc_assert (0 <= dim
&& dim
< GOMP_DIM_MAX
);
769 return oacc_min_dims
[dim
];
772 /* Parse the default dimension parameter. This is a set of
773 :-separated optional compute dimensions. Each specified dimension
774 is a positive integer. When device type support is added, it is
775 planned to be a comma separated list of such compute dimensions,
776 with all but the first prefixed by the colon-terminated device
780 oacc_parse_default_dims (const char *dims
)
784 for (ix
= GOMP_DIM_MAX
; ix
--;)
786 oacc_default_dims
[ix
] = -1;
787 oacc_min_dims
[ix
] = 1;
790 #ifndef ACCEL_COMPILER
791 /* Cannot be overridden on the host. */
796 const char *pos
= dims
;
798 for (ix
= 0; *pos
&& ix
!= GOMP_DIM_MAX
; ix
++)
813 val
= strtol (pos
, CONST_CAST (char **, &eptr
), 10);
814 if (errno
|| val
<= 0 || (int) val
!= val
)
817 oacc_default_dims
[ix
] = (int) val
;
823 error_at (UNKNOWN_LOCATION
,
824 "%<-fopenacc-dim%> operand is malformed at %qs", pos
);
828 /* Allow the backend to validate the dimensions. */
829 targetm
.goacc
.validate_dims (NULL_TREE
, oacc_default_dims
, -1, 0);
830 targetm
.goacc
.validate_dims (NULL_TREE
, oacc_min_dims
, -2, 0);
833 /* Validate and update the dimensions for offloaded FN. ATTRS is the
834 raw attribute. DIMS is an array of dimensions, which is filled in.
835 LEVEL is the partitioning level of a routine, or -1 for an offload
836 region itself. USED is the mask of partitioned execution in the
840 oacc_validate_dims (tree fn
, tree attrs
, int *dims
, int level
, unsigned used
)
842 tree purpose
[GOMP_DIM_MAX
];
844 tree pos
= TREE_VALUE (attrs
);
846 /* Make sure the attribute creator attached the dimension
850 for (ix
= 0; ix
!= GOMP_DIM_MAX
; ix
++)
852 purpose
[ix
] = TREE_PURPOSE (pos
);
853 tree val
= TREE_VALUE (pos
);
854 dims
[ix
] = val
? TREE_INT_CST_LOW (val
) : -1;
855 pos
= TREE_CHAIN (pos
);
858 bool changed
= targetm
.goacc
.validate_dims (fn
, dims
, level
, used
);
860 /* Default anything left to 1 or a partitioned default. */
861 for (ix
= 0; ix
!= GOMP_DIM_MAX
; ix
++)
864 /* The OpenACC spec says 'If the [num_gangs] clause is not
865 specified, an implementation-defined default will be used;
866 the default may depend on the code within the construct.'
867 (2.5.6). Thus an implementation is free to choose
868 non-unity default for a parallel region that doesn't have
869 any gang-partitioned loops. However, it appears that there
870 is a sufficient body of user code that expects non-gang
871 partitioned regions to not execute in gang-redundant mode.
872 So we (a) don't warn about the non-portability and (b) pick
873 the minimum permissible dimension size when there is no
874 partitioned execution. Otherwise we pick the global
875 default for the dimension, which the user can control. The
876 same wording and logic applies to num_workers and
877 vector_length, however the worker- or vector- single
878 execution doesn't have the same impact as gang-redundant
879 execution. (If the minimum gang-level partioning is not 1,
880 the target is probably too confusing.) */
881 dims
[ix
] = (used
& GOMP_DIM_MASK (ix
)
882 ? oacc_default_dims
[ix
] : oacc_min_dims
[ix
]);
888 /* Replace the attribute with new values. */
890 for (ix
= GOMP_DIM_MAX
; ix
--;)
891 pos
= tree_cons (purpose
[ix
],
892 build_int_cst (integer_type_node
, dims
[ix
]), pos
);
893 oacc_replace_fn_attrib (fn
, pos
);
897 /* Create an empty OpenACC loop structure at LOC. */
900 new_oacc_loop_raw (oacc_loop
*parent
, location_t loc
)
902 oacc_loop
*loop
= XCNEW (oacc_loop
);
904 loop
->parent
= parent
;
908 loop
->sibling
= parent
->child
;
909 parent
->child
= loop
;
916 /* Create an outermost, dummy OpenACC loop for offloaded function
920 new_oacc_loop_outer (tree decl
)
922 return new_oacc_loop_raw (NULL
, DECL_SOURCE_LOCATION (decl
));
925 /* Start a new OpenACC loop structure beginning at head marker HEAD.
926 Link into PARENT loop. Return the new loop. */
929 new_oacc_loop (oacc_loop
*parent
, gcall
*marker
)
931 oacc_loop
*loop
= new_oacc_loop_raw (parent
, gimple_location (marker
));
933 loop
->marker
= marker
;
935 /* TODO: This is where device_type flattening would occur for the loop
938 loop
->flags
= TREE_INT_CST_LOW (gimple_call_arg (marker
, 3));
940 tree chunk_size
= integer_zero_node
;
941 if (loop
->flags
& OLF_GANG_STATIC
)
942 chunk_size
= gimple_call_arg (marker
, 4);
943 loop
->chunk_size
= chunk_size
;
948 /* Create a dummy loop encompassing a call to a openACC routine.
949 Extract the routine's partitioning requirements. */
952 new_oacc_loop_routine (oacc_loop
*parent
, gcall
*call
, tree decl
, tree attrs
)
954 oacc_loop
*loop
= new_oacc_loop_raw (parent
, gimple_location (call
));
955 int level
= oacc_fn_attrib_level (attrs
);
957 gcc_assert (level
>= 0);
960 loop
->routine
= decl
;
961 loop
->mask
= ((GOMP_DIM_MASK (GOMP_DIM_MAX
) - 1)
962 ^ (GOMP_DIM_MASK (level
) - 1));
965 /* Finish off the current OpenACC loop ending at tail marker TAIL.
966 Return the parent loop. */
969 finish_oacc_loop (oacc_loop
*loop
)
971 /* If the loop has been collapsed, don't partition it. */
972 if (loop
->ifns
.is_empty ())
973 loop
->mask
= loop
->flags
= 0;
977 /* Free all OpenACC loop structures within LOOP (inclusive). */
980 free_oacc_loop (oacc_loop
*loop
)
983 free_oacc_loop (loop
->sibling
);
985 free_oacc_loop (loop
->child
);
987 loop
->ifns
.release ();
991 /* Dump out the OpenACC loop head or tail beginning at FROM. */
994 dump_oacc_loop_part (FILE *file
, gcall
*from
, int depth
,
995 const char *title
, int level
)
997 enum ifn_unique_kind kind
998 = (enum ifn_unique_kind
) TREE_INT_CST_LOW (gimple_call_arg (from
, 0));
1000 fprintf (file
, "%*s%s-%d:\n", depth
* 2, "", title
, level
);
1001 for (gimple_stmt_iterator gsi
= gsi_for_stmt (from
);;)
1003 gimple
*stmt
= gsi_stmt (gsi
);
1005 if (gimple_call_internal_p (stmt
, IFN_UNIQUE
))
1007 enum ifn_unique_kind k
1008 = ((enum ifn_unique_kind
) TREE_INT_CST_LOW
1009 (gimple_call_arg (stmt
, 0)));
1011 if (k
== kind
&& stmt
!= from
)
1014 print_gimple_stmt (file
, stmt
, depth
* 2 + 2);
1017 while (gsi_end_p (gsi
))
1018 gsi
= gsi_start_bb (single_succ (gsi_bb (gsi
)));
1022 /* Dump OpenACC loop LOOP, its children, and its siblings. */
1025 dump_oacc_loop (FILE *file
, oacc_loop
*loop
, int depth
)
1029 fprintf (file
, "%*sLoop %x(%x) %s:%u\n", depth
* 2, "",
1030 loop
->flags
, loop
->mask
,
1031 LOCATION_FILE (loop
->loc
), LOCATION_LINE (loop
->loc
));
1034 print_gimple_stmt (file
, loop
->marker
, depth
* 2);
1037 fprintf (file
, "%*sRoutine %s:%u:%s\n",
1038 depth
* 2, "", DECL_SOURCE_FILE (loop
->routine
),
1039 DECL_SOURCE_LINE (loop
->routine
),
1040 IDENTIFIER_POINTER (DECL_NAME (loop
->routine
)));
1042 for (ix
= GOMP_DIM_GANG
; ix
!= GOMP_DIM_MAX
; ix
++)
1043 if (loop
->heads
[ix
])
1044 dump_oacc_loop_part (file
, loop
->heads
[ix
], depth
, "Head", ix
);
1045 for (ix
= GOMP_DIM_MAX
; ix
--;)
1046 if (loop
->tails
[ix
])
1047 dump_oacc_loop_part (file
, loop
->tails
[ix
], depth
, "Tail", ix
);
1050 dump_oacc_loop (file
, loop
->child
, depth
+ 1);
1052 dump_oacc_loop (file
, loop
->sibling
, depth
);
1055 void debug_oacc_loop (oacc_loop
*);
1057 /* Dump loops to stderr. */
1060 debug_oacc_loop (oacc_loop
*loop
)
1062 dump_oacc_loop (stderr
, loop
, 0);
1065 /* Provide diagnostics on OpenACC loop LOOP, its children, and its
1069 inform_oacc_loop (const oacc_loop
*loop
)
1072 = loop
->mask
& GOMP_DIM_MASK (GOMP_DIM_GANG
) ? " gang" : "";
1074 = loop
->mask
& GOMP_DIM_MASK (GOMP_DIM_WORKER
) ? " worker" : "";
1076 = loop
->mask
& GOMP_DIM_MASK (GOMP_DIM_VECTOR
) ? " vector" : "";
1077 const char *seq
= loop
->mask
== 0 ? " seq" : "";
1078 const dump_user_location_t loc
1079 = dump_user_location_t::from_location_t (loop
->loc
);
1080 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, loc
,
1081 "assigned OpenACC%s%s%s%s loop parallelism\n", gang
, worker
,
1085 inform_oacc_loop (loop
->child
);
1087 inform_oacc_loop (loop
->sibling
);
1090 /* DFS walk of basic blocks BB onwards, creating OpenACC loop
1091 structures as we go. By construction these loops are properly
1095 oacc_loop_discover_walk (oacc_loop
*loop
, basic_block bb
)
1100 if (bb
->flags
& BB_VISITED
)
1104 bb
->flags
|= BB_VISITED
;
1106 /* Scan for loop markers. */
1107 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);
1110 gimple
*stmt
= gsi_stmt (gsi
);
1112 if (!is_gimple_call (stmt
))
1115 gcall
*call
= as_a
<gcall
*> (stmt
);
1117 /* If this is a routine, make a dummy loop for it. */
1118 if (tree decl
= gimple_call_fndecl (call
))
1119 if (tree attrs
= oacc_get_fn_attrib (decl
))
1121 gcc_assert (!marker
);
1122 new_oacc_loop_routine (loop
, call
, decl
, attrs
);
1125 if (!gimple_call_internal_p (call
))
1128 switch (gimple_call_internal_fn (call
))
1133 case IFN_GOACC_LOOP
:
1134 case IFN_GOACC_TILE
:
1135 /* Record the abstraction function, so we can manipulate it
1137 loop
->ifns
.safe_push (call
);
1141 enum ifn_unique_kind kind
1142 = (enum ifn_unique_kind
) (TREE_INT_CST_LOW
1143 (gimple_call_arg (call
, 0)));
1144 if (kind
== IFN_UNIQUE_OACC_HEAD_MARK
1145 || kind
== IFN_UNIQUE_OACC_TAIL_MARK
)
1147 if (gimple_call_num_args (call
) == 2)
1149 gcc_assert (marker
&& !remaining
);
1151 if (kind
== IFN_UNIQUE_OACC_TAIL_MARK
)
1152 loop
= finish_oacc_loop (loop
);
1154 loop
->head_end
= call
;
1158 int count
= TREE_INT_CST_LOW (gimple_call_arg (call
, 2));
1162 if (kind
== IFN_UNIQUE_OACC_HEAD_MARK
)
1163 loop
= new_oacc_loop (loop
, call
);
1166 gcc_assert (count
== remaining
);
1170 if (kind
== IFN_UNIQUE_OACC_HEAD_MARK
)
1171 loop
->heads
[marker
] = call
;
1173 loop
->tails
[remaining
] = call
;
1180 if (remaining
|| marker
)
1182 bb
= single_succ (bb
);
1183 gcc_assert (single_pred_p (bb
) && !(bb
->flags
& BB_VISITED
));
1187 /* Walk successor blocks. */
1191 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1192 oacc_loop_discover_walk (loop
, e
->dest
);
1195 /* LOOP is the first sibling. Reverse the order in place and return
1196 the new first sibling. Recurse to child loops. */
1199 oacc_loop_sibling_nreverse (oacc_loop
*loop
)
1201 oacc_loop
*last
= NULL
;
1205 loop
->child
= oacc_loop_sibling_nreverse (loop
->child
);
1207 oacc_loop
*next
= loop
->sibling
;
1208 loop
->sibling
= last
;
1217 /* Discover the OpenACC loops marked up by HEAD and TAIL markers for
1218 the current function. */
1221 oacc_loop_discovery ()
1223 /* Clear basic block flags, in particular BB_VISITED which we're going to use
1224 in the following. */
1227 oacc_loop
*top
= new_oacc_loop_outer (current_function_decl
);
1228 oacc_loop_discover_walk (top
, ENTRY_BLOCK_PTR_FOR_FN (cfun
));
1230 /* The siblings were constructed in reverse order, reverse them so
1231 that diagnostics come out in an unsurprising order. */
1232 top
= oacc_loop_sibling_nreverse (top
);
1237 /* Transform the abstract internal function markers starting at FROM
1238 to be for partitioning level LEVEL. Stop when we meet another HEAD
1242 oacc_loop_xform_head_tail (gcall
*from
, int level
)
1244 enum ifn_unique_kind kind
1245 = (enum ifn_unique_kind
) TREE_INT_CST_LOW (gimple_call_arg (from
, 0));
1246 tree replacement
= build_int_cst (unsigned_type_node
, level
);
1248 for (gimple_stmt_iterator gsi
= gsi_for_stmt (from
);;)
1250 gimple
*stmt
= gsi_stmt (gsi
);
1252 if (gimple_call_internal_p (stmt
, IFN_UNIQUE
))
1254 enum ifn_unique_kind k
1255 = ((enum ifn_unique_kind
)
1256 TREE_INT_CST_LOW (gimple_call_arg (stmt
, 0)));
1258 if (k
== IFN_UNIQUE_OACC_FORK
|| k
== IFN_UNIQUE_OACC_JOIN
)
1259 *gimple_call_arg_ptr (stmt
, 2) = replacement
;
1260 else if (k
== kind
&& stmt
!= from
)
1263 else if (gimple_call_internal_p (stmt
, IFN_GOACC_REDUCTION
))
1264 *gimple_call_arg_ptr (stmt
, 3) = replacement
;
1267 while (gsi_end_p (gsi
))
1268 gsi
= gsi_start_bb (single_succ (gsi_bb (gsi
)));
1272 /* Process the discovered OpenACC loops, setting the correct
1273 partitioning level etc. */
1276 oacc_loop_process (oacc_loop
*loop
)
1279 oacc_loop_process (loop
->child
);
1281 if (loop
->mask
&& !loop
->routine
)
1284 tree mask_arg
= build_int_cst (unsigned_type_node
, loop
->mask
);
1285 tree e_mask_arg
= build_int_cst (unsigned_type_node
, loop
->e_mask
);
1286 tree chunk_arg
= loop
->chunk_size
;
1289 for (ix
= 0; loop
->ifns
.iterate (ix
, &call
); ix
++)
1290 switch (gimple_call_internal_fn (call
))
1292 case IFN_GOACC_LOOP
:
1294 bool is_e
= gimple_call_arg (call
, 5) == integer_minus_one_node
;
1295 gimple_call_set_arg (call
, 5, is_e
? e_mask_arg
: mask_arg
);
1297 gimple_call_set_arg (call
, 4, chunk_arg
);
1301 case IFN_GOACC_TILE
:
1302 gimple_call_set_arg (call
, 3, mask_arg
);
1303 gimple_call_set_arg (call
, 4, e_mask_arg
);
1310 unsigned dim
= GOMP_DIM_GANG
;
1311 unsigned mask
= loop
->mask
| loop
->e_mask
;
1312 for (ix
= 0; ix
!= GOMP_DIM_MAX
&& mask
; ix
++)
1314 while (!(GOMP_DIM_MASK (dim
) & mask
))
1317 oacc_loop_xform_head_tail (loop
->heads
[ix
], dim
);
1318 oacc_loop_xform_head_tail (loop
->tails
[ix
], dim
);
1320 mask
^= GOMP_DIM_MASK (dim
);
1325 oacc_loop_process (loop
->sibling
);
1328 /* Walk the OpenACC loop heirarchy checking and assigning the
1329 programmer-specified partitionings. OUTER_MASK is the partitioning
1330 this loop is contained within. Return mask of partitioning
1331 encountered. If any auto loops are discovered, set GOMP_DIM_MAX
1335 oacc_loop_fixed_partitions (oacc_loop
*loop
, unsigned outer_mask
)
1337 unsigned this_mask
= loop
->mask
;
1338 unsigned mask_all
= 0;
1341 #ifdef ACCEL_COMPILER
1342 /* When device_type is supported, we want the device compiler to be
1343 noisy, if the loop parameters are device_type-specific. */
1349 bool auto_par
= (loop
->flags
& OLF_AUTO
) != 0;
1350 bool seq_par
= (loop
->flags
& OLF_SEQ
) != 0;
1351 bool tiling
= (loop
->flags
& OLF_TILE
) != 0;
1353 this_mask
= ((loop
->flags
>> OLF_DIM_BASE
)
1354 & (GOMP_DIM_MASK (GOMP_DIM_MAX
) - 1));
1356 /* Apply auto partitioning if this is a non-partitioned regular
1357 loop, or (no more than) single axis tiled loop. */
1359 = !seq_par
&& this_mask
== (tiling
? this_mask
& -this_mask
: 0);
1361 if ((this_mask
!= 0) + auto_par
+ seq_par
> 1)
1364 error_at (loop
->loc
,
1366 ? G_("%<seq%> overrides other OpenACC loop specifiers")
1367 : G_("%<auto%> conflicts with other OpenACC loop "
1370 loop
->flags
&= ~OLF_AUTO
;
1374 &= ~((GOMP_DIM_MASK (GOMP_DIM_MAX
) - 1) << OLF_DIM_BASE
);
1379 if (maybe_auto
&& (loop
->flags
& OLF_INDEPENDENT
))
1381 loop
->flags
|= OLF_AUTO
;
1382 mask_all
|= GOMP_DIM_MASK (GOMP_DIM_MAX
);
1386 if (this_mask
& outer_mask
)
1388 const oacc_loop
*outer
;
1389 for (outer
= loop
->parent
; outer
; outer
= outer
->parent
)
1390 if ((outer
->mask
| outer
->e_mask
) & this_mask
)
1397 error_at (loop
->loc
,
1399 ? G_("routine call uses same OpenACC parallelism"
1400 " as containing loop")
1401 : G_("inner loop uses same OpenACC parallelism"
1402 " as containing loop"));
1403 inform (outer
->loc
, "containing loop here");
1406 error_at (loop
->loc
,
1408 ? G_("routine call uses OpenACC parallelism disallowed"
1409 " by containing routine")
1410 : G_("loop uses OpenACC parallelism disallowed"
1411 " by containing routine"));
1414 inform (DECL_SOURCE_LOCATION (loop
->routine
),
1415 "routine %qD declared here", loop
->routine
);
1417 this_mask
&= ~outer_mask
;
1421 unsigned outermost
= least_bit_hwi (this_mask
);
1423 if (outermost
&& outermost
<= outer_mask
)
1427 error_at (loop
->loc
,
1428 "incorrectly nested OpenACC loop parallelism");
1430 const oacc_loop
*outer
;
1431 for (outer
= loop
->parent
;
1432 outer
->flags
&& outer
->flags
< outermost
;
1433 outer
= outer
->parent
)
1435 inform (outer
->loc
, "containing loop here");
1438 this_mask
&= ~outermost
;
1442 mask_all
|= this_mask
;
1444 if (loop
->flags
& OLF_TILE
)
1446 /* When tiling, vector goes to the element loop, and failing
1447 that we put worker there. The std doesn't contemplate
1448 specifying all three. We choose to put worker and vector on
1449 the element loops in that case. */
1450 unsigned this_e_mask
= this_mask
& GOMP_DIM_MASK (GOMP_DIM_VECTOR
);
1451 if (!this_e_mask
|| this_mask
& GOMP_DIM_MASK (GOMP_DIM_GANG
))
1452 this_e_mask
|= this_mask
& GOMP_DIM_MASK (GOMP_DIM_WORKER
);
1454 loop
->e_mask
= this_e_mask
;
1455 this_mask
^= this_e_mask
;
1458 loop
->mask
= this_mask
;
1461 fprintf (dump_file
, "Loop %s:%d user specified %d & %d\n",
1462 LOCATION_FILE (loop
->loc
), LOCATION_LINE (loop
->loc
),
1463 loop
->mask
, loop
->e_mask
);
1467 unsigned tmp_mask
= outer_mask
| this_mask
| loop
->e_mask
;
1468 loop
->inner
= oacc_loop_fixed_partitions (loop
->child
, tmp_mask
);
1469 mask_all
|= loop
->inner
;
1473 mask_all
|= oacc_loop_fixed_partitions (loop
->sibling
, outer_mask
);
1478 /* Walk the OpenACC loop heirarchy to assign auto-partitioned loops.
1479 OUTER_MASK is the partitioning this loop is contained within.
1480 OUTER_ASSIGN is true if an outer loop is being auto-partitioned.
1481 Return the cumulative partitioning used by this loop, siblings and
1485 oacc_loop_auto_partitions (oacc_loop
*loop
, unsigned outer_mask
,
1488 bool assign
= (loop
->flags
& OLF_AUTO
) && (loop
->flags
& OLF_INDEPENDENT
);
1490 bool tiling
= loop
->flags
& OLF_TILE
;
1492 #ifdef ACCEL_COMPILER
1493 /* When device_type is supported, we want the device compiler to be
1494 noisy, if the loop parameters are device_type-specific. */
1498 if (assign
&& (!outer_assign
|| loop
->inner
))
1500 /* Allocate outermost and non-innermost loops at the outermost
1501 non-innermost available level. */
1502 unsigned this_mask
= GOMP_DIM_MASK (GOMP_DIM_GANG
);
1504 /* Find the first outermost available partition. */
1505 while (this_mask
<= outer_mask
)
1508 /* Grab two axes if tiling, and we've not assigned anything */
1509 if (tiling
&& !(loop
->mask
| loop
->e_mask
))
1510 this_mask
|= this_mask
<< 1;
1512 /* Prohibit the innermost partitioning at the moment. */
1513 this_mask
&= GOMP_DIM_MASK (GOMP_DIM_MAX
- 1) - 1;
1515 /* Don't use any dimension explicitly claimed by an inner loop. */
1516 this_mask
&= ~loop
->inner
;
1518 if (tiling
&& !loop
->e_mask
)
1520 /* If we got two axes, allocate the inner one to the element
1522 loop
->e_mask
= this_mask
& (this_mask
<< 1);
1523 this_mask
^= loop
->e_mask
;
1526 loop
->mask
|= this_mask
;
1531 unsigned tmp_mask
= outer_mask
| loop
->mask
| loop
->e_mask
;
1532 loop
->inner
= oacc_loop_auto_partitions (loop
->child
, tmp_mask
,
1533 outer_assign
| assign
);
1536 if (assign
&& (!loop
->mask
|| (tiling
&& !loop
->e_mask
) || !outer_assign
))
1538 /* Allocate the loop at the innermost available level. Note
1539 that we do this even if we already assigned this loop the
1540 outermost available level above. That way we'll partition
1541 this along 2 axes, if they are available. */
1542 unsigned this_mask
= 0;
1544 /* Determine the outermost partitioning used within this loop. */
1545 this_mask
= loop
->inner
| GOMP_DIM_MASK (GOMP_DIM_MAX
);
1546 this_mask
= least_bit_hwi (this_mask
);
1548 /* Pick the partitioning just inside that one. */
1551 /* And avoid picking one use by an outer loop. */
1552 this_mask
&= ~outer_mask
;
1554 /* If tiling and we failed completely above, grab the next one
1555 too. Making sure it doesn't hit an outer loop. */
1558 this_mask
&= ~(loop
->e_mask
| loop
->mask
);
1559 unsigned tile_mask
= ((this_mask
>> 1)
1560 & ~(outer_mask
| loop
->e_mask
| loop
->mask
));
1562 if (tile_mask
|| loop
->mask
)
1564 loop
->e_mask
|= this_mask
;
1565 this_mask
= tile_mask
;
1567 if (!loop
->e_mask
&& noisy
)
1568 warning_at (loop
->loc
, 0,
1569 "insufficient partitioning available"
1570 " to parallelize element loop");
1573 loop
->mask
|= this_mask
;
1574 if (!loop
->mask
&& noisy
)
1575 warning_at (loop
->loc
, 0,
1577 ? G_("insufficient partitioning available"
1578 " to parallelize tile loop")
1579 : G_("insufficient partitioning available"
1580 " to parallelize loop"));
1583 if (assign
&& dump_file
)
1584 fprintf (dump_file
, "Auto loop %s:%d assigned %d & %d\n",
1585 LOCATION_FILE (loop
->loc
), LOCATION_LINE (loop
->loc
),
1586 loop
->mask
, loop
->e_mask
);
1588 unsigned inner_mask
= 0;
1591 inner_mask
|= oacc_loop_auto_partitions (loop
->sibling
,
1592 outer_mask
, outer_assign
);
1594 inner_mask
|= loop
->inner
| loop
->mask
| loop
->e_mask
;
1599 /* Walk the OpenACC loop heirarchy to check and assign partitioning
1600 axes. Return mask of partitioning. */
1603 oacc_loop_partition (oacc_loop
*loop
, unsigned outer_mask
)
1605 unsigned mask_all
= oacc_loop_fixed_partitions (loop
, outer_mask
);
1607 if (mask_all
& GOMP_DIM_MASK (GOMP_DIM_MAX
))
1609 mask_all
^= GOMP_DIM_MASK (GOMP_DIM_MAX
);
1610 mask_all
|= oacc_loop_auto_partitions (loop
, outer_mask
, false);
1615 /* Default fork/join early expander. Delete the function calls if
1616 there is no RTL expander. */
1619 default_goacc_fork_join (gcall
*ARG_UNUSED (call
),
1620 const int *ARG_UNUSED (dims
), bool is_fork
)
1623 return targetm
.have_oacc_fork ();
1625 return targetm
.have_oacc_join ();
1628 /* Default goacc.reduction early expander.
1630 LHS-opt = IFN_REDUCTION (KIND, RES_PTR, VAR, LEVEL, OP, OFFSET)
1631 If RES_PTR is not integer-zerop:
1632 SETUP - emit 'LHS = *RES_PTR', LHS = NULL
1633 TEARDOWN - emit '*RES_PTR = VAR'
1638 default_goacc_reduction (gcall
*call
)
1640 unsigned code
= (unsigned)TREE_INT_CST_LOW (gimple_call_arg (call
, 0));
1641 gimple_stmt_iterator gsi
= gsi_for_stmt (call
);
1642 tree lhs
= gimple_call_lhs (call
);
1643 tree var
= gimple_call_arg (call
, 2);
1644 gimple_seq seq
= NULL
;
1646 if (code
== IFN_GOACC_REDUCTION_SETUP
1647 || code
== IFN_GOACC_REDUCTION_TEARDOWN
)
1649 /* Setup and Teardown need to copy from/to the receiver object,
1651 tree ref_to_res
= gimple_call_arg (call
, 1);
1653 if (!integer_zerop (ref_to_res
))
1655 tree dst
= build_simple_mem_ref (ref_to_res
);
1658 if (code
== IFN_GOACC_REDUCTION_SETUP
)
1664 gimple_seq_add_stmt (&seq
, gimple_build_assign (dst
, src
));
1668 /* Copy VAR to LHS, if there is an LHS. */
1670 gimple_seq_add_stmt (&seq
, gimple_build_assign (lhs
, var
));
1672 gsi_replace_with_seq (&gsi
, seq
, true);
1675 /* Main entry point for oacc transformations which run on the device
1676 compiler after LTO, so we know what the target device is at this
1677 point (including the host fallback). */
1680 execute_oacc_device_lower ()
1682 tree attrs
= oacc_get_fn_attrib (current_function_decl
);
1685 /* Not an offloaded function. */
1688 /* Parse the default dim argument exactly once. */
1689 if ((const void *)flag_openacc_dims
!= &flag_openacc_dims
)
1691 oacc_parse_default_dims (flag_openacc_dims
);
1692 flag_openacc_dims
= (char *)&flag_openacc_dims
;
1695 bool is_oacc_kernels
1696 = (lookup_attribute ("oacc kernels",
1697 DECL_ATTRIBUTES (current_function_decl
)) != NULL
);
1698 bool is_oacc_kernels_parallelized
1699 = (lookup_attribute ("oacc kernels parallelized",
1700 DECL_ATTRIBUTES (current_function_decl
)) != NULL
);
1702 /* Unparallelized OpenACC kernels constructs must get launched as 1 x 1 x 1
1703 kernels, so remove the parallelism dimensions function attributes
1704 potentially set earlier on. */
1705 if (is_oacc_kernels
&& !is_oacc_kernels_parallelized
)
1707 oacc_set_fn_attrib (current_function_decl
, NULL
, NULL
);
1708 attrs
= oacc_get_fn_attrib (current_function_decl
);
1711 /* Discover, partition and process the loops. */
1712 oacc_loop
*loops
= oacc_loop_discovery ();
1713 int fn_level
= oacc_fn_attrib_level (attrs
);
1718 fprintf (dump_file
, "Function is OpenACC routine level %d\n",
1720 else if (is_oacc_kernels
)
1721 fprintf (dump_file
, "Function is %s OpenACC kernels offload\n",
1722 (is_oacc_kernels_parallelized
1723 ? "parallelized" : "unparallelized"));
1725 fprintf (dump_file
, "Function is OpenACC parallel offload\n");
1728 unsigned outer_mask
= fn_level
>= 0 ? GOMP_DIM_MASK (fn_level
) - 1 : 0;
1729 unsigned used_mask
= oacc_loop_partition (loops
, outer_mask
);
1730 /* OpenACC kernels constructs are special: they currently don't use the
1731 generic oacc_loop infrastructure and attribute/dimension processing. */
1732 if (is_oacc_kernels
&& is_oacc_kernels_parallelized
)
1734 /* Parallelized OpenACC kernels constructs use gang parallelism. See
1735 also tree-parloops.c:create_parallel_loop. */
1736 used_mask
|= GOMP_DIM_MASK (GOMP_DIM_GANG
);
1739 int dims
[GOMP_DIM_MAX
];
1740 oacc_validate_dims (current_function_decl
, attrs
, dims
, fn_level
, used_mask
);
1744 const char *comma
= "Compute dimensions [";
1745 for (int ix
= 0; ix
!= GOMP_DIM_MAX
; ix
++, comma
= ", ")
1746 fprintf (dump_file
, "%s%d", comma
, dims
[ix
]);
1747 fprintf (dump_file
, "]\n");
1750 oacc_loop_process (loops
);
1753 fprintf (dump_file
, "OpenACC loops\n");
1754 dump_oacc_loop (dump_file
, loops
, 0);
1755 fprintf (dump_file
, "\n");
1757 if (dump_enabled_p ())
1759 oacc_loop
*l
= loops
;
1760 /* OpenACC kernels constructs are special: they currently don't use the
1761 generic oacc_loop infrastructure. */
1762 if (is_oacc_kernels
)
1764 /* Create a fake oacc_loop for diagnostic purposes. */
1765 l
= new_oacc_loop_raw (NULL
,
1766 DECL_SOURCE_LOCATION (current_function_decl
));
1767 l
->mask
= used_mask
;
1771 /* Skip the outermost, dummy OpenACC loop */
1775 inform_oacc_loop (l
);
1776 if (is_oacc_kernels
)
1780 /* Offloaded targets may introduce new basic blocks, which require
1781 dominance information to update SSA. */
1782 calculate_dominance_info (CDI_DOMINATORS
);
1784 /* Now lower internal loop functions to target-specific code
1787 FOR_ALL_BB_FN (bb
, cfun
)
1788 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);)
1790 gimple
*stmt
= gsi_stmt (gsi
);
1791 if (!is_gimple_call (stmt
))
1797 gcall
*call
= as_a
<gcall
*> (stmt
);
1798 if (!gimple_call_internal_p (call
))
1804 /* Rewind to allow rescan. */
1806 bool rescan
= false, remove
= false;
1807 enum internal_fn ifn_code
= gimple_call_internal_fn (call
);
1813 case IFN_GOACC_TILE
:
1814 oacc_xform_tile (call
);
1818 case IFN_GOACC_LOOP
:
1819 oacc_xform_loop (call
);
1823 case IFN_GOACC_REDUCTION
:
1824 /* Mark the function for SSA renaming. */
1825 mark_virtual_operands_for_renaming (cfun
);
1827 /* If the level is -1, this ended up being an unused
1828 axis. Handle as a default. */
1829 if (integer_minus_onep (gimple_call_arg (call
, 3)))
1830 default_goacc_reduction (call
);
1832 targetm
.goacc
.reduction (call
);
1838 enum ifn_unique_kind kind
1839 = ((enum ifn_unique_kind
)
1840 TREE_INT_CST_LOW (gimple_call_arg (call
, 0)));
1847 case IFN_UNIQUE_OACC_FORK
:
1848 case IFN_UNIQUE_OACC_JOIN
:
1849 if (integer_minus_onep (gimple_call_arg (call
, 2)))
1851 else if (!targetm
.goacc
.fork_join
1852 (call
, dims
, kind
== IFN_UNIQUE_OACC_FORK
))
1856 case IFN_UNIQUE_OACC_HEAD_MARK
:
1857 case IFN_UNIQUE_OACC_TAIL_MARK
:
1865 if (gsi_end_p (gsi
))
1866 /* We rewound past the beginning of the BB. */
1867 gsi
= gsi_start_bb (bb
);
1869 /* Undo the rewind. */
1874 if (gimple_vdef (call
))
1875 replace_uses_by (gimple_vdef (call
), gimple_vuse (call
));
1876 if (gimple_call_lhs (call
))
1878 /* Propagate the data dependency var. */
1879 gimple
*ass
= gimple_build_assign (gimple_call_lhs (call
),
1880 gimple_call_arg (call
, 1));
1881 gsi_replace (&gsi
, ass
, false);
1884 gsi_remove (&gsi
, true);
1887 /* If not rescanning, advance over the call. */
1891 free_oacc_loop (loops
);
1896 /* Default launch dimension validator. Force everything to 1. A
1897 backend that wants to provide larger dimensions must override this
1901 default_goacc_validate_dims (tree
ARG_UNUSED (decl
), int *dims
,
1902 int ARG_UNUSED (fn_level
),
1903 unsigned ARG_UNUSED (used
))
1905 bool changed
= false;
1907 for (unsigned ix
= 0; ix
!= GOMP_DIM_MAX
; ix
++)
1919 /* Default dimension bound is unknown on accelerator and 1 on host. */
1922 default_goacc_dim_limit (int ARG_UNUSED (axis
))
1924 #ifdef ACCEL_COMPILER
1933 const pass_data pass_data_oacc_device_lower
=
1935 GIMPLE_PASS
, /* type */
1936 "oaccdevlow", /* name */
1937 OPTGROUP_OMP
, /* optinfo_flags */
1938 TV_NONE
, /* tv_id */
1939 PROP_cfg
, /* properties_required */
1940 0 /* Possibly PROP_gimple_eomp. */, /* properties_provided */
1941 0, /* properties_destroyed */
1942 0, /* todo_flags_start */
1943 TODO_update_ssa
| TODO_cleanup_cfg
, /* todo_flags_finish */
1946 class pass_oacc_device_lower
: public gimple_opt_pass
1949 pass_oacc_device_lower (gcc::context
*ctxt
)
1950 : gimple_opt_pass (pass_data_oacc_device_lower
, ctxt
)
1953 /* opt_pass methods: */
1954 virtual bool gate (function
*) { return flag_openacc
; };
1956 virtual unsigned int execute (function
*)
1958 return execute_oacc_device_lower ();
1961 }; // class pass_oacc_device_lower
1966 make_pass_oacc_device_lower (gcc::context
*ctxt
)
1968 return new pass_oacc_device_lower (ctxt
);
1972 /* Rewrite GOMP_SIMT_ENTER_ALLOC call given by GSI and remove the preceding
1973 GOMP_SIMT_ENTER call identifying the privatized variables, which are
1974 turned to structure fields and receive a DECL_VALUE_EXPR accordingly.
1975 Set *REGIMPLIFY to true, except if no privatized variables were seen. */
1978 ompdevlow_adjust_simt_enter (gimple_stmt_iterator
*gsi
, bool *regimplify
)
1980 gimple
*alloc_stmt
= gsi_stmt (*gsi
);
1981 tree simtrec
= gimple_call_lhs (alloc_stmt
);
1982 tree simduid
= gimple_call_arg (alloc_stmt
, 0);
1983 gimple
*enter_stmt
= SSA_NAME_DEF_STMT (simduid
);
1984 gcc_assert (gimple_call_internal_p (enter_stmt
, IFN_GOMP_SIMT_ENTER
));
1985 tree rectype
= lang_hooks
.types
.make_type (RECORD_TYPE
);
1986 TYPE_ARTIFICIAL (rectype
) = TYPE_NAMELESS (rectype
) = 1;
1987 TREE_ADDRESSABLE (rectype
) = 1;
1988 TREE_TYPE (simtrec
) = build_pointer_type (rectype
);
1989 for (unsigned i
= 1; i
< gimple_call_num_args (enter_stmt
); i
++)
1991 tree
*argp
= gimple_call_arg_ptr (enter_stmt
, i
);
1992 if (*argp
== null_pointer_node
)
1994 gcc_assert (TREE_CODE (*argp
) == ADDR_EXPR
1995 && VAR_P (TREE_OPERAND (*argp
, 0)));
1996 tree var
= TREE_OPERAND (*argp
, 0);
1998 tree field
= build_decl (DECL_SOURCE_LOCATION (var
), FIELD_DECL
,
1999 DECL_NAME (var
), TREE_TYPE (var
));
2000 SET_DECL_ALIGN (field
, DECL_ALIGN (var
));
2001 DECL_USER_ALIGN (field
) = DECL_USER_ALIGN (var
);
2002 TREE_THIS_VOLATILE (field
) = TREE_THIS_VOLATILE (var
);
2004 insert_field_into_struct (rectype
, field
);
2006 tree t
= build_simple_mem_ref (simtrec
);
2007 t
= build3 (COMPONENT_REF
, TREE_TYPE (var
), t
, field
, NULL
);
2008 TREE_THIS_VOLATILE (t
) = TREE_THIS_VOLATILE (var
);
2009 SET_DECL_VALUE_EXPR (var
, t
);
2010 DECL_HAS_VALUE_EXPR_P (var
) = 1;
2013 layout_type (rectype
);
2014 tree size
= TYPE_SIZE_UNIT (rectype
);
2015 tree align
= build_int_cst (TREE_TYPE (size
), TYPE_ALIGN_UNIT (rectype
));
2018 = gimple_build_call_internal (IFN_GOMP_SIMT_ENTER_ALLOC
, 2, size
, align
);
2019 gimple_call_set_lhs (alloc_stmt
, simtrec
);
2020 gsi_replace (gsi
, alloc_stmt
, false);
2021 gimple_stmt_iterator enter_gsi
= gsi_for_stmt (enter_stmt
);
2022 enter_stmt
= gimple_build_assign (simduid
, gimple_call_arg (enter_stmt
, 0));
2023 gsi_replace (&enter_gsi
, enter_stmt
, false);
2027 if (single_imm_use (simtrec
, &use
, &exit_stmt
))
2029 gcc_assert (gimple_call_internal_p (exit_stmt
, IFN_GOMP_SIMT_EXIT
));
2030 gimple_stmt_iterator exit_gsi
= gsi_for_stmt (exit_stmt
);
2031 tree clobber
= build_clobber (rectype
);
2032 exit_stmt
= gimple_build_assign (build_simple_mem_ref (simtrec
), clobber
);
2033 gsi_insert_before (&exit_gsi
, exit_stmt
, GSI_SAME_STMT
);
2036 gcc_checking_assert (has_zero_uses (simtrec
));
2039 /* Callback for walk_gimple_stmt used to scan for SIMT-privatized variables. */
2042 find_simtpriv_var_op (tree
*tp
, int *walk_subtrees
, void *)
2047 && DECL_HAS_VALUE_EXPR_P (t
)
2048 && lookup_attribute ("omp simt private", DECL_ATTRIBUTES (t
)))
2056 /* Cleanup uses of SIMT placeholder internal functions: on non-SIMT targets,
2057 VF is 1 and LANE is 0; on SIMT targets, VF is folded to a constant, and
2058 LANE is kept to be expanded to RTL later on. Also cleanup all other SIMT
2059 internal functions on non-SIMT targets, and likewise some SIMD internal
2060 functions on SIMT targets. */
2063 execute_omp_device_lower ()
2065 int vf
= targetm
.simt
.vf
? targetm
.simt
.vf () : 1;
2066 bool regimplify
= false;
2068 gimple_stmt_iterator gsi
;
2069 bool calls_declare_variant_alt
2070 = cgraph_node::get (cfun
->decl
)->calls_declare_variant_alt
;
2071 FOR_EACH_BB_FN (bb
, cfun
)
2072 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2074 gimple
*stmt
= gsi_stmt (gsi
);
2075 if (!is_gimple_call (stmt
))
2077 if (!gimple_call_internal_p (stmt
))
2079 if (calls_declare_variant_alt
)
2080 if (tree fndecl
= gimple_call_fndecl (stmt
))
2082 tree new_fndecl
= omp_resolve_declare_variant (fndecl
);
2083 if (new_fndecl
!= fndecl
)
2085 gimple_call_set_fndecl (stmt
, new_fndecl
);
2091 tree lhs
= gimple_call_lhs (stmt
), rhs
= NULL_TREE
;
2092 tree type
= lhs
? TREE_TYPE (lhs
) : integer_type_node
;
2093 switch (gimple_call_internal_fn (stmt
))
2095 case IFN_GOMP_USE_SIMT
:
2096 rhs
= vf
== 1 ? integer_zero_node
: integer_one_node
;
2098 case IFN_GOMP_SIMT_ENTER
:
2099 rhs
= vf
== 1 ? gimple_call_arg (stmt
, 0) : NULL_TREE
;
2100 goto simtreg_enter_exit
;
2101 case IFN_GOMP_SIMT_ENTER_ALLOC
:
2103 ompdevlow_adjust_simt_enter (&gsi
, ®implify
);
2104 rhs
= vf
== 1 ? null_pointer_node
: NULL_TREE
;
2105 goto simtreg_enter_exit
;
2106 case IFN_GOMP_SIMT_EXIT
:
2110 unlink_stmt_vdef (stmt
);
2112 case IFN_GOMP_SIMT_LANE
:
2113 case IFN_GOMP_SIMT_LAST_LANE
:
2114 rhs
= vf
== 1 ? build_zero_cst (type
) : NULL_TREE
;
2116 case IFN_GOMP_SIMT_VF
:
2117 rhs
= build_int_cst (type
, vf
);
2119 case IFN_GOMP_SIMT_ORDERED_PRED
:
2120 rhs
= vf
== 1 ? integer_zero_node
: NULL_TREE
;
2122 unlink_stmt_vdef (stmt
);
2124 case IFN_GOMP_SIMT_VOTE_ANY
:
2125 case IFN_GOMP_SIMT_XCHG_BFLY
:
2126 case IFN_GOMP_SIMT_XCHG_IDX
:
2127 rhs
= vf
== 1 ? gimple_call_arg (stmt
, 0) : NULL_TREE
;
2129 case IFN_GOMP_SIMD_LANE
:
2130 case IFN_GOMP_SIMD_LAST_LANE
:
2131 rhs
= vf
!= 1 ? build_zero_cst (type
) : NULL_TREE
;
2133 case IFN_GOMP_SIMD_VF
:
2134 rhs
= vf
!= 1 ? build_one_cst (type
) : NULL_TREE
;
2141 stmt
= lhs
? gimple_build_assign (lhs
, rhs
) : gimple_build_nop ();
2142 gsi_replace (&gsi
, stmt
, false);
2145 FOR_EACH_BB_REVERSE_FN (bb
, cfun
)
2146 for (gsi
= gsi_last_bb (bb
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
2147 if (walk_gimple_stmt (&gsi
, NULL
, find_simtpriv_var_op
, NULL
))
2149 if (gimple_clobber_p (gsi_stmt (gsi
)))
2150 gsi_remove (&gsi
, true);
2152 gimple_regimplify_operands (gsi_stmt (gsi
), &gsi
);
2155 cfun
->has_force_vectorize_loops
= false;
2161 const pass_data pass_data_omp_device_lower
=
2163 GIMPLE_PASS
, /* type */
2164 "ompdevlow", /* name */
2165 OPTGROUP_OMP
, /* optinfo_flags */
2166 TV_NONE
, /* tv_id */
2167 PROP_cfg
, /* properties_required */
2168 PROP_gimple_lomp_dev
, /* properties_provided */
2169 0, /* properties_destroyed */
2170 0, /* todo_flags_start */
2171 TODO_update_ssa
, /* todo_flags_finish */
2174 class pass_omp_device_lower
: public gimple_opt_pass
2177 pass_omp_device_lower (gcc::context
*ctxt
)
2178 : gimple_opt_pass (pass_data_omp_device_lower
, ctxt
)
2181 /* opt_pass methods: */
2182 virtual bool gate (function
*fun
)
2184 return (!(fun
->curr_properties
& PROP_gimple_lomp_dev
)
2186 && cgraph_node::get (fun
->decl
)->calls_declare_variant_alt
));
2188 virtual unsigned int execute (function
*)
2190 return execute_omp_device_lower ();
2193 }; // class pass_expand_omp_ssa
2198 make_pass_omp_device_lower (gcc::context
*ctxt
)
2200 return new pass_omp_device_lower (ctxt
);
2203 /* "omp declare target link" handling pass. */
2207 const pass_data pass_data_omp_target_link
=
2209 GIMPLE_PASS
, /* type */
2210 "omptargetlink", /* name */
2211 OPTGROUP_OMP
, /* optinfo_flags */
2212 TV_NONE
, /* tv_id */
2213 PROP_ssa
, /* properties_required */
2214 0, /* properties_provided */
2215 0, /* properties_destroyed */
2216 0, /* todo_flags_start */
2217 TODO_update_ssa
, /* todo_flags_finish */
2220 class pass_omp_target_link
: public gimple_opt_pass
2223 pass_omp_target_link (gcc::context
*ctxt
)
2224 : gimple_opt_pass (pass_data_omp_target_link
, ctxt
)
2227 /* opt_pass methods: */
2228 virtual bool gate (function
*fun
)
2230 #ifdef ACCEL_COMPILER
2231 return offloading_function_p (fun
->decl
);
2238 virtual unsigned execute (function
*);
2241 /* Callback for walk_gimple_stmt used to scan for link var operands. */
2244 find_link_var_op (tree
*tp
, int *walk_subtrees
, void *)
2249 && DECL_HAS_VALUE_EXPR_P (t
)
2250 && is_global_var (t
)
2251 && lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (t
)))
2261 pass_omp_target_link::execute (function
*fun
)
2264 FOR_EACH_BB_FN (bb
, fun
)
2266 gimple_stmt_iterator gsi
;
2267 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2268 if (walk_gimple_stmt (&gsi
, NULL
, find_link_var_op
, NULL
))
2269 gimple_regimplify_operands (gsi_stmt (gsi
), &gsi
);
2278 make_pass_omp_target_link (gcc::context
*ctxt
)
2280 return new pass_omp_target_link (ctxt
);