1 /* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
5 Copyright (C) 2005-2016 Free Software Foundation, Inc.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
33 #include "tree-pass.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
42 #include "internal-fn.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
48 #include "tree-into-ssa.h"
50 #include "splay-tree.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "symbol-summary.h"
57 #include "gomp-constants.h"
58 #include "gimple-pretty-print.h"
62 /* OMP region information. Every parallel and workshare
63 directive is enclosed between two markers, the OMP_* directive
64 and a corresponding GIMPLE_OMP_RETURN statement. */
68 /* The enclosing region. */
69 struct omp_region
*outer
;
71 /* First child region. */
72 struct omp_region
*inner
;
74 /* Next peer region. */
75 struct omp_region
*next
;
77 /* Block containing the omp directive as its last stmt. */
80 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
83 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
86 /* If this is a combined parallel+workshare region, this is a list
87 of additional arguments needed by the combined parallel+workshare
89 vec
<tree
, va_gc
> *ws_args
;
91 /* The code for the omp directive of this region. */
92 enum gimple_code type
;
94 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
95 enum omp_clause_schedule_kind sched_kind
;
97 /* Schedule modifiers. */
98 unsigned char sched_modifiers
;
100 /* True if this is a combined parallel+workshare region. */
101 bool is_combined_parallel
;
103 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
105 gomp_ordered
*ord_stmt
;
108 static struct omp_region
*root_omp_region
;
109 static bool omp_any_child_fn_dumped
;
111 static void expand_omp_build_assign (gimple_stmt_iterator
*, tree
, tree
,
113 static gphi
*find_phi_with_arg_on_edge (tree
, edge
);
114 static void expand_omp (struct omp_region
*region
);
116 /* Return true if REGION is a combined parallel+workshare region. */
119 is_combined_parallel (struct omp_region
*region
)
121 return region
->is_combined_parallel
;
124 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
125 is the immediate dominator of PAR_ENTRY_BB, return true if there
126 are no data dependencies that would prevent expanding the parallel
127 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
129 When expanding a combined parallel+workshare region, the call to
130 the child function may need additional arguments in the case of
131 GIMPLE_OMP_FOR regions. In some cases, these arguments are
132 computed out of variables passed in from the parent to the child
133 via 'struct .omp_data_s'. For instance:
135 #pragma omp parallel for schedule (guided, i * 4)
140 # BLOCK 2 (PAR_ENTRY_BB)
142 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
144 # BLOCK 3 (WS_ENTRY_BB)
145 .omp_data_i = &.omp_data_o;
146 D.1667 = .omp_data_i->i;
148 #pragma omp for schedule (guided, D.1598)
150 When we outline the parallel region, the call to the child function
151 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
152 that value is computed *after* the call site. So, in principle we
153 cannot do the transformation.
155 To see whether the code in WS_ENTRY_BB blocks the combined
156 parallel+workshare call, we collect all the variables used in the
157 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
158 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
161 FIXME. If we had the SSA form built at this point, we could merely
162 hoist the code in block 3 into block 2 and be done with it. But at
163 this point we don't have dataflow information and though we could
164 hack something up here, it is really not worth the aggravation. */
167 workshare_safe_to_combine_p (basic_block ws_entry_bb
)
169 struct omp_for_data fd
;
170 gimple
*ws_stmt
= last_stmt (ws_entry_bb
);
172 if (gimple_code (ws_stmt
) == GIMPLE_OMP_SECTIONS
)
175 gcc_assert (gimple_code (ws_stmt
) == GIMPLE_OMP_FOR
);
177 omp_extract_for_data (as_a
<gomp_for
*> (ws_stmt
), &fd
, NULL
);
179 if (fd
.collapse
> 1 && TREE_CODE (fd
.loop
.n2
) != INTEGER_CST
)
181 if (fd
.iter_type
!= long_integer_type_node
)
184 /* FIXME. We give up too easily here. If any of these arguments
185 are not constants, they will likely involve variables that have
186 been mapped into fields of .omp_data_s for sharing with the child
187 function. With appropriate data flow, it would be possible to
189 if (!is_gimple_min_invariant (fd
.loop
.n1
)
190 || !is_gimple_min_invariant (fd
.loop
.n2
)
191 || !is_gimple_min_invariant (fd
.loop
.step
)
192 || (fd
.chunk_size
&& !is_gimple_min_invariant (fd
.chunk_size
)))
198 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
199 presence (SIMD_SCHEDULE). */
202 omp_adjust_chunk_size (tree chunk_size
, bool simd_schedule
)
207 int vf
= omp_max_vf ();
211 tree type
= TREE_TYPE (chunk_size
);
212 chunk_size
= fold_build2 (PLUS_EXPR
, type
, chunk_size
,
213 build_int_cst (type
, vf
- 1));
214 return fold_build2 (BIT_AND_EXPR
, type
, chunk_size
,
215 build_int_cst (type
, -vf
));
218 /* Collect additional arguments needed to emit a combined
219 parallel+workshare call. WS_STMT is the workshare directive being
222 static vec
<tree
, va_gc
> *
223 get_ws_args_for (gimple
*par_stmt
, gimple
*ws_stmt
)
226 location_t loc
= gimple_location (ws_stmt
);
227 vec
<tree
, va_gc
> *ws_args
;
229 if (gomp_for
*for_stmt
= dyn_cast
<gomp_for
*> (ws_stmt
))
231 struct omp_for_data fd
;
234 omp_extract_for_data (for_stmt
, &fd
, NULL
);
238 if (gimple_omp_for_combined_into_p (for_stmt
))
241 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt
),
242 OMP_CLAUSE__LOOPTEMP_
);
244 n1
= OMP_CLAUSE_DECL (innerc
);
245 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
246 OMP_CLAUSE__LOOPTEMP_
);
248 n2
= OMP_CLAUSE_DECL (innerc
);
251 vec_alloc (ws_args
, 3 + (fd
.chunk_size
!= 0));
253 t
= fold_convert_loc (loc
, long_integer_type_node
, n1
);
254 ws_args
->quick_push (t
);
256 t
= fold_convert_loc (loc
, long_integer_type_node
, n2
);
257 ws_args
->quick_push (t
);
259 t
= fold_convert_loc (loc
, long_integer_type_node
, fd
.loop
.step
);
260 ws_args
->quick_push (t
);
264 t
= fold_convert_loc (loc
, long_integer_type_node
, fd
.chunk_size
);
265 t
= omp_adjust_chunk_size (t
, fd
.simd_schedule
);
266 ws_args
->quick_push (t
);
271 else if (gimple_code (ws_stmt
) == GIMPLE_OMP_SECTIONS
)
273 /* Number of sections is equal to the number of edges from the
274 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
275 the exit of the sections region. */
276 basic_block bb
= single_succ (gimple_bb (ws_stmt
));
277 t
= build_int_cst (unsigned_type_node
, EDGE_COUNT (bb
->succs
) - 1);
278 vec_alloc (ws_args
, 1);
279 ws_args
->quick_push (t
);
286 /* Discover whether REGION is a combined parallel+workshare region. */
289 determine_parallel_type (struct omp_region
*region
)
291 basic_block par_entry_bb
, par_exit_bb
;
292 basic_block ws_entry_bb
, ws_exit_bb
;
294 if (region
== NULL
|| region
->inner
== NULL
295 || region
->exit
== NULL
|| region
->inner
->exit
== NULL
296 || region
->inner
->cont
== NULL
)
299 /* We only support parallel+for and parallel+sections. */
300 if (region
->type
!= GIMPLE_OMP_PARALLEL
301 || (region
->inner
->type
!= GIMPLE_OMP_FOR
302 && region
->inner
->type
!= GIMPLE_OMP_SECTIONS
))
305 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
306 WS_EXIT_BB -> PAR_EXIT_BB. */
307 par_entry_bb
= region
->entry
;
308 par_exit_bb
= region
->exit
;
309 ws_entry_bb
= region
->inner
->entry
;
310 ws_exit_bb
= region
->inner
->exit
;
312 if (single_succ (par_entry_bb
) == ws_entry_bb
313 && single_succ (ws_exit_bb
) == par_exit_bb
314 && workshare_safe_to_combine_p (ws_entry_bb
)
315 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb
))
316 || (last_and_only_stmt (ws_entry_bb
)
317 && last_and_only_stmt (par_exit_bb
))))
319 gimple
*par_stmt
= last_stmt (par_entry_bb
);
320 gimple
*ws_stmt
= last_stmt (ws_entry_bb
);
322 if (region
->inner
->type
== GIMPLE_OMP_FOR
)
324 /* If this is a combined parallel loop, we need to determine
325 whether or not to use the combined library calls. There
326 are two cases where we do not apply the transformation:
327 static loops and any kind of ordered loop. In the first
328 case, we already open code the loop so there is no need
329 to do anything else. In the latter case, the combined
330 parallel loop call would still need extra synchronization
331 to implement ordered semantics, so there would not be any
332 gain in using the combined call. */
333 tree clauses
= gimple_omp_for_clauses (ws_stmt
);
334 tree c
= omp_find_clause (clauses
, OMP_CLAUSE_SCHEDULE
);
336 || ((OMP_CLAUSE_SCHEDULE_KIND (c
) & OMP_CLAUSE_SCHEDULE_MASK
)
337 == OMP_CLAUSE_SCHEDULE_STATIC
)
338 || omp_find_clause (clauses
, OMP_CLAUSE_ORDERED
))
340 region
->is_combined_parallel
= false;
341 region
->inner
->is_combined_parallel
= false;
346 region
->is_combined_parallel
= true;
347 region
->inner
->is_combined_parallel
= true;
348 region
->ws_args
= get_ws_args_for (par_stmt
, ws_stmt
);
352 /* Debugging dumps for parallel regions. */
353 void dump_omp_region (FILE *, struct omp_region
*, int);
354 void debug_omp_region (struct omp_region
*);
355 void debug_all_omp_regions (void);
357 /* Dump the parallel region tree rooted at REGION. */
360 dump_omp_region (FILE *file
, struct omp_region
*region
, int indent
)
362 fprintf (file
, "%*sbb %d: %s\n", indent
, "", region
->entry
->index
,
363 gimple_code_name
[region
->type
]);
366 dump_omp_region (file
, region
->inner
, indent
+ 4);
370 fprintf (file
, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent
, "",
371 region
->cont
->index
);
375 fprintf (file
, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent
, "",
376 region
->exit
->index
);
378 fprintf (file
, "%*s[no exit marker]\n", indent
, "");
381 dump_omp_region (file
, region
->next
, indent
);
385 debug_omp_region (struct omp_region
*region
)
387 dump_omp_region (stderr
, region
, 0);
391 debug_all_omp_regions (void)
393 dump_omp_region (stderr
, root_omp_region
, 0);
396 /* Create a new parallel region starting at STMT inside region PARENT. */
398 static struct omp_region
*
399 new_omp_region (basic_block bb
, enum gimple_code type
,
400 struct omp_region
*parent
)
402 struct omp_region
*region
= XCNEW (struct omp_region
);
404 region
->outer
= parent
;
410 /* This is a nested region. Add it to the list of inner
411 regions in PARENT. */
412 region
->next
= parent
->inner
;
413 parent
->inner
= region
;
417 /* This is a toplevel region. Add it to the list of toplevel
418 regions in ROOT_OMP_REGION. */
419 region
->next
= root_omp_region
;
420 root_omp_region
= region
;
426 /* Release the memory associated with the region tree rooted at REGION. */
429 free_omp_region_1 (struct omp_region
*region
)
431 struct omp_region
*i
, *n
;
433 for (i
= region
->inner
; i
; i
= n
)
436 free_omp_region_1 (i
);
442 /* Release the memory for the entire omp region tree. */
445 omp_free_regions (void)
447 struct omp_region
*r
, *n
;
448 for (r
= root_omp_region
; r
; r
= n
)
451 free_omp_region_1 (r
);
453 root_omp_region
= NULL
;
456 /* A convenience function to build an empty GIMPLE_COND with just the
460 gimple_build_cond_empty (tree cond
)
462 enum tree_code pred_code
;
465 gimple_cond_get_ops_from_tree (cond
, &pred_code
, &lhs
, &rhs
);
466 return gimple_build_cond (pred_code
, lhs
, rhs
, NULL_TREE
, NULL_TREE
);
469 /* Return true if a parallel REGION is within a declare target function or
470 within a target region and is not a part of a gridified target. */
473 parallel_needs_hsa_kernel_p (struct omp_region
*region
)
475 bool indirect
= false;
476 for (region
= region
->outer
; region
; region
= region
->outer
)
478 if (region
->type
== GIMPLE_OMP_PARALLEL
)
480 else if (region
->type
== GIMPLE_OMP_TARGET
)
482 gomp_target
*tgt_stmt
483 = as_a
<gomp_target
*> (last_stmt (region
->entry
));
485 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt
),
486 OMP_CLAUSE__GRIDDIM_
))
493 if (lookup_attribute ("omp declare target",
494 DECL_ATTRIBUTES (current_function_decl
)))
500 /* Build the function calls to GOMP_parallel_start etc to actually
501 generate the parallel operation. REGION is the parallel region
502 being expanded. BB is the block where to insert the code. WS_ARGS
503 will be set if this is a call to a combined parallel+workshare
504 construct, it contains the list of additional arguments needed by
505 the workshare construct. */
508 expand_parallel_call (struct omp_region
*region
, basic_block bb
,
509 gomp_parallel
*entry_stmt
,
510 vec
<tree
, va_gc
> *ws_args
)
512 tree t
, t1
, t2
, val
, cond
, c
, clauses
, flags
;
513 gimple_stmt_iterator gsi
;
515 enum built_in_function start_ix
;
517 location_t clause_loc
;
518 vec
<tree
, va_gc
> *args
;
520 clauses
= gimple_omp_parallel_clauses (entry_stmt
);
522 /* Determine what flavor of GOMP_parallel we will be
524 start_ix
= BUILT_IN_GOMP_PARALLEL
;
525 if (is_combined_parallel (region
))
527 switch (region
->inner
->type
)
530 gcc_assert (region
->inner
->sched_kind
!= OMP_CLAUSE_SCHEDULE_AUTO
);
531 switch (region
->inner
->sched_kind
)
533 case OMP_CLAUSE_SCHEDULE_RUNTIME
:
536 case OMP_CLAUSE_SCHEDULE_DYNAMIC
:
537 case OMP_CLAUSE_SCHEDULE_GUIDED
:
538 if (region
->inner
->sched_modifiers
539 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC
)
541 start_ix2
= 3 + region
->inner
->sched_kind
;
546 start_ix2
= region
->inner
->sched_kind
;
549 start_ix2
+= (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC
;
550 start_ix
= (enum built_in_function
) start_ix2
;
552 case GIMPLE_OMP_SECTIONS
:
553 start_ix
= BUILT_IN_GOMP_PARALLEL_SECTIONS
;
560 /* By default, the value of NUM_THREADS is zero (selected at run time)
561 and there is no conditional. */
563 val
= build_int_cst (unsigned_type_node
, 0);
564 flags
= build_int_cst (unsigned_type_node
, 0);
566 c
= omp_find_clause (clauses
, OMP_CLAUSE_IF
);
568 cond
= OMP_CLAUSE_IF_EXPR (c
);
570 c
= omp_find_clause (clauses
, OMP_CLAUSE_NUM_THREADS
);
573 val
= OMP_CLAUSE_NUM_THREADS_EXPR (c
);
574 clause_loc
= OMP_CLAUSE_LOCATION (c
);
577 clause_loc
= gimple_location (entry_stmt
);
579 c
= omp_find_clause (clauses
, OMP_CLAUSE_PROC_BIND
);
581 flags
= build_int_cst (unsigned_type_node
, OMP_CLAUSE_PROC_BIND_KIND (c
));
583 /* Ensure 'val' is of the correct type. */
584 val
= fold_convert_loc (clause_loc
, unsigned_type_node
, val
);
586 /* If we found the clause 'if (cond)', build either
587 (cond != 0) or (cond ? val : 1u). */
590 cond
= gimple_boolify (cond
);
592 if (integer_zerop (val
))
593 val
= fold_build2_loc (clause_loc
,
594 EQ_EXPR
, unsigned_type_node
, cond
,
595 build_int_cst (TREE_TYPE (cond
), 0));
598 basic_block cond_bb
, then_bb
, else_bb
;
599 edge e
, e_then
, e_else
;
600 tree tmp_then
, tmp_else
, tmp_join
, tmp_var
;
602 tmp_var
= create_tmp_var (TREE_TYPE (val
));
603 if (gimple_in_ssa_p (cfun
))
605 tmp_then
= make_ssa_name (tmp_var
);
606 tmp_else
= make_ssa_name (tmp_var
);
607 tmp_join
= make_ssa_name (tmp_var
);
616 e
= split_block_after_labels (bb
);
621 then_bb
= create_empty_bb (cond_bb
);
622 else_bb
= create_empty_bb (then_bb
);
623 set_immediate_dominator (CDI_DOMINATORS
, then_bb
, cond_bb
);
624 set_immediate_dominator (CDI_DOMINATORS
, else_bb
, cond_bb
);
626 stmt
= gimple_build_cond_empty (cond
);
627 gsi
= gsi_start_bb (cond_bb
);
628 gsi_insert_after (&gsi
, stmt
, GSI_CONTINUE_LINKING
);
630 gsi
= gsi_start_bb (then_bb
);
631 expand_omp_build_assign (&gsi
, tmp_then
, val
, true);
633 gsi
= gsi_start_bb (else_bb
);
634 expand_omp_build_assign (&gsi
, tmp_else
,
635 build_int_cst (unsigned_type_node
, 1),
638 make_edge (cond_bb
, then_bb
, EDGE_TRUE_VALUE
);
639 make_edge (cond_bb
, else_bb
, EDGE_FALSE_VALUE
);
640 add_bb_to_loop (then_bb
, cond_bb
->loop_father
);
641 add_bb_to_loop (else_bb
, cond_bb
->loop_father
);
642 e_then
= make_edge (then_bb
, bb
, EDGE_FALLTHRU
);
643 e_else
= make_edge (else_bb
, bb
, EDGE_FALLTHRU
);
645 if (gimple_in_ssa_p (cfun
))
647 gphi
*phi
= create_phi_node (tmp_join
, bb
);
648 add_phi_arg (phi
, tmp_then
, e_then
, UNKNOWN_LOCATION
);
649 add_phi_arg (phi
, tmp_else
, e_else
, UNKNOWN_LOCATION
);
655 gsi
= gsi_start_bb (bb
);
656 val
= force_gimple_operand_gsi (&gsi
, val
, true, NULL_TREE
,
657 false, GSI_CONTINUE_LINKING
);
660 gsi
= gsi_last_bb (bb
);
661 t
= gimple_omp_parallel_data_arg (entry_stmt
);
663 t1
= null_pointer_node
;
665 t1
= build_fold_addr_expr (t
);
666 tree child_fndecl
= gimple_omp_parallel_child_fn (entry_stmt
);
667 t2
= build_fold_addr_expr (child_fndecl
);
669 vec_alloc (args
, 4 + vec_safe_length (ws_args
));
670 args
->quick_push (t2
);
671 args
->quick_push (t1
);
672 args
->quick_push (val
);
674 args
->splice (*ws_args
);
675 args
->quick_push (flags
);
677 t
= build_call_expr_loc_vec (UNKNOWN_LOCATION
,
678 builtin_decl_explicit (start_ix
), args
);
680 force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
681 false, GSI_CONTINUE_LINKING
);
683 if (hsa_gen_requested_p ()
684 && parallel_needs_hsa_kernel_p (region
))
686 cgraph_node
*child_cnode
= cgraph_node::get (child_fndecl
);
687 hsa_register_kernel (child_cnode
);
691 /* Insert a function call whose name is FUNC_NAME with the information from
692 ENTRY_STMT into the basic_block BB. */
695 expand_cilk_for_call (basic_block bb
, gomp_parallel
*entry_stmt
,
696 vec
<tree
, va_gc
> *ws_args
)
699 gimple_stmt_iterator gsi
;
700 vec
<tree
, va_gc
> *args
;
702 gcc_assert (vec_safe_length (ws_args
) == 2);
703 tree func_name
= (*ws_args
)[0];
704 tree grain
= (*ws_args
)[1];
706 tree clauses
= gimple_omp_parallel_clauses (entry_stmt
);
707 tree count
= omp_find_clause (clauses
, OMP_CLAUSE__CILK_FOR_COUNT_
);
708 gcc_assert (count
!= NULL_TREE
);
709 count
= OMP_CLAUSE_OPERAND (count
, 0);
711 gsi
= gsi_last_bb (bb
);
712 t
= gimple_omp_parallel_data_arg (entry_stmt
);
714 t1
= null_pointer_node
;
716 t1
= build_fold_addr_expr (t
);
717 t2
= build_fold_addr_expr (gimple_omp_parallel_child_fn (entry_stmt
));
720 args
->quick_push (t2
);
721 args
->quick_push (t1
);
722 args
->quick_push (count
);
723 args
->quick_push (grain
);
724 t
= build_call_expr_loc_vec (UNKNOWN_LOCATION
, func_name
, args
);
726 force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
, false,
727 GSI_CONTINUE_LINKING
);
730 /* Build the function call to GOMP_task to actually
731 generate the task operation. BB is the block where to insert the code. */
734 expand_task_call (struct omp_region
*region
, basic_block bb
,
735 gomp_task
*entry_stmt
)
738 gimple_stmt_iterator gsi
;
739 location_t loc
= gimple_location (entry_stmt
);
741 tree clauses
= gimple_omp_task_clauses (entry_stmt
);
743 tree ifc
= omp_find_clause (clauses
, OMP_CLAUSE_IF
);
744 tree untied
= omp_find_clause (clauses
, OMP_CLAUSE_UNTIED
);
745 tree mergeable
= omp_find_clause (clauses
, OMP_CLAUSE_MERGEABLE
);
746 tree depend
= omp_find_clause (clauses
, OMP_CLAUSE_DEPEND
);
747 tree finalc
= omp_find_clause (clauses
, OMP_CLAUSE_FINAL
);
748 tree priority
= omp_find_clause (clauses
, OMP_CLAUSE_PRIORITY
);
751 = (untied
? GOMP_TASK_FLAG_UNTIED
: 0)
752 | (mergeable
? GOMP_TASK_FLAG_MERGEABLE
: 0)
753 | (depend
? GOMP_TASK_FLAG_DEPEND
: 0);
755 bool taskloop_p
= gimple_omp_task_taskloop_p (entry_stmt
);
756 tree startvar
= NULL_TREE
, endvar
= NULL_TREE
, step
= NULL_TREE
;
757 tree num_tasks
= NULL_TREE
;
761 gimple
*g
= last_stmt (region
->outer
->entry
);
762 gcc_assert (gimple_code (g
) == GIMPLE_OMP_FOR
763 && gimple_omp_for_kind (g
) == GF_OMP_FOR_KIND_TASKLOOP
);
764 struct omp_for_data fd
;
765 omp_extract_for_data (as_a
<gomp_for
*> (g
), &fd
, NULL
);
766 startvar
= omp_find_clause (clauses
, OMP_CLAUSE__LOOPTEMP_
);
767 endvar
= omp_find_clause (OMP_CLAUSE_CHAIN (startvar
),
768 OMP_CLAUSE__LOOPTEMP_
);
769 startvar
= OMP_CLAUSE_DECL (startvar
);
770 endvar
= OMP_CLAUSE_DECL (endvar
);
771 step
= fold_convert_loc (loc
, fd
.iter_type
, fd
.loop
.step
);
772 if (fd
.loop
.cond_code
== LT_EXPR
)
773 iflags
|= GOMP_TASK_FLAG_UP
;
774 tree tclauses
= gimple_omp_for_clauses (g
);
775 num_tasks
= omp_find_clause (tclauses
, OMP_CLAUSE_NUM_TASKS
);
777 num_tasks
= OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks
);
780 num_tasks
= omp_find_clause (tclauses
, OMP_CLAUSE_GRAINSIZE
);
783 iflags
|= GOMP_TASK_FLAG_GRAINSIZE
;
784 num_tasks
= OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks
);
787 num_tasks
= integer_zero_node
;
789 num_tasks
= fold_convert_loc (loc
, long_integer_type_node
, num_tasks
);
790 if (ifc
== NULL_TREE
)
791 iflags
|= GOMP_TASK_FLAG_IF
;
792 if (omp_find_clause (tclauses
, OMP_CLAUSE_NOGROUP
))
793 iflags
|= GOMP_TASK_FLAG_NOGROUP
;
794 ull
= fd
.iter_type
== long_long_unsigned_type_node
;
797 iflags
|= GOMP_TASK_FLAG_PRIORITY
;
799 tree flags
= build_int_cst (unsigned_type_node
, iflags
);
801 tree cond
= boolean_true_node
;
806 tree t
= gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc
));
807 t
= fold_build3_loc (loc
, COND_EXPR
, unsigned_type_node
, t
,
808 build_int_cst (unsigned_type_node
,
810 build_int_cst (unsigned_type_node
, 0));
811 flags
= fold_build2_loc (loc
, PLUS_EXPR
, unsigned_type_node
,
815 cond
= gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc
));
820 tree t
= gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc
));
821 t
= fold_build3_loc (loc
, COND_EXPR
, unsigned_type_node
, t
,
822 build_int_cst (unsigned_type_node
,
823 GOMP_TASK_FLAG_FINAL
),
824 build_int_cst (unsigned_type_node
, 0));
825 flags
= fold_build2_loc (loc
, PLUS_EXPR
, unsigned_type_node
, flags
, t
);
828 depend
= OMP_CLAUSE_DECL (depend
);
830 depend
= build_int_cst (ptr_type_node
, 0);
832 priority
= fold_convert (integer_type_node
,
833 OMP_CLAUSE_PRIORITY_EXPR (priority
));
835 priority
= integer_zero_node
;
837 gsi
= gsi_last_bb (bb
);
838 tree t
= gimple_omp_task_data_arg (entry_stmt
);
840 t2
= null_pointer_node
;
842 t2
= build_fold_addr_expr_loc (loc
, t
);
843 t1
= build_fold_addr_expr_loc (loc
, gimple_omp_task_child_fn (entry_stmt
));
844 t
= gimple_omp_task_copy_fn (entry_stmt
);
846 t3
= null_pointer_node
;
848 t3
= build_fold_addr_expr_loc (loc
, t
);
851 t
= build_call_expr (ull
852 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL
)
853 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP
),
855 gimple_omp_task_arg_size (entry_stmt
),
856 gimple_omp_task_arg_align (entry_stmt
), flags
,
857 num_tasks
, priority
, startvar
, endvar
, step
);
859 t
= build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK
),
861 gimple_omp_task_arg_size (entry_stmt
),
862 gimple_omp_task_arg_align (entry_stmt
), cond
, flags
,
865 force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
866 false, GSI_CONTINUE_LINKING
);
869 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
872 vec2chain (vec
<tree
, va_gc
> *v
)
874 tree chain
= NULL_TREE
, t
;
877 FOR_EACH_VEC_SAFE_ELT_REVERSE (v
, ix
, t
)
879 DECL_CHAIN (t
) = chain
;
886 /* Remove barriers in REGION->EXIT's block. Note that this is only
887 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
888 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
889 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
893 remove_exit_barrier (struct omp_region
*region
)
895 gimple_stmt_iterator gsi
;
900 int any_addressable_vars
= -1;
902 exit_bb
= region
->exit
;
904 /* If the parallel region doesn't return, we don't have REGION->EXIT
909 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
910 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
911 statements that can appear in between are extremely limited -- no
912 memory operations at all. Here, we allow nothing at all, so the
913 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
914 gsi
= gsi_last_bb (exit_bb
);
915 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_RETURN
);
917 if (!gsi_end_p (gsi
) && gimple_code (gsi_stmt (gsi
)) != GIMPLE_LABEL
)
920 FOR_EACH_EDGE (e
, ei
, exit_bb
->preds
)
922 gsi
= gsi_last_bb (e
->src
);
925 stmt
= gsi_stmt (gsi
);
926 if (gimple_code (stmt
) == GIMPLE_OMP_RETURN
927 && !gimple_omp_return_nowait_p (stmt
))
929 /* OpenMP 3.0 tasks unfortunately prevent this optimization
930 in many cases. If there could be tasks queued, the barrier
931 might be needed to let the tasks run before some local
932 variable of the parallel that the task uses as shared
933 runs out of scope. The task can be spawned either
934 from within current function (this would be easy to check)
935 or from some function it calls and gets passed an address
936 of such a variable. */
937 if (any_addressable_vars
< 0)
939 gomp_parallel
*parallel_stmt
940 = as_a
<gomp_parallel
*> (last_stmt (region
->entry
));
941 tree child_fun
= gimple_omp_parallel_child_fn (parallel_stmt
);
942 tree local_decls
, block
, decl
;
945 any_addressable_vars
= 0;
946 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun
), ix
, decl
)
947 if (TREE_ADDRESSABLE (decl
))
949 any_addressable_vars
= 1;
952 for (block
= gimple_block (stmt
);
953 !any_addressable_vars
955 && TREE_CODE (block
) == BLOCK
;
956 block
= BLOCK_SUPERCONTEXT (block
))
958 for (local_decls
= BLOCK_VARS (block
);
960 local_decls
= DECL_CHAIN (local_decls
))
961 if (TREE_ADDRESSABLE (local_decls
))
963 any_addressable_vars
= 1;
966 if (block
== gimple_block (parallel_stmt
))
970 if (!any_addressable_vars
)
971 gimple_omp_return_set_nowait (stmt
);
977 remove_exit_barriers (struct omp_region
*region
)
979 if (region
->type
== GIMPLE_OMP_PARALLEL
)
980 remove_exit_barrier (region
);
984 region
= region
->inner
;
985 remove_exit_barriers (region
);
988 region
= region
->next
;
989 remove_exit_barriers (region
);
994 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
995 calls. These can't be declared as const functions, but
996 within one parallel body they are constant, so they can be
997 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
998 which are declared const. Similarly for task body, except
999 that in untied task omp_get_thread_num () can change at any task
1000 scheduling point. */
1003 optimize_omp_library_calls (gimple
*entry_stmt
)
1006 gimple_stmt_iterator gsi
;
1007 tree thr_num_tree
= builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM
);
1008 tree thr_num_id
= DECL_ASSEMBLER_NAME (thr_num_tree
);
1009 tree num_thr_tree
= builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS
);
1010 tree num_thr_id
= DECL_ASSEMBLER_NAME (num_thr_tree
);
1011 bool untied_task
= (gimple_code (entry_stmt
) == GIMPLE_OMP_TASK
1012 && omp_find_clause (gimple_omp_task_clauses (entry_stmt
),
1013 OMP_CLAUSE_UNTIED
) != NULL
);
1015 FOR_EACH_BB_FN (bb
, cfun
)
1016 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1018 gimple
*call
= gsi_stmt (gsi
);
1021 if (is_gimple_call (call
)
1022 && (decl
= gimple_call_fndecl (call
))
1023 && DECL_EXTERNAL (decl
)
1024 && TREE_PUBLIC (decl
)
1025 && DECL_INITIAL (decl
) == NULL
)
1029 if (DECL_NAME (decl
) == thr_num_id
)
1031 /* In #pragma omp task untied omp_get_thread_num () can change
1032 during the execution of the task region. */
1035 built_in
= builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM
);
1037 else if (DECL_NAME (decl
) == num_thr_id
)
1038 built_in
= builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS
);
1042 if (DECL_ASSEMBLER_NAME (decl
) != DECL_ASSEMBLER_NAME (built_in
)
1043 || gimple_call_num_args (call
) != 0)
1046 if (flag_exceptions
&& !TREE_NOTHROW (decl
))
1049 if (TREE_CODE (TREE_TYPE (decl
)) != FUNCTION_TYPE
1050 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl
)),
1051 TREE_TYPE (TREE_TYPE (built_in
))))
1054 gimple_call_set_fndecl (call
, built_in
);
1059 /* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1063 expand_omp_regimplify_p (tree
*tp
, int *walk_subtrees
, void *)
1067 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1068 if (VAR_P (t
) && DECL_HAS_VALUE_EXPR_P (t
))
1071 if (TREE_CODE (t
) == ADDR_EXPR
)
1072 recompute_tree_invariant_for_addr_expr (t
);
1074 *walk_subtrees
= !TYPE_P (t
) && !DECL_P (t
);
1078 /* Prepend or append TO = FROM assignment before or after *GSI_P. */
1081 expand_omp_build_assign (gimple_stmt_iterator
*gsi_p
, tree to
, tree from
,
1084 bool simple_p
= DECL_P (to
) && TREE_ADDRESSABLE (to
);
1085 from
= force_gimple_operand_gsi (gsi_p
, from
, simple_p
, NULL_TREE
,
1086 !after
, after
? GSI_CONTINUE_LINKING
1088 gimple
*stmt
= gimple_build_assign (to
, from
);
1090 gsi_insert_after (gsi_p
, stmt
, GSI_CONTINUE_LINKING
);
1092 gsi_insert_before (gsi_p
, stmt
, GSI_SAME_STMT
);
1093 if (walk_tree (&from
, expand_omp_regimplify_p
, NULL
, NULL
)
1094 || walk_tree (&to
, expand_omp_regimplify_p
, NULL
, NULL
))
1096 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
1097 gimple_regimplify_operands (stmt
, &gsi
);
1101 /* Expand the OpenMP parallel or task directive starting at REGION. */
1104 expand_omp_taskreg (struct omp_region
*region
)
1106 basic_block entry_bb
, exit_bb
, new_bb
;
1107 struct function
*child_cfun
;
1108 tree child_fn
, block
, t
;
1109 gimple_stmt_iterator gsi
;
1110 gimple
*entry_stmt
, *stmt
;
1112 vec
<tree
, va_gc
> *ws_args
;
1114 entry_stmt
= last_stmt (region
->entry
);
1115 child_fn
= gimple_omp_taskreg_child_fn (entry_stmt
);
1116 child_cfun
= DECL_STRUCT_FUNCTION (child_fn
);
1118 entry_bb
= region
->entry
;
1119 if (gimple_code (entry_stmt
) == GIMPLE_OMP_TASK
)
1120 exit_bb
= region
->cont
;
1122 exit_bb
= region
->exit
;
1126 && gimple_code (entry_stmt
) == GIMPLE_OMP_PARALLEL
1127 && omp_find_clause (gimple_omp_parallel_clauses (entry_stmt
),
1128 OMP_CLAUSE__CILK_FOR_COUNT_
) != NULL_TREE
);
1131 /* If it is a _Cilk_for statement, it is modelled *like* a parallel for,
1132 and the inner statement contains the name of the built-in function
1134 ws_args
= region
->inner
->ws_args
;
1135 else if (is_combined_parallel (region
))
1136 ws_args
= region
->ws_args
;
1140 if (child_cfun
->cfg
)
1142 /* Due to inlining, it may happen that we have already outlined
1143 the region, in which case all we need to do is make the
1144 sub-graph unreachable and emit the parallel call. */
1145 edge entry_succ_e
, exit_succ_e
;
1147 entry_succ_e
= single_succ_edge (entry_bb
);
1149 gsi
= gsi_last_bb (entry_bb
);
1150 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_PARALLEL
1151 || gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_TASK
);
1152 gsi_remove (&gsi
, true);
1157 exit_succ_e
= single_succ_edge (exit_bb
);
1158 make_edge (new_bb
, exit_succ_e
->dest
, EDGE_FALLTHRU
);
1160 remove_edge_and_dominated_blocks (entry_succ_e
);
1164 unsigned srcidx
, dstidx
, num
;
1166 /* If the parallel region needs data sent from the parent
1167 function, then the very first statement (except possible
1168 tree profile counter updates) of the parallel body
1169 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1170 &.OMP_DATA_O is passed as an argument to the child function,
1171 we need to replace it with the argument as seen by the child
1174 In most cases, this will end up being the identity assignment
1175 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1176 a function call that has been inlined, the original PARM_DECL
1177 .OMP_DATA_I may have been converted into a different local
1178 variable. In which case, we need to keep the assignment. */
1179 if (gimple_omp_taskreg_data_arg (entry_stmt
))
1181 basic_block entry_succ_bb
1182 = single_succ_p (entry_bb
) ? single_succ (entry_bb
)
1183 : FALLTHRU_EDGE (entry_bb
)->dest
;
1185 gimple
*parcopy_stmt
= NULL
;
1187 for (gsi
= gsi_start_bb (entry_succ_bb
); ; gsi_next (&gsi
))
1191 gcc_assert (!gsi_end_p (gsi
));
1192 stmt
= gsi_stmt (gsi
);
1193 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1196 if (gimple_num_ops (stmt
) == 2)
1198 tree arg
= gimple_assign_rhs1 (stmt
);
1200 /* We're ignore the subcode because we're
1201 effectively doing a STRIP_NOPS. */
1203 if (TREE_CODE (arg
) == ADDR_EXPR
1204 && TREE_OPERAND (arg
, 0)
1205 == gimple_omp_taskreg_data_arg (entry_stmt
))
1207 parcopy_stmt
= stmt
;
1213 gcc_assert (parcopy_stmt
!= NULL
);
1214 arg
= DECL_ARGUMENTS (child_fn
);
1216 if (!gimple_in_ssa_p (cfun
))
1218 if (gimple_assign_lhs (parcopy_stmt
) == arg
)
1219 gsi_remove (&gsi
, true);
1222 /* ?? Is setting the subcode really necessary ?? */
1223 gimple_omp_set_subcode (parcopy_stmt
, TREE_CODE (arg
));
1224 gimple_assign_set_rhs1 (parcopy_stmt
, arg
);
1229 tree lhs
= gimple_assign_lhs (parcopy_stmt
);
1230 gcc_assert (SSA_NAME_VAR (lhs
) == arg
);
1231 /* We'd like to set the rhs to the default def in the child_fn,
1232 but it's too early to create ssa names in the child_fn.
1233 Instead, we set the rhs to the parm. In
1234 move_sese_region_to_fn, we introduce a default def for the
1235 parm, map the parm to it's default def, and once we encounter
1236 this stmt, replace the parm with the default def. */
1237 gimple_assign_set_rhs1 (parcopy_stmt
, arg
);
1238 update_stmt (parcopy_stmt
);
1242 /* Declare local variables needed in CHILD_CFUN. */
1243 block
= DECL_INITIAL (child_fn
);
1244 BLOCK_VARS (block
) = vec2chain (child_cfun
->local_decls
);
1245 /* The gimplifier could record temporaries in parallel/task block
1246 rather than in containing function's local_decls chain,
1247 which would mean cgraph missed finalizing them. Do it now. */
1248 for (t
= BLOCK_VARS (block
); t
; t
= DECL_CHAIN (t
))
1249 if (VAR_P (t
) && TREE_STATIC (t
) && !DECL_EXTERNAL (t
))
1250 varpool_node::finalize_decl (t
);
1251 DECL_SAVED_TREE (child_fn
) = NULL
;
1252 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1253 gimple_set_body (child_fn
, NULL
);
1254 TREE_USED (block
) = 1;
1256 /* Reset DECL_CONTEXT on function arguments. */
1257 for (t
= DECL_ARGUMENTS (child_fn
); t
; t
= DECL_CHAIN (t
))
1258 DECL_CONTEXT (t
) = child_fn
;
1260 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1261 so that it can be moved to the child function. */
1262 gsi
= gsi_last_bb (entry_bb
);
1263 stmt
= gsi_stmt (gsi
);
1264 gcc_assert (stmt
&& (gimple_code (stmt
) == GIMPLE_OMP_PARALLEL
1265 || gimple_code (stmt
) == GIMPLE_OMP_TASK
));
1266 e
= split_block (entry_bb
, stmt
);
1267 gsi_remove (&gsi
, true);
1270 if (gimple_code (entry_stmt
) == GIMPLE_OMP_PARALLEL
)
1271 single_succ_edge (entry_bb
)->flags
= EDGE_FALLTHRU
;
1274 e2
= make_edge (e
->src
, BRANCH_EDGE (entry_bb
)->dest
, EDGE_ABNORMAL
);
1275 gcc_assert (e2
->dest
== region
->exit
);
1276 remove_edge (BRANCH_EDGE (entry_bb
));
1277 set_immediate_dominator (CDI_DOMINATORS
, e2
->dest
, e
->src
);
1278 gsi
= gsi_last_bb (region
->exit
);
1279 gcc_assert (!gsi_end_p (gsi
)
1280 && gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_RETURN
);
1281 gsi_remove (&gsi
, true);
1284 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1287 gsi
= gsi_last_bb (exit_bb
);
1288 gcc_assert (!gsi_end_p (gsi
)
1289 && (gimple_code (gsi_stmt (gsi
))
1290 == (e2
? GIMPLE_OMP_CONTINUE
: GIMPLE_OMP_RETURN
)));
1291 stmt
= gimple_build_return (NULL
);
1292 gsi_insert_after (&gsi
, stmt
, GSI_SAME_STMT
);
1293 gsi_remove (&gsi
, true);
1296 /* Move the parallel region into CHILD_CFUN. */
1298 if (gimple_in_ssa_p (cfun
))
1300 init_tree_ssa (child_cfun
);
1301 init_ssa_operands (child_cfun
);
1302 child_cfun
->gimple_df
->in_ssa_p
= true;
1306 block
= gimple_block (entry_stmt
);
1308 new_bb
= move_sese_region_to_fn (child_cfun
, entry_bb
, exit_bb
, block
);
1310 single_succ_edge (new_bb
)->flags
= EDGE_FALLTHRU
;
1313 basic_block dest_bb
= e2
->dest
;
1315 make_edge (new_bb
, dest_bb
, EDGE_FALLTHRU
);
1317 set_immediate_dominator (CDI_DOMINATORS
, dest_bb
, new_bb
);
1319 /* When the OMP expansion process cannot guarantee an up-to-date
1320 loop tree arrange for the child function to fixup loops. */
1321 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP
))
1322 child_cfun
->x_current_loops
->state
|= LOOPS_NEED_FIXUP
;
1324 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1325 num
= vec_safe_length (child_cfun
->local_decls
);
1326 for (srcidx
= 0, dstidx
= 0; srcidx
< num
; srcidx
++)
1328 t
= (*child_cfun
->local_decls
)[srcidx
];
1329 if (DECL_CONTEXT (t
) == cfun
->decl
)
1331 if (srcidx
!= dstidx
)
1332 (*child_cfun
->local_decls
)[dstidx
] = t
;
1336 vec_safe_truncate (child_cfun
->local_decls
, dstidx
);
1338 /* Inform the callgraph about the new function. */
1339 child_cfun
->curr_properties
= cfun
->curr_properties
;
1340 child_cfun
->has_simduid_loops
|= cfun
->has_simduid_loops
;
1341 child_cfun
->has_force_vectorize_loops
|= cfun
->has_force_vectorize_loops
;
1342 cgraph_node
*node
= cgraph_node::get_create (child_fn
);
1343 node
->parallelized_function
= 1;
1344 cgraph_node::add_new_function (child_fn
, true);
1346 bool need_asm
= DECL_ASSEMBLER_NAME_SET_P (current_function_decl
)
1347 && !DECL_ASSEMBLER_NAME_SET_P (child_fn
);
1349 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1350 fixed in a following pass. */
1351 push_cfun (child_cfun
);
1353 assign_assembler_name_if_neeeded (child_fn
);
1356 optimize_omp_library_calls (entry_stmt
);
1357 cgraph_edge::rebuild_edges ();
1359 /* Some EH regions might become dead, see PR34608. If
1360 pass_cleanup_cfg isn't the first pass to happen with the
1361 new child, these dead EH edges might cause problems.
1362 Clean them up now. */
1363 if (flag_exceptions
)
1366 bool changed
= false;
1368 FOR_EACH_BB_FN (bb
, cfun
)
1369 changed
|= gimple_purge_dead_eh_edges (bb
);
1371 cleanup_tree_cfg ();
1373 if (gimple_in_ssa_p (cfun
))
1374 update_ssa (TODO_update_ssa
);
1375 if (flag_checking
&& !loops_state_satisfies_p (LOOPS_NEED_FIXUP
))
1376 verify_loop_structure ();
1379 if (dump_file
&& !gimple_in_ssa_p (cfun
))
1381 omp_any_child_fn_dumped
= true;
1382 dump_function_header (dump_file
, child_fn
, dump_flags
);
1383 dump_function_to_file (child_fn
, dump_file
, dump_flags
);
1387 /* Emit a library call to launch the children threads. */
1389 expand_cilk_for_call (new_bb
,
1390 as_a
<gomp_parallel
*> (entry_stmt
), ws_args
);
1391 else if (gimple_code (entry_stmt
) == GIMPLE_OMP_PARALLEL
)
1392 expand_parallel_call (region
, new_bb
,
1393 as_a
<gomp_parallel
*> (entry_stmt
), ws_args
);
1395 expand_task_call (region
, new_bb
, as_a
<gomp_task
*> (entry_stmt
));
1396 if (gimple_in_ssa_p (cfun
))
1397 update_ssa (TODO_update_ssa_only_virtuals
);
1400 /* Information about members of an OpenACC collapsed loop nest. */
1402 struct oacc_collapse
1404 tree base
; /* Base value. */
1405 tree iters
; /* Number of steps. */
1406 tree step
; /* step size. */
1409 /* Helper for expand_oacc_for. Determine collapsed loop information.
1410 Fill in COUNTS array. Emit any initialization code before GSI.
1411 Return the calculated outer loop bound of BOUND_TYPE. */
1414 expand_oacc_collapse_init (const struct omp_for_data
*fd
,
1415 gimple_stmt_iterator
*gsi
,
1416 oacc_collapse
*counts
, tree bound_type
)
1418 tree total
= build_int_cst (bound_type
, 1);
1421 gcc_assert (integer_onep (fd
->loop
.step
));
1422 gcc_assert (integer_zerop (fd
->loop
.n1
));
1424 for (ix
= 0; ix
!= fd
->collapse
; ix
++)
1426 const omp_for_data_loop
*loop
= &fd
->loops
[ix
];
1428 tree iter_type
= TREE_TYPE (loop
->v
);
1429 tree diff_type
= iter_type
;
1430 tree plus_type
= iter_type
;
1432 gcc_assert (loop
->cond_code
== fd
->loop
.cond_code
);
1434 if (POINTER_TYPE_P (iter_type
))
1435 plus_type
= sizetype
;
1436 if (POINTER_TYPE_P (diff_type
) || TYPE_UNSIGNED (diff_type
))
1437 diff_type
= signed_type_for (diff_type
);
1441 tree s
= loop
->step
;
1442 bool up
= loop
->cond_code
== LT_EXPR
;
1443 tree dir
= build_int_cst (diff_type
, up
? +1 : -1);
1447 b
= force_gimple_operand_gsi (gsi
, b
, true, NULL_TREE
,
1448 true, GSI_SAME_STMT
);
1449 e
= force_gimple_operand_gsi (gsi
, e
, true, NULL_TREE
,
1450 true, GSI_SAME_STMT
);
1452 /* Convert the step, avoiding possible unsigned->signed overflow. */
1453 negating
= !up
&& TYPE_UNSIGNED (TREE_TYPE (s
));
1455 s
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (s
), s
);
1456 s
= fold_convert (diff_type
, s
);
1458 s
= fold_build1 (NEGATE_EXPR
, diff_type
, s
);
1459 s
= force_gimple_operand_gsi (gsi
, s
, true, NULL_TREE
,
1460 true, GSI_SAME_STMT
);
1462 /* Determine the range, avoiding possible unsigned->signed overflow. */
1463 negating
= !up
&& TYPE_UNSIGNED (iter_type
);
1464 expr
= fold_build2 (MINUS_EXPR
, plus_type
,
1465 fold_convert (plus_type
, negating
? b
: e
),
1466 fold_convert (plus_type
, negating
? e
: b
));
1467 expr
= fold_convert (diff_type
, expr
);
1469 expr
= fold_build1 (NEGATE_EXPR
, diff_type
, expr
);
1470 tree range
= force_gimple_operand_gsi
1471 (gsi
, expr
, true, NULL_TREE
, true, GSI_SAME_STMT
);
1473 /* Determine number of iterations. */
1474 expr
= fold_build2 (MINUS_EXPR
, diff_type
, range
, dir
);
1475 expr
= fold_build2 (PLUS_EXPR
, diff_type
, expr
, s
);
1476 expr
= fold_build2 (TRUNC_DIV_EXPR
, diff_type
, expr
, s
);
1478 tree iters
= force_gimple_operand_gsi (gsi
, expr
, true, NULL_TREE
,
1479 true, GSI_SAME_STMT
);
1481 counts
[ix
].base
= b
;
1482 counts
[ix
].iters
= iters
;
1483 counts
[ix
].step
= s
;
1485 total
= fold_build2 (MULT_EXPR
, bound_type
, total
,
1486 fold_convert (bound_type
, iters
));
1492 /* Emit initializers for collapsed loop members. IVAR is the outer
1493 loop iteration variable, from which collapsed loop iteration values
1494 are calculated. COUNTS array has been initialized by
1495 expand_oacc_collapse_inits. */
1498 expand_oacc_collapse_vars (const struct omp_for_data
*fd
,
1499 gimple_stmt_iterator
*gsi
,
1500 const oacc_collapse
*counts
, tree ivar
)
1502 tree ivar_type
= TREE_TYPE (ivar
);
1504 /* The most rapidly changing iteration variable is the innermost
1506 for (int ix
= fd
->collapse
; ix
--;)
1508 const omp_for_data_loop
*loop
= &fd
->loops
[ix
];
1509 const oacc_collapse
*collapse
= &counts
[ix
];
1510 tree iter_type
= TREE_TYPE (loop
->v
);
1511 tree diff_type
= TREE_TYPE (collapse
->step
);
1512 tree plus_type
= iter_type
;
1513 enum tree_code plus_code
= PLUS_EXPR
;
1516 if (POINTER_TYPE_P (iter_type
))
1518 plus_code
= POINTER_PLUS_EXPR
;
1519 plus_type
= sizetype
;
1522 expr
= fold_build2 (TRUNC_MOD_EXPR
, ivar_type
, ivar
,
1523 fold_convert (ivar_type
, collapse
->iters
));
1524 expr
= fold_build2 (MULT_EXPR
, diff_type
, fold_convert (diff_type
, expr
),
1526 expr
= fold_build2 (plus_code
, iter_type
, collapse
->base
,
1527 fold_convert (plus_type
, expr
));
1528 expr
= force_gimple_operand_gsi (gsi
, expr
, false, NULL_TREE
,
1529 true, GSI_SAME_STMT
);
1530 gassign
*ass
= gimple_build_assign (loop
->v
, expr
);
1531 gsi_insert_before (gsi
, ass
, GSI_SAME_STMT
);
1535 expr
= fold_build2 (TRUNC_DIV_EXPR
, ivar_type
, ivar
,
1536 fold_convert (ivar_type
, collapse
->iters
));
1537 ivar
= force_gimple_operand_gsi (gsi
, expr
, true, NULL_TREE
,
1538 true, GSI_SAME_STMT
);
1543 /* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1544 of the combined collapse > 1 loop constructs, generate code like:
1545 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1550 count3 = (adj + N32 - N31) / STEP3;
1551 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1556 count2 = (adj + N22 - N21) / STEP2;
1557 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1562 count1 = (adj + N12 - N11) / STEP1;
1563 count = count1 * count2 * count3;
1564 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1566 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1567 of the combined loop constructs, just initialize COUNTS array
1568 from the _looptemp_ clauses. */
1570 /* NOTE: It *could* be better to moosh all of the BBs together,
1571 creating one larger BB with all the computation and the unexpected
1572 jump at the end. I.e.
1574 bool zero3, zero2, zero1, zero;
1577 count3 = (N32 - N31) /[cl] STEP3;
1579 count2 = (N22 - N21) /[cl] STEP2;
1581 count1 = (N12 - N11) /[cl] STEP1;
1582 zero = zero3 || zero2 || zero1;
1583 count = count1 * count2 * count3;
1584 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1586 After all, we expect the zero=false, and thus we expect to have to
1587 evaluate all of the comparison expressions, so short-circuiting
1588 oughtn't be a win. Since the condition isn't protecting a
1589 denominator, we're not concerned about divide-by-zero, so we can
1590 fully evaluate count even if a numerator turned out to be wrong.
1592 It seems like putting this all together would create much better
1593 scheduling opportunities, and less pressure on the chip's branch
1597 expand_omp_for_init_counts (struct omp_for_data
*fd
, gimple_stmt_iterator
*gsi
,
1598 basic_block
&entry_bb
, tree
*counts
,
1599 basic_block
&zero_iter1_bb
, int &first_zero_iter1
,
1600 basic_block
&zero_iter2_bb
, int &first_zero_iter2
,
1601 basic_block
&l2_dom_bb
)
1603 tree t
, type
= TREE_TYPE (fd
->loop
.v
);
1607 /* Collapsed loops need work for expansion into SSA form. */
1608 gcc_assert (!gimple_in_ssa_p (cfun
));
1610 if (gimple_omp_for_combined_into_p (fd
->for_stmt
)
1611 && TREE_CODE (fd
->loop
.n2
) != INTEGER_CST
)
1613 gcc_assert (fd
->ordered
== 0);
1614 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1615 isn't supposed to be handled, as the inner loop doesn't
1617 tree innerc
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
1618 OMP_CLAUSE__LOOPTEMP_
);
1619 gcc_assert (innerc
);
1620 for (i
= 0; i
< fd
->collapse
; i
++)
1622 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
1623 OMP_CLAUSE__LOOPTEMP_
);
1624 gcc_assert (innerc
);
1626 counts
[i
] = OMP_CLAUSE_DECL (innerc
);
1628 counts
[0] = NULL_TREE
;
1633 for (i
= fd
->collapse
; i
< fd
->ordered
; i
++)
1635 tree itype
= TREE_TYPE (fd
->loops
[i
].v
);
1636 counts
[i
] = NULL_TREE
;
1637 t
= fold_binary (fd
->loops
[i
].cond_code
, boolean_type_node
,
1638 fold_convert (itype
, fd
->loops
[i
].n1
),
1639 fold_convert (itype
, fd
->loops
[i
].n2
));
1640 if (t
&& integer_zerop (t
))
1642 for (i
= fd
->collapse
; i
< fd
->ordered
; i
++)
1643 counts
[i
] = build_int_cst (type
, 0);
1647 for (i
= 0; i
< (fd
->ordered
? fd
->ordered
: fd
->collapse
); i
++)
1649 tree itype
= TREE_TYPE (fd
->loops
[i
].v
);
1651 if (i
>= fd
->collapse
&& counts
[i
])
1653 if ((SSA_VAR_P (fd
->loop
.n2
) || i
>= fd
->collapse
)
1654 && ((t
= fold_binary (fd
->loops
[i
].cond_code
, boolean_type_node
,
1655 fold_convert (itype
, fd
->loops
[i
].n1
),
1656 fold_convert (itype
, fd
->loops
[i
].n2
)))
1657 == NULL_TREE
|| !integer_onep (t
)))
1661 n1
= fold_convert (itype
, unshare_expr (fd
->loops
[i
].n1
));
1662 n1
= force_gimple_operand_gsi (gsi
, n1
, true, NULL_TREE
,
1663 true, GSI_SAME_STMT
);
1664 n2
= fold_convert (itype
, unshare_expr (fd
->loops
[i
].n2
));
1665 n2
= force_gimple_operand_gsi (gsi
, n2
, true, NULL_TREE
,
1666 true, GSI_SAME_STMT
);
1667 cond_stmt
= gimple_build_cond (fd
->loops
[i
].cond_code
, n1
, n2
,
1668 NULL_TREE
, NULL_TREE
);
1669 gsi_insert_before (gsi
, cond_stmt
, GSI_SAME_STMT
);
1670 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt
),
1671 expand_omp_regimplify_p
, NULL
, NULL
)
1672 || walk_tree (gimple_cond_rhs_ptr (cond_stmt
),
1673 expand_omp_regimplify_p
, NULL
, NULL
))
1675 *gsi
= gsi_for_stmt (cond_stmt
);
1676 gimple_regimplify_operands (cond_stmt
, gsi
);
1678 e
= split_block (entry_bb
, cond_stmt
);
1679 basic_block
&zero_iter_bb
1680 = i
< fd
->collapse
? zero_iter1_bb
: zero_iter2_bb
;
1681 int &first_zero_iter
1682 = i
< fd
->collapse
? first_zero_iter1
: first_zero_iter2
;
1683 if (zero_iter_bb
== NULL
)
1685 gassign
*assign_stmt
;
1686 first_zero_iter
= i
;
1687 zero_iter_bb
= create_empty_bb (entry_bb
);
1688 add_bb_to_loop (zero_iter_bb
, entry_bb
->loop_father
);
1689 *gsi
= gsi_after_labels (zero_iter_bb
);
1690 if (i
< fd
->collapse
)
1691 assign_stmt
= gimple_build_assign (fd
->loop
.n2
,
1692 build_zero_cst (type
));
1695 counts
[i
] = create_tmp_reg (type
, ".count");
1697 = gimple_build_assign (counts
[i
], build_zero_cst (type
));
1699 gsi_insert_before (gsi
, assign_stmt
, GSI_SAME_STMT
);
1700 set_immediate_dominator (CDI_DOMINATORS
, zero_iter_bb
,
1703 ne
= make_edge (entry_bb
, zero_iter_bb
, EDGE_FALSE_VALUE
);
1704 ne
->probability
= REG_BR_PROB_BASE
/ 2000 - 1;
1705 e
->flags
= EDGE_TRUE_VALUE
;
1706 e
->probability
= REG_BR_PROB_BASE
- ne
->probability
;
1707 if (l2_dom_bb
== NULL
)
1708 l2_dom_bb
= entry_bb
;
1710 *gsi
= gsi_last_bb (entry_bb
);
1713 if (POINTER_TYPE_P (itype
))
1714 itype
= signed_type_for (itype
);
1715 t
= build_int_cst (itype
, (fd
->loops
[i
].cond_code
== LT_EXPR
1717 t
= fold_build2 (PLUS_EXPR
, itype
,
1718 fold_convert (itype
, fd
->loops
[i
].step
), t
);
1719 t
= fold_build2 (PLUS_EXPR
, itype
, t
,
1720 fold_convert (itype
, fd
->loops
[i
].n2
));
1721 t
= fold_build2 (MINUS_EXPR
, itype
, t
,
1722 fold_convert (itype
, fd
->loops
[i
].n1
));
1723 /* ?? We could probably use CEIL_DIV_EXPR instead of
1724 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1725 generate the same code in the end because generically we
1726 don't know that the values involved must be negative for
1728 if (TYPE_UNSIGNED (itype
) && fd
->loops
[i
].cond_code
== GT_EXPR
)
1729 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
,
1730 fold_build1 (NEGATE_EXPR
, itype
, t
),
1731 fold_build1 (NEGATE_EXPR
, itype
,
1732 fold_convert (itype
,
1733 fd
->loops
[i
].step
)));
1735 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
, t
,
1736 fold_convert (itype
, fd
->loops
[i
].step
));
1737 t
= fold_convert (type
, t
);
1738 if (TREE_CODE (t
) == INTEGER_CST
)
1742 if (i
< fd
->collapse
|| i
!= first_zero_iter2
)
1743 counts
[i
] = create_tmp_reg (type
, ".count");
1744 expand_omp_build_assign (gsi
, counts
[i
], t
);
1746 if (SSA_VAR_P (fd
->loop
.n2
) && i
< fd
->collapse
)
1751 t
= fold_build2 (MULT_EXPR
, type
, fd
->loop
.n2
, counts
[i
]);
1752 expand_omp_build_assign (gsi
, fd
->loop
.n2
, t
);
1757 /* Helper function for expand_omp_{for_*,simd}. Generate code like:
1759 V3 = N31 + (T % count3) * STEP3;
1761 V2 = N21 + (T % count2) * STEP2;
1763 V1 = N11 + T * STEP1;
1764 if this loop doesn't have an inner loop construct combined with it.
1765 If it does have an inner loop construct combined with it and the
1766 iteration count isn't known constant, store values from counts array
1767 into its _looptemp_ temporaries instead. */
1770 expand_omp_for_init_vars (struct omp_for_data
*fd
, gimple_stmt_iterator
*gsi
,
1771 tree
*counts
, gimple
*inner_stmt
, tree startvar
)
1774 if (gimple_omp_for_combined_p (fd
->for_stmt
))
1776 /* If fd->loop.n2 is constant, then no propagation of the counts
1777 is needed, they are constant. */
1778 if (TREE_CODE (fd
->loop
.n2
) == INTEGER_CST
)
1781 tree clauses
= gimple_code (inner_stmt
) != GIMPLE_OMP_FOR
1782 ? gimple_omp_taskreg_clauses (inner_stmt
)
1783 : gimple_omp_for_clauses (inner_stmt
);
1784 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1785 isn't supposed to be handled, as the inner loop doesn't
1787 tree innerc
= omp_find_clause (clauses
, OMP_CLAUSE__LOOPTEMP_
);
1788 gcc_assert (innerc
);
1789 for (i
= 0; i
< fd
->collapse
; i
++)
1791 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
1792 OMP_CLAUSE__LOOPTEMP_
);
1793 gcc_assert (innerc
);
1796 tree tem
= OMP_CLAUSE_DECL (innerc
);
1797 tree t
= fold_convert (TREE_TYPE (tem
), counts
[i
]);
1798 t
= force_gimple_operand_gsi (gsi
, t
, false, NULL_TREE
,
1799 false, GSI_CONTINUE_LINKING
);
1800 gassign
*stmt
= gimple_build_assign (tem
, t
);
1801 gsi_insert_after (gsi
, stmt
, GSI_CONTINUE_LINKING
);
1807 tree type
= TREE_TYPE (fd
->loop
.v
);
1808 tree tem
= create_tmp_reg (type
, ".tem");
1809 gassign
*stmt
= gimple_build_assign (tem
, startvar
);
1810 gsi_insert_after (gsi
, stmt
, GSI_CONTINUE_LINKING
);
1812 for (i
= fd
->collapse
- 1; i
>= 0; i
--)
1814 tree vtype
= TREE_TYPE (fd
->loops
[i
].v
), itype
, t
;
1816 if (POINTER_TYPE_P (vtype
))
1817 itype
= signed_type_for (vtype
);
1819 t
= fold_build2 (TRUNC_MOD_EXPR
, type
, tem
, counts
[i
]);
1822 t
= fold_convert (itype
, t
);
1823 t
= fold_build2 (MULT_EXPR
, itype
, t
,
1824 fold_convert (itype
, fd
->loops
[i
].step
));
1825 if (POINTER_TYPE_P (vtype
))
1826 t
= fold_build_pointer_plus (fd
->loops
[i
].n1
, t
);
1828 t
= fold_build2 (PLUS_EXPR
, itype
, fd
->loops
[i
].n1
, t
);
1829 t
= force_gimple_operand_gsi (gsi
, t
,
1830 DECL_P (fd
->loops
[i
].v
)
1831 && TREE_ADDRESSABLE (fd
->loops
[i
].v
),
1833 GSI_CONTINUE_LINKING
);
1834 stmt
= gimple_build_assign (fd
->loops
[i
].v
, t
);
1835 gsi_insert_after (gsi
, stmt
, GSI_CONTINUE_LINKING
);
1838 t
= fold_build2 (TRUNC_DIV_EXPR
, type
, tem
, counts
[i
]);
1839 t
= force_gimple_operand_gsi (gsi
, t
, false, NULL_TREE
,
1840 false, GSI_CONTINUE_LINKING
);
1841 stmt
= gimple_build_assign (tem
, t
);
1842 gsi_insert_after (gsi
, stmt
, GSI_CONTINUE_LINKING
);
1847 /* Helper function for expand_omp_for_*. Generate code like:
1850 if (V3 cond3 N32) goto BODY_BB; else goto L11;
1854 if (V2 cond2 N22) goto BODY_BB; else goto L12;
1861 extract_omp_for_update_vars (struct omp_for_data
*fd
, basic_block cont_bb
,
1862 basic_block body_bb
)
1864 basic_block last_bb
, bb
, collapse_bb
= NULL
;
1866 gimple_stmt_iterator gsi
;
1872 for (i
= fd
->collapse
- 1; i
>= 0; i
--)
1874 tree vtype
= TREE_TYPE (fd
->loops
[i
].v
);
1876 bb
= create_empty_bb (last_bb
);
1877 add_bb_to_loop (bb
, last_bb
->loop_father
);
1878 gsi
= gsi_start_bb (bb
);
1880 if (i
< fd
->collapse
- 1)
1882 e
= make_edge (last_bb
, bb
, EDGE_FALSE_VALUE
);
1883 e
->probability
= REG_BR_PROB_BASE
/ 8;
1885 t
= fd
->loops
[i
+ 1].n1
;
1886 t
= force_gimple_operand_gsi (&gsi
, t
,
1887 DECL_P (fd
->loops
[i
+ 1].v
)
1888 && TREE_ADDRESSABLE (fd
->loops
[i
1891 GSI_CONTINUE_LINKING
);
1892 stmt
= gimple_build_assign (fd
->loops
[i
+ 1].v
, t
);
1893 gsi_insert_after (&gsi
, stmt
, GSI_CONTINUE_LINKING
);
1898 set_immediate_dominator (CDI_DOMINATORS
, bb
, last_bb
);
1900 if (POINTER_TYPE_P (vtype
))
1901 t
= fold_build_pointer_plus (fd
->loops
[i
].v
, fd
->loops
[i
].step
);
1903 t
= fold_build2 (PLUS_EXPR
, vtype
, fd
->loops
[i
].v
, fd
->loops
[i
].step
);
1904 t
= force_gimple_operand_gsi (&gsi
, t
,
1905 DECL_P (fd
->loops
[i
].v
)
1906 && TREE_ADDRESSABLE (fd
->loops
[i
].v
),
1907 NULL_TREE
, false, GSI_CONTINUE_LINKING
);
1908 stmt
= gimple_build_assign (fd
->loops
[i
].v
, t
);
1909 gsi_insert_after (&gsi
, stmt
, GSI_CONTINUE_LINKING
);
1913 t
= fd
->loops
[i
].n2
;
1914 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
1915 false, GSI_CONTINUE_LINKING
);
1916 tree v
= fd
->loops
[i
].v
;
1917 if (DECL_P (v
) && TREE_ADDRESSABLE (v
))
1918 v
= force_gimple_operand_gsi (&gsi
, v
, true, NULL_TREE
,
1919 false, GSI_CONTINUE_LINKING
);
1920 t
= fold_build2 (fd
->loops
[i
].cond_code
, boolean_type_node
, v
, t
);
1921 stmt
= gimple_build_cond_empty (t
);
1922 gsi_insert_after (&gsi
, stmt
, GSI_CONTINUE_LINKING
);
1923 e
= make_edge (bb
, body_bb
, EDGE_TRUE_VALUE
);
1924 e
->probability
= REG_BR_PROB_BASE
* 7 / 8;
1927 make_edge (bb
, body_bb
, EDGE_FALLTHRU
);
1934 /* Expand #pragma omp ordered depend(source). */
1937 expand_omp_ordered_source (gimple_stmt_iterator
*gsi
, struct omp_for_data
*fd
,
1938 tree
*counts
, location_t loc
)
1940 enum built_in_function source_ix
1941 = fd
->iter_type
== long_integer_type_node
1942 ? BUILT_IN_GOMP_DOACROSS_POST
: BUILT_IN_GOMP_DOACROSS_ULL_POST
;
1944 = gimple_build_call (builtin_decl_explicit (source_ix
), 1,
1945 build_fold_addr_expr (counts
[fd
->ordered
]));
1946 gimple_set_location (g
, loc
);
1947 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
1950 /* Expand a single depend from #pragma omp ordered depend(sink:...). */
1953 expand_omp_ordered_sink (gimple_stmt_iterator
*gsi
, struct omp_for_data
*fd
,
1954 tree
*counts
, tree c
, location_t loc
)
1956 auto_vec
<tree
, 10> args
;
1957 enum built_in_function sink_ix
1958 = fd
->iter_type
== long_integer_type_node
1959 ? BUILT_IN_GOMP_DOACROSS_WAIT
: BUILT_IN_GOMP_DOACROSS_ULL_WAIT
;
1960 tree t
, off
, coff
= NULL_TREE
, deps
= OMP_CLAUSE_DECL (c
), cond
= NULL_TREE
;
1962 gimple_stmt_iterator gsi2
= *gsi
;
1963 bool warned_step
= false;
1965 for (i
= 0; i
< fd
->ordered
; i
++)
1967 tree step
= NULL_TREE
;
1968 off
= TREE_PURPOSE (deps
);
1969 if (TREE_CODE (off
) == TRUNC_DIV_EXPR
)
1971 step
= TREE_OPERAND (off
, 1);
1972 off
= TREE_OPERAND (off
, 0);
1974 if (!integer_zerop (off
))
1976 gcc_assert (fd
->loops
[i
].cond_code
== LT_EXPR
1977 || fd
->loops
[i
].cond_code
== GT_EXPR
);
1978 bool forward
= fd
->loops
[i
].cond_code
== LT_EXPR
;
1981 /* Non-simple Fortran DO loops. If step is variable,
1982 we don't know at compile even the direction, so can't
1984 if (TREE_CODE (step
) != INTEGER_CST
)
1986 forward
= tree_int_cst_sgn (step
) != -1;
1988 if (forward
^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps
))
1989 warning_at (loc
, 0, "%<depend(sink)%> clause waiting for "
1990 "lexically later iteration");
1993 deps
= TREE_CHAIN (deps
);
1995 /* If all offsets corresponding to the collapsed loops are zero,
1996 this depend clause can be ignored. FIXME: but there is still a
1997 flush needed. We need to emit one __sync_synchronize () for it
1998 though (perhaps conditionally)? Solve this together with the
1999 conservative dependence folding optimization.
2000 if (i >= fd->collapse)
2003 deps
= OMP_CLAUSE_DECL (c
);
2005 edge e1
= split_block (gsi_bb (gsi2
), gsi_stmt (gsi2
));
2006 edge e2
= split_block_after_labels (e1
->dest
);
2008 gsi2
= gsi_after_labels (e1
->dest
);
2009 *gsi
= gsi_last_bb (e1
->src
);
2010 for (i
= 0; i
< fd
->ordered
; i
++)
2012 tree itype
= TREE_TYPE (fd
->loops
[i
].v
);
2013 tree step
= NULL_TREE
;
2014 tree orig_off
= NULL_TREE
;
2015 if (POINTER_TYPE_P (itype
))
2018 deps
= TREE_CHAIN (deps
);
2019 off
= TREE_PURPOSE (deps
);
2020 if (TREE_CODE (off
) == TRUNC_DIV_EXPR
)
2022 step
= TREE_OPERAND (off
, 1);
2023 off
= TREE_OPERAND (off
, 0);
2024 gcc_assert (fd
->loops
[i
].cond_code
== LT_EXPR
2025 && integer_onep (fd
->loops
[i
].step
)
2026 && !POINTER_TYPE_P (TREE_TYPE (fd
->loops
[i
].v
)));
2028 tree s
= fold_convert_loc (loc
, itype
, step
? step
: fd
->loops
[i
].step
);
2031 off
= fold_convert_loc (loc
, itype
, off
);
2033 off
= fold_build2_loc (loc
, TRUNC_DIV_EXPR
, itype
, off
, s
);
2036 if (integer_zerop (off
))
2037 t
= boolean_true_node
;
2041 tree co
= fold_convert_loc (loc
, itype
, off
);
2042 if (POINTER_TYPE_P (TREE_TYPE (fd
->loops
[i
].v
)))
2044 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps
))
2045 co
= fold_build1_loc (loc
, NEGATE_EXPR
, itype
, co
);
2046 a
= fold_build2_loc (loc
, POINTER_PLUS_EXPR
,
2047 TREE_TYPE (fd
->loops
[i
].v
), fd
->loops
[i
].v
,
2050 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps
))
2051 a
= fold_build2_loc (loc
, MINUS_EXPR
, TREE_TYPE (fd
->loops
[i
].v
),
2052 fd
->loops
[i
].v
, co
);
2054 a
= fold_build2_loc (loc
, PLUS_EXPR
, TREE_TYPE (fd
->loops
[i
].v
),
2055 fd
->loops
[i
].v
, co
);
2059 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps
))
2060 t1
= fold_build2_loc (loc
, GE_EXPR
, boolean_type_node
, a
,
2063 t1
= fold_build2_loc (loc
, LT_EXPR
, boolean_type_node
, a
,
2065 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps
))
2066 t2
= fold_build2_loc (loc
, LT_EXPR
, boolean_type_node
, a
,
2069 t2
= fold_build2_loc (loc
, GE_EXPR
, boolean_type_node
, a
,
2071 t
= fold_build2_loc (loc
, LT_EXPR
, boolean_type_node
,
2072 step
, build_int_cst (TREE_TYPE (step
), 0));
2073 if (TREE_CODE (step
) != INTEGER_CST
)
2075 t1
= unshare_expr (t1
);
2076 t1
= force_gimple_operand_gsi (gsi
, t1
, true, NULL_TREE
,
2077 false, GSI_CONTINUE_LINKING
);
2078 t2
= unshare_expr (t2
);
2079 t2
= force_gimple_operand_gsi (gsi
, t2
, true, NULL_TREE
,
2080 false, GSI_CONTINUE_LINKING
);
2082 t
= fold_build3_loc (loc
, COND_EXPR
, boolean_type_node
,
2085 else if (fd
->loops
[i
].cond_code
== LT_EXPR
)
2087 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps
))
2088 t
= fold_build2_loc (loc
, GE_EXPR
, boolean_type_node
, a
,
2091 t
= fold_build2_loc (loc
, LT_EXPR
, boolean_type_node
, a
,
2094 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps
))
2095 t
= fold_build2_loc (loc
, GT_EXPR
, boolean_type_node
, a
,
2098 t
= fold_build2_loc (loc
, LE_EXPR
, boolean_type_node
, a
,
2102 cond
= fold_build2_loc (loc
, BIT_AND_EXPR
, boolean_type_node
, cond
, t
);
2106 off
= fold_convert_loc (loc
, itype
, off
);
2109 || (fd
->loops
[i
].cond_code
== LT_EXPR
2110 ? !integer_onep (fd
->loops
[i
].step
)
2111 : !integer_minus_onep (fd
->loops
[i
].step
)))
2113 if (step
== NULL_TREE
2114 && TYPE_UNSIGNED (itype
)
2115 && fd
->loops
[i
].cond_code
== GT_EXPR
)
2116 t
= fold_build2_loc (loc
, TRUNC_MOD_EXPR
, itype
, off
,
2117 fold_build1_loc (loc
, NEGATE_EXPR
, itype
,
2120 t
= fold_build2_loc (loc
, TRUNC_MOD_EXPR
, itype
,
2121 orig_off
? orig_off
: off
, s
);
2122 t
= fold_build2_loc (loc
, EQ_EXPR
, boolean_type_node
, t
,
2123 build_int_cst (itype
, 0));
2124 if (integer_zerop (t
) && !warned_step
)
2126 warning_at (loc
, 0, "%<depend(sink)%> refers to iteration never "
2127 "in the iteration space");
2130 cond
= fold_build2_loc (loc
, BIT_AND_EXPR
, boolean_type_node
,
2134 if (i
<= fd
->collapse
- 1 && fd
->collapse
> 1)
2140 t
= fold_build2_loc (loc
, MINUS_EXPR
, TREE_TYPE (fd
->loops
[i
].v
),
2141 fd
->loops
[i
].v
, fd
->loops
[i
].n1
);
2142 t
= fold_convert_loc (loc
, fd
->iter_type
, t
);
2145 /* We have divided off by step already earlier. */;
2146 else if (TYPE_UNSIGNED (itype
) && fd
->loops
[i
].cond_code
== GT_EXPR
)
2147 off
= fold_build2_loc (loc
, TRUNC_DIV_EXPR
, itype
, off
,
2148 fold_build1_loc (loc
, NEGATE_EXPR
, itype
,
2151 off
= fold_build2_loc (loc
, TRUNC_DIV_EXPR
, itype
, off
, s
);
2152 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps
))
2153 off
= fold_build1_loc (loc
, NEGATE_EXPR
, itype
, off
);
2154 off
= fold_convert_loc (loc
, fd
->iter_type
, off
);
2155 if (i
<= fd
->collapse
- 1 && fd
->collapse
> 1)
2158 off
= fold_build2_loc (loc
, PLUS_EXPR
, fd
->iter_type
, coff
,
2160 if (i
< fd
->collapse
- 1)
2162 coff
= fold_build2_loc (loc
, MULT_EXPR
, fd
->iter_type
, off
,
2167 off
= unshare_expr (off
);
2168 t
= fold_build2_loc (loc
, PLUS_EXPR
, fd
->iter_type
, t
, off
);
2169 t
= force_gimple_operand_gsi (&gsi2
, t
, true, NULL_TREE
,
2170 true, GSI_SAME_STMT
);
2173 gimple
*g
= gimple_build_call_vec (builtin_decl_explicit (sink_ix
), args
);
2174 gimple_set_location (g
, loc
);
2175 gsi_insert_before (&gsi2
, g
, GSI_SAME_STMT
);
2177 cond
= unshare_expr (cond
);
2178 cond
= force_gimple_operand_gsi (gsi
, cond
, true, NULL_TREE
, false,
2179 GSI_CONTINUE_LINKING
);
2180 gsi_insert_after (gsi
, gimple_build_cond_empty (cond
), GSI_NEW_STMT
);
2181 edge e3
= make_edge (e1
->src
, e2
->dest
, EDGE_FALSE_VALUE
);
2182 e3
->probability
= REG_BR_PROB_BASE
/ 8;
2183 e1
->probability
= REG_BR_PROB_BASE
- e3
->probability
;
2184 e1
->flags
= EDGE_TRUE_VALUE
;
2185 set_immediate_dominator (CDI_DOMINATORS
, e2
->dest
, e1
->src
);
2187 *gsi
= gsi_after_labels (e2
->dest
);
2190 /* Expand all #pragma omp ordered depend(source) and
2191 #pragma omp ordered depend(sink:...) constructs in the current
2192 #pragma omp for ordered(n) region. */
2195 expand_omp_ordered_source_sink (struct omp_region
*region
,
2196 struct omp_for_data
*fd
, tree
*counts
,
2197 basic_block cont_bb
)
2199 struct omp_region
*inner
;
2201 for (i
= fd
->collapse
- 1; i
< fd
->ordered
; i
++)
2202 if (i
== fd
->collapse
- 1 && fd
->collapse
> 1)
2203 counts
[i
] = NULL_TREE
;
2204 else if (i
>= fd
->collapse
&& !cont_bb
)
2205 counts
[i
] = build_zero_cst (fd
->iter_type
);
2206 else if (!POINTER_TYPE_P (TREE_TYPE (fd
->loops
[i
].v
))
2207 && integer_onep (fd
->loops
[i
].step
))
2208 counts
[i
] = NULL_TREE
;
2210 counts
[i
] = create_tmp_var (fd
->iter_type
, ".orditer");
2212 = build_array_type_nelts (fd
->iter_type
, fd
->ordered
- fd
->collapse
+ 1);
2213 counts
[fd
->ordered
] = create_tmp_var (atype
, ".orditera");
2214 TREE_ADDRESSABLE (counts
[fd
->ordered
]) = 1;
2216 for (inner
= region
->inner
; inner
; inner
= inner
->next
)
2217 if (inner
->type
== GIMPLE_OMP_ORDERED
)
2219 gomp_ordered
*ord_stmt
= inner
->ord_stmt
;
2220 gimple_stmt_iterator gsi
= gsi_for_stmt (ord_stmt
);
2221 location_t loc
= gimple_location (ord_stmt
);
2223 for (c
= gimple_omp_ordered_clauses (ord_stmt
);
2224 c
; c
= OMP_CLAUSE_CHAIN (c
))
2225 if (OMP_CLAUSE_DEPEND_KIND (c
) == OMP_CLAUSE_DEPEND_SOURCE
)
2228 expand_omp_ordered_source (&gsi
, fd
, counts
, loc
);
2229 for (c
= gimple_omp_ordered_clauses (ord_stmt
);
2230 c
; c
= OMP_CLAUSE_CHAIN (c
))
2231 if (OMP_CLAUSE_DEPEND_KIND (c
) == OMP_CLAUSE_DEPEND_SINK
)
2232 expand_omp_ordered_sink (&gsi
, fd
, counts
, c
, loc
);
2233 gsi_remove (&gsi
, true);
2237 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
2241 expand_omp_for_ordered_loops (struct omp_for_data
*fd
, tree
*counts
,
2242 basic_block cont_bb
, basic_block body_bb
,
2243 bool ordered_lastprivate
)
2245 if (fd
->ordered
== fd
->collapse
)
2250 gimple_stmt_iterator gsi
= gsi_after_labels (body_bb
);
2251 for (int i
= fd
->collapse
; i
< fd
->ordered
; i
++)
2253 tree type
= TREE_TYPE (fd
->loops
[i
].v
);
2254 tree n1
= fold_convert (type
, fd
->loops
[i
].n1
);
2255 expand_omp_build_assign (&gsi
, fd
->loops
[i
].v
, n1
);
2256 tree aref
= build4 (ARRAY_REF
, fd
->iter_type
, counts
[fd
->ordered
],
2257 size_int (i
- fd
->collapse
+ 1),
2258 NULL_TREE
, NULL_TREE
);
2259 expand_omp_build_assign (&gsi
, aref
, build_zero_cst (fd
->iter_type
));
2264 for (int i
= fd
->ordered
- 1; i
>= fd
->collapse
; i
--)
2266 tree t
, type
= TREE_TYPE (fd
->loops
[i
].v
);
2267 gimple_stmt_iterator gsi
= gsi_after_labels (body_bb
);
2268 expand_omp_build_assign (&gsi
, fd
->loops
[i
].v
,
2269 fold_convert (type
, fd
->loops
[i
].n1
));
2271 expand_omp_build_assign (&gsi
, counts
[i
],
2272 build_zero_cst (fd
->iter_type
));
2273 tree aref
= build4 (ARRAY_REF
, fd
->iter_type
, counts
[fd
->ordered
],
2274 size_int (i
- fd
->collapse
+ 1),
2275 NULL_TREE
, NULL_TREE
);
2276 expand_omp_build_assign (&gsi
, aref
, build_zero_cst (fd
->iter_type
));
2277 if (!gsi_end_p (gsi
))
2280 gsi
= gsi_last_bb (body_bb
);
2281 edge e1
= split_block (body_bb
, gsi_stmt (gsi
));
2282 basic_block new_body
= e1
->dest
;
2283 if (body_bb
== cont_bb
)
2286 basic_block new_header
;
2287 if (EDGE_COUNT (cont_bb
->preds
) > 0)
2289 gsi
= gsi_last_bb (cont_bb
);
2290 if (POINTER_TYPE_P (type
))
2291 t
= fold_build_pointer_plus (fd
->loops
[i
].v
,
2292 fold_convert (sizetype
,
2293 fd
->loops
[i
].step
));
2295 t
= fold_build2 (PLUS_EXPR
, type
, fd
->loops
[i
].v
,
2296 fold_convert (type
, fd
->loops
[i
].step
));
2297 expand_omp_build_assign (&gsi
, fd
->loops
[i
].v
, t
);
2300 t
= fold_build2 (PLUS_EXPR
, fd
->iter_type
, counts
[i
],
2301 build_int_cst (fd
->iter_type
, 1));
2302 expand_omp_build_assign (&gsi
, counts
[i
], t
);
2307 t
= fold_build2 (MINUS_EXPR
, TREE_TYPE (fd
->loops
[i
].v
),
2308 fd
->loops
[i
].v
, fd
->loops
[i
].n1
);
2309 t
= fold_convert (fd
->iter_type
, t
);
2310 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
2311 true, GSI_SAME_STMT
);
2313 aref
= build4 (ARRAY_REF
, fd
->iter_type
, counts
[fd
->ordered
],
2314 size_int (i
- fd
->collapse
+ 1),
2315 NULL_TREE
, NULL_TREE
);
2316 expand_omp_build_assign (&gsi
, aref
, t
);
2318 e2
= split_block (cont_bb
, gsi_stmt (gsi
));
2319 new_header
= e2
->dest
;
2322 new_header
= cont_bb
;
2323 gsi
= gsi_after_labels (new_header
);
2324 tree v
= force_gimple_operand_gsi (&gsi
, fd
->loops
[i
].v
, true, NULL_TREE
,
2325 true, GSI_SAME_STMT
);
2327 = force_gimple_operand_gsi (&gsi
, fold_convert (type
, fd
->loops
[i
].n2
),
2328 true, NULL_TREE
, true, GSI_SAME_STMT
);
2329 t
= build2 (fd
->loops
[i
].cond_code
, boolean_type_node
, v
, n2
);
2330 gsi_insert_before (&gsi
, gimple_build_cond_empty (t
), GSI_NEW_STMT
);
2331 edge e3
= split_block (new_header
, gsi_stmt (gsi
));
2334 make_edge (body_bb
, new_header
, EDGE_FALLTHRU
);
2335 e3
->flags
= EDGE_FALSE_VALUE
;
2336 e3
->probability
= REG_BR_PROB_BASE
/ 8;
2337 e1
= make_edge (new_header
, new_body
, EDGE_TRUE_VALUE
);
2338 e1
->probability
= REG_BR_PROB_BASE
- e3
->probability
;
2340 set_immediate_dominator (CDI_DOMINATORS
, new_header
, body_bb
);
2341 set_immediate_dominator (CDI_DOMINATORS
, new_body
, new_header
);
2345 struct loop
*loop
= alloc_loop ();
2346 loop
->header
= new_header
;
2347 loop
->latch
= e2
->src
;
2348 add_loop (loop
, body_bb
->loop_father
);
2352 /* If there are any lastprivate clauses and it is possible some loops
2353 might have zero iterations, ensure all the decls are initialized,
2354 otherwise we could crash evaluating C++ class iterators with lastprivate
2356 bool need_inits
= false;
2357 for (int i
= fd
->collapse
; ordered_lastprivate
&& i
< fd
->ordered
; i
++)
2360 tree type
= TREE_TYPE (fd
->loops
[i
].v
);
2361 gimple_stmt_iterator gsi
= gsi_after_labels (body_bb
);
2362 expand_omp_build_assign (&gsi
, fd
->loops
[i
].v
,
2363 fold_convert (type
, fd
->loops
[i
].n1
));
2367 tree type
= TREE_TYPE (fd
->loops
[i
].v
);
2368 tree this_cond
= fold_build2 (fd
->loops
[i
].cond_code
,
2370 fold_convert (type
, fd
->loops
[i
].n1
),
2371 fold_convert (type
, fd
->loops
[i
].n2
));
2372 if (!integer_onep (this_cond
))
2379 /* A subroutine of expand_omp_for. Generate code for a parallel
2380 loop with any schedule. Given parameters:
2382 for (V = N1; V cond N2; V += STEP) BODY;
2384 where COND is "<" or ">", we generate pseudocode
2386 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
2387 if (more) goto L0; else goto L3;
2394 if (V cond iend) goto L1; else goto L2;
2396 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2399 If this is a combined omp parallel loop, instead of the call to
2400 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
2401 If this is gimple_omp_for_combined_p loop, then instead of assigning
2402 V and iend in L0 we assign the first two _looptemp_ clause decls of the
2403 inner GIMPLE_OMP_FOR and V += STEP; and
2404 if (V cond iend) goto L1; else goto L2; are removed.
2406 For collapsed loops, given parameters:
2408 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2409 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2410 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
2413 we generate pseudocode
2415 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
2420 count3 = (adj + N32 - N31) / STEP3;
2421 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
2426 count2 = (adj + N22 - N21) / STEP2;
2427 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
2432 count1 = (adj + N12 - N11) / STEP1;
2433 count = count1 * count2 * count3;
2438 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
2439 if (more) goto L0; else goto L3;
2443 V3 = N31 + (T % count3) * STEP3;
2445 V2 = N21 + (T % count2) * STEP2;
2447 V1 = N11 + T * STEP1;
2452 if (V < iend) goto L10; else goto L2;
2455 if (V3 cond3 N32) goto L1; else goto L11;
2459 if (V2 cond2 N22) goto L1; else goto L12;
2465 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2471 expand_omp_for_generic (struct omp_region
*region
,
2472 struct omp_for_data
*fd
,
2473 enum built_in_function start_fn
,
2474 enum built_in_function next_fn
,
2477 tree type
, istart0
, iend0
, iend
;
2478 tree t
, vmain
, vback
, bias
= NULL_TREE
;
2479 basic_block entry_bb
, cont_bb
, exit_bb
, l0_bb
, l1_bb
, collapse_bb
;
2480 basic_block l2_bb
= NULL
, l3_bb
= NULL
;
2481 gimple_stmt_iterator gsi
;
2482 gassign
*assign_stmt
;
2483 bool in_combined_parallel
= is_combined_parallel (region
);
2484 bool broken_loop
= region
->cont
== NULL
;
2486 tree
*counts
= NULL
;
2488 bool ordered_lastprivate
= false;
2490 gcc_assert (!broken_loop
|| !in_combined_parallel
);
2491 gcc_assert (fd
->iter_type
== long_integer_type_node
2492 || !in_combined_parallel
);
2494 entry_bb
= region
->entry
;
2495 cont_bb
= region
->cont
;
2497 gcc_assert (EDGE_COUNT (entry_bb
->succs
) == 2);
2498 gcc_assert (broken_loop
2499 || BRANCH_EDGE (entry_bb
)->dest
== FALLTHRU_EDGE (cont_bb
)->dest
);
2500 l0_bb
= split_edge (FALLTHRU_EDGE (entry_bb
));
2501 l1_bb
= single_succ (l0_bb
);
2504 l2_bb
= create_empty_bb (cont_bb
);
2505 gcc_assert (BRANCH_EDGE (cont_bb
)->dest
== l1_bb
2506 || (single_succ_edge (BRANCH_EDGE (cont_bb
)->dest
)->dest
2508 gcc_assert (EDGE_COUNT (cont_bb
->succs
) == 2);
2512 l3_bb
= BRANCH_EDGE (entry_bb
)->dest
;
2513 exit_bb
= region
->exit
;
2515 gsi
= gsi_last_bb (entry_bb
);
2517 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_FOR
);
2519 && omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi
)),
2520 OMP_CLAUSE_LASTPRIVATE
))
2521 ordered_lastprivate
= false;
2522 if (fd
->collapse
> 1 || fd
->ordered
)
2524 int first_zero_iter1
= -1, first_zero_iter2
= -1;
2525 basic_block zero_iter1_bb
= NULL
, zero_iter2_bb
= NULL
, l2_dom_bb
= NULL
;
2527 counts
= XALLOCAVEC (tree
, fd
->ordered
? fd
->ordered
+ 1 : fd
->collapse
);
2528 expand_omp_for_init_counts (fd
, &gsi
, entry_bb
, counts
,
2529 zero_iter1_bb
, first_zero_iter1
,
2530 zero_iter2_bb
, first_zero_iter2
, l2_dom_bb
);
2534 /* Some counts[i] vars might be uninitialized if
2535 some loop has zero iterations. But the body shouldn't
2536 be executed in that case, so just avoid uninit warnings. */
2537 for (i
= first_zero_iter1
;
2538 i
< (fd
->ordered
? fd
->ordered
: fd
->collapse
); i
++)
2539 if (SSA_VAR_P (counts
[i
]))
2540 TREE_NO_WARNING (counts
[i
]) = 1;
2542 e
= split_block (entry_bb
, gsi_stmt (gsi
));
2544 make_edge (zero_iter1_bb
, entry_bb
, EDGE_FALLTHRU
);
2545 gsi
= gsi_last_bb (entry_bb
);
2546 set_immediate_dominator (CDI_DOMINATORS
, entry_bb
,
2547 get_immediate_dominator (CDI_DOMINATORS
,
2552 /* Some counts[i] vars might be uninitialized if
2553 some loop has zero iterations. But the body shouldn't
2554 be executed in that case, so just avoid uninit warnings. */
2555 for (i
= first_zero_iter2
; i
< fd
->ordered
; i
++)
2556 if (SSA_VAR_P (counts
[i
]))
2557 TREE_NO_WARNING (counts
[i
]) = 1;
2559 make_edge (zero_iter2_bb
, entry_bb
, EDGE_FALLTHRU
);
2563 e
= split_block (entry_bb
, gsi_stmt (gsi
));
2565 make_edge (zero_iter2_bb
, entry_bb
, EDGE_FALLTHRU
);
2566 gsi
= gsi_last_bb (entry_bb
);
2567 set_immediate_dominator (CDI_DOMINATORS
, entry_bb
,
2568 get_immediate_dominator
2569 (CDI_DOMINATORS
, zero_iter2_bb
));
2572 if (fd
->collapse
== 1)
2574 counts
[0] = fd
->loop
.n2
;
2575 fd
->loop
= fd
->loops
[0];
2579 type
= TREE_TYPE (fd
->loop
.v
);
2580 istart0
= create_tmp_var (fd
->iter_type
, ".istart0");
2581 iend0
= create_tmp_var (fd
->iter_type
, ".iend0");
2582 TREE_ADDRESSABLE (istart0
) = 1;
2583 TREE_ADDRESSABLE (iend0
) = 1;
2585 /* See if we need to bias by LLONG_MIN. */
2586 if (fd
->iter_type
== long_long_unsigned_type_node
2587 && TREE_CODE (type
) == INTEGER_TYPE
2588 && !TYPE_UNSIGNED (type
)
2589 && fd
->ordered
== 0)
2593 if (fd
->loop
.cond_code
== LT_EXPR
)
2596 n2
= fold_build2 (PLUS_EXPR
, type
, fd
->loop
.n2
, fd
->loop
.step
);
2600 n1
= fold_build2 (MINUS_EXPR
, type
, fd
->loop
.n2
, fd
->loop
.step
);
2603 if (TREE_CODE (n1
) != INTEGER_CST
2604 || TREE_CODE (n2
) != INTEGER_CST
2605 || ((tree_int_cst_sgn (n1
) < 0) ^ (tree_int_cst_sgn (n2
) < 0)))
2606 bias
= fold_convert (fd
->iter_type
, TYPE_MIN_VALUE (type
));
2609 gimple_stmt_iterator gsif
= gsi
;
2612 tree arr
= NULL_TREE
;
2613 if (in_combined_parallel
)
2615 gcc_assert (fd
->ordered
== 0);
2616 /* In a combined parallel loop, emit a call to
2617 GOMP_loop_foo_next. */
2618 t
= build_call_expr (builtin_decl_explicit (next_fn
), 2,
2619 build_fold_addr_expr (istart0
),
2620 build_fold_addr_expr (iend0
));
2624 tree t0
, t1
, t2
, t3
, t4
;
2625 /* If this is not a combined parallel loop, emit a call to
2626 GOMP_loop_foo_start in ENTRY_BB. */
2627 t4
= build_fold_addr_expr (iend0
);
2628 t3
= build_fold_addr_expr (istart0
);
2631 t0
= build_int_cst (unsigned_type_node
,
2632 fd
->ordered
- fd
->collapse
+ 1);
2633 arr
= create_tmp_var (build_array_type_nelts (fd
->iter_type
,
2635 - fd
->collapse
+ 1),
2637 DECL_NAMELESS (arr
) = 1;
2638 TREE_ADDRESSABLE (arr
) = 1;
2639 TREE_STATIC (arr
) = 1;
2640 vec
<constructor_elt
, va_gc
> *v
;
2641 vec_alloc (v
, fd
->ordered
- fd
->collapse
+ 1);
2644 for (idx
= 0; idx
< fd
->ordered
- fd
->collapse
+ 1; idx
++)
2647 if (idx
== 0 && fd
->collapse
> 1)
2650 c
= counts
[idx
+ fd
->collapse
- 1];
2651 tree purpose
= size_int (idx
);
2652 CONSTRUCTOR_APPEND_ELT (v
, purpose
, c
);
2653 if (TREE_CODE (c
) != INTEGER_CST
)
2654 TREE_STATIC (arr
) = 0;
2657 DECL_INITIAL (arr
) = build_constructor (TREE_TYPE (arr
), v
);
2658 if (!TREE_STATIC (arr
))
2659 force_gimple_operand_gsi (&gsi
, build1 (DECL_EXPR
,
2660 void_type_node
, arr
),
2661 true, NULL_TREE
, true, GSI_SAME_STMT
);
2662 t1
= build_fold_addr_expr (arr
);
2667 t2
= fold_convert (fd
->iter_type
, fd
->loop
.step
);
2670 if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
2673 = omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
2674 OMP_CLAUSE__LOOPTEMP_
);
2675 gcc_assert (innerc
);
2676 t0
= OMP_CLAUSE_DECL (innerc
);
2677 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
2678 OMP_CLAUSE__LOOPTEMP_
);
2679 gcc_assert (innerc
);
2680 t1
= OMP_CLAUSE_DECL (innerc
);
2682 if (POINTER_TYPE_P (TREE_TYPE (t0
))
2683 && TYPE_PRECISION (TREE_TYPE (t0
))
2684 != TYPE_PRECISION (fd
->iter_type
))
2686 /* Avoid casting pointers to integer of a different size. */
2687 tree itype
= signed_type_for (type
);
2688 t1
= fold_convert (fd
->iter_type
, fold_convert (itype
, t1
));
2689 t0
= fold_convert (fd
->iter_type
, fold_convert (itype
, t0
));
2693 t1
= fold_convert (fd
->iter_type
, t1
);
2694 t0
= fold_convert (fd
->iter_type
, t0
);
2698 t1
= fold_build2 (PLUS_EXPR
, fd
->iter_type
, t1
, bias
);
2699 t0
= fold_build2 (PLUS_EXPR
, fd
->iter_type
, t0
, bias
);
2702 if (fd
->iter_type
== long_integer_type_node
|| fd
->ordered
)
2706 t
= fold_convert (fd
->iter_type
, fd
->chunk_size
);
2707 t
= omp_adjust_chunk_size (t
, fd
->simd_schedule
);
2709 t
= build_call_expr (builtin_decl_explicit (start_fn
),
2710 5, t0
, t1
, t
, t3
, t4
);
2712 t
= build_call_expr (builtin_decl_explicit (start_fn
),
2713 6, t0
, t1
, t2
, t
, t3
, t4
);
2715 else if (fd
->ordered
)
2716 t
= build_call_expr (builtin_decl_explicit (start_fn
),
2719 t
= build_call_expr (builtin_decl_explicit (start_fn
),
2720 5, t0
, t1
, t2
, t3
, t4
);
2728 /* The GOMP_loop_ull_*start functions have additional boolean
2729 argument, true for < loops and false for > loops.
2730 In Fortran, the C bool type can be different from
2731 boolean_type_node. */
2732 bfn_decl
= builtin_decl_explicit (start_fn
);
2733 c_bool_type
= TREE_TYPE (TREE_TYPE (bfn_decl
));
2734 t5
= build_int_cst (c_bool_type
,
2735 fd
->loop
.cond_code
== LT_EXPR
? 1 : 0);
2738 tree bfn_decl
= builtin_decl_explicit (start_fn
);
2739 t
= fold_convert (fd
->iter_type
, fd
->chunk_size
);
2740 t
= omp_adjust_chunk_size (t
, fd
->simd_schedule
);
2741 t
= build_call_expr (bfn_decl
, 7, t5
, t0
, t1
, t2
, t
, t3
, t4
);
2744 t
= build_call_expr (builtin_decl_explicit (start_fn
),
2745 6, t5
, t0
, t1
, t2
, t3
, t4
);
2748 if (TREE_TYPE (t
) != boolean_type_node
)
2749 t
= fold_build2 (NE_EXPR
, boolean_type_node
,
2750 t
, build_int_cst (TREE_TYPE (t
), 0));
2751 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
2752 true, GSI_SAME_STMT
);
2753 if (arr
&& !TREE_STATIC (arr
))
2755 tree clobber
= build_constructor (TREE_TYPE (arr
), NULL
);
2756 TREE_THIS_VOLATILE (clobber
) = 1;
2757 gsi_insert_before (&gsi
, gimple_build_assign (arr
, clobber
),
2760 gsi_insert_after (&gsi
, gimple_build_cond_empty (t
), GSI_SAME_STMT
);
2762 /* Remove the GIMPLE_OMP_FOR statement. */
2763 gsi_remove (&gsi
, true);
2765 if (gsi_end_p (gsif
))
2766 gsif
= gsi_after_labels (gsi_bb (gsif
));
2769 /* Iteration setup for sequential loop goes in L0_BB. */
2770 tree startvar
= fd
->loop
.v
;
2771 tree endvar
= NULL_TREE
;
2773 if (gimple_omp_for_combined_p (fd
->for_stmt
))
2775 gcc_assert (gimple_code (inner_stmt
) == GIMPLE_OMP_FOR
2776 && gimple_omp_for_kind (inner_stmt
)
2777 == GF_OMP_FOR_KIND_SIMD
);
2778 tree innerc
= omp_find_clause (gimple_omp_for_clauses (inner_stmt
),
2779 OMP_CLAUSE__LOOPTEMP_
);
2780 gcc_assert (innerc
);
2781 startvar
= OMP_CLAUSE_DECL (innerc
);
2782 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
2783 OMP_CLAUSE__LOOPTEMP_
);
2784 gcc_assert (innerc
);
2785 endvar
= OMP_CLAUSE_DECL (innerc
);
2788 gsi
= gsi_start_bb (l0_bb
);
2790 if (fd
->ordered
&& fd
->collapse
== 1)
2791 t
= fold_build2 (MULT_EXPR
, fd
->iter_type
, t
,
2792 fold_convert (fd
->iter_type
, fd
->loop
.step
));
2794 t
= fold_build2 (MINUS_EXPR
, fd
->iter_type
, t
, bias
);
2795 if (fd
->ordered
&& fd
->collapse
== 1)
2797 if (POINTER_TYPE_P (TREE_TYPE (startvar
)))
2798 t
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (startvar
),
2799 fd
->loop
.n1
, fold_convert (sizetype
, t
));
2802 t
= fold_convert (TREE_TYPE (startvar
), t
);
2803 t
= fold_build2 (PLUS_EXPR
, TREE_TYPE (startvar
),
2809 if (POINTER_TYPE_P (TREE_TYPE (startvar
)))
2810 t
= fold_convert (signed_type_for (TREE_TYPE (startvar
)), t
);
2811 t
= fold_convert (TREE_TYPE (startvar
), t
);
2813 t
= force_gimple_operand_gsi (&gsi
, t
,
2815 && TREE_ADDRESSABLE (startvar
),
2816 NULL_TREE
, false, GSI_CONTINUE_LINKING
);
2817 assign_stmt
= gimple_build_assign (startvar
, t
);
2818 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
2821 if (fd
->ordered
&& fd
->collapse
== 1)
2822 t
= fold_build2 (MULT_EXPR
, fd
->iter_type
, t
,
2823 fold_convert (fd
->iter_type
, fd
->loop
.step
));
2825 t
= fold_build2 (MINUS_EXPR
, fd
->iter_type
, t
, bias
);
2826 if (fd
->ordered
&& fd
->collapse
== 1)
2828 if (POINTER_TYPE_P (TREE_TYPE (startvar
)))
2829 t
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (startvar
),
2830 fd
->loop
.n1
, fold_convert (sizetype
, t
));
2833 t
= fold_convert (TREE_TYPE (startvar
), t
);
2834 t
= fold_build2 (PLUS_EXPR
, TREE_TYPE (startvar
),
2840 if (POINTER_TYPE_P (TREE_TYPE (startvar
)))
2841 t
= fold_convert (signed_type_for (TREE_TYPE (startvar
)), t
);
2842 t
= fold_convert (TREE_TYPE (startvar
), t
);
2844 iend
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
2845 false, GSI_CONTINUE_LINKING
);
2848 assign_stmt
= gimple_build_assign (endvar
, iend
);
2849 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
2850 if (useless_type_conversion_p (TREE_TYPE (fd
->loop
.v
), TREE_TYPE (iend
)))
2851 assign_stmt
= gimple_build_assign (fd
->loop
.v
, iend
);
2853 assign_stmt
= gimple_build_assign (fd
->loop
.v
, NOP_EXPR
, iend
);
2854 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
2856 /* Handle linear clause adjustments. */
2857 tree itercnt
= NULL_TREE
;
2858 if (gimple_omp_for_kind (fd
->for_stmt
) == GF_OMP_FOR_KIND_FOR
)
2859 for (tree c
= gimple_omp_for_clauses (fd
->for_stmt
);
2860 c
; c
= OMP_CLAUSE_CHAIN (c
))
2861 if (OMP_CLAUSE_CODE (c
) == OMP_CLAUSE_LINEAR
2862 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c
))
2864 tree d
= OMP_CLAUSE_DECL (c
);
2865 bool is_ref
= omp_is_reference (d
);
2866 tree t
= d
, a
, dest
;
2868 t
= build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c
), t
);
2869 tree type
= TREE_TYPE (t
);
2870 if (POINTER_TYPE_P (type
))
2872 dest
= unshare_expr (t
);
2873 tree v
= create_tmp_var (TREE_TYPE (t
), NULL
);
2874 expand_omp_build_assign (&gsif
, v
, t
);
2875 if (itercnt
== NULL_TREE
)
2878 tree n1
= fd
->loop
.n1
;
2879 if (POINTER_TYPE_P (TREE_TYPE (itercnt
)))
2882 = fold_convert (signed_type_for (TREE_TYPE (itercnt
)),
2884 n1
= fold_convert (TREE_TYPE (itercnt
), n1
);
2886 itercnt
= fold_build2 (MINUS_EXPR
, TREE_TYPE (itercnt
),
2888 itercnt
= fold_build2 (EXACT_DIV_EXPR
, TREE_TYPE (itercnt
),
2889 itercnt
, fd
->loop
.step
);
2890 itercnt
= force_gimple_operand_gsi (&gsi
, itercnt
, true,
2892 GSI_CONTINUE_LINKING
);
2894 a
= fold_build2 (MULT_EXPR
, type
,
2895 fold_convert (type
, itercnt
),
2896 fold_convert (type
, OMP_CLAUSE_LINEAR_STEP (c
)));
2897 t
= fold_build2 (type
== TREE_TYPE (t
) ? PLUS_EXPR
2898 : POINTER_PLUS_EXPR
, TREE_TYPE (t
), v
, a
);
2899 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
2900 false, GSI_CONTINUE_LINKING
);
2901 assign_stmt
= gimple_build_assign (dest
, t
);
2902 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
2904 if (fd
->collapse
> 1)
2905 expand_omp_for_init_vars (fd
, &gsi
, counts
, inner_stmt
, startvar
);
2909 /* Until now, counts array contained number of iterations or
2910 variable containing it for ith loop. From now on, we need
2911 those counts only for collapsed loops, and only for the 2nd
2912 till the last collapsed one. Move those one element earlier,
2913 we'll use counts[fd->collapse - 1] for the first source/sink
2914 iteration counter and so on and counts[fd->ordered]
2915 as the array holding the current counter values for
2917 if (fd
->collapse
> 1)
2918 memmove (counts
, counts
+ 1, (fd
->collapse
- 1) * sizeof (counts
[0]));
2922 for (i
= fd
->collapse
; i
< fd
->ordered
; i
++)
2924 tree type
= TREE_TYPE (fd
->loops
[i
].v
);
2926 = fold_build2 (fd
->loops
[i
].cond_code
, boolean_type_node
,
2927 fold_convert (type
, fd
->loops
[i
].n1
),
2928 fold_convert (type
, fd
->loops
[i
].n2
));
2929 if (!integer_onep (this_cond
))
2932 if (i
< fd
->ordered
)
2935 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun
)->prev_bb
);
2936 add_bb_to_loop (cont_bb
, l1_bb
->loop_father
);
2937 gimple_stmt_iterator gsi
= gsi_after_labels (cont_bb
);
2938 gimple
*g
= gimple_build_omp_continue (fd
->loop
.v
, fd
->loop
.v
);
2939 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
2940 make_edge (cont_bb
, l3_bb
, EDGE_FALLTHRU
);
2941 make_edge (cont_bb
, l1_bb
, 0);
2942 l2_bb
= create_empty_bb (cont_bb
);
2943 broken_loop
= false;
2946 expand_omp_ordered_source_sink (region
, fd
, counts
, cont_bb
);
2947 cont_bb
= expand_omp_for_ordered_loops (fd
, counts
, cont_bb
, l1_bb
,
2948 ordered_lastprivate
);
2949 if (counts
[fd
->collapse
- 1])
2951 gcc_assert (fd
->collapse
== 1);
2952 gsi
= gsi_last_bb (l0_bb
);
2953 expand_omp_build_assign (&gsi
, counts
[fd
->collapse
- 1],
2955 gsi
= gsi_last_bb (cont_bb
);
2956 t
= fold_build2 (PLUS_EXPR
, fd
->iter_type
, counts
[fd
->collapse
- 1],
2957 build_int_cst (fd
->iter_type
, 1));
2958 expand_omp_build_assign (&gsi
, counts
[fd
->collapse
- 1], t
);
2959 tree aref
= build4 (ARRAY_REF
, fd
->iter_type
, counts
[fd
->ordered
],
2960 size_zero_node
, NULL_TREE
, NULL_TREE
);
2961 expand_omp_build_assign (&gsi
, aref
, counts
[fd
->collapse
- 1]);
2962 t
= counts
[fd
->collapse
- 1];
2964 else if (fd
->collapse
> 1)
2968 t
= fold_build2 (MINUS_EXPR
, TREE_TYPE (fd
->loops
[0].v
),
2969 fd
->loops
[0].v
, fd
->loops
[0].n1
);
2970 t
= fold_convert (fd
->iter_type
, t
);
2972 gsi
= gsi_last_bb (l0_bb
);
2973 tree aref
= build4 (ARRAY_REF
, fd
->iter_type
, counts
[fd
->ordered
],
2974 size_zero_node
, NULL_TREE
, NULL_TREE
);
2975 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
2976 false, GSI_CONTINUE_LINKING
);
2977 expand_omp_build_assign (&gsi
, aref
, t
, true);
2982 /* Code to control the increment and predicate for the sequential
2983 loop goes in the CONT_BB. */
2984 gsi
= gsi_last_bb (cont_bb
);
2985 gomp_continue
*cont_stmt
= as_a
<gomp_continue
*> (gsi_stmt (gsi
));
2986 gcc_assert (gimple_code (cont_stmt
) == GIMPLE_OMP_CONTINUE
);
2987 vmain
= gimple_omp_continue_control_use (cont_stmt
);
2988 vback
= gimple_omp_continue_control_def (cont_stmt
);
2990 if (!gimple_omp_for_combined_p (fd
->for_stmt
))
2992 if (POINTER_TYPE_P (type
))
2993 t
= fold_build_pointer_plus (vmain
, fd
->loop
.step
);
2995 t
= fold_build2 (PLUS_EXPR
, type
, vmain
, fd
->loop
.step
);
2996 t
= force_gimple_operand_gsi (&gsi
, t
,
2998 && TREE_ADDRESSABLE (vback
),
2999 NULL_TREE
, true, GSI_SAME_STMT
);
3000 assign_stmt
= gimple_build_assign (vback
, t
);
3001 gsi_insert_before (&gsi
, assign_stmt
, GSI_SAME_STMT
);
3003 if (fd
->ordered
&& counts
[fd
->collapse
- 1] == NULL_TREE
)
3005 if (fd
->collapse
> 1)
3009 t
= fold_build2 (MINUS_EXPR
, TREE_TYPE (fd
->loops
[0].v
),
3010 fd
->loops
[0].v
, fd
->loops
[0].n1
);
3011 t
= fold_convert (fd
->iter_type
, t
);
3013 tree aref
= build4 (ARRAY_REF
, fd
->iter_type
,
3014 counts
[fd
->ordered
], size_zero_node
,
3015 NULL_TREE
, NULL_TREE
);
3016 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
3017 true, GSI_SAME_STMT
);
3018 expand_omp_build_assign (&gsi
, aref
, t
);
3021 t
= build2 (fd
->loop
.cond_code
, boolean_type_node
,
3022 DECL_P (vback
) && TREE_ADDRESSABLE (vback
) ? t
: vback
,
3024 gcond
*cond_stmt
= gimple_build_cond_empty (t
);
3025 gsi_insert_before (&gsi
, cond_stmt
, GSI_SAME_STMT
);
3028 /* Remove GIMPLE_OMP_CONTINUE. */
3029 gsi_remove (&gsi
, true);
3031 if (fd
->collapse
> 1 && !gimple_omp_for_combined_p (fd
->for_stmt
))
3032 collapse_bb
= extract_omp_for_update_vars (fd
, cont_bb
, l1_bb
);
3034 /* Emit code to get the next parallel iteration in L2_BB. */
3035 gsi
= gsi_start_bb (l2_bb
);
3037 t
= build_call_expr (builtin_decl_explicit (next_fn
), 2,
3038 build_fold_addr_expr (istart0
),
3039 build_fold_addr_expr (iend0
));
3040 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
3041 false, GSI_CONTINUE_LINKING
);
3042 if (TREE_TYPE (t
) != boolean_type_node
)
3043 t
= fold_build2 (NE_EXPR
, boolean_type_node
,
3044 t
, build_int_cst (TREE_TYPE (t
), 0));
3045 gcond
*cond_stmt
= gimple_build_cond_empty (t
);
3046 gsi_insert_after (&gsi
, cond_stmt
, GSI_CONTINUE_LINKING
);
3049 /* Add the loop cleanup function. */
3050 gsi
= gsi_last_bb (exit_bb
);
3051 if (gimple_omp_return_nowait_p (gsi_stmt (gsi
)))
3052 t
= builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT
);
3053 else if (gimple_omp_return_lhs (gsi_stmt (gsi
)))
3054 t
= builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL
);
3056 t
= builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END
);
3057 gcall
*call_stmt
= gimple_build_call (t
, 0);
3058 if (gimple_omp_return_lhs (gsi_stmt (gsi
)))
3059 gimple_call_set_lhs (call_stmt
, gimple_omp_return_lhs (gsi_stmt (gsi
)));
3060 gsi_insert_after (&gsi
, call_stmt
, GSI_SAME_STMT
);
3063 tree arr
= counts
[fd
->ordered
];
3064 tree clobber
= build_constructor (TREE_TYPE (arr
), NULL
);
3065 TREE_THIS_VOLATILE (clobber
) = 1;
3066 gsi_insert_after (&gsi
, gimple_build_assign (arr
, clobber
),
3069 gsi_remove (&gsi
, true);
3071 /* Connect the new blocks. */
3072 find_edge (entry_bb
, l0_bb
)->flags
= EDGE_TRUE_VALUE
;
3073 find_edge (entry_bb
, l3_bb
)->flags
= EDGE_FALSE_VALUE
;
3079 e
= find_edge (cont_bb
, l3_bb
);
3080 ne
= make_edge (l2_bb
, l3_bb
, EDGE_FALSE_VALUE
);
3082 phis
= phi_nodes (l3_bb
);
3083 for (gsi
= gsi_start (phis
); !gsi_end_p (gsi
); gsi_next (&gsi
))
3085 gimple
*phi
= gsi_stmt (gsi
);
3086 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi
, ne
),
3087 PHI_ARG_DEF_FROM_EDGE (phi
, e
));
3091 make_edge (cont_bb
, l2_bb
, EDGE_FALSE_VALUE
);
3092 e
= find_edge (cont_bb
, l1_bb
);
3095 e
= BRANCH_EDGE (cont_bb
);
3096 gcc_assert (single_succ (e
->dest
) == l1_bb
);
3098 if (gimple_omp_for_combined_p (fd
->for_stmt
))
3103 else if (fd
->collapse
> 1)
3106 e
= make_edge (cont_bb
, collapse_bb
, EDGE_TRUE_VALUE
);
3109 e
->flags
= EDGE_TRUE_VALUE
;
3112 e
->probability
= REG_BR_PROB_BASE
* 7 / 8;
3113 find_edge (cont_bb
, l2_bb
)->probability
= REG_BR_PROB_BASE
/ 8;
3117 e
= find_edge (cont_bb
, l2_bb
);
3118 e
->flags
= EDGE_FALLTHRU
;
3120 make_edge (l2_bb
, l0_bb
, EDGE_TRUE_VALUE
);
3122 if (gimple_in_ssa_p (cfun
))
3124 /* Add phis to the outer loop that connect to the phis in the inner,
3125 original loop, and move the loop entry value of the inner phi to
3126 the loop entry value of the outer phi. */
3128 for (psi
= gsi_start_phis (l3_bb
); !gsi_end_p (psi
); gsi_next (&psi
))
3130 source_location locus
;
3132 gphi
*exit_phi
= psi
.phi ();
3134 edge l2_to_l3
= find_edge (l2_bb
, l3_bb
);
3135 tree exit_res
= PHI_ARG_DEF_FROM_EDGE (exit_phi
, l2_to_l3
);
3137 basic_block latch
= BRANCH_EDGE (cont_bb
)->dest
;
3138 edge latch_to_l1
= find_edge (latch
, l1_bb
);
3140 = find_phi_with_arg_on_edge (exit_res
, latch_to_l1
);
3142 tree t
= gimple_phi_result (exit_phi
);
3143 tree new_res
= copy_ssa_name (t
, NULL
);
3144 nphi
= create_phi_node (new_res
, l0_bb
);
3146 edge l0_to_l1
= find_edge (l0_bb
, l1_bb
);
3147 t
= PHI_ARG_DEF_FROM_EDGE (inner_phi
, l0_to_l1
);
3148 locus
= gimple_phi_arg_location_from_edge (inner_phi
, l0_to_l1
);
3149 edge entry_to_l0
= find_edge (entry_bb
, l0_bb
);
3150 add_phi_arg (nphi
, t
, entry_to_l0
, locus
);
3152 edge l2_to_l0
= find_edge (l2_bb
, l0_bb
);
3153 add_phi_arg (nphi
, exit_res
, l2_to_l0
, UNKNOWN_LOCATION
);
3155 add_phi_arg (inner_phi
, new_res
, l0_to_l1
, UNKNOWN_LOCATION
);
3159 set_immediate_dominator (CDI_DOMINATORS
, l2_bb
,
3160 recompute_dominator (CDI_DOMINATORS
, l2_bb
));
3161 set_immediate_dominator (CDI_DOMINATORS
, l3_bb
,
3162 recompute_dominator (CDI_DOMINATORS
, l3_bb
));
3163 set_immediate_dominator (CDI_DOMINATORS
, l0_bb
,
3164 recompute_dominator (CDI_DOMINATORS
, l0_bb
));
3165 set_immediate_dominator (CDI_DOMINATORS
, l1_bb
,
3166 recompute_dominator (CDI_DOMINATORS
, l1_bb
));
3168 /* We enter expand_omp_for_generic with a loop. This original loop may
3169 have its own loop struct, or it may be part of an outer loop struct
3170 (which may be the fake loop). */
3171 struct loop
*outer_loop
= entry_bb
->loop_father
;
3172 bool orig_loop_has_loop_struct
= l1_bb
->loop_father
!= outer_loop
;
3174 add_bb_to_loop (l2_bb
, outer_loop
);
3176 /* We've added a new loop around the original loop. Allocate the
3177 corresponding loop struct. */
3178 struct loop
*new_loop
= alloc_loop ();
3179 new_loop
->header
= l0_bb
;
3180 new_loop
->latch
= l2_bb
;
3181 add_loop (new_loop
, outer_loop
);
3183 /* Allocate a loop structure for the original loop unless we already
3185 if (!orig_loop_has_loop_struct
3186 && !gimple_omp_for_combined_p (fd
->for_stmt
))
3188 struct loop
*orig_loop
= alloc_loop ();
3189 orig_loop
->header
= l1_bb
;
3190 /* The loop may have multiple latches. */
3191 add_loop (orig_loop
, new_loop
);
3196 /* A subroutine of expand_omp_for. Generate code for a parallel
3197 loop with static schedule and no specified chunk size. Given
3200 for (V = N1; V cond N2; V += STEP) BODY;
3202 where COND is "<" or ">", we generate pseudocode
3204 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3209 if ((__typeof (V)) -1 > 0 && cond is >)
3210 n = -(adj + N2 - N1) / -STEP;
3212 n = (adj + N2 - N1) / STEP;
3215 if (threadid < tt) goto L3; else goto L4;
3220 s0 = q * threadid + tt;
3223 if (s0 >= e0) goto L2; else goto L0;
3229 if (V cond e) goto L1;
3234 expand_omp_for_static_nochunk (struct omp_region
*region
,
3235 struct omp_for_data
*fd
,
3238 tree n
, q
, s0
, e0
, e
, t
, tt
, nthreads
, threadid
;
3239 tree type
, itype
, vmain
, vback
;
3240 basic_block entry_bb
, second_bb
, third_bb
, exit_bb
, seq_start_bb
;
3241 basic_block body_bb
, cont_bb
, collapse_bb
= NULL
;
3243 gimple_stmt_iterator gsi
;
3245 bool broken_loop
= region
->cont
== NULL
;
3246 tree
*counts
= NULL
;
3249 itype
= type
= TREE_TYPE (fd
->loop
.v
);
3250 if (POINTER_TYPE_P (type
))
3251 itype
= signed_type_for (type
);
3253 entry_bb
= region
->entry
;
3254 cont_bb
= region
->cont
;
3255 gcc_assert (EDGE_COUNT (entry_bb
->succs
) == 2);
3256 fin_bb
= BRANCH_EDGE (entry_bb
)->dest
;
3257 gcc_assert (broken_loop
3258 || (fin_bb
== FALLTHRU_EDGE (cont_bb
)->dest
));
3259 seq_start_bb
= split_edge (FALLTHRU_EDGE (entry_bb
));
3260 body_bb
= single_succ (seq_start_bb
);
3263 gcc_assert (BRANCH_EDGE (cont_bb
)->dest
== body_bb
3264 || single_succ (BRANCH_EDGE (cont_bb
)->dest
) == body_bb
);
3265 gcc_assert (EDGE_COUNT (cont_bb
->succs
) == 2);
3267 exit_bb
= region
->exit
;
3269 /* Iteration space partitioning goes in ENTRY_BB. */
3270 gsi
= gsi_last_bb (entry_bb
);
3271 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_FOR
);
3273 if (fd
->collapse
> 1)
3275 int first_zero_iter
= -1, dummy
= -1;
3276 basic_block l2_dom_bb
= NULL
, dummy_bb
= NULL
;
3278 counts
= XALLOCAVEC (tree
, fd
->collapse
);
3279 expand_omp_for_init_counts (fd
, &gsi
, entry_bb
, counts
,
3280 fin_bb
, first_zero_iter
,
3281 dummy_bb
, dummy
, l2_dom_bb
);
3284 else if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
3285 t
= integer_one_node
;
3287 t
= fold_binary (fd
->loop
.cond_code
, boolean_type_node
,
3288 fold_convert (type
, fd
->loop
.n1
),
3289 fold_convert (type
, fd
->loop
.n2
));
3290 if (fd
->collapse
== 1
3291 && TYPE_UNSIGNED (type
)
3292 && (t
== NULL_TREE
|| !integer_onep (t
)))
3294 n1
= fold_convert (type
, unshare_expr (fd
->loop
.n1
));
3295 n1
= force_gimple_operand_gsi (&gsi
, n1
, true, NULL_TREE
,
3296 true, GSI_SAME_STMT
);
3297 n2
= fold_convert (type
, unshare_expr (fd
->loop
.n2
));
3298 n2
= force_gimple_operand_gsi (&gsi
, n2
, true, NULL_TREE
,
3299 true, GSI_SAME_STMT
);
3300 gcond
*cond_stmt
= gimple_build_cond (fd
->loop
.cond_code
, n1
, n2
,
3301 NULL_TREE
, NULL_TREE
);
3302 gsi_insert_before (&gsi
, cond_stmt
, GSI_SAME_STMT
);
3303 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt
),
3304 expand_omp_regimplify_p
, NULL
, NULL
)
3305 || walk_tree (gimple_cond_rhs_ptr (cond_stmt
),
3306 expand_omp_regimplify_p
, NULL
, NULL
))
3308 gsi
= gsi_for_stmt (cond_stmt
);
3309 gimple_regimplify_operands (cond_stmt
, &gsi
);
3311 ep
= split_block (entry_bb
, cond_stmt
);
3312 ep
->flags
= EDGE_TRUE_VALUE
;
3313 entry_bb
= ep
->dest
;
3314 ep
->probability
= REG_BR_PROB_BASE
- (REG_BR_PROB_BASE
/ 2000 - 1);
3315 ep
= make_edge (ep
->src
, fin_bb
, EDGE_FALSE_VALUE
);
3316 ep
->probability
= REG_BR_PROB_BASE
/ 2000 - 1;
3317 if (gimple_in_ssa_p (cfun
))
3319 int dest_idx
= find_edge (entry_bb
, fin_bb
)->dest_idx
;
3320 for (gphi_iterator gpi
= gsi_start_phis (fin_bb
);
3321 !gsi_end_p (gpi
); gsi_next (&gpi
))
3323 gphi
*phi
= gpi
.phi ();
3324 add_phi_arg (phi
, gimple_phi_arg_def (phi
, dest_idx
),
3325 ep
, UNKNOWN_LOCATION
);
3328 gsi
= gsi_last_bb (entry_bb
);
3331 switch (gimple_omp_for_kind (fd
->for_stmt
))
3333 case GF_OMP_FOR_KIND_FOR
:
3334 nthreads
= builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS
);
3335 threadid
= builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM
);
3337 case GF_OMP_FOR_KIND_DISTRIBUTE
:
3338 nthreads
= builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS
);
3339 threadid
= builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM
);
3344 nthreads
= build_call_expr (nthreads
, 0);
3345 nthreads
= fold_convert (itype
, nthreads
);
3346 nthreads
= force_gimple_operand_gsi (&gsi
, nthreads
, true, NULL_TREE
,
3347 true, GSI_SAME_STMT
);
3348 threadid
= build_call_expr (threadid
, 0);
3349 threadid
= fold_convert (itype
, threadid
);
3350 threadid
= force_gimple_operand_gsi (&gsi
, threadid
, true, NULL_TREE
,
3351 true, GSI_SAME_STMT
);
3355 step
= fd
->loop
.step
;
3356 if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
3358 tree innerc
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
3359 OMP_CLAUSE__LOOPTEMP_
);
3360 gcc_assert (innerc
);
3361 n1
= OMP_CLAUSE_DECL (innerc
);
3362 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
3363 OMP_CLAUSE__LOOPTEMP_
);
3364 gcc_assert (innerc
);
3365 n2
= OMP_CLAUSE_DECL (innerc
);
3367 n1
= force_gimple_operand_gsi (&gsi
, fold_convert (type
, n1
),
3368 true, NULL_TREE
, true, GSI_SAME_STMT
);
3369 n2
= force_gimple_operand_gsi (&gsi
, fold_convert (itype
, n2
),
3370 true, NULL_TREE
, true, GSI_SAME_STMT
);
3371 step
= force_gimple_operand_gsi (&gsi
, fold_convert (itype
, step
),
3372 true, NULL_TREE
, true, GSI_SAME_STMT
);
3374 t
= build_int_cst (itype
, (fd
->loop
.cond_code
== LT_EXPR
? -1 : 1));
3375 t
= fold_build2 (PLUS_EXPR
, itype
, step
, t
);
3376 t
= fold_build2 (PLUS_EXPR
, itype
, t
, n2
);
3377 t
= fold_build2 (MINUS_EXPR
, itype
, t
, fold_convert (itype
, n1
));
3378 if (TYPE_UNSIGNED (itype
) && fd
->loop
.cond_code
== GT_EXPR
)
3379 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
,
3380 fold_build1 (NEGATE_EXPR
, itype
, t
),
3381 fold_build1 (NEGATE_EXPR
, itype
, step
));
3383 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
, t
, step
);
3384 t
= fold_convert (itype
, t
);
3385 n
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
, true, GSI_SAME_STMT
);
3387 q
= create_tmp_reg (itype
, "q");
3388 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
, n
, nthreads
);
3389 t
= force_gimple_operand_gsi (&gsi
, t
, false, NULL_TREE
, true, GSI_SAME_STMT
);
3390 gsi_insert_before (&gsi
, gimple_build_assign (q
, t
), GSI_SAME_STMT
);
3392 tt
= create_tmp_reg (itype
, "tt");
3393 t
= fold_build2 (TRUNC_MOD_EXPR
, itype
, n
, nthreads
);
3394 t
= force_gimple_operand_gsi (&gsi
, t
, false, NULL_TREE
, true, GSI_SAME_STMT
);
3395 gsi_insert_before (&gsi
, gimple_build_assign (tt
, t
), GSI_SAME_STMT
);
3397 t
= build2 (LT_EXPR
, boolean_type_node
, threadid
, tt
);
3398 gcond
*cond_stmt
= gimple_build_cond_empty (t
);
3399 gsi_insert_before (&gsi
, cond_stmt
, GSI_SAME_STMT
);
3401 second_bb
= split_block (entry_bb
, cond_stmt
)->dest
;
3402 gsi
= gsi_last_bb (second_bb
);
3403 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_FOR
);
3405 gsi_insert_before (&gsi
, gimple_build_assign (tt
, build_int_cst (itype
, 0)),
3407 gassign
*assign_stmt
3408 = gimple_build_assign (q
, PLUS_EXPR
, q
, build_int_cst (itype
, 1));
3409 gsi_insert_before (&gsi
, assign_stmt
, GSI_SAME_STMT
);
3411 third_bb
= split_block (second_bb
, assign_stmt
)->dest
;
3412 gsi
= gsi_last_bb (third_bb
);
3413 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_FOR
);
3415 t
= build2 (MULT_EXPR
, itype
, q
, threadid
);
3416 t
= build2 (PLUS_EXPR
, itype
, t
, tt
);
3417 s0
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
, true, GSI_SAME_STMT
);
3419 t
= fold_build2 (PLUS_EXPR
, itype
, s0
, q
);
3420 e0
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
, true, GSI_SAME_STMT
);
3422 t
= build2 (GE_EXPR
, boolean_type_node
, s0
, e0
);
3423 gsi_insert_before (&gsi
, gimple_build_cond_empty (t
), GSI_SAME_STMT
);
3425 /* Remove the GIMPLE_OMP_FOR statement. */
3426 gsi_remove (&gsi
, true);
3428 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3429 gsi
= gsi_start_bb (seq_start_bb
);
3431 tree startvar
= fd
->loop
.v
;
3432 tree endvar
= NULL_TREE
;
3434 if (gimple_omp_for_combined_p (fd
->for_stmt
))
3436 tree clauses
= gimple_code (inner_stmt
) == GIMPLE_OMP_PARALLEL
3437 ? gimple_omp_parallel_clauses (inner_stmt
)
3438 : gimple_omp_for_clauses (inner_stmt
);
3439 tree innerc
= omp_find_clause (clauses
, OMP_CLAUSE__LOOPTEMP_
);
3440 gcc_assert (innerc
);
3441 startvar
= OMP_CLAUSE_DECL (innerc
);
3442 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
3443 OMP_CLAUSE__LOOPTEMP_
);
3444 gcc_assert (innerc
);
3445 endvar
= OMP_CLAUSE_DECL (innerc
);
3446 if (fd
->collapse
> 1 && TREE_CODE (fd
->loop
.n2
) != INTEGER_CST
3447 && gimple_omp_for_kind (fd
->for_stmt
) == GF_OMP_FOR_KIND_DISTRIBUTE
)
3450 for (i
= 1; i
< fd
->collapse
; i
++)
3452 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
3453 OMP_CLAUSE__LOOPTEMP_
);
3454 gcc_assert (innerc
);
3456 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
3457 OMP_CLAUSE__LOOPTEMP_
);
3460 /* If needed (distribute parallel for with lastprivate),
3461 propagate down the total number of iterations. */
3462 tree t
= fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc
)),
3464 t
= force_gimple_operand_gsi (&gsi
, t
, false, NULL_TREE
, false,
3465 GSI_CONTINUE_LINKING
);
3466 assign_stmt
= gimple_build_assign (OMP_CLAUSE_DECL (innerc
), t
);
3467 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
3471 t
= fold_convert (itype
, s0
);
3472 t
= fold_build2 (MULT_EXPR
, itype
, t
, step
);
3473 if (POINTER_TYPE_P (type
))
3474 t
= fold_build_pointer_plus (n1
, t
);
3476 t
= fold_build2 (PLUS_EXPR
, type
, t
, n1
);
3477 t
= fold_convert (TREE_TYPE (startvar
), t
);
3478 t
= force_gimple_operand_gsi (&gsi
, t
,
3480 && TREE_ADDRESSABLE (startvar
),
3481 NULL_TREE
, false, GSI_CONTINUE_LINKING
);
3482 assign_stmt
= gimple_build_assign (startvar
, t
);
3483 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
3485 t
= fold_convert (itype
, e0
);
3486 t
= fold_build2 (MULT_EXPR
, itype
, t
, step
);
3487 if (POINTER_TYPE_P (type
))
3488 t
= fold_build_pointer_plus (n1
, t
);
3490 t
= fold_build2 (PLUS_EXPR
, type
, t
, n1
);
3491 t
= fold_convert (TREE_TYPE (startvar
), t
);
3492 e
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
3493 false, GSI_CONTINUE_LINKING
);
3496 assign_stmt
= gimple_build_assign (endvar
, e
);
3497 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
3498 if (useless_type_conversion_p (TREE_TYPE (fd
->loop
.v
), TREE_TYPE (e
)))
3499 assign_stmt
= gimple_build_assign (fd
->loop
.v
, e
);
3501 assign_stmt
= gimple_build_assign (fd
->loop
.v
, NOP_EXPR
, e
);
3502 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
3504 /* Handle linear clause adjustments. */
3505 tree itercnt
= NULL_TREE
;
3506 if (gimple_omp_for_kind (fd
->for_stmt
) == GF_OMP_FOR_KIND_FOR
)
3507 for (tree c
= gimple_omp_for_clauses (fd
->for_stmt
);
3508 c
; c
= OMP_CLAUSE_CHAIN (c
))
3509 if (OMP_CLAUSE_CODE (c
) == OMP_CLAUSE_LINEAR
3510 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c
))
3512 tree d
= OMP_CLAUSE_DECL (c
);
3513 bool is_ref
= omp_is_reference (d
);
3514 tree t
= d
, a
, dest
;
3516 t
= build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c
), t
);
3517 if (itercnt
== NULL_TREE
)
3519 if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
3521 itercnt
= fold_build2 (MINUS_EXPR
, itype
,
3522 fold_convert (itype
, n1
),
3523 fold_convert (itype
, fd
->loop
.n1
));
3524 itercnt
= fold_build2 (EXACT_DIV_EXPR
, itype
, itercnt
, step
);
3525 itercnt
= fold_build2 (PLUS_EXPR
, itype
, itercnt
, s0
);
3526 itercnt
= force_gimple_operand_gsi (&gsi
, itercnt
, true,
3528 GSI_CONTINUE_LINKING
);
3533 tree type
= TREE_TYPE (t
);
3534 if (POINTER_TYPE_P (type
))
3536 a
= fold_build2 (MULT_EXPR
, type
,
3537 fold_convert (type
, itercnt
),
3538 fold_convert (type
, OMP_CLAUSE_LINEAR_STEP (c
)));
3539 dest
= unshare_expr (t
);
3540 t
= fold_build2 (type
== TREE_TYPE (t
) ? PLUS_EXPR
3541 : POINTER_PLUS_EXPR
, TREE_TYPE (t
), t
, a
);
3542 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
3543 false, GSI_CONTINUE_LINKING
);
3544 assign_stmt
= gimple_build_assign (dest
, t
);
3545 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
3547 if (fd
->collapse
> 1)
3548 expand_omp_for_init_vars (fd
, &gsi
, counts
, inner_stmt
, startvar
);
3552 /* The code controlling the sequential loop replaces the
3553 GIMPLE_OMP_CONTINUE. */
3554 gsi
= gsi_last_bb (cont_bb
);
3555 gomp_continue
*cont_stmt
= as_a
<gomp_continue
*> (gsi_stmt (gsi
));
3556 gcc_assert (gimple_code (cont_stmt
) == GIMPLE_OMP_CONTINUE
);
3557 vmain
= gimple_omp_continue_control_use (cont_stmt
);
3558 vback
= gimple_omp_continue_control_def (cont_stmt
);
3560 if (!gimple_omp_for_combined_p (fd
->for_stmt
))
3562 if (POINTER_TYPE_P (type
))
3563 t
= fold_build_pointer_plus (vmain
, step
);
3565 t
= fold_build2 (PLUS_EXPR
, type
, vmain
, step
);
3566 t
= force_gimple_operand_gsi (&gsi
, t
,
3568 && TREE_ADDRESSABLE (vback
),
3569 NULL_TREE
, true, GSI_SAME_STMT
);
3570 assign_stmt
= gimple_build_assign (vback
, t
);
3571 gsi_insert_before (&gsi
, assign_stmt
, GSI_SAME_STMT
);
3573 t
= build2 (fd
->loop
.cond_code
, boolean_type_node
,
3574 DECL_P (vback
) && TREE_ADDRESSABLE (vback
)
3576 gsi_insert_before (&gsi
, gimple_build_cond_empty (t
), GSI_SAME_STMT
);
3579 /* Remove the GIMPLE_OMP_CONTINUE statement. */
3580 gsi_remove (&gsi
, true);
3582 if (fd
->collapse
> 1 && !gimple_omp_for_combined_p (fd
->for_stmt
))
3583 collapse_bb
= extract_omp_for_update_vars (fd
, cont_bb
, body_bb
);
3586 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
3587 gsi
= gsi_last_bb (exit_bb
);
3588 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi
)))
3590 t
= gimple_omp_return_lhs (gsi_stmt (gsi
));
3591 gsi_insert_after (&gsi
, omp_build_barrier (t
), GSI_SAME_STMT
);
3593 gsi_remove (&gsi
, true);
3595 /* Connect all the blocks. */
3596 ep
= make_edge (entry_bb
, third_bb
, EDGE_FALSE_VALUE
);
3597 ep
->probability
= REG_BR_PROB_BASE
/ 4 * 3;
3598 ep
= find_edge (entry_bb
, second_bb
);
3599 ep
->flags
= EDGE_TRUE_VALUE
;
3600 ep
->probability
= REG_BR_PROB_BASE
/ 4;
3601 find_edge (third_bb
, seq_start_bb
)->flags
= EDGE_FALSE_VALUE
;
3602 find_edge (third_bb
, fin_bb
)->flags
= EDGE_TRUE_VALUE
;
3606 ep
= find_edge (cont_bb
, body_bb
);
3609 ep
= BRANCH_EDGE (cont_bb
);
3610 gcc_assert (single_succ (ep
->dest
) == body_bb
);
3612 if (gimple_omp_for_combined_p (fd
->for_stmt
))
3617 else if (fd
->collapse
> 1)
3620 ep
= make_edge (cont_bb
, collapse_bb
, EDGE_TRUE_VALUE
);
3623 ep
->flags
= EDGE_TRUE_VALUE
;
3624 find_edge (cont_bb
, fin_bb
)->flags
3625 = ep
? EDGE_FALSE_VALUE
: EDGE_FALLTHRU
;
3628 set_immediate_dominator (CDI_DOMINATORS
, second_bb
, entry_bb
);
3629 set_immediate_dominator (CDI_DOMINATORS
, third_bb
, entry_bb
);
3630 set_immediate_dominator (CDI_DOMINATORS
, seq_start_bb
, third_bb
);
3632 set_immediate_dominator (CDI_DOMINATORS
, body_bb
,
3633 recompute_dominator (CDI_DOMINATORS
, body_bb
));
3634 set_immediate_dominator (CDI_DOMINATORS
, fin_bb
,
3635 recompute_dominator (CDI_DOMINATORS
, fin_bb
));
3637 struct loop
*loop
= body_bb
->loop_father
;
3638 if (loop
!= entry_bb
->loop_father
)
3640 gcc_assert (broken_loop
|| loop
->header
== body_bb
);
3641 gcc_assert (broken_loop
3642 || loop
->latch
== region
->cont
3643 || single_pred (loop
->latch
) == region
->cont
);
3647 if (!broken_loop
&& !gimple_omp_for_combined_p (fd
->for_stmt
))
3649 loop
= alloc_loop ();
3650 loop
->header
= body_bb
;
3651 if (collapse_bb
== NULL
)
3652 loop
->latch
= cont_bb
;
3653 add_loop (loop
, body_bb
->loop_father
);
3657 /* Return phi in E->DEST with ARG on edge E. */
3660 find_phi_with_arg_on_edge (tree arg
, edge e
)
3662 basic_block bb
= e
->dest
;
3664 for (gphi_iterator gpi
= gsi_start_phis (bb
);
3668 gphi
*phi
= gpi
.phi ();
3669 if (PHI_ARG_DEF_FROM_EDGE (phi
, e
) == arg
)
3676 /* A subroutine of expand_omp_for. Generate code for a parallel
3677 loop with static schedule and a specified chunk size. Given
3680 for (V = N1; V cond N2; V += STEP) BODY;
3682 where COND is "<" or ">", we generate pseudocode
3684 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3689 if ((__typeof (V)) -1 > 0 && cond is >)
3690 n = -(adj + N2 - N1) / -STEP;
3692 n = (adj + N2 - N1) / STEP;
3694 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
3695 here so that V is defined
3696 if the loop is not entered
3698 s0 = (trip * nthreads + threadid) * CHUNK;
3699 e0 = min(s0 + CHUNK, n);
3700 if (s0 < n) goto L1; else goto L4;
3707 if (V cond e) goto L2; else goto L3;
3715 expand_omp_for_static_chunk (struct omp_region
*region
,
3716 struct omp_for_data
*fd
, gimple
*inner_stmt
)
3718 tree n
, s0
, e0
, e
, t
;
3719 tree trip_var
, trip_init
, trip_main
, trip_back
, nthreads
, threadid
;
3720 tree type
, itype
, vmain
, vback
, vextra
;
3721 basic_block entry_bb
, exit_bb
, body_bb
, seq_start_bb
, iter_part_bb
;
3722 basic_block trip_update_bb
= NULL
, cont_bb
, collapse_bb
= NULL
, fin_bb
;
3723 gimple_stmt_iterator gsi
;
3725 bool broken_loop
= region
->cont
== NULL
;
3726 tree
*counts
= NULL
;
3729 itype
= type
= TREE_TYPE (fd
->loop
.v
);
3730 if (POINTER_TYPE_P (type
))
3731 itype
= signed_type_for (type
);
3733 entry_bb
= region
->entry
;
3734 se
= split_block (entry_bb
, last_stmt (entry_bb
));
3736 iter_part_bb
= se
->dest
;
3737 cont_bb
= region
->cont
;
3738 gcc_assert (EDGE_COUNT (iter_part_bb
->succs
) == 2);
3739 fin_bb
= BRANCH_EDGE (iter_part_bb
)->dest
;
3740 gcc_assert (broken_loop
3741 || fin_bb
== FALLTHRU_EDGE (cont_bb
)->dest
);
3742 seq_start_bb
= split_edge (FALLTHRU_EDGE (iter_part_bb
));
3743 body_bb
= single_succ (seq_start_bb
);
3746 gcc_assert (BRANCH_EDGE (cont_bb
)->dest
== body_bb
3747 || single_succ (BRANCH_EDGE (cont_bb
)->dest
) == body_bb
);
3748 gcc_assert (EDGE_COUNT (cont_bb
->succs
) == 2);
3749 trip_update_bb
= split_edge (FALLTHRU_EDGE (cont_bb
));
3751 exit_bb
= region
->exit
;
3753 /* Trip and adjustment setup goes in ENTRY_BB. */
3754 gsi
= gsi_last_bb (entry_bb
);
3755 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_FOR
);
3757 if (fd
->collapse
> 1)
3759 int first_zero_iter
= -1, dummy
= -1;
3760 basic_block l2_dom_bb
= NULL
, dummy_bb
= NULL
;
3762 counts
= XALLOCAVEC (tree
, fd
->collapse
);
3763 expand_omp_for_init_counts (fd
, &gsi
, entry_bb
, counts
,
3764 fin_bb
, first_zero_iter
,
3765 dummy_bb
, dummy
, l2_dom_bb
);
3768 else if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
3769 t
= integer_one_node
;
3771 t
= fold_binary (fd
->loop
.cond_code
, boolean_type_node
,
3772 fold_convert (type
, fd
->loop
.n1
),
3773 fold_convert (type
, fd
->loop
.n2
));
3774 if (fd
->collapse
== 1
3775 && TYPE_UNSIGNED (type
)
3776 && (t
== NULL_TREE
|| !integer_onep (t
)))
3778 n1
= fold_convert (type
, unshare_expr (fd
->loop
.n1
));
3779 n1
= force_gimple_operand_gsi (&gsi
, n1
, true, NULL_TREE
,
3780 true, GSI_SAME_STMT
);
3781 n2
= fold_convert (type
, unshare_expr (fd
->loop
.n2
));
3782 n2
= force_gimple_operand_gsi (&gsi
, n2
, true, NULL_TREE
,
3783 true, GSI_SAME_STMT
);
3784 gcond
*cond_stmt
= gimple_build_cond (fd
->loop
.cond_code
, n1
, n2
,
3785 NULL_TREE
, NULL_TREE
);
3786 gsi_insert_before (&gsi
, cond_stmt
, GSI_SAME_STMT
);
3787 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt
),
3788 expand_omp_regimplify_p
, NULL
, NULL
)
3789 || walk_tree (gimple_cond_rhs_ptr (cond_stmt
),
3790 expand_omp_regimplify_p
, NULL
, NULL
))
3792 gsi
= gsi_for_stmt (cond_stmt
);
3793 gimple_regimplify_operands (cond_stmt
, &gsi
);
3795 se
= split_block (entry_bb
, cond_stmt
);
3796 se
->flags
= EDGE_TRUE_VALUE
;
3797 entry_bb
= se
->dest
;
3798 se
->probability
= REG_BR_PROB_BASE
- (REG_BR_PROB_BASE
/ 2000 - 1);
3799 se
= make_edge (se
->src
, fin_bb
, EDGE_FALSE_VALUE
);
3800 se
->probability
= REG_BR_PROB_BASE
/ 2000 - 1;
3801 if (gimple_in_ssa_p (cfun
))
3803 int dest_idx
= find_edge (iter_part_bb
, fin_bb
)->dest_idx
;
3804 for (gphi_iterator gpi
= gsi_start_phis (fin_bb
);
3805 !gsi_end_p (gpi
); gsi_next (&gpi
))
3807 gphi
*phi
= gpi
.phi ();
3808 add_phi_arg (phi
, gimple_phi_arg_def (phi
, dest_idx
),
3809 se
, UNKNOWN_LOCATION
);
3812 gsi
= gsi_last_bb (entry_bb
);
3815 switch (gimple_omp_for_kind (fd
->for_stmt
))
3817 case GF_OMP_FOR_KIND_FOR
:
3818 nthreads
= builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS
);
3819 threadid
= builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM
);
3821 case GF_OMP_FOR_KIND_DISTRIBUTE
:
3822 nthreads
= builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS
);
3823 threadid
= builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM
);
3828 nthreads
= build_call_expr (nthreads
, 0);
3829 nthreads
= fold_convert (itype
, nthreads
);
3830 nthreads
= force_gimple_operand_gsi (&gsi
, nthreads
, true, NULL_TREE
,
3831 true, GSI_SAME_STMT
);
3832 threadid
= build_call_expr (threadid
, 0);
3833 threadid
= fold_convert (itype
, threadid
);
3834 threadid
= force_gimple_operand_gsi (&gsi
, threadid
, true, NULL_TREE
,
3835 true, GSI_SAME_STMT
);
3839 step
= fd
->loop
.step
;
3840 if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
3842 tree innerc
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
3843 OMP_CLAUSE__LOOPTEMP_
);
3844 gcc_assert (innerc
);
3845 n1
= OMP_CLAUSE_DECL (innerc
);
3846 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
3847 OMP_CLAUSE__LOOPTEMP_
);
3848 gcc_assert (innerc
);
3849 n2
= OMP_CLAUSE_DECL (innerc
);
3851 n1
= force_gimple_operand_gsi (&gsi
, fold_convert (type
, n1
),
3852 true, NULL_TREE
, true, GSI_SAME_STMT
);
3853 n2
= force_gimple_operand_gsi (&gsi
, fold_convert (itype
, n2
),
3854 true, NULL_TREE
, true, GSI_SAME_STMT
);
3855 step
= force_gimple_operand_gsi (&gsi
, fold_convert (itype
, step
),
3856 true, NULL_TREE
, true, GSI_SAME_STMT
);
3857 tree chunk_size
= fold_convert (itype
, fd
->chunk_size
);
3858 chunk_size
= omp_adjust_chunk_size (chunk_size
, fd
->simd_schedule
);
3860 = force_gimple_operand_gsi (&gsi
, chunk_size
, true, NULL_TREE
, true,
3863 t
= build_int_cst (itype
, (fd
->loop
.cond_code
== LT_EXPR
? -1 : 1));
3864 t
= fold_build2 (PLUS_EXPR
, itype
, step
, t
);
3865 t
= fold_build2 (PLUS_EXPR
, itype
, t
, n2
);
3866 t
= fold_build2 (MINUS_EXPR
, itype
, t
, fold_convert (itype
, n1
));
3867 if (TYPE_UNSIGNED (itype
) && fd
->loop
.cond_code
== GT_EXPR
)
3868 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
,
3869 fold_build1 (NEGATE_EXPR
, itype
, t
),
3870 fold_build1 (NEGATE_EXPR
, itype
, step
));
3872 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
, t
, step
);
3873 t
= fold_convert (itype
, t
);
3874 n
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
3875 true, GSI_SAME_STMT
);
3877 trip_var
= create_tmp_reg (itype
, ".trip");
3878 if (gimple_in_ssa_p (cfun
))
3880 trip_init
= make_ssa_name (trip_var
);
3881 trip_main
= make_ssa_name (trip_var
);
3882 trip_back
= make_ssa_name (trip_var
);
3886 trip_init
= trip_var
;
3887 trip_main
= trip_var
;
3888 trip_back
= trip_var
;
3891 gassign
*assign_stmt
3892 = gimple_build_assign (trip_init
, build_int_cst (itype
, 0));
3893 gsi_insert_before (&gsi
, assign_stmt
, GSI_SAME_STMT
);
3895 t
= fold_build2 (MULT_EXPR
, itype
, threadid
, chunk_size
);
3896 t
= fold_build2 (MULT_EXPR
, itype
, t
, step
);
3897 if (POINTER_TYPE_P (type
))
3898 t
= fold_build_pointer_plus (n1
, t
);
3900 t
= fold_build2 (PLUS_EXPR
, type
, t
, n1
);
3901 vextra
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
3902 true, GSI_SAME_STMT
);
3904 /* Remove the GIMPLE_OMP_FOR. */
3905 gsi_remove (&gsi
, true);
3907 gimple_stmt_iterator gsif
= gsi
;
3909 /* Iteration space partitioning goes in ITER_PART_BB. */
3910 gsi
= gsi_last_bb (iter_part_bb
);
3912 t
= fold_build2 (MULT_EXPR
, itype
, trip_main
, nthreads
);
3913 t
= fold_build2 (PLUS_EXPR
, itype
, t
, threadid
);
3914 t
= fold_build2 (MULT_EXPR
, itype
, t
, chunk_size
);
3915 s0
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
3916 false, GSI_CONTINUE_LINKING
);
3918 t
= fold_build2 (PLUS_EXPR
, itype
, s0
, chunk_size
);
3919 t
= fold_build2 (MIN_EXPR
, itype
, t
, n
);
3920 e0
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
3921 false, GSI_CONTINUE_LINKING
);
3923 t
= build2 (LT_EXPR
, boolean_type_node
, s0
, n
);
3924 gsi_insert_after (&gsi
, gimple_build_cond_empty (t
), GSI_CONTINUE_LINKING
);
3926 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3927 gsi
= gsi_start_bb (seq_start_bb
);
3929 tree startvar
= fd
->loop
.v
;
3930 tree endvar
= NULL_TREE
;
3932 if (gimple_omp_for_combined_p (fd
->for_stmt
))
3934 tree clauses
= gimple_code (inner_stmt
) == GIMPLE_OMP_PARALLEL
3935 ? gimple_omp_parallel_clauses (inner_stmt
)
3936 : gimple_omp_for_clauses (inner_stmt
);
3937 tree innerc
= omp_find_clause (clauses
, OMP_CLAUSE__LOOPTEMP_
);
3938 gcc_assert (innerc
);
3939 startvar
= OMP_CLAUSE_DECL (innerc
);
3940 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
3941 OMP_CLAUSE__LOOPTEMP_
);
3942 gcc_assert (innerc
);
3943 endvar
= OMP_CLAUSE_DECL (innerc
);
3944 if (fd
->collapse
> 1 && TREE_CODE (fd
->loop
.n2
) != INTEGER_CST
3945 && gimple_omp_for_kind (fd
->for_stmt
) == GF_OMP_FOR_KIND_DISTRIBUTE
)
3948 for (i
= 1; i
< fd
->collapse
; i
++)
3950 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
3951 OMP_CLAUSE__LOOPTEMP_
);
3952 gcc_assert (innerc
);
3954 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
3955 OMP_CLAUSE__LOOPTEMP_
);
3958 /* If needed (distribute parallel for with lastprivate),
3959 propagate down the total number of iterations. */
3960 tree t
= fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc
)),
3962 t
= force_gimple_operand_gsi (&gsi
, t
, false, NULL_TREE
, false,
3963 GSI_CONTINUE_LINKING
);
3964 assign_stmt
= gimple_build_assign (OMP_CLAUSE_DECL (innerc
), t
);
3965 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
3970 t
= fold_convert (itype
, s0
);
3971 t
= fold_build2 (MULT_EXPR
, itype
, t
, step
);
3972 if (POINTER_TYPE_P (type
))
3973 t
= fold_build_pointer_plus (n1
, t
);
3975 t
= fold_build2 (PLUS_EXPR
, type
, t
, n1
);
3976 t
= fold_convert (TREE_TYPE (startvar
), t
);
3977 t
= force_gimple_operand_gsi (&gsi
, t
,
3979 && TREE_ADDRESSABLE (startvar
),
3980 NULL_TREE
, false, GSI_CONTINUE_LINKING
);
3981 assign_stmt
= gimple_build_assign (startvar
, t
);
3982 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
3984 t
= fold_convert (itype
, e0
);
3985 t
= fold_build2 (MULT_EXPR
, itype
, t
, step
);
3986 if (POINTER_TYPE_P (type
))
3987 t
= fold_build_pointer_plus (n1
, t
);
3989 t
= fold_build2 (PLUS_EXPR
, type
, t
, n1
);
3990 t
= fold_convert (TREE_TYPE (startvar
), t
);
3991 e
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
3992 false, GSI_CONTINUE_LINKING
);
3995 assign_stmt
= gimple_build_assign (endvar
, e
);
3996 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
3997 if (useless_type_conversion_p (TREE_TYPE (fd
->loop
.v
), TREE_TYPE (e
)))
3998 assign_stmt
= gimple_build_assign (fd
->loop
.v
, e
);
4000 assign_stmt
= gimple_build_assign (fd
->loop
.v
, NOP_EXPR
, e
);
4001 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
4003 /* Handle linear clause adjustments. */
4004 tree itercnt
= NULL_TREE
, itercntbias
= NULL_TREE
;
4005 if (gimple_omp_for_kind (fd
->for_stmt
) == GF_OMP_FOR_KIND_FOR
)
4006 for (tree c
= gimple_omp_for_clauses (fd
->for_stmt
);
4007 c
; c
= OMP_CLAUSE_CHAIN (c
))
4008 if (OMP_CLAUSE_CODE (c
) == OMP_CLAUSE_LINEAR
4009 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c
))
4011 tree d
= OMP_CLAUSE_DECL (c
);
4012 bool is_ref
= omp_is_reference (d
);
4013 tree t
= d
, a
, dest
;
4015 t
= build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c
), t
);
4016 tree type
= TREE_TYPE (t
);
4017 if (POINTER_TYPE_P (type
))
4019 dest
= unshare_expr (t
);
4020 tree v
= create_tmp_var (TREE_TYPE (t
), NULL
);
4021 expand_omp_build_assign (&gsif
, v
, t
);
4022 if (itercnt
== NULL_TREE
)
4024 if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
4027 = fold_build2 (MINUS_EXPR
, itype
, fold_convert (itype
, n1
),
4028 fold_convert (itype
, fd
->loop
.n1
));
4029 itercntbias
= fold_build2 (EXACT_DIV_EXPR
, itype
,
4032 = force_gimple_operand_gsi (&gsif
, itercntbias
, true,
4035 itercnt
= fold_build2 (PLUS_EXPR
, itype
, itercntbias
, s0
);
4036 itercnt
= force_gimple_operand_gsi (&gsi
, itercnt
, true,
4038 GSI_CONTINUE_LINKING
);
4043 a
= fold_build2 (MULT_EXPR
, type
,
4044 fold_convert (type
, itercnt
),
4045 fold_convert (type
, OMP_CLAUSE_LINEAR_STEP (c
)));
4046 t
= fold_build2 (type
== TREE_TYPE (t
) ? PLUS_EXPR
4047 : POINTER_PLUS_EXPR
, TREE_TYPE (t
), v
, a
);
4048 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
4049 false, GSI_CONTINUE_LINKING
);
4050 assign_stmt
= gimple_build_assign (dest
, t
);
4051 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
4053 if (fd
->collapse
> 1)
4054 expand_omp_for_init_vars (fd
, &gsi
, counts
, inner_stmt
, startvar
);
4058 /* The code controlling the sequential loop goes in CONT_BB,
4059 replacing the GIMPLE_OMP_CONTINUE. */
4060 gsi
= gsi_last_bb (cont_bb
);
4061 gomp_continue
*cont_stmt
= as_a
<gomp_continue
*> (gsi_stmt (gsi
));
4062 vmain
= gimple_omp_continue_control_use (cont_stmt
);
4063 vback
= gimple_omp_continue_control_def (cont_stmt
);
4065 if (!gimple_omp_for_combined_p (fd
->for_stmt
))
4067 if (POINTER_TYPE_P (type
))
4068 t
= fold_build_pointer_plus (vmain
, step
);
4070 t
= fold_build2 (PLUS_EXPR
, type
, vmain
, step
);
4071 if (DECL_P (vback
) && TREE_ADDRESSABLE (vback
))
4072 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
4073 true, GSI_SAME_STMT
);
4074 assign_stmt
= gimple_build_assign (vback
, t
);
4075 gsi_insert_before (&gsi
, assign_stmt
, GSI_SAME_STMT
);
4077 if (tree_int_cst_equal (fd
->chunk_size
, integer_one_node
))
4078 t
= build2 (EQ_EXPR
, boolean_type_node
,
4079 build_int_cst (itype
, 0),
4080 build_int_cst (itype
, 1));
4082 t
= build2 (fd
->loop
.cond_code
, boolean_type_node
,
4083 DECL_P (vback
) && TREE_ADDRESSABLE (vback
)
4085 gsi_insert_before (&gsi
, gimple_build_cond_empty (t
), GSI_SAME_STMT
);
4088 /* Remove GIMPLE_OMP_CONTINUE. */
4089 gsi_remove (&gsi
, true);
4091 if (fd
->collapse
> 1 && !gimple_omp_for_combined_p (fd
->for_stmt
))
4092 collapse_bb
= extract_omp_for_update_vars (fd
, cont_bb
, body_bb
);
4094 /* Trip update code goes into TRIP_UPDATE_BB. */
4095 gsi
= gsi_start_bb (trip_update_bb
);
4097 t
= build_int_cst (itype
, 1);
4098 t
= build2 (PLUS_EXPR
, itype
, trip_main
, t
);
4099 assign_stmt
= gimple_build_assign (trip_back
, t
);
4100 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
4103 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
4104 gsi
= gsi_last_bb (exit_bb
);
4105 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi
)))
4107 t
= gimple_omp_return_lhs (gsi_stmt (gsi
));
4108 gsi_insert_after (&gsi
, omp_build_barrier (t
), GSI_SAME_STMT
);
4110 gsi_remove (&gsi
, true);
4112 /* Connect the new blocks. */
4113 find_edge (iter_part_bb
, seq_start_bb
)->flags
= EDGE_TRUE_VALUE
;
4114 find_edge (iter_part_bb
, fin_bb
)->flags
= EDGE_FALSE_VALUE
;
4118 se
= find_edge (cont_bb
, body_bb
);
4121 se
= BRANCH_EDGE (cont_bb
);
4122 gcc_assert (single_succ (se
->dest
) == body_bb
);
4124 if (gimple_omp_for_combined_p (fd
->for_stmt
))
4129 else if (fd
->collapse
> 1)
4132 se
= make_edge (cont_bb
, collapse_bb
, EDGE_TRUE_VALUE
);
4135 se
->flags
= EDGE_TRUE_VALUE
;
4136 find_edge (cont_bb
, trip_update_bb
)->flags
4137 = se
? EDGE_FALSE_VALUE
: EDGE_FALLTHRU
;
4139 redirect_edge_and_branch (single_succ_edge (trip_update_bb
), iter_part_bb
);
4142 if (gimple_in_ssa_p (cfun
))
4150 gcc_assert (fd
->collapse
== 1 && !broken_loop
);
4152 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
4153 remove arguments of the phi nodes in fin_bb. We need to create
4154 appropriate phi nodes in iter_part_bb instead. */
4155 se
= find_edge (iter_part_bb
, fin_bb
);
4156 re
= single_succ_edge (trip_update_bb
);
4157 vec
<edge_var_map
> *head
= redirect_edge_var_map_vector (re
);
4158 ene
= single_succ_edge (entry_bb
);
4160 psi
= gsi_start_phis (fin_bb
);
4161 for (i
= 0; !gsi_end_p (psi
) && head
->iterate (i
, &vm
);
4162 gsi_next (&psi
), ++i
)
4165 source_location locus
;
4168 t
= gimple_phi_result (phi
);
4169 gcc_assert (t
== redirect_edge_var_map_result (vm
));
4171 if (!single_pred_p (fin_bb
))
4172 t
= copy_ssa_name (t
, phi
);
4174 nphi
= create_phi_node (t
, iter_part_bb
);
4176 t
= PHI_ARG_DEF_FROM_EDGE (phi
, se
);
4177 locus
= gimple_phi_arg_location_from_edge (phi
, se
);
4179 /* A special case -- fd->loop.v is not yet computed in
4180 iter_part_bb, we need to use vextra instead. */
4181 if (t
== fd
->loop
.v
)
4183 add_phi_arg (nphi
, t
, ene
, locus
);
4184 locus
= redirect_edge_var_map_location (vm
);
4185 tree back_arg
= redirect_edge_var_map_def (vm
);
4186 add_phi_arg (nphi
, back_arg
, re
, locus
);
4187 edge ce
= find_edge (cont_bb
, body_bb
);
4190 ce
= BRANCH_EDGE (cont_bb
);
4191 gcc_assert (single_succ (ce
->dest
) == body_bb
);
4192 ce
= single_succ_edge (ce
->dest
);
4194 gphi
*inner_loop_phi
= find_phi_with_arg_on_edge (back_arg
, ce
);
4195 gcc_assert (inner_loop_phi
!= NULL
);
4196 add_phi_arg (inner_loop_phi
, gimple_phi_result (nphi
),
4197 find_edge (seq_start_bb
, body_bb
), locus
);
4199 if (!single_pred_p (fin_bb
))
4200 add_phi_arg (phi
, gimple_phi_result (nphi
), se
, locus
);
4202 gcc_assert (gsi_end_p (psi
) && (head
== NULL
|| i
== head
->length ()));
4203 redirect_edge_var_map_clear (re
);
4204 if (single_pred_p (fin_bb
))
4207 psi
= gsi_start_phis (fin_bb
);
4208 if (gsi_end_p (psi
))
4210 remove_phi_node (&psi
, false);
4213 /* Make phi node for trip. */
4214 phi
= create_phi_node (trip_main
, iter_part_bb
);
4215 add_phi_arg (phi
, trip_back
, single_succ_edge (trip_update_bb
),
4217 add_phi_arg (phi
, trip_init
, single_succ_edge (entry_bb
),
4222 set_immediate_dominator (CDI_DOMINATORS
, trip_update_bb
, cont_bb
);
4223 set_immediate_dominator (CDI_DOMINATORS
, iter_part_bb
,
4224 recompute_dominator (CDI_DOMINATORS
, iter_part_bb
));
4225 set_immediate_dominator (CDI_DOMINATORS
, fin_bb
,
4226 recompute_dominator (CDI_DOMINATORS
, fin_bb
));
4227 set_immediate_dominator (CDI_DOMINATORS
, seq_start_bb
,
4228 recompute_dominator (CDI_DOMINATORS
, seq_start_bb
));
4229 set_immediate_dominator (CDI_DOMINATORS
, body_bb
,
4230 recompute_dominator (CDI_DOMINATORS
, body_bb
));
4234 struct loop
*loop
= body_bb
->loop_father
;
4235 struct loop
*trip_loop
= alloc_loop ();
4236 trip_loop
->header
= iter_part_bb
;
4237 trip_loop
->latch
= trip_update_bb
;
4238 add_loop (trip_loop
, iter_part_bb
->loop_father
);
4240 if (loop
!= entry_bb
->loop_father
)
4242 gcc_assert (loop
->header
== body_bb
);
4243 gcc_assert (loop
->latch
== region
->cont
4244 || single_pred (loop
->latch
) == region
->cont
);
4245 trip_loop
->inner
= loop
;
4249 if (!gimple_omp_for_combined_p (fd
->for_stmt
))
4251 loop
= alloc_loop ();
4252 loop
->header
= body_bb
;
4253 if (collapse_bb
== NULL
)
4254 loop
->latch
= cont_bb
;
4255 add_loop (loop
, trip_loop
);
4260 /* A subroutine of expand_omp_for. Generate code for _Cilk_for loop.
4262 for (V = N1; V cond N2; V += STEP) BODY;
4264 where COND is "<" or ">" or "!=", we generate pseudocode
4266 for (ind_var = low; ind_var < high; ind_var++)
4268 V = n1 + (ind_var * STEP)
4273 In the above pseudocode, low and high are function parameters of the
4274 child function. In the function below, we are inserting a temp.
4275 variable that will be making a call to two OMP functions that will not be
4276 found in the body of _Cilk_for (since OMP_FOR cannot be mixed
4277 with _Cilk_for). These functions are replaced with low and high
4278 by the function that handles taskreg. */
4282 expand_cilk_for (struct omp_region
*region
, struct omp_for_data
*fd
)
4284 bool broken_loop
= region
->cont
== NULL
;
4285 basic_block entry_bb
= region
->entry
;
4286 basic_block cont_bb
= region
->cont
;
4288 gcc_assert (EDGE_COUNT (entry_bb
->succs
) == 2);
4289 gcc_assert (broken_loop
4290 || BRANCH_EDGE (entry_bb
)->dest
== FALLTHRU_EDGE (cont_bb
)->dest
);
4291 basic_block l0_bb
= FALLTHRU_EDGE (entry_bb
)->dest
;
4292 basic_block l1_bb
, l2_bb
;
4296 gcc_assert (BRANCH_EDGE (cont_bb
)->dest
== l0_bb
);
4297 gcc_assert (EDGE_COUNT (cont_bb
->succs
) == 2);
4298 l1_bb
= split_block (cont_bb
, last_stmt (cont_bb
))->dest
;
4299 l2_bb
= BRANCH_EDGE (entry_bb
)->dest
;
4303 BRANCH_EDGE (entry_bb
)->flags
&= ~EDGE_ABNORMAL
;
4304 l1_bb
= split_edge (BRANCH_EDGE (entry_bb
));
4305 l2_bb
= single_succ (l1_bb
);
4307 basic_block exit_bb
= region
->exit
;
4308 basic_block l2_dom_bb
= NULL
;
4310 gimple_stmt_iterator gsi
= gsi_last_bb (entry_bb
);
4312 /* Below statements until the "tree high_val = ..." are pseudo statements
4313 used to pass information to be used by expand_omp_taskreg.
4314 low_val and high_val will be replaced by the __low and __high
4315 parameter from the child function.
4317 The call_exprs part is a place-holder, it is mainly used
4318 to distinctly identify to the top-level part that this is
4319 where we should put low and high (reasoning given in header
4323 = gimple_omp_parallel_child_fn (
4324 as_a
<gomp_parallel
*> (last_stmt (region
->outer
->entry
)));
4325 tree t
, low_val
= NULL_TREE
, high_val
= NULL_TREE
;
4326 for (t
= DECL_ARGUMENTS (child_fndecl
); t
; t
= TREE_CHAIN (t
))
4328 if (!strcmp (IDENTIFIER_POINTER (DECL_NAME (t
)), "__high"))
4330 else if (!strcmp (IDENTIFIER_POINTER (DECL_NAME (t
)), "__low"))
4333 gcc_assert (low_val
&& high_val
);
4335 tree type
= TREE_TYPE (low_val
);
4336 tree ind_var
= create_tmp_reg (type
, "__cilk_ind_var");
4337 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_FOR
);
4339 /* Not needed in SSA form right now. */
4340 gcc_assert (!gimple_in_ssa_p (cfun
));
4341 if (l2_dom_bb
== NULL
)
4347 gimple
*stmt
= gimple_build_assign (ind_var
, n1
);
4349 /* Replace the GIMPLE_OMP_FOR statement. */
4350 gsi_replace (&gsi
, stmt
, true);
4354 /* Code to control the increment goes in the CONT_BB. */
4355 gsi
= gsi_last_bb (cont_bb
);
4356 stmt
= gsi_stmt (gsi
);
4357 gcc_assert (gimple_code (stmt
) == GIMPLE_OMP_CONTINUE
);
4358 stmt
= gimple_build_assign (ind_var
, PLUS_EXPR
, ind_var
,
4359 build_one_cst (type
));
4361 /* Replace GIMPLE_OMP_CONTINUE. */
4362 gsi_replace (&gsi
, stmt
, true);
4365 /* Emit the condition in L1_BB. */
4366 gsi
= gsi_after_labels (l1_bb
);
4367 t
= fold_build2 (MULT_EXPR
, TREE_TYPE (fd
->loop
.step
),
4368 fold_convert (TREE_TYPE (fd
->loop
.step
), ind_var
),
4370 if (POINTER_TYPE_P (TREE_TYPE (fd
->loop
.n1
)))
4371 t
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (fd
->loop
.n1
),
4372 fd
->loop
.n1
, fold_convert (sizetype
, t
));
4374 t
= fold_build2 (PLUS_EXPR
, TREE_TYPE (fd
->loop
.n1
),
4375 fd
->loop
.n1
, fold_convert (TREE_TYPE (fd
->loop
.n1
), t
));
4376 t
= fold_convert (TREE_TYPE (fd
->loop
.v
), t
);
4377 expand_omp_build_assign (&gsi
, fd
->loop
.v
, t
);
4379 /* The condition is always '<' since the runtime will fill in the low
4381 stmt
= gimple_build_cond (LT_EXPR
, ind_var
, n2
, NULL_TREE
, NULL_TREE
);
4382 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
4384 /* Remove GIMPLE_OMP_RETURN. */
4385 gsi
= gsi_last_bb (exit_bb
);
4386 gsi_remove (&gsi
, true);
4388 /* Connect the new blocks. */
4389 remove_edge (FALLTHRU_EDGE (entry_bb
));
4394 remove_edge (BRANCH_EDGE (entry_bb
));
4395 make_edge (entry_bb
, l1_bb
, EDGE_FALLTHRU
);
4397 e
= BRANCH_EDGE (l1_bb
);
4398 ne
= FALLTHRU_EDGE (l1_bb
);
4399 e
->flags
= EDGE_TRUE_VALUE
;
4403 single_succ_edge (entry_bb
)->flags
= EDGE_FALLTHRU
;
4405 ne
= single_succ_edge (l1_bb
);
4406 e
= make_edge (l1_bb
, l0_bb
, EDGE_TRUE_VALUE
);
4409 ne
->flags
= EDGE_FALSE_VALUE
;
4410 e
->probability
= REG_BR_PROB_BASE
* 7 / 8;
4411 ne
->probability
= REG_BR_PROB_BASE
/ 8;
4413 set_immediate_dominator (CDI_DOMINATORS
, l1_bb
, entry_bb
);
4414 set_immediate_dominator (CDI_DOMINATORS
, l2_bb
, l2_dom_bb
);
4415 set_immediate_dominator (CDI_DOMINATORS
, l0_bb
, l1_bb
);
4419 struct loop
*loop
= alloc_loop ();
4420 loop
->header
= l1_bb
;
4421 loop
->latch
= cont_bb
;
4422 add_loop (loop
, l1_bb
->loop_father
);
4423 loop
->safelen
= INT_MAX
;
4426 /* Pick the correct library function based on the precision of the
4427 induction variable type. */
4428 tree lib_fun
= NULL_TREE
;
4429 if (TYPE_PRECISION (type
) == 32)
4430 lib_fun
= cilk_for_32_fndecl
;
4431 else if (TYPE_PRECISION (type
) == 64)
4432 lib_fun
= cilk_for_64_fndecl
;
4436 gcc_assert (fd
->sched_kind
== OMP_CLAUSE_SCHEDULE_CILKFOR
);
4438 /* WS_ARGS contains the library function flavor to call:
4439 __libcilkrts_cilk_for_64 or __libcilkrts_cilk_for_32), and the
4440 user-defined grain value. If the user does not define one, then zero
4441 is passed in by the parser. */
4442 vec_alloc (region
->ws_args
, 2);
4443 region
->ws_args
->quick_push (lib_fun
);
4444 region
->ws_args
->quick_push (fd
->chunk_size
);
4447 /* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
4448 loop. Given parameters:
4450 for (V = N1; V cond N2; V += STEP) BODY;
4452 where COND is "<" or ">", we generate pseudocode
4460 if (V cond N2) goto L0; else goto L2;
4463 For collapsed loops, given parameters:
4465 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
4466 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
4467 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
4470 we generate pseudocode
4476 count3 = (adj + N32 - N31) / STEP3;
4481 count2 = (adj + N22 - N21) / STEP2;
4486 count1 = (adj + N12 - N11) / STEP1;
4487 count = count1 * count2 * count3;
4497 V2 += (V3 cond3 N32) ? 0 : STEP2;
4498 V3 = (V3 cond3 N32) ? V3 : N31;
4499 V1 += (V2 cond2 N22) ? 0 : STEP1;
4500 V2 = (V2 cond2 N22) ? V2 : N21;
4502 if (V < count) goto L0; else goto L2;
4508 expand_omp_simd (struct omp_region
*region
, struct omp_for_data
*fd
)
4511 basic_block entry_bb
, cont_bb
, exit_bb
, l0_bb
, l1_bb
, l2_bb
, l2_dom_bb
;
4512 gimple_stmt_iterator gsi
;
4515 bool broken_loop
= region
->cont
== NULL
;
4517 tree
*counts
= NULL
;
4519 int safelen_int
= INT_MAX
;
4520 tree safelen
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
4521 OMP_CLAUSE_SAFELEN
);
4522 tree simduid
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
4523 OMP_CLAUSE__SIMDUID_
);
4528 safelen
= OMP_CLAUSE_SAFELEN_EXPR (safelen
);
4529 if (TREE_CODE (safelen
) != INTEGER_CST
)
4531 else if (tree_fits_uhwi_p (safelen
) && tree_to_uhwi (safelen
) < INT_MAX
)
4532 safelen_int
= tree_to_uhwi (safelen
);
4533 if (safelen_int
== 1)
4536 type
= TREE_TYPE (fd
->loop
.v
);
4537 entry_bb
= region
->entry
;
4538 cont_bb
= region
->cont
;
4539 gcc_assert (EDGE_COUNT (entry_bb
->succs
) == 2);
4540 gcc_assert (broken_loop
4541 || BRANCH_EDGE (entry_bb
)->dest
== FALLTHRU_EDGE (cont_bb
)->dest
);
4542 l0_bb
= FALLTHRU_EDGE (entry_bb
)->dest
;
4545 gcc_assert (BRANCH_EDGE (cont_bb
)->dest
== l0_bb
);
4546 gcc_assert (EDGE_COUNT (cont_bb
->succs
) == 2);
4547 l1_bb
= split_block (cont_bb
, last_stmt (cont_bb
))->dest
;
4548 l2_bb
= BRANCH_EDGE (entry_bb
)->dest
;
4552 BRANCH_EDGE (entry_bb
)->flags
&= ~EDGE_ABNORMAL
;
4553 l1_bb
= split_edge (BRANCH_EDGE (entry_bb
));
4554 l2_bb
= single_succ (l1_bb
);
4556 exit_bb
= region
->exit
;
4559 gsi
= gsi_last_bb (entry_bb
);
4561 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_FOR
);
4562 /* Not needed in SSA form right now. */
4563 gcc_assert (!gimple_in_ssa_p (cfun
));
4564 if (fd
->collapse
> 1)
4566 int first_zero_iter
= -1, dummy
= -1;
4567 basic_block zero_iter_bb
= l2_bb
, dummy_bb
= NULL
;
4569 counts
= XALLOCAVEC (tree
, fd
->collapse
);
4570 expand_omp_for_init_counts (fd
, &gsi
, entry_bb
, counts
,
4571 zero_iter_bb
, first_zero_iter
,
4572 dummy_bb
, dummy
, l2_dom_bb
);
4574 if (l2_dom_bb
== NULL
)
4579 if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
4581 tree innerc
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
4582 OMP_CLAUSE__LOOPTEMP_
);
4583 gcc_assert (innerc
);
4584 n1
= OMP_CLAUSE_DECL (innerc
);
4585 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
4586 OMP_CLAUSE__LOOPTEMP_
);
4587 gcc_assert (innerc
);
4588 n2
= OMP_CLAUSE_DECL (innerc
);
4590 tree step
= fd
->loop
.step
;
4592 bool is_simt
= (safelen_int
> 1
4593 && omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
4594 OMP_CLAUSE__SIMT_
));
4595 tree simt_lane
= NULL_TREE
, simt_maxlane
= NULL_TREE
;
4598 cfun
->curr_properties
&= ~PROP_gimple_lomp_dev
;
4599 simt_lane
= create_tmp_var (unsigned_type_node
);
4600 gimple
*g
= gimple_build_call_internal (IFN_GOMP_SIMT_LANE
, 0);
4601 gimple_call_set_lhs (g
, simt_lane
);
4602 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
4603 tree offset
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), step
,
4604 fold_convert (TREE_TYPE (step
), simt_lane
));
4605 n1
= fold_convert (type
, n1
);
4606 if (POINTER_TYPE_P (type
))
4607 n1
= fold_build_pointer_plus (n1
, offset
);
4609 n1
= fold_build2 (PLUS_EXPR
, type
, n1
, fold_convert (type
, offset
));
4611 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
4612 if (fd
->collapse
> 1)
4613 simt_maxlane
= build_one_cst (unsigned_type_node
);
4614 else if (safelen_int
< omp_max_simt_vf ())
4615 simt_maxlane
= build_int_cst (unsigned_type_node
, safelen_int
);
4617 = build_call_expr_internal_loc (UNKNOWN_LOCATION
, IFN_GOMP_SIMT_VF
,
4618 unsigned_type_node
, 0);
4620 vf
= fold_build2 (MIN_EXPR
, unsigned_type_node
, vf
, simt_maxlane
);
4621 vf
= fold_convert (TREE_TYPE (step
), vf
);
4622 step
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), step
, vf
);
4625 expand_omp_build_assign (&gsi
, fd
->loop
.v
, fold_convert (type
, n1
));
4626 if (fd
->collapse
> 1)
4628 if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
4631 expand_omp_for_init_vars (fd
, &gsi
, counts
, NULL
, n1
);
4635 for (i
= 0; i
< fd
->collapse
; i
++)
4637 tree itype
= TREE_TYPE (fd
->loops
[i
].v
);
4638 if (POINTER_TYPE_P (itype
))
4639 itype
= signed_type_for (itype
);
4640 t
= fold_convert (TREE_TYPE (fd
->loops
[i
].v
), fd
->loops
[i
].n1
);
4641 expand_omp_build_assign (&gsi
, fd
->loops
[i
].v
, t
);
4645 /* Remove the GIMPLE_OMP_FOR statement. */
4646 gsi_remove (&gsi
, true);
4650 /* Code to control the increment goes in the CONT_BB. */
4651 gsi
= gsi_last_bb (cont_bb
);
4652 stmt
= gsi_stmt (gsi
);
4653 gcc_assert (gimple_code (stmt
) == GIMPLE_OMP_CONTINUE
);
4655 if (POINTER_TYPE_P (type
))
4656 t
= fold_build_pointer_plus (fd
->loop
.v
, step
);
4658 t
= fold_build2 (PLUS_EXPR
, type
, fd
->loop
.v
, step
);
4659 expand_omp_build_assign (&gsi
, fd
->loop
.v
, t
);
4661 if (fd
->collapse
> 1)
4663 i
= fd
->collapse
- 1;
4664 if (POINTER_TYPE_P (TREE_TYPE (fd
->loops
[i
].v
)))
4666 t
= fold_convert (sizetype
, fd
->loops
[i
].step
);
4667 t
= fold_build_pointer_plus (fd
->loops
[i
].v
, t
);
4671 t
= fold_convert (TREE_TYPE (fd
->loops
[i
].v
),
4673 t
= fold_build2 (PLUS_EXPR
, TREE_TYPE (fd
->loops
[i
].v
),
4676 expand_omp_build_assign (&gsi
, fd
->loops
[i
].v
, t
);
4678 for (i
= fd
->collapse
- 1; i
> 0; i
--)
4680 tree itype
= TREE_TYPE (fd
->loops
[i
].v
);
4681 tree itype2
= TREE_TYPE (fd
->loops
[i
- 1].v
);
4682 if (POINTER_TYPE_P (itype2
))
4683 itype2
= signed_type_for (itype2
);
4684 t
= build3 (COND_EXPR
, itype2
,
4685 build2 (fd
->loops
[i
].cond_code
, boolean_type_node
,
4687 fold_convert (itype
, fd
->loops
[i
].n2
)),
4688 build_int_cst (itype2
, 0),
4689 fold_convert (itype2
, fd
->loops
[i
- 1].step
));
4690 if (POINTER_TYPE_P (TREE_TYPE (fd
->loops
[i
- 1].v
)))
4691 t
= fold_build_pointer_plus (fd
->loops
[i
- 1].v
, t
);
4693 t
= fold_build2 (PLUS_EXPR
, itype2
, fd
->loops
[i
- 1].v
, t
);
4694 expand_omp_build_assign (&gsi
, fd
->loops
[i
- 1].v
, t
);
4696 t
= build3 (COND_EXPR
, itype
,
4697 build2 (fd
->loops
[i
].cond_code
, boolean_type_node
,
4699 fold_convert (itype
, fd
->loops
[i
].n2
)),
4701 fold_convert (itype
, fd
->loops
[i
].n1
));
4702 expand_omp_build_assign (&gsi
, fd
->loops
[i
].v
, t
);
4706 /* Remove GIMPLE_OMP_CONTINUE. */
4707 gsi_remove (&gsi
, true);
4710 /* Emit the condition in L1_BB. */
4711 gsi
= gsi_start_bb (l1_bb
);
4713 t
= fold_convert (type
, n2
);
4714 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
4715 false, GSI_CONTINUE_LINKING
);
4716 tree v
= fd
->loop
.v
;
4717 if (DECL_P (v
) && TREE_ADDRESSABLE (v
))
4718 v
= force_gimple_operand_gsi (&gsi
, v
, true, NULL_TREE
,
4719 false, GSI_CONTINUE_LINKING
);
4720 t
= build2 (fd
->loop
.cond_code
, boolean_type_node
, v
, t
);
4721 cond_stmt
= gimple_build_cond_empty (t
);
4722 gsi_insert_after (&gsi
, cond_stmt
, GSI_CONTINUE_LINKING
);
4723 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt
), expand_omp_regimplify_p
,
4725 || walk_tree (gimple_cond_rhs_ptr (cond_stmt
), expand_omp_regimplify_p
,
4728 gsi
= gsi_for_stmt (cond_stmt
);
4729 gimple_regimplify_operands (cond_stmt
, &gsi
);
4732 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
4735 gsi
= gsi_start_bb (l2_bb
);
4736 step
= fold_build2 (MINUS_EXPR
, TREE_TYPE (step
), fd
->loop
.step
, step
);
4737 if (POINTER_TYPE_P (type
))
4738 t
= fold_build_pointer_plus (fd
->loop
.v
, step
);
4740 t
= fold_build2 (PLUS_EXPR
, type
, fd
->loop
.v
, step
);
4741 expand_omp_build_assign (&gsi
, fd
->loop
.v
, t
);
4744 /* Remove GIMPLE_OMP_RETURN. */
4745 gsi
= gsi_last_bb (exit_bb
);
4746 gsi_remove (&gsi
, true);
4748 /* Connect the new blocks. */
4749 remove_edge (FALLTHRU_EDGE (entry_bb
));
4753 remove_edge (BRANCH_EDGE (entry_bb
));
4754 make_edge (entry_bb
, l1_bb
, EDGE_FALLTHRU
);
4756 e
= BRANCH_EDGE (l1_bb
);
4757 ne
= FALLTHRU_EDGE (l1_bb
);
4758 e
->flags
= EDGE_TRUE_VALUE
;
4762 single_succ_edge (entry_bb
)->flags
= EDGE_FALLTHRU
;
4764 ne
= single_succ_edge (l1_bb
);
4765 e
= make_edge (l1_bb
, l0_bb
, EDGE_TRUE_VALUE
);
4768 ne
->flags
= EDGE_FALSE_VALUE
;
4769 e
->probability
= REG_BR_PROB_BASE
* 7 / 8;
4770 ne
->probability
= REG_BR_PROB_BASE
/ 8;
4772 set_immediate_dominator (CDI_DOMINATORS
, l1_bb
, entry_bb
);
4773 set_immediate_dominator (CDI_DOMINATORS
, l0_bb
, l1_bb
);
4777 cond_stmt
= gimple_build_cond (LT_EXPR
, simt_lane
, simt_maxlane
,
4778 NULL_TREE
, NULL_TREE
);
4779 gsi
= gsi_last_bb (entry_bb
);
4780 gsi_insert_after (&gsi
, cond_stmt
, GSI_NEW_STMT
);
4781 make_edge (entry_bb
, l2_bb
, EDGE_FALSE_VALUE
);
4782 FALLTHRU_EDGE (entry_bb
)->flags
= EDGE_TRUE_VALUE
;
4783 FALLTHRU_EDGE (entry_bb
)->probability
= REG_BR_PROB_BASE
* 7 / 8;
4784 BRANCH_EDGE (entry_bb
)->probability
= REG_BR_PROB_BASE
/ 8;
4785 l2_dom_bb
= entry_bb
;
4787 set_immediate_dominator (CDI_DOMINATORS
, l2_bb
, l2_dom_bb
);
4791 struct loop
*loop
= alloc_loop ();
4792 loop
->header
= l1_bb
;
4793 loop
->latch
= cont_bb
;
4794 add_loop (loop
, l1_bb
->loop_father
);
4795 loop
->safelen
= safelen_int
;
4798 loop
->simduid
= OMP_CLAUSE__SIMDUID__DECL (simduid
);
4799 cfun
->has_simduid_loops
= true;
4801 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
4803 if ((flag_tree_loop_vectorize
4804 || (!global_options_set
.x_flag_tree_loop_vectorize
4805 && !global_options_set
.x_flag_tree_vectorize
))
4806 && flag_tree_loop_optimize
4807 && loop
->safelen
> 1)
4809 loop
->force_vectorize
= true;
4810 cfun
->has_force_vectorize_loops
= true;
4814 cfun
->has_simduid_loops
= true;
4817 /* Taskloop construct is represented after gimplification with
4818 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
4819 in between them. This routine expands the outer GIMPLE_OMP_FOR,
4820 which should just compute all the needed loop temporaries
4821 for GIMPLE_OMP_TASK. */
4824 expand_omp_taskloop_for_outer (struct omp_region
*region
,
4825 struct omp_for_data
*fd
,
4828 tree type
, bias
= NULL_TREE
;
4829 basic_block entry_bb
, cont_bb
, exit_bb
;
4830 gimple_stmt_iterator gsi
;
4831 gassign
*assign_stmt
;
4832 tree
*counts
= NULL
;
4835 gcc_assert (inner_stmt
);
4836 gcc_assert (region
->cont
);
4837 gcc_assert (gimple_code (inner_stmt
) == GIMPLE_OMP_TASK
4838 && gimple_omp_task_taskloop_p (inner_stmt
));
4839 type
= TREE_TYPE (fd
->loop
.v
);
4841 /* See if we need to bias by LLONG_MIN. */
4842 if (fd
->iter_type
== long_long_unsigned_type_node
4843 && TREE_CODE (type
) == INTEGER_TYPE
4844 && !TYPE_UNSIGNED (type
))
4848 if (fd
->loop
.cond_code
== LT_EXPR
)
4851 n2
= fold_build2 (PLUS_EXPR
, type
, fd
->loop
.n2
, fd
->loop
.step
);
4855 n1
= fold_build2 (MINUS_EXPR
, type
, fd
->loop
.n2
, fd
->loop
.step
);
4858 if (TREE_CODE (n1
) != INTEGER_CST
4859 || TREE_CODE (n2
) != INTEGER_CST
4860 || ((tree_int_cst_sgn (n1
) < 0) ^ (tree_int_cst_sgn (n2
) < 0)))
4861 bias
= fold_convert (fd
->iter_type
, TYPE_MIN_VALUE (type
));
4864 entry_bb
= region
->entry
;
4865 cont_bb
= region
->cont
;
4866 gcc_assert (EDGE_COUNT (entry_bb
->succs
) == 2);
4867 gcc_assert (BRANCH_EDGE (entry_bb
)->dest
== FALLTHRU_EDGE (cont_bb
)->dest
);
4868 exit_bb
= region
->exit
;
4870 gsi
= gsi_last_bb (entry_bb
);
4871 gimple
*for_stmt
= gsi_stmt (gsi
);
4872 gcc_assert (gimple_code (for_stmt
) == GIMPLE_OMP_FOR
);
4873 if (fd
->collapse
> 1)
4875 int first_zero_iter
= -1, dummy
= -1;
4876 basic_block zero_iter_bb
= NULL
, dummy_bb
= NULL
, l2_dom_bb
= NULL
;
4878 counts
= XALLOCAVEC (tree
, fd
->collapse
);
4879 expand_omp_for_init_counts (fd
, &gsi
, entry_bb
, counts
,
4880 zero_iter_bb
, first_zero_iter
,
4881 dummy_bb
, dummy
, l2_dom_bb
);
4885 /* Some counts[i] vars might be uninitialized if
4886 some loop has zero iterations. But the body shouldn't
4887 be executed in that case, so just avoid uninit warnings. */
4888 for (i
= first_zero_iter
; i
< fd
->collapse
; i
++)
4889 if (SSA_VAR_P (counts
[i
]))
4890 TREE_NO_WARNING (counts
[i
]) = 1;
4892 edge e
= split_block (entry_bb
, gsi_stmt (gsi
));
4894 make_edge (zero_iter_bb
, entry_bb
, EDGE_FALLTHRU
);
4895 gsi
= gsi_last_bb (entry_bb
);
4896 set_immediate_dominator (CDI_DOMINATORS
, entry_bb
,
4897 get_immediate_dominator (CDI_DOMINATORS
,
4905 if (POINTER_TYPE_P (TREE_TYPE (t0
))
4906 && TYPE_PRECISION (TREE_TYPE (t0
))
4907 != TYPE_PRECISION (fd
->iter_type
))
4909 /* Avoid casting pointers to integer of a different size. */
4910 tree itype
= signed_type_for (type
);
4911 t1
= fold_convert (fd
->iter_type
, fold_convert (itype
, t1
));
4912 t0
= fold_convert (fd
->iter_type
, fold_convert (itype
, t0
));
4916 t1
= fold_convert (fd
->iter_type
, t1
);
4917 t0
= fold_convert (fd
->iter_type
, t0
);
4921 t1
= fold_build2 (PLUS_EXPR
, fd
->iter_type
, t1
, bias
);
4922 t0
= fold_build2 (PLUS_EXPR
, fd
->iter_type
, t0
, bias
);
4925 tree innerc
= omp_find_clause (gimple_omp_task_clauses (inner_stmt
),
4926 OMP_CLAUSE__LOOPTEMP_
);
4927 gcc_assert (innerc
);
4928 tree startvar
= OMP_CLAUSE_DECL (innerc
);
4929 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
), OMP_CLAUSE__LOOPTEMP_
);
4930 gcc_assert (innerc
);
4931 tree endvar
= OMP_CLAUSE_DECL (innerc
);
4932 if (fd
->collapse
> 1 && TREE_CODE (fd
->loop
.n2
) != INTEGER_CST
)
4934 gcc_assert (innerc
);
4935 for (i
= 1; i
< fd
->collapse
; i
++)
4937 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
4938 OMP_CLAUSE__LOOPTEMP_
);
4939 gcc_assert (innerc
);
4941 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
4942 OMP_CLAUSE__LOOPTEMP_
);
4945 /* If needed (inner taskloop has lastprivate clause), propagate
4946 down the total number of iterations. */
4947 tree t
= force_gimple_operand_gsi (&gsi
, fd
->loop
.n2
, false,
4949 GSI_CONTINUE_LINKING
);
4950 assign_stmt
= gimple_build_assign (OMP_CLAUSE_DECL (innerc
), t
);
4951 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
4955 t0
= force_gimple_operand_gsi (&gsi
, t0
, false, NULL_TREE
, false,
4956 GSI_CONTINUE_LINKING
);
4957 assign_stmt
= gimple_build_assign (startvar
, t0
);
4958 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
4960 t1
= force_gimple_operand_gsi (&gsi
, t1
, false, NULL_TREE
, false,
4961 GSI_CONTINUE_LINKING
);
4962 assign_stmt
= gimple_build_assign (endvar
, t1
);
4963 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
4964 if (fd
->collapse
> 1)
4965 expand_omp_for_init_vars (fd
, &gsi
, counts
, inner_stmt
, startvar
);
4967 /* Remove the GIMPLE_OMP_FOR statement. */
4968 gsi
= gsi_for_stmt (for_stmt
);
4969 gsi_remove (&gsi
, true);
4971 gsi
= gsi_last_bb (cont_bb
);
4972 gsi_remove (&gsi
, true);
4974 gsi
= gsi_last_bb (exit_bb
);
4975 gsi_remove (&gsi
, true);
4977 FALLTHRU_EDGE (entry_bb
)->probability
= REG_BR_PROB_BASE
;
4978 remove_edge (BRANCH_EDGE (entry_bb
));
4979 FALLTHRU_EDGE (cont_bb
)->probability
= REG_BR_PROB_BASE
;
4980 remove_edge (BRANCH_EDGE (cont_bb
));
4981 set_immediate_dominator (CDI_DOMINATORS
, exit_bb
, cont_bb
);
4982 set_immediate_dominator (CDI_DOMINATORS
, region
->entry
,
4983 recompute_dominator (CDI_DOMINATORS
, region
->entry
));
4986 /* Taskloop construct is represented after gimplification with
4987 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
4988 in between them. This routine expands the inner GIMPLE_OMP_FOR.
4989 GOMP_taskloop{,_ull} function arranges for each task to be given just
4990 a single range of iterations. */
4993 expand_omp_taskloop_for_inner (struct omp_region
*region
,
4994 struct omp_for_data
*fd
,
4997 tree e
, t
, type
, itype
, vmain
, vback
, bias
= NULL_TREE
;
4998 basic_block entry_bb
, exit_bb
, body_bb
, cont_bb
, collapse_bb
= NULL
;
5000 gimple_stmt_iterator gsi
;
5002 bool broken_loop
= region
->cont
== NULL
;
5003 tree
*counts
= NULL
;
5006 itype
= type
= TREE_TYPE (fd
->loop
.v
);
5007 if (POINTER_TYPE_P (type
))
5008 itype
= signed_type_for (type
);
5010 /* See if we need to bias by LLONG_MIN. */
5011 if (fd
->iter_type
== long_long_unsigned_type_node
5012 && TREE_CODE (type
) == INTEGER_TYPE
5013 && !TYPE_UNSIGNED (type
))
5017 if (fd
->loop
.cond_code
== LT_EXPR
)
5020 n2
= fold_build2 (PLUS_EXPR
, type
, fd
->loop
.n2
, fd
->loop
.step
);
5024 n1
= fold_build2 (MINUS_EXPR
, type
, fd
->loop
.n2
, fd
->loop
.step
);
5027 if (TREE_CODE (n1
) != INTEGER_CST
5028 || TREE_CODE (n2
) != INTEGER_CST
5029 || ((tree_int_cst_sgn (n1
) < 0) ^ (tree_int_cst_sgn (n2
) < 0)))
5030 bias
= fold_convert (fd
->iter_type
, TYPE_MIN_VALUE (type
));
5033 entry_bb
= region
->entry
;
5034 cont_bb
= region
->cont
;
5035 gcc_assert (EDGE_COUNT (entry_bb
->succs
) == 2);
5036 fin_bb
= BRANCH_EDGE (entry_bb
)->dest
;
5037 gcc_assert (broken_loop
5038 || (fin_bb
== FALLTHRU_EDGE (cont_bb
)->dest
));
5039 body_bb
= FALLTHRU_EDGE (entry_bb
)->dest
;
5042 gcc_assert (BRANCH_EDGE (cont_bb
)->dest
== body_bb
);
5043 gcc_assert (EDGE_COUNT (cont_bb
->succs
) == 2);
5045 exit_bb
= region
->exit
;
5047 /* Iteration space partitioning goes in ENTRY_BB. */
5048 gsi
= gsi_last_bb (entry_bb
);
5049 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_FOR
);
5051 if (fd
->collapse
> 1)
5053 int first_zero_iter
= -1, dummy
= -1;
5054 basic_block l2_dom_bb
= NULL
, dummy_bb
= NULL
;
5056 counts
= XALLOCAVEC (tree
, fd
->collapse
);
5057 expand_omp_for_init_counts (fd
, &gsi
, entry_bb
, counts
,
5058 fin_bb
, first_zero_iter
,
5059 dummy_bb
, dummy
, l2_dom_bb
);
5063 t
= integer_one_node
;
5065 step
= fd
->loop
.step
;
5066 tree innerc
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
5067 OMP_CLAUSE__LOOPTEMP_
);
5068 gcc_assert (innerc
);
5069 n1
= OMP_CLAUSE_DECL (innerc
);
5070 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
), OMP_CLAUSE__LOOPTEMP_
);
5071 gcc_assert (innerc
);
5072 n2
= OMP_CLAUSE_DECL (innerc
);
5075 n1
= fold_build2 (PLUS_EXPR
, fd
->iter_type
, n1
, bias
);
5076 n2
= fold_build2 (PLUS_EXPR
, fd
->iter_type
, n2
, bias
);
5078 n1
= force_gimple_operand_gsi (&gsi
, fold_convert (type
, n1
),
5079 true, NULL_TREE
, true, GSI_SAME_STMT
);
5080 n2
= force_gimple_operand_gsi (&gsi
, fold_convert (itype
, n2
),
5081 true, NULL_TREE
, true, GSI_SAME_STMT
);
5082 step
= force_gimple_operand_gsi (&gsi
, fold_convert (itype
, step
),
5083 true, NULL_TREE
, true, GSI_SAME_STMT
);
5085 tree startvar
= fd
->loop
.v
;
5086 tree endvar
= NULL_TREE
;
5088 if (gimple_omp_for_combined_p (fd
->for_stmt
))
5090 tree clauses
= gimple_omp_for_clauses (inner_stmt
);
5091 tree innerc
= omp_find_clause (clauses
, OMP_CLAUSE__LOOPTEMP_
);
5092 gcc_assert (innerc
);
5093 startvar
= OMP_CLAUSE_DECL (innerc
);
5094 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
5095 OMP_CLAUSE__LOOPTEMP_
);
5096 gcc_assert (innerc
);
5097 endvar
= OMP_CLAUSE_DECL (innerc
);
5099 t
= fold_convert (TREE_TYPE (startvar
), n1
);
5100 t
= force_gimple_operand_gsi (&gsi
, t
,
5102 && TREE_ADDRESSABLE (startvar
),
5103 NULL_TREE
, false, GSI_CONTINUE_LINKING
);
5104 gimple
*assign_stmt
= gimple_build_assign (startvar
, t
);
5105 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
5107 t
= fold_convert (TREE_TYPE (startvar
), n2
);
5108 e
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
5109 false, GSI_CONTINUE_LINKING
);
5112 assign_stmt
= gimple_build_assign (endvar
, e
);
5113 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
5114 if (useless_type_conversion_p (TREE_TYPE (fd
->loop
.v
), TREE_TYPE (e
)))
5115 assign_stmt
= gimple_build_assign (fd
->loop
.v
, e
);
5117 assign_stmt
= gimple_build_assign (fd
->loop
.v
, NOP_EXPR
, e
);
5118 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
5120 if (fd
->collapse
> 1)
5121 expand_omp_for_init_vars (fd
, &gsi
, counts
, inner_stmt
, startvar
);
5125 /* The code controlling the sequential loop replaces the
5126 GIMPLE_OMP_CONTINUE. */
5127 gsi
= gsi_last_bb (cont_bb
);
5128 gomp_continue
*cont_stmt
= as_a
<gomp_continue
*> (gsi_stmt (gsi
));
5129 gcc_assert (gimple_code (cont_stmt
) == GIMPLE_OMP_CONTINUE
);
5130 vmain
= gimple_omp_continue_control_use (cont_stmt
);
5131 vback
= gimple_omp_continue_control_def (cont_stmt
);
5133 if (!gimple_omp_for_combined_p (fd
->for_stmt
))
5135 if (POINTER_TYPE_P (type
))
5136 t
= fold_build_pointer_plus (vmain
, step
);
5138 t
= fold_build2 (PLUS_EXPR
, type
, vmain
, step
);
5139 t
= force_gimple_operand_gsi (&gsi
, t
,
5141 && TREE_ADDRESSABLE (vback
),
5142 NULL_TREE
, true, GSI_SAME_STMT
);
5143 assign_stmt
= gimple_build_assign (vback
, t
);
5144 gsi_insert_before (&gsi
, assign_stmt
, GSI_SAME_STMT
);
5146 t
= build2 (fd
->loop
.cond_code
, boolean_type_node
,
5147 DECL_P (vback
) && TREE_ADDRESSABLE (vback
)
5149 gsi_insert_before (&gsi
, gimple_build_cond_empty (t
), GSI_SAME_STMT
);
5152 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5153 gsi_remove (&gsi
, true);
5155 if (fd
->collapse
> 1 && !gimple_omp_for_combined_p (fd
->for_stmt
))
5156 collapse_bb
= extract_omp_for_update_vars (fd
, cont_bb
, body_bb
);
5159 /* Remove the GIMPLE_OMP_FOR statement. */
5160 gsi
= gsi_for_stmt (fd
->for_stmt
);
5161 gsi_remove (&gsi
, true);
5163 /* Remove the GIMPLE_OMP_RETURN statement. */
5164 gsi
= gsi_last_bb (exit_bb
);
5165 gsi_remove (&gsi
, true);
5167 FALLTHRU_EDGE (entry_bb
)->probability
= REG_BR_PROB_BASE
;
5169 remove_edge (BRANCH_EDGE (entry_bb
));
5172 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb
));
5173 region
->outer
->cont
= NULL
;
5176 /* Connect all the blocks. */
5179 ep
= find_edge (cont_bb
, body_bb
);
5180 if (gimple_omp_for_combined_p (fd
->for_stmt
))
5185 else if (fd
->collapse
> 1)
5188 ep
= make_edge (cont_bb
, collapse_bb
, EDGE_TRUE_VALUE
);
5191 ep
->flags
= EDGE_TRUE_VALUE
;
5192 find_edge (cont_bb
, fin_bb
)->flags
5193 = ep
? EDGE_FALSE_VALUE
: EDGE_FALLTHRU
;
5196 set_immediate_dominator (CDI_DOMINATORS
, body_bb
,
5197 recompute_dominator (CDI_DOMINATORS
, body_bb
));
5199 set_immediate_dominator (CDI_DOMINATORS
, fin_bb
,
5200 recompute_dominator (CDI_DOMINATORS
, fin_bb
));
5202 if (!broken_loop
&& !gimple_omp_for_combined_p (fd
->for_stmt
))
5204 struct loop
*loop
= alloc_loop ();
5205 loop
->header
= body_bb
;
5206 if (collapse_bb
== NULL
)
5207 loop
->latch
= cont_bb
;
5208 add_loop (loop
, body_bb
->loop_father
);
5212 /* A subroutine of expand_omp_for. Generate code for an OpenACC
5213 partitioned loop. The lowering here is abstracted, in that the
5214 loop parameters are passed through internal functions, which are
5215 further lowered by oacc_device_lower, once we get to the target
5216 compiler. The loop is of the form:
5218 for (V = B; V LTGT E; V += S) {BODY}
5220 where LTGT is < or >. We may have a specified chunking size, CHUNKING
5221 (constant 0 for no chunking) and we will have a GWV partitioning
5222 mask, specifying dimensions over which the loop is to be
5223 partitioned (see note below). We generate code that looks like:
5225 <entry_bb> [incoming FALL->body, BRANCH->exit]
5226 typedef signedintify (typeof (V)) T; // underlying signed integral type
5229 T DIR = LTGT == '<' ? +1 : -1;
5230 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
5231 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
5233 <head_bb> [created by splitting end of entry_bb]
5234 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
5235 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
5236 if (!(offset LTGT bound)) goto bottom_bb;
5238 <body_bb> [incoming]
5242 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
5244 if (offset LTGT bound) goto body_bb; [*]
5246 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
5248 if (chunk < chunk_max) goto head_bb;
5250 <exit_bb> [incoming]
5251 V = B + ((range -/+ 1) / S +/- 1) * S [*]
5253 [*] Needed if V live at end of loop
5255 Note: CHUNKING & GWV mask are specified explicitly here. This is a
5256 transition, and will be specified by a more general mechanism shortly.
5260 expand_oacc_for (struct omp_region
*region
, struct omp_for_data
*fd
)
5262 tree v
= fd
->loop
.v
;
5263 enum tree_code cond_code
= fd
->loop
.cond_code
;
5264 enum tree_code plus_code
= PLUS_EXPR
;
5266 tree chunk_size
= integer_minus_one_node
;
5267 tree gwv
= integer_zero_node
;
5268 tree iter_type
= TREE_TYPE (v
);
5269 tree diff_type
= iter_type
;
5270 tree plus_type
= iter_type
;
5271 struct oacc_collapse
*counts
= NULL
;
5273 gcc_checking_assert (gimple_omp_for_kind (fd
->for_stmt
)
5274 == GF_OMP_FOR_KIND_OACC_LOOP
);
5275 gcc_assert (!gimple_omp_for_combined_into_p (fd
->for_stmt
));
5276 gcc_assert (cond_code
== LT_EXPR
|| cond_code
== GT_EXPR
);
5278 if (POINTER_TYPE_P (iter_type
))
5280 plus_code
= POINTER_PLUS_EXPR
;
5281 plus_type
= sizetype
;
5283 if (POINTER_TYPE_P (diff_type
) || TYPE_UNSIGNED (diff_type
))
5284 diff_type
= signed_type_for (diff_type
);
5286 basic_block entry_bb
= region
->entry
; /* BB ending in OMP_FOR */
5287 basic_block exit_bb
= region
->exit
; /* BB ending in OMP_RETURN */
5288 basic_block cont_bb
= region
->cont
; /* BB ending in OMP_CONTINUE */
5289 basic_block bottom_bb
= NULL
;
5291 /* entry_bb has two sucessors; the branch edge is to the exit
5292 block, fallthrough edge to body. */
5293 gcc_assert (EDGE_COUNT (entry_bb
->succs
) == 2
5294 && BRANCH_EDGE (entry_bb
)->dest
== exit_bb
);
5296 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
5297 body_bb, or to a block whose only successor is the body_bb. Its
5298 fallthrough successor is the final block (same as the branch
5299 successor of the entry_bb). */
5302 basic_block body_bb
= FALLTHRU_EDGE (entry_bb
)->dest
;
5303 basic_block bed
= BRANCH_EDGE (cont_bb
)->dest
;
5305 gcc_assert (FALLTHRU_EDGE (cont_bb
)->dest
== exit_bb
);
5306 gcc_assert (bed
== body_bb
|| single_succ_edge (bed
)->dest
== body_bb
);
5309 gcc_assert (!gimple_in_ssa_p (cfun
));
5311 /* The exit block only has entry_bb and cont_bb as predecessors. */
5312 gcc_assert (EDGE_COUNT (exit_bb
->preds
) == 1 + (cont_bb
!= NULL
));
5315 tree chunk_max
= NULL_TREE
;
5317 tree step
= create_tmp_var (diff_type
, ".step");
5318 bool up
= cond_code
== LT_EXPR
;
5319 tree dir
= build_int_cst (diff_type
, up
? +1 : -1);
5320 bool chunking
= !gimple_in_ssa_p (cfun
);;
5323 /* SSA instances. */
5324 tree offset_incr
= NULL_TREE
;
5325 tree offset_init
= NULL_TREE
;
5327 gimple_stmt_iterator gsi
;
5333 edge split
, be
, fte
;
5335 /* Split the end of entry_bb to create head_bb. */
5336 split
= split_block (entry_bb
, last_stmt (entry_bb
));
5337 basic_block head_bb
= split
->dest
;
5338 entry_bb
= split
->src
;
5340 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
5341 gsi
= gsi_last_bb (entry_bb
);
5342 gomp_for
*for_stmt
= as_a
<gomp_for
*> (gsi_stmt (gsi
));
5343 loc
= gimple_location (for_stmt
);
5345 if (gimple_in_ssa_p (cfun
))
5347 offset_init
= gimple_omp_for_index (for_stmt
, 0);
5348 gcc_assert (integer_zerop (fd
->loop
.n1
));
5349 /* The SSA parallelizer does gang parallelism. */
5350 gwv
= build_int_cst (integer_type_node
, GOMP_DIM_MASK (GOMP_DIM_GANG
));
5353 if (fd
->collapse
> 1)
5355 counts
= XALLOCAVEC (struct oacc_collapse
, fd
->collapse
);
5356 tree total
= expand_oacc_collapse_init (fd
, &gsi
, counts
,
5357 TREE_TYPE (fd
->loop
.n2
));
5359 if (SSA_VAR_P (fd
->loop
.n2
))
5361 total
= force_gimple_operand_gsi (&gsi
, total
, false, NULL_TREE
,
5362 true, GSI_SAME_STMT
);
5363 ass
= gimple_build_assign (fd
->loop
.n2
, total
);
5364 gsi_insert_before (&gsi
, ass
, GSI_SAME_STMT
);
5369 tree b
= fd
->loop
.n1
;
5370 tree e
= fd
->loop
.n2
;
5371 tree s
= fd
->loop
.step
;
5373 b
= force_gimple_operand_gsi (&gsi
, b
, true, NULL_TREE
, true, GSI_SAME_STMT
);
5374 e
= force_gimple_operand_gsi (&gsi
, e
, true, NULL_TREE
, true, GSI_SAME_STMT
);
5376 /* Convert the step, avoiding possible unsigned->signed overflow. */
5377 negating
= !up
&& TYPE_UNSIGNED (TREE_TYPE (s
));
5379 s
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (s
), s
);
5380 s
= fold_convert (diff_type
, s
);
5382 s
= fold_build1 (NEGATE_EXPR
, diff_type
, s
);
5383 s
= force_gimple_operand_gsi (&gsi
, s
, true, NULL_TREE
, true, GSI_SAME_STMT
);
5386 chunk_size
= integer_zero_node
;
5387 expr
= fold_convert (diff_type
, chunk_size
);
5388 chunk_size
= force_gimple_operand_gsi (&gsi
, expr
, true,
5389 NULL_TREE
, true, GSI_SAME_STMT
);
5390 /* Determine the range, avoiding possible unsigned->signed overflow. */
5391 negating
= !up
&& TYPE_UNSIGNED (iter_type
);
5392 expr
= fold_build2 (MINUS_EXPR
, plus_type
,
5393 fold_convert (plus_type
, negating
? b
: e
),
5394 fold_convert (plus_type
, negating
? e
: b
));
5395 expr
= fold_convert (diff_type
, expr
);
5397 expr
= fold_build1 (NEGATE_EXPR
, diff_type
, expr
);
5398 tree range
= force_gimple_operand_gsi (&gsi
, expr
, true,
5399 NULL_TREE
, true, GSI_SAME_STMT
);
5401 chunk_no
= build_int_cst (diff_type
, 0);
5404 gcc_assert (!gimple_in_ssa_p (cfun
));
5407 chunk_max
= create_tmp_var (diff_type
, ".chunk_max");
5408 chunk_no
= create_tmp_var (diff_type
, ".chunk_no");
5410 ass
= gimple_build_assign (chunk_no
, expr
);
5411 gsi_insert_before (&gsi
, ass
, GSI_SAME_STMT
);
5413 call
= gimple_build_call_internal (IFN_GOACC_LOOP
, 6,
5414 build_int_cst (integer_type_node
,
5415 IFN_GOACC_LOOP_CHUNKS
),
5416 dir
, range
, s
, chunk_size
, gwv
);
5417 gimple_call_set_lhs (call
, chunk_max
);
5418 gimple_set_location (call
, loc
);
5419 gsi_insert_before (&gsi
, call
, GSI_SAME_STMT
);
5422 chunk_size
= chunk_no
;
5424 call
= gimple_build_call_internal (IFN_GOACC_LOOP
, 6,
5425 build_int_cst (integer_type_node
,
5426 IFN_GOACC_LOOP_STEP
),
5427 dir
, range
, s
, chunk_size
, gwv
);
5428 gimple_call_set_lhs (call
, step
);
5429 gimple_set_location (call
, loc
);
5430 gsi_insert_before (&gsi
, call
, GSI_SAME_STMT
);
5432 /* Remove the GIMPLE_OMP_FOR. */
5433 gsi_remove (&gsi
, true);
5435 /* Fixup edges from head_bb */
5436 be
= BRANCH_EDGE (head_bb
);
5437 fte
= FALLTHRU_EDGE (head_bb
);
5438 be
->flags
|= EDGE_FALSE_VALUE
;
5439 fte
->flags
^= EDGE_FALLTHRU
| EDGE_TRUE_VALUE
;
5441 basic_block body_bb
= fte
->dest
;
5443 if (gimple_in_ssa_p (cfun
))
5445 gsi
= gsi_last_bb (cont_bb
);
5446 gomp_continue
*cont_stmt
= as_a
<gomp_continue
*> (gsi_stmt (gsi
));
5448 offset
= gimple_omp_continue_control_use (cont_stmt
);
5449 offset_incr
= gimple_omp_continue_control_def (cont_stmt
);
5453 offset
= create_tmp_var (diff_type
, ".offset");
5454 offset_init
= offset_incr
= offset
;
5456 bound
= create_tmp_var (TREE_TYPE (offset
), ".bound");
5458 /* Loop offset & bound go into head_bb. */
5459 gsi
= gsi_start_bb (head_bb
);
5461 call
= gimple_build_call_internal (IFN_GOACC_LOOP
, 7,
5462 build_int_cst (integer_type_node
,
5463 IFN_GOACC_LOOP_OFFSET
),
5465 chunk_size
, gwv
, chunk_no
);
5466 gimple_call_set_lhs (call
, offset_init
);
5467 gimple_set_location (call
, loc
);
5468 gsi_insert_after (&gsi
, call
, GSI_CONTINUE_LINKING
);
5470 call
= gimple_build_call_internal (IFN_GOACC_LOOP
, 7,
5471 build_int_cst (integer_type_node
,
5472 IFN_GOACC_LOOP_BOUND
),
5474 chunk_size
, gwv
, offset_init
);
5475 gimple_call_set_lhs (call
, bound
);
5476 gimple_set_location (call
, loc
);
5477 gsi_insert_after (&gsi
, call
, GSI_CONTINUE_LINKING
);
5479 expr
= build2 (cond_code
, boolean_type_node
, offset_init
, bound
);
5480 gsi_insert_after (&gsi
, gimple_build_cond_empty (expr
),
5481 GSI_CONTINUE_LINKING
);
5483 /* V assignment goes into body_bb. */
5484 if (!gimple_in_ssa_p (cfun
))
5486 gsi
= gsi_start_bb (body_bb
);
5488 expr
= build2 (plus_code
, iter_type
, b
,
5489 fold_convert (plus_type
, offset
));
5490 expr
= force_gimple_operand_gsi (&gsi
, expr
, false, NULL_TREE
,
5491 true, GSI_SAME_STMT
);
5492 ass
= gimple_build_assign (v
, expr
);
5493 gsi_insert_before (&gsi
, ass
, GSI_SAME_STMT
);
5494 if (fd
->collapse
> 1)
5495 expand_oacc_collapse_vars (fd
, &gsi
, counts
, v
);
5498 /* Loop increment goes into cont_bb. If this is not a loop, we
5499 will have spawned threads as if it was, and each one will
5500 execute one iteration. The specification is not explicit about
5501 whether such constructs are ill-formed or not, and they can
5502 occur, especially when noreturn routines are involved. */
5505 gsi
= gsi_last_bb (cont_bb
);
5506 gomp_continue
*cont_stmt
= as_a
<gomp_continue
*> (gsi_stmt (gsi
));
5507 loc
= gimple_location (cont_stmt
);
5509 /* Increment offset. */
5510 if (gimple_in_ssa_p (cfun
))
5511 expr
= build2 (plus_code
, iter_type
, offset
,
5512 fold_convert (plus_type
, step
));
5514 expr
= build2 (PLUS_EXPR
, diff_type
, offset
, step
);
5515 expr
= force_gimple_operand_gsi (&gsi
, expr
, false, NULL_TREE
,
5516 true, GSI_SAME_STMT
);
5517 ass
= gimple_build_assign (offset_incr
, expr
);
5518 gsi_insert_before (&gsi
, ass
, GSI_SAME_STMT
);
5519 expr
= build2 (cond_code
, boolean_type_node
, offset_incr
, bound
);
5520 gsi_insert_before (&gsi
, gimple_build_cond_empty (expr
), GSI_SAME_STMT
);
5522 /* Remove the GIMPLE_OMP_CONTINUE. */
5523 gsi_remove (&gsi
, true);
5525 /* Fixup edges from cont_bb */
5526 be
= BRANCH_EDGE (cont_bb
);
5527 fte
= FALLTHRU_EDGE (cont_bb
);
5528 be
->flags
|= EDGE_TRUE_VALUE
;
5529 fte
->flags
^= EDGE_FALLTHRU
| EDGE_FALSE_VALUE
;
5533 /* Split the beginning of exit_bb to make bottom_bb. We
5534 need to insert a nop at the start, because splitting is
5535 after a stmt, not before. */
5536 gsi
= gsi_start_bb (exit_bb
);
5537 stmt
= gimple_build_nop ();
5538 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
5539 split
= split_block (exit_bb
, stmt
);
5540 bottom_bb
= split
->src
;
5541 exit_bb
= split
->dest
;
5542 gsi
= gsi_last_bb (bottom_bb
);
5544 /* Chunk increment and test goes into bottom_bb. */
5545 expr
= build2 (PLUS_EXPR
, diff_type
, chunk_no
,
5546 build_int_cst (diff_type
, 1));
5547 ass
= gimple_build_assign (chunk_no
, expr
);
5548 gsi_insert_after (&gsi
, ass
, GSI_CONTINUE_LINKING
);
5550 /* Chunk test at end of bottom_bb. */
5551 expr
= build2 (LT_EXPR
, boolean_type_node
, chunk_no
, chunk_max
);
5552 gsi_insert_after (&gsi
, gimple_build_cond_empty (expr
),
5553 GSI_CONTINUE_LINKING
);
5555 /* Fixup edges from bottom_bb. */
5556 split
->flags
^= EDGE_FALLTHRU
| EDGE_FALSE_VALUE
;
5557 make_edge (bottom_bb
, head_bb
, EDGE_TRUE_VALUE
);
5561 gsi
= gsi_last_bb (exit_bb
);
5562 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_RETURN
);
5563 loc
= gimple_location (gsi_stmt (gsi
));
5565 if (!gimple_in_ssa_p (cfun
))
5567 /* Insert the final value of V, in case it is live. This is the
5568 value for the only thread that survives past the join. */
5569 expr
= fold_build2 (MINUS_EXPR
, diff_type
, range
, dir
);
5570 expr
= fold_build2 (PLUS_EXPR
, diff_type
, expr
, s
);
5571 expr
= fold_build2 (TRUNC_DIV_EXPR
, diff_type
, expr
, s
);
5572 expr
= fold_build2 (MULT_EXPR
, diff_type
, expr
, s
);
5573 expr
= build2 (plus_code
, iter_type
, b
, fold_convert (plus_type
, expr
));
5574 expr
= force_gimple_operand_gsi (&gsi
, expr
, false, NULL_TREE
,
5575 true, GSI_SAME_STMT
);
5576 ass
= gimple_build_assign (v
, expr
);
5577 gsi_insert_before (&gsi
, ass
, GSI_SAME_STMT
);
5580 /* Remove the OMP_RETURN. */
5581 gsi_remove (&gsi
, true);
5585 /* We now have one or two nested loops. Update the loop
5587 struct loop
*parent
= entry_bb
->loop_father
;
5588 struct loop
*body
= body_bb
->loop_father
;
5592 struct loop
*chunk_loop
= alloc_loop ();
5593 chunk_loop
->header
= head_bb
;
5594 chunk_loop
->latch
= bottom_bb
;
5595 add_loop (chunk_loop
, parent
);
5596 parent
= chunk_loop
;
5598 else if (parent
!= body
)
5600 gcc_assert (body
->header
== body_bb
);
5601 gcc_assert (body
->latch
== cont_bb
5602 || single_pred (body
->latch
) == cont_bb
);
5608 struct loop
*body_loop
= alloc_loop ();
5609 body_loop
->header
= body_bb
;
5610 body_loop
->latch
= cont_bb
;
5611 add_loop (body_loop
, parent
);
5616 /* Expand the OMP loop defined by REGION. */
5619 expand_omp_for (struct omp_region
*region
, gimple
*inner_stmt
)
5621 struct omp_for_data fd
;
5622 struct omp_for_data_loop
*loops
;
5625 = (struct omp_for_data_loop
*)
5626 alloca (gimple_omp_for_collapse (last_stmt (region
->entry
))
5627 * sizeof (struct omp_for_data_loop
));
5628 omp_extract_for_data (as_a
<gomp_for
*> (last_stmt (region
->entry
)),
5630 region
->sched_kind
= fd
.sched_kind
;
5631 region
->sched_modifiers
= fd
.sched_modifiers
;
5633 gcc_assert (EDGE_COUNT (region
->entry
->succs
) == 2);
5634 BRANCH_EDGE (region
->entry
)->flags
&= ~EDGE_ABNORMAL
;
5635 FALLTHRU_EDGE (region
->entry
)->flags
&= ~EDGE_ABNORMAL
;
5638 gcc_assert (EDGE_COUNT (region
->cont
->succs
) == 2);
5639 BRANCH_EDGE (region
->cont
)->flags
&= ~EDGE_ABNORMAL
;
5640 FALLTHRU_EDGE (region
->cont
)->flags
&= ~EDGE_ABNORMAL
;
5643 /* If there isn't a continue then this is a degerate case where
5644 the introduction of abnormal edges during lowering will prevent
5645 original loops from being detected. Fix that up. */
5646 loops_state_set (LOOPS_NEED_FIXUP
);
5648 if (gimple_omp_for_kind (fd
.for_stmt
) & GF_OMP_FOR_SIMD
)
5649 expand_omp_simd (region
, &fd
);
5650 else if (gimple_omp_for_kind (fd
.for_stmt
) == GF_OMP_FOR_KIND_CILKFOR
)
5651 expand_cilk_for (region
, &fd
);
5652 else if (gimple_omp_for_kind (fd
.for_stmt
) == GF_OMP_FOR_KIND_OACC_LOOP
)
5654 gcc_assert (!inner_stmt
);
5655 expand_oacc_for (region
, &fd
);
5657 else if (gimple_omp_for_kind (fd
.for_stmt
) == GF_OMP_FOR_KIND_TASKLOOP
)
5659 if (gimple_omp_for_combined_into_p (fd
.for_stmt
))
5660 expand_omp_taskloop_for_inner (region
, &fd
, inner_stmt
);
5662 expand_omp_taskloop_for_outer (region
, &fd
, inner_stmt
);
5664 else if (fd
.sched_kind
== OMP_CLAUSE_SCHEDULE_STATIC
5665 && !fd
.have_ordered
)
5667 if (fd
.chunk_size
== NULL
)
5668 expand_omp_for_static_nochunk (region
, &fd
, inner_stmt
);
5670 expand_omp_for_static_chunk (region
, &fd
, inner_stmt
);
5674 int fn_index
, start_ix
, next_ix
;
5676 gcc_assert (gimple_omp_for_kind (fd
.for_stmt
)
5677 == GF_OMP_FOR_KIND_FOR
);
5678 if (fd
.chunk_size
== NULL
5679 && fd
.sched_kind
== OMP_CLAUSE_SCHEDULE_STATIC
)
5680 fd
.chunk_size
= integer_zero_node
;
5681 gcc_assert (fd
.sched_kind
!= OMP_CLAUSE_SCHEDULE_AUTO
);
5682 switch (fd
.sched_kind
)
5684 case OMP_CLAUSE_SCHEDULE_RUNTIME
:
5687 case OMP_CLAUSE_SCHEDULE_DYNAMIC
:
5688 case OMP_CLAUSE_SCHEDULE_GUIDED
:
5689 if ((fd
.sched_modifiers
& OMP_CLAUSE_SCHEDULE_NONMONOTONIC
)
5691 && !fd
.have_ordered
)
5693 fn_index
= 3 + fd
.sched_kind
;
5698 fn_index
= fd
.sched_kind
;
5702 fn_index
+= fd
.have_ordered
* 6;
5704 start_ix
= ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START
) + fn_index
;
5706 start_ix
= ((int)BUILT_IN_GOMP_LOOP_STATIC_START
) + fn_index
;
5707 next_ix
= ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT
) + fn_index
;
5708 if (fd
.iter_type
== long_long_unsigned_type_node
)
5710 start_ix
+= ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
5711 - (int)BUILT_IN_GOMP_LOOP_STATIC_START
);
5712 next_ix
+= ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
5713 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT
);
5715 expand_omp_for_generic (region
, &fd
, (enum built_in_function
) start_ix
,
5716 (enum built_in_function
) next_ix
, inner_stmt
);
5719 if (gimple_in_ssa_p (cfun
))
5720 update_ssa (TODO_update_ssa_only_virtuals
);
5723 /* Expand code for an OpenMP sections directive. In pseudo code, we generate
5725 v = GOMP_sections_start (n);
5742 v = GOMP_sections_next ();
5747 If this is a combined parallel sections, replace the call to
5748 GOMP_sections_start with call to GOMP_sections_next. */
5751 expand_omp_sections (struct omp_region
*region
)
5753 tree t
, u
, vin
= NULL
, vmain
, vnext
, l2
;
5755 basic_block entry_bb
, l0_bb
, l1_bb
, l2_bb
, default_bb
;
5756 gimple_stmt_iterator si
, switch_si
;
5757 gomp_sections
*sections_stmt
;
5759 gomp_continue
*cont
;
5762 struct omp_region
*inner
;
5764 bool exit_reachable
= region
->cont
!= NULL
;
5766 gcc_assert (region
->exit
!= NULL
);
5767 entry_bb
= region
->entry
;
5768 l0_bb
= single_succ (entry_bb
);
5769 l1_bb
= region
->cont
;
5770 l2_bb
= region
->exit
;
5771 if (single_pred_p (l2_bb
) && single_pred (l2_bb
) == l0_bb
)
5772 l2
= gimple_block_label (l2_bb
);
5775 /* This can happen if there are reductions. */
5776 len
= EDGE_COUNT (l0_bb
->succs
);
5777 gcc_assert (len
> 0);
5778 e
= EDGE_SUCC (l0_bb
, len
- 1);
5779 si
= gsi_last_bb (e
->dest
);
5782 || gimple_code (gsi_stmt (si
)) != GIMPLE_OMP_SECTION
)
5783 l2
= gimple_block_label (e
->dest
);
5785 FOR_EACH_EDGE (e
, ei
, l0_bb
->succs
)
5787 si
= gsi_last_bb (e
->dest
);
5789 || gimple_code (gsi_stmt (si
)) != GIMPLE_OMP_SECTION
)
5791 l2
= gimple_block_label (e
->dest
);
5797 default_bb
= create_empty_bb (l1_bb
->prev_bb
);
5799 default_bb
= create_empty_bb (l0_bb
);
5801 /* We will build a switch() with enough cases for all the
5802 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
5803 and a default case to abort if something goes wrong. */
5804 len
= EDGE_COUNT (l0_bb
->succs
);
5806 /* Use vec::quick_push on label_vec throughout, since we know the size
5808 auto_vec
<tree
> label_vec (len
);
5810 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
5811 GIMPLE_OMP_SECTIONS statement. */
5812 si
= gsi_last_bb (entry_bb
);
5813 sections_stmt
= as_a
<gomp_sections
*> (gsi_stmt (si
));
5814 gcc_assert (gimple_code (sections_stmt
) == GIMPLE_OMP_SECTIONS
);
5815 vin
= gimple_omp_sections_control (sections_stmt
);
5816 if (!is_combined_parallel (region
))
5818 /* If we are not inside a combined parallel+sections region,
5819 call GOMP_sections_start. */
5820 t
= build_int_cst (unsigned_type_node
, len
- 1);
5821 u
= builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START
);
5822 stmt
= gimple_build_call (u
, 1, t
);
5826 /* Otherwise, call GOMP_sections_next. */
5827 u
= builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT
);
5828 stmt
= gimple_build_call (u
, 0);
5830 gimple_call_set_lhs (stmt
, vin
);
5831 gsi_insert_after (&si
, stmt
, GSI_SAME_STMT
);
5832 gsi_remove (&si
, true);
5834 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
5836 switch_si
= gsi_last_bb (l0_bb
);
5837 gcc_assert (gimple_code (gsi_stmt (switch_si
)) == GIMPLE_OMP_SECTIONS_SWITCH
);
5840 cont
= as_a
<gomp_continue
*> (last_stmt (l1_bb
));
5841 gcc_assert (gimple_code (cont
) == GIMPLE_OMP_CONTINUE
);
5842 vmain
= gimple_omp_continue_control_use (cont
);
5843 vnext
= gimple_omp_continue_control_def (cont
);
5851 t
= build_case_label (build_int_cst (unsigned_type_node
, 0), NULL
, l2
);
5852 label_vec
.quick_push (t
);
5855 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
5856 for (inner
= region
->inner
, casei
= 1;
5858 inner
= inner
->next
, i
++, casei
++)
5860 basic_block s_entry_bb
, s_exit_bb
;
5862 /* Skip optional reduction region. */
5863 if (inner
->type
== GIMPLE_OMP_ATOMIC_LOAD
)
5870 s_entry_bb
= inner
->entry
;
5871 s_exit_bb
= inner
->exit
;
5873 t
= gimple_block_label (s_entry_bb
);
5874 u
= build_int_cst (unsigned_type_node
, casei
);
5875 u
= build_case_label (u
, NULL
, t
);
5876 label_vec
.quick_push (u
);
5878 si
= gsi_last_bb (s_entry_bb
);
5879 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_SECTION
);
5880 gcc_assert (i
< len
|| gimple_omp_section_last_p (gsi_stmt (si
)));
5881 gsi_remove (&si
, true);
5882 single_succ_edge (s_entry_bb
)->flags
= EDGE_FALLTHRU
;
5884 if (s_exit_bb
== NULL
)
5887 si
= gsi_last_bb (s_exit_bb
);
5888 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_RETURN
);
5889 gsi_remove (&si
, true);
5891 single_succ_edge (s_exit_bb
)->flags
= EDGE_FALLTHRU
;
5894 /* Error handling code goes in DEFAULT_BB. */
5895 t
= gimple_block_label (default_bb
);
5896 u
= build_case_label (NULL
, NULL
, t
);
5897 make_edge (l0_bb
, default_bb
, 0);
5898 add_bb_to_loop (default_bb
, current_loops
->tree_root
);
5900 stmt
= gimple_build_switch (vmain
, u
, label_vec
);
5901 gsi_insert_after (&switch_si
, stmt
, GSI_SAME_STMT
);
5902 gsi_remove (&switch_si
, true);
5904 si
= gsi_start_bb (default_bb
);
5905 stmt
= gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP
), 0);
5906 gsi_insert_after (&si
, stmt
, GSI_CONTINUE_LINKING
);
5912 /* Code to get the next section goes in L1_BB. */
5913 si
= gsi_last_bb (l1_bb
);
5914 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_CONTINUE
);
5916 bfn_decl
= builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT
);
5917 stmt
= gimple_build_call (bfn_decl
, 0);
5918 gimple_call_set_lhs (stmt
, vnext
);
5919 gsi_insert_after (&si
, stmt
, GSI_SAME_STMT
);
5920 gsi_remove (&si
, true);
5922 single_succ_edge (l1_bb
)->flags
= EDGE_FALLTHRU
;
5925 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
5926 si
= gsi_last_bb (l2_bb
);
5927 if (gimple_omp_return_nowait_p (gsi_stmt (si
)))
5928 t
= builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT
);
5929 else if (gimple_omp_return_lhs (gsi_stmt (si
)))
5930 t
= builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL
);
5932 t
= builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END
);
5933 stmt
= gimple_build_call (t
, 0);
5934 if (gimple_omp_return_lhs (gsi_stmt (si
)))
5935 gimple_call_set_lhs (stmt
, gimple_omp_return_lhs (gsi_stmt (si
)));
5936 gsi_insert_after (&si
, stmt
, GSI_SAME_STMT
);
5937 gsi_remove (&si
, true);
5939 set_immediate_dominator (CDI_DOMINATORS
, default_bb
, l0_bb
);
5942 /* Expand code for an OpenMP single directive. We've already expanded
5943 much of the code, here we simply place the GOMP_barrier call. */
5946 expand_omp_single (struct omp_region
*region
)
5948 basic_block entry_bb
, exit_bb
;
5949 gimple_stmt_iterator si
;
5951 entry_bb
= region
->entry
;
5952 exit_bb
= region
->exit
;
5954 si
= gsi_last_bb (entry_bb
);
5955 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_SINGLE
);
5956 gsi_remove (&si
, true);
5957 single_succ_edge (entry_bb
)->flags
= EDGE_FALLTHRU
;
5959 si
= gsi_last_bb (exit_bb
);
5960 if (!gimple_omp_return_nowait_p (gsi_stmt (si
)))
5962 tree t
= gimple_omp_return_lhs (gsi_stmt (si
));
5963 gsi_insert_after (&si
, omp_build_barrier (t
), GSI_SAME_STMT
);
5965 gsi_remove (&si
, true);
5966 single_succ_edge (exit_bb
)->flags
= EDGE_FALLTHRU
;
5969 /* Generic expansion for OpenMP synchronization directives: master,
5970 ordered and critical. All we need to do here is remove the entry
5971 and exit markers for REGION. */
5974 expand_omp_synch (struct omp_region
*region
)
5976 basic_block entry_bb
, exit_bb
;
5977 gimple_stmt_iterator si
;
5979 entry_bb
= region
->entry
;
5980 exit_bb
= region
->exit
;
5982 si
= gsi_last_bb (entry_bb
);
5983 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_SINGLE
5984 || gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_MASTER
5985 || gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_TASKGROUP
5986 || gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_ORDERED
5987 || gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_CRITICAL
5988 || gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_TEAMS
);
5989 gsi_remove (&si
, true);
5990 single_succ_edge (entry_bb
)->flags
= EDGE_FALLTHRU
;
5994 si
= gsi_last_bb (exit_bb
);
5995 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_RETURN
);
5996 gsi_remove (&si
, true);
5997 single_succ_edge (exit_bb
)->flags
= EDGE_FALLTHRU
;
6001 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6002 operation as a normal volatile load. */
6005 expand_omp_atomic_load (basic_block load_bb
, tree addr
,
6006 tree loaded_val
, int index
)
6008 enum built_in_function tmpbase
;
6009 gimple_stmt_iterator gsi
;
6010 basic_block store_bb
;
6013 tree decl
, call
, type
, itype
;
6015 gsi
= gsi_last_bb (load_bb
);
6016 stmt
= gsi_stmt (gsi
);
6017 gcc_assert (gimple_code (stmt
) == GIMPLE_OMP_ATOMIC_LOAD
);
6018 loc
= gimple_location (stmt
);
6020 /* ??? If the target does not implement atomic_load_optab[mode], and mode
6021 is smaller than word size, then expand_atomic_load assumes that the load
6022 is atomic. We could avoid the builtin entirely in this case. */
6024 tmpbase
= (enum built_in_function
) (BUILT_IN_ATOMIC_LOAD_N
+ index
+ 1);
6025 decl
= builtin_decl_explicit (tmpbase
);
6026 if (decl
== NULL_TREE
)
6029 type
= TREE_TYPE (loaded_val
);
6030 itype
= TREE_TYPE (TREE_TYPE (decl
));
6032 call
= build_call_expr_loc (loc
, decl
, 2, addr
,
6033 build_int_cst (NULL
,
6034 gimple_omp_atomic_seq_cst_p (stmt
)
6036 : MEMMODEL_RELAXED
));
6037 if (!useless_type_conversion_p (type
, itype
))
6038 call
= fold_build1_loc (loc
, VIEW_CONVERT_EXPR
, type
, call
);
6039 call
= build2_loc (loc
, MODIFY_EXPR
, void_type_node
, loaded_val
, call
);
6041 force_gimple_operand_gsi (&gsi
, call
, true, NULL_TREE
, true, GSI_SAME_STMT
);
6042 gsi_remove (&gsi
, true);
6044 store_bb
= single_succ (load_bb
);
6045 gsi
= gsi_last_bb (store_bb
);
6046 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_ATOMIC_STORE
);
6047 gsi_remove (&gsi
, true);
6049 if (gimple_in_ssa_p (cfun
))
6050 update_ssa (TODO_update_ssa_no_phi
);
6055 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6056 operation as a normal volatile store. */
6059 expand_omp_atomic_store (basic_block load_bb
, tree addr
,
6060 tree loaded_val
, tree stored_val
, int index
)
6062 enum built_in_function tmpbase
;
6063 gimple_stmt_iterator gsi
;
6064 basic_block store_bb
= single_succ (load_bb
);
6067 tree decl
, call
, type
, itype
;
6071 gsi
= gsi_last_bb (load_bb
);
6072 stmt
= gsi_stmt (gsi
);
6073 gcc_assert (gimple_code (stmt
) == GIMPLE_OMP_ATOMIC_LOAD
);
6075 /* If the load value is needed, then this isn't a store but an exchange. */
6076 exchange
= gimple_omp_atomic_need_value_p (stmt
);
6078 gsi
= gsi_last_bb (store_bb
);
6079 stmt
= gsi_stmt (gsi
);
6080 gcc_assert (gimple_code (stmt
) == GIMPLE_OMP_ATOMIC_STORE
);
6081 loc
= gimple_location (stmt
);
6083 /* ??? If the target does not implement atomic_store_optab[mode], and mode
6084 is smaller than word size, then expand_atomic_store assumes that the store
6085 is atomic. We could avoid the builtin entirely in this case. */
6087 tmpbase
= (exchange
? BUILT_IN_ATOMIC_EXCHANGE_N
: BUILT_IN_ATOMIC_STORE_N
);
6088 tmpbase
= (enum built_in_function
) ((int) tmpbase
+ index
+ 1);
6089 decl
= builtin_decl_explicit (tmpbase
);
6090 if (decl
== NULL_TREE
)
6093 type
= TREE_TYPE (stored_val
);
6095 /* Dig out the type of the function's second argument. */
6096 itype
= TREE_TYPE (decl
);
6097 itype
= TYPE_ARG_TYPES (itype
);
6098 itype
= TREE_CHAIN (itype
);
6099 itype
= TREE_VALUE (itype
);
6100 imode
= TYPE_MODE (itype
);
6102 if (exchange
&& !can_atomic_exchange_p (imode
, true))
6105 if (!useless_type_conversion_p (itype
, type
))
6106 stored_val
= fold_build1_loc (loc
, VIEW_CONVERT_EXPR
, itype
, stored_val
);
6107 call
= build_call_expr_loc (loc
, decl
, 3, addr
, stored_val
,
6108 build_int_cst (NULL
,
6109 gimple_omp_atomic_seq_cst_p (stmt
)
6111 : MEMMODEL_RELAXED
));
6114 if (!useless_type_conversion_p (type
, itype
))
6115 call
= build1_loc (loc
, VIEW_CONVERT_EXPR
, type
, call
);
6116 call
= build2_loc (loc
, MODIFY_EXPR
, void_type_node
, loaded_val
, call
);
6119 force_gimple_operand_gsi (&gsi
, call
, true, NULL_TREE
, true, GSI_SAME_STMT
);
6120 gsi_remove (&gsi
, true);
6122 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
6123 gsi
= gsi_last_bb (load_bb
);
6124 gsi_remove (&gsi
, true);
6126 if (gimple_in_ssa_p (cfun
))
6127 update_ssa (TODO_update_ssa_no_phi
);
6132 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6133 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
6134 size of the data type, and thus usable to find the index of the builtin
6135 decl. Returns false if the expression is not of the proper form. */
6138 expand_omp_atomic_fetch_op (basic_block load_bb
,
6139 tree addr
, tree loaded_val
,
6140 tree stored_val
, int index
)
6142 enum built_in_function oldbase
, newbase
, tmpbase
;
6143 tree decl
, itype
, call
;
6145 basic_block store_bb
= single_succ (load_bb
);
6146 gimple_stmt_iterator gsi
;
6149 enum tree_code code
;
6150 bool need_old
, need_new
;
6154 /* We expect to find the following sequences:
6157 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
6160 val = tmp OP something; (or: something OP tmp)
6161 GIMPLE_OMP_STORE (val)
6163 ???FIXME: Allow a more flexible sequence.
6164 Perhaps use data flow to pick the statements.
6168 gsi
= gsi_after_labels (store_bb
);
6169 stmt
= gsi_stmt (gsi
);
6170 loc
= gimple_location (stmt
);
6171 if (!is_gimple_assign (stmt
))
6174 if (gimple_code (gsi_stmt (gsi
)) != GIMPLE_OMP_ATOMIC_STORE
)
6176 need_new
= gimple_omp_atomic_need_value_p (gsi_stmt (gsi
));
6177 need_old
= gimple_omp_atomic_need_value_p (last_stmt (load_bb
));
6178 seq_cst
= gimple_omp_atomic_seq_cst_p (last_stmt (load_bb
));
6179 gcc_checking_assert (!need_old
|| !need_new
);
6181 if (!operand_equal_p (gimple_assign_lhs (stmt
), stored_val
, 0))
6184 /* Check for one of the supported fetch-op operations. */
6185 code
= gimple_assign_rhs_code (stmt
);
6189 case POINTER_PLUS_EXPR
:
6190 oldbase
= BUILT_IN_ATOMIC_FETCH_ADD_N
;
6191 newbase
= BUILT_IN_ATOMIC_ADD_FETCH_N
;
6194 oldbase
= BUILT_IN_ATOMIC_FETCH_SUB_N
;
6195 newbase
= BUILT_IN_ATOMIC_SUB_FETCH_N
;
6198 oldbase
= BUILT_IN_ATOMIC_FETCH_AND_N
;
6199 newbase
= BUILT_IN_ATOMIC_AND_FETCH_N
;
6202 oldbase
= BUILT_IN_ATOMIC_FETCH_OR_N
;
6203 newbase
= BUILT_IN_ATOMIC_OR_FETCH_N
;
6206 oldbase
= BUILT_IN_ATOMIC_FETCH_XOR_N
;
6207 newbase
= BUILT_IN_ATOMIC_XOR_FETCH_N
;
6213 /* Make sure the expression is of the proper form. */
6214 if (operand_equal_p (gimple_assign_rhs1 (stmt
), loaded_val
, 0))
6215 rhs
= gimple_assign_rhs2 (stmt
);
6216 else if (commutative_tree_code (gimple_assign_rhs_code (stmt
))
6217 && operand_equal_p (gimple_assign_rhs2 (stmt
), loaded_val
, 0))
6218 rhs
= gimple_assign_rhs1 (stmt
);
6222 tmpbase
= ((enum built_in_function
)
6223 ((need_new
? newbase
: oldbase
) + index
+ 1));
6224 decl
= builtin_decl_explicit (tmpbase
);
6225 if (decl
== NULL_TREE
)
6227 itype
= TREE_TYPE (TREE_TYPE (decl
));
6228 imode
= TYPE_MODE (itype
);
6230 /* We could test all of the various optabs involved, but the fact of the
6231 matter is that (with the exception of i486 vs i586 and xadd) all targets
6232 that support any atomic operaton optab also implements compare-and-swap.
6233 Let optabs.c take care of expanding any compare-and-swap loop. */
6234 if (!can_compare_and_swap_p (imode
, true))
6237 gsi
= gsi_last_bb (load_bb
);
6238 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_ATOMIC_LOAD
);
6240 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
6241 It only requires that the operation happen atomically. Thus we can
6242 use the RELAXED memory model. */
6243 call
= build_call_expr_loc (loc
, decl
, 3, addr
,
6244 fold_convert_loc (loc
, itype
, rhs
),
6245 build_int_cst (NULL
,
6246 seq_cst
? MEMMODEL_SEQ_CST
6247 : MEMMODEL_RELAXED
));
6249 if (need_old
|| need_new
)
6251 lhs
= need_old
? loaded_val
: stored_val
;
6252 call
= fold_convert_loc (loc
, TREE_TYPE (lhs
), call
);
6253 call
= build2_loc (loc
, MODIFY_EXPR
, void_type_node
, lhs
, call
);
6256 call
= fold_convert_loc (loc
, void_type_node
, call
);
6257 force_gimple_operand_gsi (&gsi
, call
, true, NULL_TREE
, true, GSI_SAME_STMT
);
6258 gsi_remove (&gsi
, true);
6260 gsi
= gsi_last_bb (store_bb
);
6261 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_ATOMIC_STORE
);
6262 gsi_remove (&gsi
, true);
6263 gsi
= gsi_last_bb (store_bb
);
6264 stmt
= gsi_stmt (gsi
);
6265 gsi_remove (&gsi
, true);
6267 if (gimple_in_ssa_p (cfun
))
6269 release_defs (stmt
);
6270 update_ssa (TODO_update_ssa_no_phi
);
6276 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6280 newval = rhs; // with oldval replacing *addr in rhs
6281 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
6282 if (oldval != newval)
6285 INDEX is log2 of the size of the data type, and thus usable to find the
6286 index of the builtin decl. */
6289 expand_omp_atomic_pipeline (basic_block load_bb
, basic_block store_bb
,
6290 tree addr
, tree loaded_val
, tree stored_val
,
6293 tree loadedi
, storedi
, initial
, new_storedi
, old_vali
;
6294 tree type
, itype
, cmpxchg
, iaddr
;
6295 gimple_stmt_iterator si
;
6296 basic_block loop_header
= single_succ (load_bb
);
6299 enum built_in_function fncode
;
6301 /* ??? We need a non-pointer interface to __atomic_compare_exchange in
6302 order to use the RELAXED memory model effectively. */
6303 fncode
= (enum built_in_function
)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
6305 cmpxchg
= builtin_decl_explicit (fncode
);
6306 if (cmpxchg
== NULL_TREE
)
6308 type
= TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr
)));
6309 itype
= TREE_TYPE (TREE_TYPE (cmpxchg
));
6311 if (!can_compare_and_swap_p (TYPE_MODE (itype
), true))
6314 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
6315 si
= gsi_last_bb (load_bb
);
6316 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_ATOMIC_LOAD
);
6318 /* For floating-point values, we'll need to view-convert them to integers
6319 so that we can perform the atomic compare and swap. Simplify the
6320 following code by always setting up the "i"ntegral variables. */
6321 if (!INTEGRAL_TYPE_P (type
) && !POINTER_TYPE_P (type
))
6325 iaddr
= create_tmp_reg (build_pointer_type_for_mode (itype
, ptr_mode
,
6328 = force_gimple_operand_gsi (&si
,
6329 fold_convert (TREE_TYPE (iaddr
), addr
),
6330 false, NULL_TREE
, true, GSI_SAME_STMT
);
6331 stmt
= gimple_build_assign (iaddr
, iaddr_val
);
6332 gsi_insert_before (&si
, stmt
, GSI_SAME_STMT
);
6333 loadedi
= create_tmp_var (itype
);
6334 if (gimple_in_ssa_p (cfun
))
6335 loadedi
= make_ssa_name (loadedi
);
6340 loadedi
= loaded_val
;
6343 fncode
= (enum built_in_function
) (BUILT_IN_ATOMIC_LOAD_N
+ index
+ 1);
6344 tree loaddecl
= builtin_decl_explicit (fncode
);
6347 = fold_convert (TREE_TYPE (TREE_TYPE (iaddr
)),
6348 build_call_expr (loaddecl
, 2, iaddr
,
6349 build_int_cst (NULL_TREE
,
6350 MEMMODEL_RELAXED
)));
6352 initial
= build2 (MEM_REF
, TREE_TYPE (TREE_TYPE (iaddr
)), iaddr
,
6353 build_int_cst (TREE_TYPE (iaddr
), 0));
6356 = force_gimple_operand_gsi (&si
, initial
, true, NULL_TREE
, true,
6359 /* Move the value to the LOADEDI temporary. */
6360 if (gimple_in_ssa_p (cfun
))
6362 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header
)));
6363 phi
= create_phi_node (loadedi
, loop_header
);
6364 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi
, single_succ_edge (load_bb
)),
6368 gsi_insert_before (&si
,
6369 gimple_build_assign (loadedi
, initial
),
6371 if (loadedi
!= loaded_val
)
6373 gimple_stmt_iterator gsi2
;
6376 x
= build1 (VIEW_CONVERT_EXPR
, type
, loadedi
);
6377 gsi2
= gsi_start_bb (loop_header
);
6378 if (gimple_in_ssa_p (cfun
))
6381 x
= force_gimple_operand_gsi (&gsi2
, x
, true, NULL_TREE
,
6382 true, GSI_SAME_STMT
);
6383 stmt
= gimple_build_assign (loaded_val
, x
);
6384 gsi_insert_before (&gsi2
, stmt
, GSI_SAME_STMT
);
6388 x
= build2 (MODIFY_EXPR
, TREE_TYPE (loaded_val
), loaded_val
, x
);
6389 force_gimple_operand_gsi (&gsi2
, x
, true, NULL_TREE
,
6390 true, GSI_SAME_STMT
);
6393 gsi_remove (&si
, true);
6395 si
= gsi_last_bb (store_bb
);
6396 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_ATOMIC_STORE
);
6399 storedi
= stored_val
;
6402 force_gimple_operand_gsi (&si
,
6403 build1 (VIEW_CONVERT_EXPR
, itype
,
6404 stored_val
), true, NULL_TREE
, true,
6407 /* Build the compare&swap statement. */
6408 new_storedi
= build_call_expr (cmpxchg
, 3, iaddr
, loadedi
, storedi
);
6409 new_storedi
= force_gimple_operand_gsi (&si
,
6410 fold_convert (TREE_TYPE (loadedi
),
6413 true, GSI_SAME_STMT
);
6415 if (gimple_in_ssa_p (cfun
))
6419 old_vali
= create_tmp_var (TREE_TYPE (loadedi
));
6420 stmt
= gimple_build_assign (old_vali
, loadedi
);
6421 gsi_insert_before (&si
, stmt
, GSI_SAME_STMT
);
6423 stmt
= gimple_build_assign (loadedi
, new_storedi
);
6424 gsi_insert_before (&si
, stmt
, GSI_SAME_STMT
);
6427 /* Note that we always perform the comparison as an integer, even for
6428 floating point. This allows the atomic operation to properly
6429 succeed even with NaNs and -0.0. */
6430 stmt
= gimple_build_cond_empty
6431 (build2 (NE_EXPR
, boolean_type_node
,
6432 new_storedi
, old_vali
));
6433 gsi_insert_before (&si
, stmt
, GSI_SAME_STMT
);
6436 e
= single_succ_edge (store_bb
);
6437 e
->flags
&= ~EDGE_FALLTHRU
;
6438 e
->flags
|= EDGE_FALSE_VALUE
;
6440 e
= make_edge (store_bb
, loop_header
, EDGE_TRUE_VALUE
);
6442 /* Copy the new value to loadedi (we already did that before the condition
6443 if we are not in SSA). */
6444 if (gimple_in_ssa_p (cfun
))
6446 phi
= gimple_seq_first_stmt (phi_nodes (loop_header
));
6447 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi
, e
), new_storedi
);
6450 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
6451 gsi_remove (&si
, true);
6453 struct loop
*loop
= alloc_loop ();
6454 loop
->header
= loop_header
;
6455 loop
->latch
= store_bb
;
6456 add_loop (loop
, loop_header
->loop_father
);
6458 if (gimple_in_ssa_p (cfun
))
6459 update_ssa (TODO_update_ssa_no_phi
);
6464 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6466 GOMP_atomic_start ();
6470 The result is not globally atomic, but works so long as all parallel
6471 references are within #pragma omp atomic directives. According to
6472 responses received from omp@openmp.org, appears to be within spec.
6473 Which makes sense, since that's how several other compilers handle
6474 this situation as well.
6475 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
6476 expanding. STORED_VAL is the operand of the matching
6477 GIMPLE_OMP_ATOMIC_STORE.
6480 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
6484 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
6489 expand_omp_atomic_mutex (basic_block load_bb
, basic_block store_bb
,
6490 tree addr
, tree loaded_val
, tree stored_val
)
6492 gimple_stmt_iterator si
;
6496 si
= gsi_last_bb (load_bb
);
6497 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_ATOMIC_LOAD
);
6499 t
= builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START
);
6500 t
= build_call_expr (t
, 0);
6501 force_gimple_operand_gsi (&si
, t
, true, NULL_TREE
, true, GSI_SAME_STMT
);
6503 stmt
= gimple_build_assign (loaded_val
, build_simple_mem_ref (addr
));
6504 gsi_insert_before (&si
, stmt
, GSI_SAME_STMT
);
6505 gsi_remove (&si
, true);
6507 si
= gsi_last_bb (store_bb
);
6508 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_ATOMIC_STORE
);
6510 stmt
= gimple_build_assign (build_simple_mem_ref (unshare_expr (addr
)),
6512 gsi_insert_before (&si
, stmt
, GSI_SAME_STMT
);
6514 t
= builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END
);
6515 t
= build_call_expr (t
, 0);
6516 force_gimple_operand_gsi (&si
, t
, true, NULL_TREE
, true, GSI_SAME_STMT
);
6517 gsi_remove (&si
, true);
6519 if (gimple_in_ssa_p (cfun
))
6520 update_ssa (TODO_update_ssa_no_phi
);
6524 /* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
6525 using expand_omp_atomic_fetch_op. If it failed, we try to
6526 call expand_omp_atomic_pipeline, and if it fails too, the
6527 ultimate fallback is wrapping the operation in a mutex
6528 (expand_omp_atomic_mutex). REGION is the atomic region built
6529 by build_omp_regions_1(). */
6532 expand_omp_atomic (struct omp_region
*region
)
6534 basic_block load_bb
= region
->entry
, store_bb
= region
->exit
;
6535 gomp_atomic_load
*load
= as_a
<gomp_atomic_load
*> (last_stmt (load_bb
));
6536 gomp_atomic_store
*store
= as_a
<gomp_atomic_store
*> (last_stmt (store_bb
));
6537 tree loaded_val
= gimple_omp_atomic_load_lhs (load
);
6538 tree addr
= gimple_omp_atomic_load_rhs (load
);
6539 tree stored_val
= gimple_omp_atomic_store_val (store
);
6540 tree type
= TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr
)));
6541 HOST_WIDE_INT index
;
6543 /* Make sure the type is one of the supported sizes. */
6544 index
= tree_to_uhwi (TYPE_SIZE_UNIT (type
));
6545 index
= exact_log2 (index
);
6546 if (index
>= 0 && index
<= 4)
6548 unsigned int align
= TYPE_ALIGN_UNIT (type
);
6550 /* __sync builtins require strict data alignment. */
6551 if (exact_log2 (align
) >= index
)
6554 if (loaded_val
== stored_val
6555 && (GET_MODE_CLASS (TYPE_MODE (type
)) == MODE_INT
6556 || GET_MODE_CLASS (TYPE_MODE (type
)) == MODE_FLOAT
)
6557 && GET_MODE_BITSIZE (TYPE_MODE (type
)) <= BITS_PER_WORD
6558 && expand_omp_atomic_load (load_bb
, addr
, loaded_val
, index
))
6562 if ((GET_MODE_CLASS (TYPE_MODE (type
)) == MODE_INT
6563 || GET_MODE_CLASS (TYPE_MODE (type
)) == MODE_FLOAT
)
6564 && GET_MODE_BITSIZE (TYPE_MODE (type
)) <= BITS_PER_WORD
6565 && store_bb
== single_succ (load_bb
)
6566 && first_stmt (store_bb
) == store
6567 && expand_omp_atomic_store (load_bb
, addr
, loaded_val
,
6571 /* When possible, use specialized atomic update functions. */
6572 if ((INTEGRAL_TYPE_P (type
) || POINTER_TYPE_P (type
))
6573 && store_bb
== single_succ (load_bb
)
6574 && expand_omp_atomic_fetch_op (load_bb
, addr
,
6575 loaded_val
, stored_val
, index
))
6578 /* If we don't have specialized __sync builtins, try and implement
6579 as a compare and swap loop. */
6580 if (expand_omp_atomic_pipeline (load_bb
, store_bb
, addr
,
6581 loaded_val
, stored_val
, index
))
6586 /* The ultimate fallback is wrapping the operation in a mutex. */
6587 expand_omp_atomic_mutex (load_bb
, store_bb
, addr
, loaded_val
, stored_val
);
6590 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
6594 mark_loops_in_oacc_kernels_region (basic_block region_entry
,
6595 basic_block region_exit
)
6597 struct loop
*outer
= region_entry
->loop_father
;
6598 gcc_assert (region_exit
== NULL
|| outer
== region_exit
->loop_father
);
6600 /* Don't parallelize the kernels region if it contains more than one outer
6602 unsigned int nr_outer_loops
= 0;
6603 struct loop
*single_outer
= NULL
;
6604 for (struct loop
*loop
= outer
->inner
; loop
!= NULL
; loop
= loop
->next
)
6606 gcc_assert (loop_outer (loop
) == outer
);
6608 if (!dominated_by_p (CDI_DOMINATORS
, loop
->header
, region_entry
))
6611 if (region_exit
!= NULL
6612 && dominated_by_p (CDI_DOMINATORS
, loop
->header
, region_exit
))
6616 single_outer
= loop
;
6618 if (nr_outer_loops
!= 1)
6621 for (struct loop
*loop
= single_outer
->inner
; loop
!= NULL
; loop
= loop
->inner
)
6625 /* Mark the loops in the region. */
6626 for (struct loop
*loop
= single_outer
; loop
!= NULL
; loop
= loop
->inner
)
6627 loop
->in_oacc_kernels_region
= true;
6630 /* Types used to pass grid and wortkgroup sizes to kernel invocation. */
6632 struct GTY(()) grid_launch_attributes_trees
6634 tree kernel_dim_array_type
;
6635 tree kernel_lattrs_dimnum_decl
;
6636 tree kernel_lattrs_grid_decl
;
6637 tree kernel_lattrs_group_decl
;
6638 tree kernel_launch_attributes_type
;
6641 static GTY(()) struct grid_launch_attributes_trees
*grid_attr_trees
;
6643 /* Create types used to pass kernel launch attributes to target. */
6646 grid_create_kernel_launch_attr_types (void)
6648 if (grid_attr_trees
)
6650 grid_attr_trees
= ggc_alloc
<grid_launch_attributes_trees
> ();
6652 tree dim_arr_index_type
6653 = build_index_type (build_int_cst (integer_type_node
, 2));
6654 grid_attr_trees
->kernel_dim_array_type
6655 = build_array_type (uint32_type_node
, dim_arr_index_type
);
6657 grid_attr_trees
->kernel_launch_attributes_type
= make_node (RECORD_TYPE
);
6658 grid_attr_trees
->kernel_lattrs_dimnum_decl
6659 = build_decl (BUILTINS_LOCATION
, FIELD_DECL
, get_identifier ("ndim"),
6661 DECL_CHAIN (grid_attr_trees
->kernel_lattrs_dimnum_decl
) = NULL_TREE
;
6663 grid_attr_trees
->kernel_lattrs_grid_decl
6664 = build_decl (BUILTINS_LOCATION
, FIELD_DECL
, get_identifier ("grid_size"),
6665 grid_attr_trees
->kernel_dim_array_type
);
6666 DECL_CHAIN (grid_attr_trees
->kernel_lattrs_grid_decl
)
6667 = grid_attr_trees
->kernel_lattrs_dimnum_decl
;
6668 grid_attr_trees
->kernel_lattrs_group_decl
6669 = build_decl (BUILTINS_LOCATION
, FIELD_DECL
, get_identifier ("group_size"),
6670 grid_attr_trees
->kernel_dim_array_type
);
6671 DECL_CHAIN (grid_attr_trees
->kernel_lattrs_group_decl
)
6672 = grid_attr_trees
->kernel_lattrs_grid_decl
;
6673 finish_builtin_struct (grid_attr_trees
->kernel_launch_attributes_type
,
6674 "__gomp_kernel_launch_attributes",
6675 grid_attr_trees
->kernel_lattrs_group_decl
, NULL_TREE
);
6678 /* Insert before the current statement in GSI a store of VALUE to INDEX of
6679 array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be
6680 of type uint32_type_node. */
6683 grid_insert_store_range_dim (gimple_stmt_iterator
*gsi
, tree range_var
,
6684 tree fld_decl
, int index
, tree value
)
6686 tree ref
= build4 (ARRAY_REF
, uint32_type_node
,
6687 build3 (COMPONENT_REF
,
6688 grid_attr_trees
->kernel_dim_array_type
,
6689 range_var
, fld_decl
, NULL_TREE
),
6690 build_int_cst (integer_type_node
, index
),
6691 NULL_TREE
, NULL_TREE
);
6692 gsi_insert_before (gsi
, gimple_build_assign (ref
, value
), GSI_SAME_STMT
);
6695 /* Return a tree representation of a pointer to a structure with grid and
6696 work-group size information. Statements filling that information will be
6697 inserted before GSI, TGT_STMT is the target statement which has the
6698 necessary information in it. */
6701 grid_get_kernel_launch_attributes (gimple_stmt_iterator
*gsi
,
6702 gomp_target
*tgt_stmt
)
6704 grid_create_kernel_launch_attr_types ();
6705 tree lattrs
= create_tmp_var (grid_attr_trees
->kernel_launch_attributes_type
,
6706 "__kernel_launch_attrs");
6708 unsigned max_dim
= 0;
6709 for (tree clause
= gimple_omp_target_clauses (tgt_stmt
);
6711 clause
= OMP_CLAUSE_CHAIN (clause
))
6713 if (OMP_CLAUSE_CODE (clause
) != OMP_CLAUSE__GRIDDIM_
)
6716 unsigned dim
= OMP_CLAUSE__GRIDDIM__DIMENSION (clause
);
6717 max_dim
= MAX (dim
, max_dim
);
6719 grid_insert_store_range_dim (gsi
, lattrs
,
6720 grid_attr_trees
->kernel_lattrs_grid_decl
,
6721 dim
, OMP_CLAUSE__GRIDDIM__SIZE (clause
));
6722 grid_insert_store_range_dim (gsi
, lattrs
,
6723 grid_attr_trees
->kernel_lattrs_group_decl
,
6724 dim
, OMP_CLAUSE__GRIDDIM__GROUP (clause
));
6727 tree dimref
= build3 (COMPONENT_REF
, uint32_type_node
, lattrs
,
6728 grid_attr_trees
->kernel_lattrs_dimnum_decl
, NULL_TREE
);
6729 gcc_checking_assert (max_dim
<= 2);
6730 tree dimensions
= build_int_cstu (uint32_type_node
, max_dim
+ 1);
6731 gsi_insert_before (gsi
, gimple_build_assign (dimref
, dimensions
),
6733 TREE_ADDRESSABLE (lattrs
) = 1;
6734 return build_fold_addr_expr (lattrs
);
6737 /* Build target argument identifier from the DEVICE identifier, value
6738 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
6741 get_target_argument_identifier_1 (int device
, bool subseqent_param
, int id
)
6743 tree t
= build_int_cst (integer_type_node
, device
);
6744 if (subseqent_param
)
6745 t
= fold_build2 (BIT_IOR_EXPR
, integer_type_node
, t
,
6746 build_int_cst (integer_type_node
,
6747 GOMP_TARGET_ARG_SUBSEQUENT_PARAM
));
6748 t
= fold_build2 (BIT_IOR_EXPR
, integer_type_node
, t
,
6749 build_int_cst (integer_type_node
, id
));
6753 /* Like above but return it in type that can be directly stored as an element
6754 of the argument array. */
6757 get_target_argument_identifier (int device
, bool subseqent_param
, int id
)
6759 tree t
= get_target_argument_identifier_1 (device
, subseqent_param
, id
);
6760 return fold_convert (ptr_type_node
, t
);
6763 /* Return a target argument consisting of DEVICE identifier, value identifier
6764 ID, and the actual VALUE. */
6767 get_target_argument_value (gimple_stmt_iterator
*gsi
, int device
, int id
,
6770 tree t
= fold_build2 (LSHIFT_EXPR
, integer_type_node
,
6771 fold_convert (integer_type_node
, value
),
6772 build_int_cst (unsigned_type_node
,
6773 GOMP_TARGET_ARG_VALUE_SHIFT
));
6774 t
= fold_build2 (BIT_IOR_EXPR
, integer_type_node
, t
,
6775 get_target_argument_identifier_1 (device
, false, id
));
6776 t
= fold_convert (ptr_type_node
, t
);
6777 return force_gimple_operand_gsi (gsi
, t
, true, NULL
, true, GSI_SAME_STMT
);
6780 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
6781 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
6782 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
6786 push_target_argument_according_to_value (gimple_stmt_iterator
*gsi
, int device
,
6787 int id
, tree value
, vec
<tree
> *args
)
6789 if (tree_fits_shwi_p (value
)
6790 && tree_to_shwi (value
) > -(1 << 15)
6791 && tree_to_shwi (value
) < (1 << 15))
6792 args
->quick_push (get_target_argument_value (gsi
, device
, id
, value
));
6795 args
->quick_push (get_target_argument_identifier (device
, true, id
));
6796 value
= fold_convert (ptr_type_node
, value
);
6797 value
= force_gimple_operand_gsi (gsi
, value
, true, NULL
, true,
6799 args
->quick_push (value
);
6803 /* Create an array of arguments that is then passed to GOMP_target. */
6806 get_target_arguments (gimple_stmt_iterator
*gsi
, gomp_target
*tgt_stmt
)
6808 auto_vec
<tree
, 6> args
;
6809 tree clauses
= gimple_omp_target_clauses (tgt_stmt
);
6810 tree t
, c
= omp_find_clause (clauses
, OMP_CLAUSE_NUM_TEAMS
);
6812 t
= OMP_CLAUSE_NUM_TEAMS_EXPR (c
);
6814 t
= integer_minus_one_node
;
6815 push_target_argument_according_to_value (gsi
, GOMP_TARGET_ARG_DEVICE_ALL
,
6816 GOMP_TARGET_ARG_NUM_TEAMS
, t
, &args
);
6818 c
= omp_find_clause (clauses
, OMP_CLAUSE_THREAD_LIMIT
);
6820 t
= OMP_CLAUSE_THREAD_LIMIT_EXPR (c
);
6822 t
= integer_minus_one_node
;
6823 push_target_argument_according_to_value (gsi
, GOMP_TARGET_ARG_DEVICE_ALL
,
6824 GOMP_TARGET_ARG_THREAD_LIMIT
, t
,
6827 /* Add HSA-specific grid sizes, if available. */
6828 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt
),
6829 OMP_CLAUSE__GRIDDIM_
))
6831 t
= get_target_argument_identifier (GOMP_DEVICE_HSA
, true,
6832 GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES
);
6833 args
.quick_push (t
);
6834 args
.quick_push (grid_get_kernel_launch_attributes (gsi
, tgt_stmt
));
6837 /* Produce more, perhaps device specific, arguments here. */
6839 tree argarray
= create_tmp_var (build_array_type_nelts (ptr_type_node
,
6840 args
.length () + 1),
6841 ".omp_target_args");
6842 for (unsigned i
= 0; i
< args
.length (); i
++)
6844 tree ref
= build4 (ARRAY_REF
, ptr_type_node
, argarray
,
6845 build_int_cst (integer_type_node
, i
),
6846 NULL_TREE
, NULL_TREE
);
6847 gsi_insert_before (gsi
, gimple_build_assign (ref
, args
[i
]),
6850 tree ref
= build4 (ARRAY_REF
, ptr_type_node
, argarray
,
6851 build_int_cst (integer_type_node
, args
.length ()),
6852 NULL_TREE
, NULL_TREE
);
6853 gsi_insert_before (gsi
, gimple_build_assign (ref
, null_pointer_node
),
6855 TREE_ADDRESSABLE (argarray
) = 1;
6856 return build_fold_addr_expr (argarray
);
6859 /* Expand the GIMPLE_OMP_TARGET starting at REGION. */
6862 expand_omp_target (struct omp_region
*region
)
6864 basic_block entry_bb
, exit_bb
, new_bb
;
6865 struct function
*child_cfun
;
6866 tree child_fn
, block
, t
;
6867 gimple_stmt_iterator gsi
;
6868 gomp_target
*entry_stmt
;
6871 bool offloaded
, data_region
;
6873 entry_stmt
= as_a
<gomp_target
*> (last_stmt (region
->entry
));
6874 new_bb
= region
->entry
;
6876 offloaded
= is_gimple_omp_offloaded (entry_stmt
);
6877 switch (gimple_omp_target_kind (entry_stmt
))
6879 case GF_OMP_TARGET_KIND_REGION
:
6880 case GF_OMP_TARGET_KIND_UPDATE
:
6881 case GF_OMP_TARGET_KIND_ENTER_DATA
:
6882 case GF_OMP_TARGET_KIND_EXIT_DATA
:
6883 case GF_OMP_TARGET_KIND_OACC_PARALLEL
:
6884 case GF_OMP_TARGET_KIND_OACC_KERNELS
:
6885 case GF_OMP_TARGET_KIND_OACC_UPDATE
:
6886 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA
:
6887 case GF_OMP_TARGET_KIND_OACC_DECLARE
:
6888 data_region
= false;
6890 case GF_OMP_TARGET_KIND_DATA
:
6891 case GF_OMP_TARGET_KIND_OACC_DATA
:
6892 case GF_OMP_TARGET_KIND_OACC_HOST_DATA
:
6899 child_fn
= NULL_TREE
;
6903 child_fn
= gimple_omp_target_child_fn (entry_stmt
);
6904 child_cfun
= DECL_STRUCT_FUNCTION (child_fn
);
6907 /* Supported by expand_omp_taskreg, but not here. */
6908 if (child_cfun
!= NULL
)
6909 gcc_checking_assert (!child_cfun
->cfg
);
6910 gcc_checking_assert (!gimple_in_ssa_p (cfun
));
6912 entry_bb
= region
->entry
;
6913 exit_bb
= region
->exit
;
6915 if (gimple_omp_target_kind (entry_stmt
) == GF_OMP_TARGET_KIND_OACC_KERNELS
)
6916 mark_loops_in_oacc_kernels_region (region
->entry
, region
->exit
);
6920 unsigned srcidx
, dstidx
, num
;
6922 /* If the offloading region needs data sent from the parent
6923 function, then the very first statement (except possible
6924 tree profile counter updates) of the offloading body
6925 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
6926 &.OMP_DATA_O is passed as an argument to the child function,
6927 we need to replace it with the argument as seen by the child
6930 In most cases, this will end up being the identity assignment
6931 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
6932 a function call that has been inlined, the original PARM_DECL
6933 .OMP_DATA_I may have been converted into a different local
6934 variable. In which case, we need to keep the assignment. */
6935 tree data_arg
= gimple_omp_target_data_arg (entry_stmt
);
6938 basic_block entry_succ_bb
= single_succ (entry_bb
);
6939 gimple_stmt_iterator gsi
;
6941 gimple
*tgtcopy_stmt
= NULL
;
6942 tree sender
= TREE_VEC_ELT (data_arg
, 0);
6944 for (gsi
= gsi_start_bb (entry_succ_bb
); ; gsi_next (&gsi
))
6946 gcc_assert (!gsi_end_p (gsi
));
6947 stmt
= gsi_stmt (gsi
);
6948 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
6951 if (gimple_num_ops (stmt
) == 2)
6953 tree arg
= gimple_assign_rhs1 (stmt
);
6955 /* We're ignoring the subcode because we're
6956 effectively doing a STRIP_NOPS. */
6958 if (TREE_CODE (arg
) == ADDR_EXPR
6959 && TREE_OPERAND (arg
, 0) == sender
)
6961 tgtcopy_stmt
= stmt
;
6967 gcc_assert (tgtcopy_stmt
!= NULL
);
6968 arg
= DECL_ARGUMENTS (child_fn
);
6970 gcc_assert (gimple_assign_lhs (tgtcopy_stmt
) == arg
);
6971 gsi_remove (&gsi
, true);
6974 /* Declare local variables needed in CHILD_CFUN. */
6975 block
= DECL_INITIAL (child_fn
);
6976 BLOCK_VARS (block
) = vec2chain (child_cfun
->local_decls
);
6977 /* The gimplifier could record temporaries in the offloading block
6978 rather than in containing function's local_decls chain,
6979 which would mean cgraph missed finalizing them. Do it now. */
6980 for (t
= BLOCK_VARS (block
); t
; t
= DECL_CHAIN (t
))
6981 if (VAR_P (t
) && TREE_STATIC (t
) && !DECL_EXTERNAL (t
))
6982 varpool_node::finalize_decl (t
);
6983 DECL_SAVED_TREE (child_fn
) = NULL
;
6984 /* We'll create a CFG for child_fn, so no gimple body is needed. */
6985 gimple_set_body (child_fn
, NULL
);
6986 TREE_USED (block
) = 1;
6988 /* Reset DECL_CONTEXT on function arguments. */
6989 for (t
= DECL_ARGUMENTS (child_fn
); t
; t
= DECL_CHAIN (t
))
6990 DECL_CONTEXT (t
) = child_fn
;
6992 /* Split ENTRY_BB at GIMPLE_*,
6993 so that it can be moved to the child function. */
6994 gsi
= gsi_last_bb (entry_bb
);
6995 stmt
= gsi_stmt (gsi
);
6997 && gimple_code (stmt
) == gimple_code (entry_stmt
));
6998 e
= split_block (entry_bb
, stmt
);
6999 gsi_remove (&gsi
, true);
7001 single_succ_edge (entry_bb
)->flags
= EDGE_FALLTHRU
;
7003 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
7006 gsi
= gsi_last_bb (exit_bb
);
7007 gcc_assert (!gsi_end_p (gsi
)
7008 && gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_RETURN
);
7009 stmt
= gimple_build_return (NULL
);
7010 gsi_insert_after (&gsi
, stmt
, GSI_SAME_STMT
);
7011 gsi_remove (&gsi
, true);
7014 /* Move the offloading region into CHILD_CFUN. */
7016 block
= gimple_block (entry_stmt
);
7018 new_bb
= move_sese_region_to_fn (child_cfun
, entry_bb
, exit_bb
, block
);
7020 single_succ_edge (new_bb
)->flags
= EDGE_FALLTHRU
;
7021 /* When the OMP expansion process cannot guarantee an up-to-date
7022 loop tree arrange for the child function to fixup loops. */
7023 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP
))
7024 child_cfun
->x_current_loops
->state
|= LOOPS_NEED_FIXUP
;
7026 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
7027 num
= vec_safe_length (child_cfun
->local_decls
);
7028 for (srcidx
= 0, dstidx
= 0; srcidx
< num
; srcidx
++)
7030 t
= (*child_cfun
->local_decls
)[srcidx
];
7031 if (DECL_CONTEXT (t
) == cfun
->decl
)
7033 if (srcidx
!= dstidx
)
7034 (*child_cfun
->local_decls
)[dstidx
] = t
;
7038 vec_safe_truncate (child_cfun
->local_decls
, dstidx
);
7040 /* Inform the callgraph about the new function. */
7041 child_cfun
->curr_properties
= cfun
->curr_properties
;
7042 child_cfun
->has_simduid_loops
|= cfun
->has_simduid_loops
;
7043 child_cfun
->has_force_vectorize_loops
|= cfun
->has_force_vectorize_loops
;
7044 cgraph_node
*node
= cgraph_node::get_create (child_fn
);
7045 node
->parallelized_function
= 1;
7046 cgraph_node::add_new_function (child_fn
, true);
7048 /* Add the new function to the offload table. */
7049 if (ENABLE_OFFLOADING
)
7050 vec_safe_push (offload_funcs
, child_fn
);
7052 bool need_asm
= DECL_ASSEMBLER_NAME_SET_P (current_function_decl
)
7053 && !DECL_ASSEMBLER_NAME_SET_P (child_fn
);
7055 /* Fix the callgraph edges for child_cfun. Those for cfun will be
7056 fixed in a following pass. */
7057 push_cfun (child_cfun
);
7059 assign_assembler_name_if_neeeded (child_fn
);
7060 cgraph_edge::rebuild_edges ();
7062 /* Some EH regions might become dead, see PR34608. If
7063 pass_cleanup_cfg isn't the first pass to happen with the
7064 new child, these dead EH edges might cause problems.
7065 Clean them up now. */
7066 if (flag_exceptions
)
7069 bool changed
= false;
7071 FOR_EACH_BB_FN (bb
, cfun
)
7072 changed
|= gimple_purge_dead_eh_edges (bb
);
7074 cleanup_tree_cfg ();
7076 if (flag_checking
&& !loops_state_satisfies_p (LOOPS_NEED_FIXUP
))
7077 verify_loop_structure ();
7080 if (dump_file
&& !gimple_in_ssa_p (cfun
))
7082 omp_any_child_fn_dumped
= true;
7083 dump_function_header (dump_file
, child_fn
, dump_flags
);
7084 dump_function_to_file (child_fn
, dump_file
, dump_flags
);
7088 /* Emit a library call to launch the offloading region, or do data
7090 tree t1
, t2
, t3
, t4
, device
, cond
, depend
, c
, clauses
;
7091 enum built_in_function start_ix
;
7092 location_t clause_loc
;
7093 unsigned int flags_i
= 0;
7094 bool oacc_kernels_p
= false;
7096 switch (gimple_omp_target_kind (entry_stmt
))
7098 case GF_OMP_TARGET_KIND_REGION
:
7099 start_ix
= BUILT_IN_GOMP_TARGET
;
7101 case GF_OMP_TARGET_KIND_DATA
:
7102 start_ix
= BUILT_IN_GOMP_TARGET_DATA
;
7104 case GF_OMP_TARGET_KIND_UPDATE
:
7105 start_ix
= BUILT_IN_GOMP_TARGET_UPDATE
;
7107 case GF_OMP_TARGET_KIND_ENTER_DATA
:
7108 start_ix
= BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA
;
7110 case GF_OMP_TARGET_KIND_EXIT_DATA
:
7111 start_ix
= BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA
;
7112 flags_i
|= GOMP_TARGET_FLAG_EXIT_DATA
;
7114 case GF_OMP_TARGET_KIND_OACC_KERNELS
:
7115 oacc_kernels_p
= true;
7117 case GF_OMP_TARGET_KIND_OACC_PARALLEL
:
7118 start_ix
= BUILT_IN_GOACC_PARALLEL
;
7120 case GF_OMP_TARGET_KIND_OACC_DATA
:
7121 case GF_OMP_TARGET_KIND_OACC_HOST_DATA
:
7122 start_ix
= BUILT_IN_GOACC_DATA_START
;
7124 case GF_OMP_TARGET_KIND_OACC_UPDATE
:
7125 start_ix
= BUILT_IN_GOACC_UPDATE
;
7127 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA
:
7128 start_ix
= BUILT_IN_GOACC_ENTER_EXIT_DATA
;
7130 case GF_OMP_TARGET_KIND_OACC_DECLARE
:
7131 start_ix
= BUILT_IN_GOACC_DECLARE
;
7137 clauses
= gimple_omp_target_clauses (entry_stmt
);
7139 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
7140 library choose) and there is no conditional. */
7142 device
= build_int_cst (integer_type_node
, GOMP_DEVICE_ICV
);
7144 c
= omp_find_clause (clauses
, OMP_CLAUSE_IF
);
7146 cond
= OMP_CLAUSE_IF_EXPR (c
);
7148 c
= omp_find_clause (clauses
, OMP_CLAUSE_DEVICE
);
7151 /* Even if we pass it to all library function calls, it is currently only
7152 defined/used for the OpenMP target ones. */
7153 gcc_checking_assert (start_ix
== BUILT_IN_GOMP_TARGET
7154 || start_ix
== BUILT_IN_GOMP_TARGET_DATA
7155 || start_ix
== BUILT_IN_GOMP_TARGET_UPDATE
7156 || start_ix
== BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA
);
7158 device
= OMP_CLAUSE_DEVICE_ID (c
);
7159 clause_loc
= OMP_CLAUSE_LOCATION (c
);
7162 clause_loc
= gimple_location (entry_stmt
);
7164 c
= omp_find_clause (clauses
, OMP_CLAUSE_NOWAIT
);
7166 flags_i
|= GOMP_TARGET_FLAG_NOWAIT
;
7168 /* Ensure 'device' is of the correct type. */
7169 device
= fold_convert_loc (clause_loc
, integer_type_node
, device
);
7171 /* If we found the clause 'if (cond)', build
7172 (cond ? device : GOMP_DEVICE_HOST_FALLBACK). */
7175 cond
= gimple_boolify (cond
);
7177 basic_block cond_bb
, then_bb
, else_bb
;
7181 tmp_var
= create_tmp_var (TREE_TYPE (device
));
7183 e
= split_block_after_labels (new_bb
);
7186 gsi
= gsi_last_bb (new_bb
);
7188 e
= split_block (new_bb
, gsi_stmt (gsi
));
7194 then_bb
= create_empty_bb (cond_bb
);
7195 else_bb
= create_empty_bb (then_bb
);
7196 set_immediate_dominator (CDI_DOMINATORS
, then_bb
, cond_bb
);
7197 set_immediate_dominator (CDI_DOMINATORS
, else_bb
, cond_bb
);
7199 stmt
= gimple_build_cond_empty (cond
);
7200 gsi
= gsi_last_bb (cond_bb
);
7201 gsi_insert_after (&gsi
, stmt
, GSI_CONTINUE_LINKING
);
7203 gsi
= gsi_start_bb (then_bb
);
7204 stmt
= gimple_build_assign (tmp_var
, device
);
7205 gsi_insert_after (&gsi
, stmt
, GSI_CONTINUE_LINKING
);
7207 gsi
= gsi_start_bb (else_bb
);
7208 stmt
= gimple_build_assign (tmp_var
,
7209 build_int_cst (integer_type_node
,
7210 GOMP_DEVICE_HOST_FALLBACK
));
7211 gsi_insert_after (&gsi
, stmt
, GSI_CONTINUE_LINKING
);
7213 make_edge (cond_bb
, then_bb
, EDGE_TRUE_VALUE
);
7214 make_edge (cond_bb
, else_bb
, EDGE_FALSE_VALUE
);
7215 add_bb_to_loop (then_bb
, cond_bb
->loop_father
);
7216 add_bb_to_loop (else_bb
, cond_bb
->loop_father
);
7217 make_edge (then_bb
, new_bb
, EDGE_FALLTHRU
);
7218 make_edge (else_bb
, new_bb
, EDGE_FALLTHRU
);
7221 gsi
= gsi_last_bb (new_bb
);
7225 gsi
= gsi_last_bb (new_bb
);
7226 device
= force_gimple_operand_gsi (&gsi
, device
, true, NULL_TREE
,
7227 true, GSI_SAME_STMT
);
7230 t
= gimple_omp_target_data_arg (entry_stmt
);
7233 t1
= size_zero_node
;
7234 t2
= build_zero_cst (ptr_type_node
);
7240 t1
= TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t
, 1))));
7241 t1
= size_binop (PLUS_EXPR
, t1
, size_int (1));
7242 t2
= build_fold_addr_expr (TREE_VEC_ELT (t
, 0));
7243 t3
= build_fold_addr_expr (TREE_VEC_ELT (t
, 1));
7244 t4
= build_fold_addr_expr (TREE_VEC_ELT (t
, 2));
7248 bool tagging
= false;
7249 /* The maximum number used by any start_ix, without varargs. */
7250 auto_vec
<tree
, 11> args
;
7251 args
.quick_push (device
);
7253 args
.quick_push (build_fold_addr_expr (child_fn
));
7254 args
.quick_push (t1
);
7255 args
.quick_push (t2
);
7256 args
.quick_push (t3
);
7257 args
.quick_push (t4
);
7260 case BUILT_IN_GOACC_DATA_START
:
7261 case BUILT_IN_GOACC_DECLARE
:
7262 case BUILT_IN_GOMP_TARGET_DATA
:
7264 case BUILT_IN_GOMP_TARGET
:
7265 case BUILT_IN_GOMP_TARGET_UPDATE
:
7266 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA
:
7267 args
.quick_push (build_int_cst (unsigned_type_node
, flags_i
));
7268 c
= omp_find_clause (clauses
, OMP_CLAUSE_DEPEND
);
7270 depend
= OMP_CLAUSE_DECL (c
);
7272 depend
= build_int_cst (ptr_type_node
, 0);
7273 args
.quick_push (depend
);
7274 if (start_ix
== BUILT_IN_GOMP_TARGET
)
7275 args
.quick_push (get_target_arguments (&gsi
, entry_stmt
));
7277 case BUILT_IN_GOACC_PARALLEL
:
7279 oacc_set_fn_attrib (child_fn
, clauses
, oacc_kernels_p
, &args
);
7283 case BUILT_IN_GOACC_ENTER_EXIT_DATA
:
7284 case BUILT_IN_GOACC_UPDATE
:
7286 tree t_async
= NULL_TREE
;
7288 /* If present, use the value specified by the respective
7289 clause, making sure that is of the correct type. */
7290 c
= omp_find_clause (clauses
, OMP_CLAUSE_ASYNC
);
7292 t_async
= fold_convert_loc (OMP_CLAUSE_LOCATION (c
),
7294 OMP_CLAUSE_ASYNC_EXPR (c
));
7296 /* Default values for t_async. */
7297 t_async
= fold_convert_loc (gimple_location (entry_stmt
),
7299 build_int_cst (integer_type_node
,
7301 if (tagging
&& t_async
)
7303 unsigned HOST_WIDE_INT i_async
= GOMP_LAUNCH_OP_MAX
;
7305 if (TREE_CODE (t_async
) == INTEGER_CST
)
7307 /* See if we can pack the async arg in to the tag's
7309 i_async
= TREE_INT_CST_LOW (t_async
);
7310 if (i_async
< GOMP_LAUNCH_OP_MAX
)
7311 t_async
= NULL_TREE
;
7313 i_async
= GOMP_LAUNCH_OP_MAX
;
7315 args
.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC
, NULL_TREE
,
7319 args
.safe_push (t_async
);
7321 /* Save the argument index, and ... */
7322 unsigned t_wait_idx
= args
.length ();
7323 unsigned num_waits
= 0;
7324 c
= omp_find_clause (clauses
, OMP_CLAUSE_WAIT
);
7326 /* ... push a placeholder. */
7327 args
.safe_push (integer_zero_node
);
7329 for (; c
; c
= OMP_CLAUSE_CHAIN (c
))
7330 if (OMP_CLAUSE_CODE (c
) == OMP_CLAUSE_WAIT
)
7332 args
.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c
),
7334 OMP_CLAUSE_WAIT_EXPR (c
)));
7338 if (!tagging
|| num_waits
)
7342 /* Now that we know the number, update the placeholder. */
7344 len
= oacc_launch_pack (GOMP_LAUNCH_WAIT
, NULL_TREE
, num_waits
);
7346 len
= build_int_cst (integer_type_node
, num_waits
);
7347 len
= fold_convert_loc (gimple_location (entry_stmt
),
7348 unsigned_type_node
, len
);
7349 args
[t_wait_idx
] = len
;
7357 /* Push terminal marker - zero. */
7358 args
.safe_push (oacc_launch_pack (0, NULL_TREE
, 0));
7360 g
= gimple_build_call_vec (builtin_decl_explicit (start_ix
), args
);
7361 gimple_set_location (g
, gimple_location (entry_stmt
));
7362 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
7366 gcc_assert (g
&& gimple_code (g
) == GIMPLE_OMP_TARGET
);
7367 gsi_remove (&gsi
, true);
7369 if (data_region
&& region
->exit
)
7371 gsi
= gsi_last_bb (region
->exit
);
7373 gcc_assert (g
&& gimple_code (g
) == GIMPLE_OMP_RETURN
);
7374 gsi_remove (&gsi
, true);
7378 /* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with
7379 iteration variable derived from the thread number. INTRA_GROUP means this
7380 is an expansion of a loop iterating over work-items within a separate
7381 iteration over groups. */
7384 grid_expand_omp_for_loop (struct omp_region
*kfor
, bool intra_group
)
7386 gimple_stmt_iterator gsi
;
7387 gomp_for
*for_stmt
= as_a
<gomp_for
*> (last_stmt (kfor
->entry
));
7388 gcc_checking_assert (gimple_omp_for_kind (for_stmt
)
7389 == GF_OMP_FOR_KIND_GRID_LOOP
);
7390 size_t collapse
= gimple_omp_for_collapse (for_stmt
);
7391 struct omp_for_data_loop
*loops
7392 = XALLOCAVEC (struct omp_for_data_loop
,
7393 gimple_omp_for_collapse (for_stmt
));
7394 struct omp_for_data fd
;
7396 remove_edge (BRANCH_EDGE (kfor
->entry
));
7397 basic_block body_bb
= FALLTHRU_EDGE (kfor
->entry
)->dest
;
7399 gcc_assert (kfor
->cont
);
7400 omp_extract_for_data (for_stmt
, &fd
, loops
);
7402 gsi
= gsi_start_bb (body_bb
);
7404 for (size_t dim
= 0; dim
< collapse
; dim
++)
7407 itype
= type
= TREE_TYPE (fd
.loops
[dim
].v
);
7408 if (POINTER_TYPE_P (type
))
7409 itype
= signed_type_for (type
);
7411 tree n1
= fd
.loops
[dim
].n1
;
7412 tree step
= fd
.loops
[dim
].step
;
7413 n1
= force_gimple_operand_gsi (&gsi
, fold_convert (type
, n1
),
7414 true, NULL_TREE
, true, GSI_SAME_STMT
);
7415 step
= force_gimple_operand_gsi (&gsi
, fold_convert (itype
, step
),
7416 true, NULL_TREE
, true, GSI_SAME_STMT
);
7418 if (gimple_omp_for_grid_group_iter (for_stmt
))
7420 gcc_checking_assert (!intra_group
);
7421 threadid
= build_call_expr (builtin_decl_explicit
7422 (BUILT_IN_HSA_WORKGROUPID
), 1,
7423 build_int_cstu (unsigned_type_node
, dim
));
7425 else if (intra_group
)
7426 threadid
= build_call_expr (builtin_decl_explicit
7427 (BUILT_IN_HSA_WORKITEMID
), 1,
7428 build_int_cstu (unsigned_type_node
, dim
));
7430 threadid
= build_call_expr (builtin_decl_explicit
7431 (BUILT_IN_HSA_WORKITEMABSID
), 1,
7432 build_int_cstu (unsigned_type_node
, dim
));
7433 threadid
= fold_convert (itype
, threadid
);
7434 threadid
= force_gimple_operand_gsi (&gsi
, threadid
, true, NULL_TREE
,
7435 true, GSI_SAME_STMT
);
7437 tree startvar
= fd
.loops
[dim
].v
;
7438 tree t
= fold_build2 (MULT_EXPR
, itype
, threadid
, step
);
7439 if (POINTER_TYPE_P (type
))
7440 t
= fold_build_pointer_plus (n1
, t
);
7442 t
= fold_build2 (PLUS_EXPR
, type
, t
, n1
);
7443 t
= fold_convert (type
, t
);
7444 t
= force_gimple_operand_gsi (&gsi
, t
,
7446 && TREE_ADDRESSABLE (startvar
),
7447 NULL_TREE
, true, GSI_SAME_STMT
);
7448 gassign
*assign_stmt
= gimple_build_assign (startvar
, t
);
7449 gsi_insert_before (&gsi
, assign_stmt
, GSI_SAME_STMT
);
7451 /* Remove the omp for statement */
7452 gsi
= gsi_last_bb (kfor
->entry
);
7453 gsi_remove (&gsi
, true);
7455 /* Remove the GIMPLE_OMP_CONTINUE statement. */
7456 gsi
= gsi_last_bb (kfor
->cont
);
7457 gcc_assert (!gsi_end_p (gsi
)
7458 && gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_CONTINUE
);
7459 gsi_remove (&gsi
, true);
7461 /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary. */
7462 gsi
= gsi_last_bb (kfor
->exit
);
7463 gcc_assert (!gsi_end_p (gsi
)
7464 && gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_RETURN
);
7466 gsi_insert_before (&gsi
, omp_build_barrier (NULL_TREE
), GSI_SAME_STMT
);
7467 gsi_remove (&gsi
, true);
7469 /* Fixup the much simpler CFG. */
7470 remove_edge (find_edge (kfor
->cont
, body_bb
));
7472 if (kfor
->cont
!= body_bb
)
7473 set_immediate_dominator (CDI_DOMINATORS
, kfor
->cont
, body_bb
);
7474 set_immediate_dominator (CDI_DOMINATORS
, kfor
->exit
, kfor
->cont
);
7477 /* Structure passed to grid_remap_kernel_arg_accesses so that it can remap
7480 struct grid_arg_decl_map
7486 /* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones
7487 pertaining to kernel function. */
7490 grid_remap_kernel_arg_accesses (tree
*tp
, int *walk_subtrees
, void *data
)
7492 struct walk_stmt_info
*wi
= (struct walk_stmt_info
*) data
;
7493 struct grid_arg_decl_map
*adm
= (struct grid_arg_decl_map
*) wi
->info
;
7496 if (t
== adm
->old_arg
)
7498 *walk_subtrees
= !TYPE_P (t
) && !DECL_P (t
);
7502 /* If TARGET region contains a kernel body for loop, remove its region from the
7503 TARGET and expand it in HSA gridified kernel fashion. */
7506 grid_expand_target_grid_body (struct omp_region
*target
)
7508 if (!hsa_gen_requested_p ())
7511 gomp_target
*tgt_stmt
= as_a
<gomp_target
*> (last_stmt (target
->entry
));
7512 struct omp_region
**pp
;
7514 for (pp
= &target
->inner
; *pp
; pp
= &(*pp
)->next
)
7515 if ((*pp
)->type
== GIMPLE_OMP_GRID_BODY
)
7518 struct omp_region
*gpukernel
= *pp
;
7520 tree orig_child_fndecl
= gimple_omp_target_child_fn (tgt_stmt
);
7523 /* HSA cannot handle OACC stuff. */
7524 if (gimple_omp_target_kind (tgt_stmt
) != GF_OMP_TARGET_KIND_REGION
)
7526 gcc_checking_assert (orig_child_fndecl
);
7527 gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt
),
7528 OMP_CLAUSE__GRIDDIM_
));
7529 cgraph_node
*n
= cgraph_node::get (orig_child_fndecl
);
7531 hsa_register_kernel (n
);
7535 gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt
),
7536 OMP_CLAUSE__GRIDDIM_
));
7537 tree inside_block
= gimple_block (first_stmt (single_succ (gpukernel
->entry
)));
7538 *pp
= gpukernel
->next
;
7539 for (pp
= &gpukernel
->inner
; *pp
; pp
= &(*pp
)->next
)
7540 if ((*pp
)->type
== GIMPLE_OMP_FOR
)
7543 struct omp_region
*kfor
= *pp
;
7545 gomp_for
*for_stmt
= as_a
<gomp_for
*> (last_stmt (kfor
->entry
));
7546 gcc_assert (gimple_omp_for_kind (for_stmt
) == GF_OMP_FOR_KIND_GRID_LOOP
);
7550 if (gimple_omp_for_grid_group_iter (for_stmt
))
7552 struct omp_region
**next_pp
;
7553 for (pp
= &kfor
->inner
; *pp
; pp
= next_pp
)
7555 next_pp
= &(*pp
)->next
;
7556 if ((*pp
)->type
!= GIMPLE_OMP_FOR
)
7558 gomp_for
*inner
= as_a
<gomp_for
*> (last_stmt ((*pp
)->entry
));
7559 gcc_assert (gimple_omp_for_kind (inner
)
7560 == GF_OMP_FOR_KIND_GRID_LOOP
);
7561 grid_expand_omp_for_loop (*pp
, true);
7566 expand_omp (kfor
->inner
);
7568 if (gpukernel
->inner
)
7569 expand_omp (gpukernel
->inner
);
7571 tree kern_fndecl
= copy_node (orig_child_fndecl
);
7572 DECL_NAME (kern_fndecl
) = clone_function_name (kern_fndecl
, "kernel");
7573 SET_DECL_ASSEMBLER_NAME (kern_fndecl
, DECL_NAME (kern_fndecl
));
7574 tree tgtblock
= gimple_block (tgt_stmt
);
7575 tree fniniblock
= make_node (BLOCK
);
7576 BLOCK_ABSTRACT_ORIGIN (fniniblock
) = tgtblock
;
7577 BLOCK_SOURCE_LOCATION (fniniblock
) = BLOCK_SOURCE_LOCATION (tgtblock
);
7578 BLOCK_SOURCE_END_LOCATION (fniniblock
) = BLOCK_SOURCE_END_LOCATION (tgtblock
);
7579 BLOCK_SUPERCONTEXT (fniniblock
) = kern_fndecl
;
7580 DECL_INITIAL (kern_fndecl
) = fniniblock
;
7581 push_struct_function (kern_fndecl
);
7582 cfun
->function_end_locus
= gimple_location (tgt_stmt
);
7583 init_tree_ssa (cfun
);
7586 tree old_parm_decl
= DECL_ARGUMENTS (kern_fndecl
);
7587 gcc_assert (!DECL_CHAIN (old_parm_decl
));
7588 tree new_parm_decl
= copy_node (DECL_ARGUMENTS (kern_fndecl
));
7589 DECL_CONTEXT (new_parm_decl
) = kern_fndecl
;
7590 DECL_ARGUMENTS (kern_fndecl
) = new_parm_decl
;
7591 gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl
))));
7592 DECL_RESULT (kern_fndecl
) = copy_node (DECL_RESULT (kern_fndecl
));
7593 DECL_CONTEXT (DECL_RESULT (kern_fndecl
)) = kern_fndecl
;
7594 struct function
*kern_cfun
= DECL_STRUCT_FUNCTION (kern_fndecl
);
7595 kern_cfun
->curr_properties
= cfun
->curr_properties
;
7597 grid_expand_omp_for_loop (kfor
, false);
7599 /* Remove the omp for statement */
7600 gimple_stmt_iterator gsi
= gsi_last_bb (gpukernel
->entry
);
7601 gsi_remove (&gsi
, true);
7602 /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real
7604 gsi
= gsi_last_bb (gpukernel
->exit
);
7605 gcc_assert (!gsi_end_p (gsi
)
7606 && gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_RETURN
);
7607 gimple
*ret_stmt
= gimple_build_return (NULL
);
7608 gsi_insert_after (&gsi
, ret_stmt
, GSI_SAME_STMT
);
7609 gsi_remove (&gsi
, true);
7611 /* Statements in the first BB in the target construct have been produced by
7612 target lowering and must be copied inside the GPUKERNEL, with the two
7613 exceptions of the first OMP statement and the OMP_DATA assignment
7615 gsi
= gsi_start_bb (single_succ (gpukernel
->entry
));
7616 tree data_arg
= gimple_omp_target_data_arg (tgt_stmt
);
7617 tree sender
= data_arg
? TREE_VEC_ELT (data_arg
, 0) : NULL
;
7618 for (gimple_stmt_iterator tsi
= gsi_start_bb (single_succ (target
->entry
));
7619 !gsi_end_p (tsi
); gsi_next (&tsi
))
7621 gimple
*stmt
= gsi_stmt (tsi
);
7622 if (is_gimple_omp (stmt
))
7625 && is_gimple_assign (stmt
)
7626 && TREE_CODE (gimple_assign_rhs1 (stmt
)) == ADDR_EXPR
7627 && TREE_OPERAND (gimple_assign_rhs1 (stmt
), 0) == sender
)
7629 gimple
*copy
= gimple_copy (stmt
);
7630 gsi_insert_before (&gsi
, copy
, GSI_SAME_STMT
);
7631 gimple_set_block (copy
, fniniblock
);
7634 move_sese_region_to_fn (kern_cfun
, single_succ (gpukernel
->entry
),
7635 gpukernel
->exit
, inside_block
);
7637 cgraph_node
*kcn
= cgraph_node::get_create (kern_fndecl
);
7638 kcn
->mark_force_output ();
7639 cgraph_node
*orig_child
= cgraph_node::get (orig_child_fndecl
);
7641 hsa_register_kernel (kcn
, orig_child
);
7643 cgraph_node::add_new_function (kern_fndecl
, true);
7644 push_cfun (kern_cfun
);
7645 cgraph_edge::rebuild_edges ();
7647 /* Re-map any mention of the PARM_DECL of the original function to the
7648 PARM_DECL of the new one.
7650 TODO: It would be great if lowering produced references into the GPU
7651 kernel decl straight away and we did not have to do this. */
7652 struct grid_arg_decl_map adm
;
7653 adm
.old_arg
= old_parm_decl
;
7654 adm
.new_arg
= new_parm_decl
;
7656 FOR_EACH_BB_FN (bb
, kern_cfun
)
7658 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
7660 gimple
*stmt
= gsi_stmt (gsi
);
7661 struct walk_stmt_info wi
;
7662 memset (&wi
, 0, sizeof (wi
));
7664 walk_gimple_op (stmt
, grid_remap_kernel_arg_accesses
, &wi
);
7672 /* Expand the parallel region tree rooted at REGION. Expansion
7673 proceeds in depth-first order. Innermost regions are expanded
7674 first. This way, parallel regions that require a new function to
7675 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
7676 internal dependencies in their body. */
7679 expand_omp (struct omp_region
*region
)
7681 omp_any_child_fn_dumped
= false;
7684 location_t saved_location
;
7685 gimple
*inner_stmt
= NULL
;
7687 /* First, determine whether this is a combined parallel+workshare
7689 if (region
->type
== GIMPLE_OMP_PARALLEL
)
7690 determine_parallel_type (region
);
7691 else if (region
->type
== GIMPLE_OMP_TARGET
)
7692 grid_expand_target_grid_body (region
);
7694 if (region
->type
== GIMPLE_OMP_FOR
7695 && gimple_omp_for_combined_p (last_stmt (region
->entry
)))
7696 inner_stmt
= last_stmt (region
->inner
->entry
);
7699 expand_omp (region
->inner
);
7701 saved_location
= input_location
;
7702 if (gimple_has_location (last_stmt (region
->entry
)))
7703 input_location
= gimple_location (last_stmt (region
->entry
));
7705 switch (region
->type
)
7707 case GIMPLE_OMP_PARALLEL
:
7708 case GIMPLE_OMP_TASK
:
7709 expand_omp_taskreg (region
);
7712 case GIMPLE_OMP_FOR
:
7713 expand_omp_for (region
, inner_stmt
);
7716 case GIMPLE_OMP_SECTIONS
:
7717 expand_omp_sections (region
);
7720 case GIMPLE_OMP_SECTION
:
7721 /* Individual omp sections are handled together with their
7722 parent GIMPLE_OMP_SECTIONS region. */
7725 case GIMPLE_OMP_SINGLE
:
7726 expand_omp_single (region
);
7729 case GIMPLE_OMP_ORDERED
:
7731 gomp_ordered
*ord_stmt
7732 = as_a
<gomp_ordered
*> (last_stmt (region
->entry
));
7733 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt
),
7736 /* We'll expand these when expanding corresponding
7737 worksharing region with ordered(n) clause. */
7738 gcc_assert (region
->outer
7739 && region
->outer
->type
== GIMPLE_OMP_FOR
);
7740 region
->ord_stmt
= ord_stmt
;
7745 case GIMPLE_OMP_MASTER
:
7746 case GIMPLE_OMP_TASKGROUP
:
7747 case GIMPLE_OMP_CRITICAL
:
7748 case GIMPLE_OMP_TEAMS
:
7749 expand_omp_synch (region
);
7752 case GIMPLE_OMP_ATOMIC_LOAD
:
7753 expand_omp_atomic (region
);
7756 case GIMPLE_OMP_TARGET
:
7757 expand_omp_target (region
);
7764 input_location
= saved_location
;
7765 region
= region
->next
;
7767 if (omp_any_child_fn_dumped
)
7770 dump_function_header (dump_file
, current_function_decl
, dump_flags
);
7771 omp_any_child_fn_dumped
= false;
7775 /* Helper for build_omp_regions. Scan the dominator tree starting at
7776 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
7777 true, the function ends once a single tree is built (otherwise, whole
7778 forest of OMP constructs may be built). */
7781 build_omp_regions_1 (basic_block bb
, struct omp_region
*parent
,
7784 gimple_stmt_iterator gsi
;
7788 gsi
= gsi_last_bb (bb
);
7789 if (!gsi_end_p (gsi
) && is_gimple_omp (gsi_stmt (gsi
)))
7791 struct omp_region
*region
;
7792 enum gimple_code code
;
7794 stmt
= gsi_stmt (gsi
);
7795 code
= gimple_code (stmt
);
7796 if (code
== GIMPLE_OMP_RETURN
)
7798 /* STMT is the return point out of region PARENT. Mark it
7799 as the exit point and make PARENT the immediately
7800 enclosing region. */
7801 gcc_assert (parent
);
7804 parent
= parent
->outer
;
7806 else if (code
== GIMPLE_OMP_ATOMIC_STORE
)
7808 /* GIMPLE_OMP_ATOMIC_STORE is analoguous to
7809 GIMPLE_OMP_RETURN, but matches with
7810 GIMPLE_OMP_ATOMIC_LOAD. */
7811 gcc_assert (parent
);
7812 gcc_assert (parent
->type
== GIMPLE_OMP_ATOMIC_LOAD
);
7815 parent
= parent
->outer
;
7817 else if (code
== GIMPLE_OMP_CONTINUE
)
7819 gcc_assert (parent
);
7822 else if (code
== GIMPLE_OMP_SECTIONS_SWITCH
)
7824 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
7825 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
7829 region
= new_omp_region (bb
, code
, parent
);
7831 if (code
== GIMPLE_OMP_TARGET
)
7833 switch (gimple_omp_target_kind (stmt
))
7835 case GF_OMP_TARGET_KIND_REGION
:
7836 case GF_OMP_TARGET_KIND_DATA
:
7837 case GF_OMP_TARGET_KIND_OACC_PARALLEL
:
7838 case GF_OMP_TARGET_KIND_OACC_KERNELS
:
7839 case GF_OMP_TARGET_KIND_OACC_DATA
:
7840 case GF_OMP_TARGET_KIND_OACC_HOST_DATA
:
7842 case GF_OMP_TARGET_KIND_UPDATE
:
7843 case GF_OMP_TARGET_KIND_ENTER_DATA
:
7844 case GF_OMP_TARGET_KIND_EXIT_DATA
:
7845 case GF_OMP_TARGET_KIND_OACC_UPDATE
:
7846 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA
:
7847 case GF_OMP_TARGET_KIND_OACC_DECLARE
:
7848 /* ..., other than for those stand-alone directives... */
7855 else if (code
== GIMPLE_OMP_ORDERED
7856 && omp_find_clause (gimple_omp_ordered_clauses
7857 (as_a
<gomp_ordered
*> (stmt
)),
7859 /* #pragma omp ordered depend is also just a stand-alone
7862 /* ..., this directive becomes the parent for a new region. */
7868 if (single_tree
&& !parent
)
7871 for (son
= first_dom_son (CDI_DOMINATORS
, bb
);
7873 son
= next_dom_son (CDI_DOMINATORS
, son
))
7874 build_omp_regions_1 (son
, parent
, single_tree
);
7877 /* Builds the tree of OMP regions rooted at ROOT, storing it to
7881 build_omp_regions_root (basic_block root
)
7883 gcc_assert (root_omp_region
== NULL
);
7884 build_omp_regions_1 (root
, NULL
, true);
7885 gcc_assert (root_omp_region
!= NULL
);
7888 /* Expands omp construct (and its subconstructs) starting in HEAD. */
7891 omp_expand_local (basic_block head
)
7893 build_omp_regions_root (head
);
7894 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7896 fprintf (dump_file
, "\nOMP region tree\n\n");
7897 dump_omp_region (dump_file
, root_omp_region
, 0);
7898 fprintf (dump_file
, "\n");
7901 remove_exit_barriers (root_omp_region
);
7902 expand_omp (root_omp_region
);
7904 omp_free_regions ();
7907 /* Scan the CFG and build a tree of OMP regions. Return the root of
7908 the OMP region tree. */
7911 build_omp_regions (void)
7913 gcc_assert (root_omp_region
== NULL
);
7914 calculate_dominance_info (CDI_DOMINATORS
);
7915 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun
), NULL
, false);
7918 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */
7921 execute_expand_omp (void)
7923 build_omp_regions ();
7925 if (!root_omp_region
)
7930 fprintf (dump_file
, "\nOMP region tree\n\n");
7931 dump_omp_region (dump_file
, root_omp_region
, 0);
7932 fprintf (dump_file
, "\n");
7935 remove_exit_barriers (root_omp_region
);
7937 expand_omp (root_omp_region
);
7939 if (flag_checking
&& !loops_state_satisfies_p (LOOPS_NEED_FIXUP
))
7940 verify_loop_structure ();
7941 cleanup_tree_cfg ();
7943 omp_free_regions ();
7948 /* OMP expansion -- the default pass, run before creation of SSA form. */
7952 const pass_data pass_data_expand_omp
=
7954 GIMPLE_PASS
, /* type */
7955 "ompexp", /* name */
7956 OPTGROUP_OPENMP
, /* optinfo_flags */
7957 TV_NONE
, /* tv_id */
7958 PROP_gimple_any
, /* properties_required */
7959 PROP_gimple_eomp
, /* properties_provided */
7960 0, /* properties_destroyed */
7961 0, /* todo_flags_start */
7962 0, /* todo_flags_finish */
7965 class pass_expand_omp
: public gimple_opt_pass
7968 pass_expand_omp (gcc::context
*ctxt
)
7969 : gimple_opt_pass (pass_data_expand_omp
, ctxt
)
7972 /* opt_pass methods: */
7973 virtual unsigned int execute (function
*)
7975 bool gate
= ((flag_cilkplus
!= 0 || flag_openacc
!= 0 || flag_openmp
!= 0
7976 || flag_openmp_simd
!= 0)
7979 /* This pass always runs, to provide PROP_gimple_eomp.
7980 But often, there is nothing to do. */
7984 return execute_expand_omp ();
7987 }; // class pass_expand_omp
7992 make_pass_expand_omp (gcc::context
*ctxt
)
7994 return new pass_expand_omp (ctxt
);
7999 const pass_data pass_data_expand_omp_ssa
=
8001 GIMPLE_PASS
, /* type */
8002 "ompexpssa", /* name */
8003 OPTGROUP_OPENMP
, /* optinfo_flags */
8004 TV_NONE
, /* tv_id */
8005 PROP_cfg
| PROP_ssa
, /* properties_required */
8006 PROP_gimple_eomp
, /* properties_provided */
8007 0, /* properties_destroyed */
8008 0, /* todo_flags_start */
8009 TODO_cleanup_cfg
| TODO_rebuild_alias
, /* todo_flags_finish */
8012 class pass_expand_omp_ssa
: public gimple_opt_pass
8015 pass_expand_omp_ssa (gcc::context
*ctxt
)
8016 : gimple_opt_pass (pass_data_expand_omp_ssa
, ctxt
)
8019 /* opt_pass methods: */
8020 virtual bool gate (function
*fun
)
8022 return !(fun
->curr_properties
& PROP_gimple_eomp
);
8024 virtual unsigned int execute (function
*) { return execute_expand_omp (); }
8025 opt_pass
* clone () { return new pass_expand_omp_ssa (m_ctxt
); }
8027 }; // class pass_expand_omp_ssa
8032 make_pass_expand_omp_ssa (gcc::context
*ctxt
)
8034 return new pass_expand_omp_ssa (ctxt
);
8037 /* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
8041 omp_make_gimple_edges (basic_block bb
, struct omp_region
**region
,
8044 gimple
*last
= last_stmt (bb
);
8045 enum gimple_code code
= gimple_code (last
);
8046 struct omp_region
*cur_region
= *region
;
8047 bool fallthru
= false;
8051 case GIMPLE_OMP_PARALLEL
:
8052 case GIMPLE_OMP_TASK
:
8053 case GIMPLE_OMP_FOR
:
8054 case GIMPLE_OMP_SINGLE
:
8055 case GIMPLE_OMP_TEAMS
:
8056 case GIMPLE_OMP_MASTER
:
8057 case GIMPLE_OMP_TASKGROUP
:
8058 case GIMPLE_OMP_CRITICAL
:
8059 case GIMPLE_OMP_SECTION
:
8060 case GIMPLE_OMP_GRID_BODY
:
8061 cur_region
= new_omp_region (bb
, code
, cur_region
);
8065 case GIMPLE_OMP_ORDERED
:
8066 cur_region
= new_omp_region (bb
, code
, cur_region
);
8068 if (omp_find_clause (gimple_omp_ordered_clauses
8069 (as_a
<gomp_ordered
*> (last
)),
8071 cur_region
= cur_region
->outer
;
8074 case GIMPLE_OMP_TARGET
:
8075 cur_region
= new_omp_region (bb
, code
, cur_region
);
8077 switch (gimple_omp_target_kind (last
))
8079 case GF_OMP_TARGET_KIND_REGION
:
8080 case GF_OMP_TARGET_KIND_DATA
:
8081 case GF_OMP_TARGET_KIND_OACC_PARALLEL
:
8082 case GF_OMP_TARGET_KIND_OACC_KERNELS
:
8083 case GF_OMP_TARGET_KIND_OACC_DATA
:
8084 case GF_OMP_TARGET_KIND_OACC_HOST_DATA
:
8086 case GF_OMP_TARGET_KIND_UPDATE
:
8087 case GF_OMP_TARGET_KIND_ENTER_DATA
:
8088 case GF_OMP_TARGET_KIND_EXIT_DATA
:
8089 case GF_OMP_TARGET_KIND_OACC_UPDATE
:
8090 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA
:
8091 case GF_OMP_TARGET_KIND_OACC_DECLARE
:
8092 cur_region
= cur_region
->outer
;
8099 case GIMPLE_OMP_SECTIONS
:
8100 cur_region
= new_omp_region (bb
, code
, cur_region
);
8104 case GIMPLE_OMP_SECTIONS_SWITCH
:
8108 case GIMPLE_OMP_ATOMIC_LOAD
:
8109 case GIMPLE_OMP_ATOMIC_STORE
:
8113 case GIMPLE_OMP_RETURN
:
8114 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
8115 somewhere other than the next block. This will be
8117 cur_region
->exit
= bb
;
8118 if (cur_region
->type
== GIMPLE_OMP_TASK
)
8119 /* Add an edge corresponding to not scheduling the task
8121 make_edge (cur_region
->entry
, bb
, EDGE_ABNORMAL
);
8122 fallthru
= cur_region
->type
!= GIMPLE_OMP_SECTION
;
8123 cur_region
= cur_region
->outer
;
8126 case GIMPLE_OMP_CONTINUE
:
8127 cur_region
->cont
= bb
;
8128 switch (cur_region
->type
)
8130 case GIMPLE_OMP_FOR
:
8131 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
8132 succs edges as abnormal to prevent splitting
8134 single_succ_edge (cur_region
->entry
)->flags
|= EDGE_ABNORMAL
;
8135 /* Make the loopback edge. */
8136 make_edge (bb
, single_succ (cur_region
->entry
),
8139 /* Create an edge from GIMPLE_OMP_FOR to exit, which
8140 corresponds to the case that the body of the loop
8141 is not executed at all. */
8142 make_edge (cur_region
->entry
, bb
->next_bb
, EDGE_ABNORMAL
);
8143 make_edge (bb
, bb
->next_bb
, EDGE_FALLTHRU
| EDGE_ABNORMAL
);
8147 case GIMPLE_OMP_SECTIONS
:
8148 /* Wire up the edges into and out of the nested sections. */
8150 basic_block switch_bb
= single_succ (cur_region
->entry
);
8152 struct omp_region
*i
;
8153 for (i
= cur_region
->inner
; i
; i
= i
->next
)
8155 gcc_assert (i
->type
== GIMPLE_OMP_SECTION
);
8156 make_edge (switch_bb
, i
->entry
, 0);
8157 make_edge (i
->exit
, bb
, EDGE_FALLTHRU
);
8160 /* Make the loopback edge to the block with
8161 GIMPLE_OMP_SECTIONS_SWITCH. */
8162 make_edge (bb
, switch_bb
, 0);
8164 /* Make the edge from the switch to exit. */
8165 make_edge (switch_bb
, bb
->next_bb
, 0);
8170 case GIMPLE_OMP_TASK
:
8183 if (*region
!= cur_region
)
8185 *region
= cur_region
;
8187 *region_idx
= cur_region
->entry
->index
;
8195 #include "gt-omp-expand.h"