1 /* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
5 Copyright (C) 2005-2020 Free Software Foundation, Inc.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
33 #include "tree-pass.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
42 #include "internal-fn.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
48 #include "tree-into-ssa.h"
50 #include "splay-tree.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "alloc-pool.h"
56 #include "symbol-summary.h"
57 #include "gomp-constants.h"
58 #include "gimple-pretty-print.h"
59 #include "stringpool.h"
63 /* OMP region information. Every parallel and workshare
64 directive is enclosed between two markers, the OMP_* directive
65 and a corresponding GIMPLE_OMP_RETURN statement. */
69 /* The enclosing region. */
70 struct omp_region
*outer
;
72 /* First child region. */
73 struct omp_region
*inner
;
75 /* Next peer region. */
76 struct omp_region
*next
;
78 /* Block containing the omp directive as its last stmt. */
81 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
84 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
87 /* If this is a combined parallel+workshare region, this is a list
88 of additional arguments needed by the combined parallel+workshare
90 vec
<tree
, va_gc
> *ws_args
;
92 /* The code for the omp directive of this region. */
93 enum gimple_code type
;
95 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
96 enum omp_clause_schedule_kind sched_kind
;
98 /* Schedule modifiers. */
99 unsigned char sched_modifiers
;
101 /* True if this is a combined parallel+workshare region. */
102 bool is_combined_parallel
;
104 /* Copy of fd.lastprivate_conditional != 0. */
105 bool has_lastprivate_conditional
;
107 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
109 gomp_ordered
*ord_stmt
;
112 static struct omp_region
*root_omp_region
;
113 static bool omp_any_child_fn_dumped
;
115 static void expand_omp_build_assign (gimple_stmt_iterator
*, tree
, tree
,
117 static gphi
*find_phi_with_arg_on_edge (tree
, edge
);
118 static void expand_omp (struct omp_region
*region
);
120 /* Return true if REGION is a combined parallel+workshare region. */
123 is_combined_parallel (struct omp_region
*region
)
125 return region
->is_combined_parallel
;
128 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
129 is the immediate dominator of PAR_ENTRY_BB, return true if there
130 are no data dependencies that would prevent expanding the parallel
131 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
133 When expanding a combined parallel+workshare region, the call to
134 the child function may need additional arguments in the case of
135 GIMPLE_OMP_FOR regions. In some cases, these arguments are
136 computed out of variables passed in from the parent to the child
137 via 'struct .omp_data_s'. For instance:
139 #pragma omp parallel for schedule (guided, i * 4)
144 # BLOCK 2 (PAR_ENTRY_BB)
146 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
148 # BLOCK 3 (WS_ENTRY_BB)
149 .omp_data_i = &.omp_data_o;
150 D.1667 = .omp_data_i->i;
152 #pragma omp for schedule (guided, D.1598)
154 When we outline the parallel region, the call to the child function
155 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
156 that value is computed *after* the call site. So, in principle we
157 cannot do the transformation.
159 To see whether the code in WS_ENTRY_BB blocks the combined
160 parallel+workshare call, we collect all the variables used in the
161 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
162 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
165 FIXME. If we had the SSA form built at this point, we could merely
166 hoist the code in block 3 into block 2 and be done with it. But at
167 this point we don't have dataflow information and though we could
168 hack something up here, it is really not worth the aggravation. */
171 workshare_safe_to_combine_p (basic_block ws_entry_bb
)
173 struct omp_for_data fd
;
174 gimple
*ws_stmt
= last_stmt (ws_entry_bb
);
176 if (gimple_code (ws_stmt
) == GIMPLE_OMP_SECTIONS
)
179 gcc_assert (gimple_code (ws_stmt
) == GIMPLE_OMP_FOR
);
180 if (gimple_omp_for_kind (ws_stmt
) != GF_OMP_FOR_KIND_FOR
)
183 omp_extract_for_data (as_a
<gomp_for
*> (ws_stmt
), &fd
, NULL
);
185 if (fd
.collapse
> 1 && TREE_CODE (fd
.loop
.n2
) != INTEGER_CST
)
187 if (fd
.iter_type
!= long_integer_type_node
)
190 /* FIXME. We give up too easily here. If any of these arguments
191 are not constants, they will likely involve variables that have
192 been mapped into fields of .omp_data_s for sharing with the child
193 function. With appropriate data flow, it would be possible to
195 if (!is_gimple_min_invariant (fd
.loop
.n1
)
196 || !is_gimple_min_invariant (fd
.loop
.n2
)
197 || !is_gimple_min_invariant (fd
.loop
.step
)
198 || (fd
.chunk_size
&& !is_gimple_min_invariant (fd
.chunk_size
)))
204 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
205 presence (SIMD_SCHEDULE). */
208 omp_adjust_chunk_size (tree chunk_size
, bool simd_schedule
)
210 if (!simd_schedule
|| integer_zerop (chunk_size
))
213 poly_uint64 vf
= omp_max_vf ();
214 if (known_eq (vf
, 1U))
217 tree type
= TREE_TYPE (chunk_size
);
218 chunk_size
= fold_build2 (PLUS_EXPR
, type
, chunk_size
,
219 build_int_cst (type
, vf
- 1));
220 return fold_build2 (BIT_AND_EXPR
, type
, chunk_size
,
221 build_int_cst (type
, -vf
));
224 /* Collect additional arguments needed to emit a combined
225 parallel+workshare call. WS_STMT is the workshare directive being
228 static vec
<tree
, va_gc
> *
229 get_ws_args_for (gimple
*par_stmt
, gimple
*ws_stmt
)
232 location_t loc
= gimple_location (ws_stmt
);
233 vec
<tree
, va_gc
> *ws_args
;
235 if (gomp_for
*for_stmt
= dyn_cast
<gomp_for
*> (ws_stmt
))
237 struct omp_for_data fd
;
240 omp_extract_for_data (for_stmt
, &fd
, NULL
);
244 if (gimple_omp_for_combined_into_p (for_stmt
))
247 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt
),
248 OMP_CLAUSE__LOOPTEMP_
);
250 n1
= OMP_CLAUSE_DECL (innerc
);
251 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
252 OMP_CLAUSE__LOOPTEMP_
);
254 n2
= OMP_CLAUSE_DECL (innerc
);
257 vec_alloc (ws_args
, 3 + (fd
.chunk_size
!= 0));
259 t
= fold_convert_loc (loc
, long_integer_type_node
, n1
);
260 ws_args
->quick_push (t
);
262 t
= fold_convert_loc (loc
, long_integer_type_node
, n2
);
263 ws_args
->quick_push (t
);
265 t
= fold_convert_loc (loc
, long_integer_type_node
, fd
.loop
.step
);
266 ws_args
->quick_push (t
);
270 t
= fold_convert_loc (loc
, long_integer_type_node
, fd
.chunk_size
);
271 t
= omp_adjust_chunk_size (t
, fd
.simd_schedule
);
272 ws_args
->quick_push (t
);
277 else if (gimple_code (ws_stmt
) == GIMPLE_OMP_SECTIONS
)
279 /* Number of sections is equal to the number of edges from the
280 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
281 the exit of the sections region. */
282 basic_block bb
= single_succ (gimple_bb (ws_stmt
));
283 t
= build_int_cst (unsigned_type_node
, EDGE_COUNT (bb
->succs
) - 1);
284 vec_alloc (ws_args
, 1);
285 ws_args
->quick_push (t
);
292 /* Discover whether REGION is a combined parallel+workshare region. */
295 determine_parallel_type (struct omp_region
*region
)
297 basic_block par_entry_bb
, par_exit_bb
;
298 basic_block ws_entry_bb
, ws_exit_bb
;
300 if (region
== NULL
|| region
->inner
== NULL
301 || region
->exit
== NULL
|| region
->inner
->exit
== NULL
302 || region
->inner
->cont
== NULL
)
305 /* We only support parallel+for and parallel+sections. */
306 if (region
->type
!= GIMPLE_OMP_PARALLEL
307 || (region
->inner
->type
!= GIMPLE_OMP_FOR
308 && region
->inner
->type
!= GIMPLE_OMP_SECTIONS
))
311 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
312 WS_EXIT_BB -> PAR_EXIT_BB. */
313 par_entry_bb
= region
->entry
;
314 par_exit_bb
= region
->exit
;
315 ws_entry_bb
= region
->inner
->entry
;
316 ws_exit_bb
= region
->inner
->exit
;
318 /* Give up for task reductions on the parallel, while it is implementable,
319 adding another big set of APIs or slowing down the normal paths is
321 tree pclauses
= gimple_omp_parallel_clauses (last_stmt (par_entry_bb
));
322 if (omp_find_clause (pclauses
, OMP_CLAUSE__REDUCTEMP_
))
325 if (single_succ (par_entry_bb
) == ws_entry_bb
326 && single_succ (ws_exit_bb
) == par_exit_bb
327 && workshare_safe_to_combine_p (ws_entry_bb
)
328 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb
))
329 || (last_and_only_stmt (ws_entry_bb
)
330 && last_and_only_stmt (par_exit_bb
))))
332 gimple
*par_stmt
= last_stmt (par_entry_bb
);
333 gimple
*ws_stmt
= last_stmt (ws_entry_bb
);
335 if (region
->inner
->type
== GIMPLE_OMP_FOR
)
337 /* If this is a combined parallel loop, we need to determine
338 whether or not to use the combined library calls. There
339 are two cases where we do not apply the transformation:
340 static loops and any kind of ordered loop. In the first
341 case, we already open code the loop so there is no need
342 to do anything else. In the latter case, the combined
343 parallel loop call would still need extra synchronization
344 to implement ordered semantics, so there would not be any
345 gain in using the combined call. */
346 tree clauses
= gimple_omp_for_clauses (ws_stmt
);
347 tree c
= omp_find_clause (clauses
, OMP_CLAUSE_SCHEDULE
);
349 || ((OMP_CLAUSE_SCHEDULE_KIND (c
) & OMP_CLAUSE_SCHEDULE_MASK
)
350 == OMP_CLAUSE_SCHEDULE_STATIC
)
351 || omp_find_clause (clauses
, OMP_CLAUSE_ORDERED
)
352 || omp_find_clause (clauses
, OMP_CLAUSE__REDUCTEMP_
)
353 || ((c
= omp_find_clause (clauses
, OMP_CLAUSE__CONDTEMP_
))
354 && POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (c
)))))
357 else if (region
->inner
->type
== GIMPLE_OMP_SECTIONS
358 && (omp_find_clause (gimple_omp_sections_clauses (ws_stmt
),
359 OMP_CLAUSE__REDUCTEMP_
)
360 || omp_find_clause (gimple_omp_sections_clauses (ws_stmt
),
361 OMP_CLAUSE__CONDTEMP_
)))
364 region
->is_combined_parallel
= true;
365 region
->inner
->is_combined_parallel
= true;
366 region
->ws_args
= get_ws_args_for (par_stmt
, ws_stmt
);
370 /* Debugging dumps for parallel regions. */
371 void dump_omp_region (FILE *, struct omp_region
*, int);
372 void debug_omp_region (struct omp_region
*);
373 void debug_all_omp_regions (void);
375 /* Dump the parallel region tree rooted at REGION. */
378 dump_omp_region (FILE *file
, struct omp_region
*region
, int indent
)
380 fprintf (file
, "%*sbb %d: %s\n", indent
, "", region
->entry
->index
,
381 gimple_code_name
[region
->type
]);
384 dump_omp_region (file
, region
->inner
, indent
+ 4);
388 fprintf (file
, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent
, "",
389 region
->cont
->index
);
393 fprintf (file
, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent
, "",
394 region
->exit
->index
);
396 fprintf (file
, "%*s[no exit marker]\n", indent
, "");
399 dump_omp_region (file
, region
->next
, indent
);
403 debug_omp_region (struct omp_region
*region
)
405 dump_omp_region (stderr
, region
, 0);
409 debug_all_omp_regions (void)
411 dump_omp_region (stderr
, root_omp_region
, 0);
414 /* Create a new parallel region starting at STMT inside region PARENT. */
416 static struct omp_region
*
417 new_omp_region (basic_block bb
, enum gimple_code type
,
418 struct omp_region
*parent
)
420 struct omp_region
*region
= XCNEW (struct omp_region
);
422 region
->outer
= parent
;
428 /* This is a nested region. Add it to the list of inner
429 regions in PARENT. */
430 region
->next
= parent
->inner
;
431 parent
->inner
= region
;
435 /* This is a toplevel region. Add it to the list of toplevel
436 regions in ROOT_OMP_REGION. */
437 region
->next
= root_omp_region
;
438 root_omp_region
= region
;
444 /* Release the memory associated with the region tree rooted at REGION. */
447 free_omp_region_1 (struct omp_region
*region
)
449 struct omp_region
*i
, *n
;
451 for (i
= region
->inner
; i
; i
= n
)
454 free_omp_region_1 (i
);
460 /* Release the memory for the entire omp region tree. */
463 omp_free_regions (void)
465 struct omp_region
*r
, *n
;
466 for (r
= root_omp_region
; r
; r
= n
)
469 free_omp_region_1 (r
);
471 root_omp_region
= NULL
;
474 /* A convenience function to build an empty GIMPLE_COND with just the
478 gimple_build_cond_empty (tree cond
)
480 enum tree_code pred_code
;
483 gimple_cond_get_ops_from_tree (cond
, &pred_code
, &lhs
, &rhs
);
484 return gimple_build_cond (pred_code
, lhs
, rhs
, NULL_TREE
, NULL_TREE
);
487 /* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
488 Add CHILD_FNDECL to decl chain of the supercontext of the block
489 ENTRY_BLOCK - this is the block which originally contained the
490 code from which CHILD_FNDECL was created.
492 Together, these actions ensure that the debug info for the outlined
493 function will be emitted with the correct lexical scope. */
496 adjust_context_and_scope (struct omp_region
*region
, tree entry_block
,
499 tree parent_fndecl
= NULL_TREE
;
501 /* OMP expansion expands inner regions before outer ones, so if
502 we e.g. have explicit task region nested in parallel region, when
503 expanding the task region current_function_decl will be the original
504 source function, but we actually want to use as context the child
505 function of the parallel. */
506 for (region
= region
->outer
;
507 region
&& parent_fndecl
== NULL_TREE
; region
= region
->outer
)
508 switch (region
->type
)
510 case GIMPLE_OMP_PARALLEL
:
511 case GIMPLE_OMP_TASK
:
512 case GIMPLE_OMP_TEAMS
:
513 entry_stmt
= last_stmt (region
->entry
);
514 parent_fndecl
= gimple_omp_taskreg_child_fn (entry_stmt
);
516 case GIMPLE_OMP_TARGET
:
517 entry_stmt
= last_stmt (region
->entry
);
519 = gimple_omp_target_child_fn (as_a
<gomp_target
*> (entry_stmt
));
525 if (parent_fndecl
== NULL_TREE
)
526 parent_fndecl
= current_function_decl
;
527 DECL_CONTEXT (child_fndecl
) = parent_fndecl
;
529 if (entry_block
!= NULL_TREE
&& TREE_CODE (entry_block
) == BLOCK
)
531 tree b
= BLOCK_SUPERCONTEXT (entry_block
);
532 if (TREE_CODE (b
) == BLOCK
)
534 DECL_CHAIN (child_fndecl
) = BLOCK_VARS (b
);
535 BLOCK_VARS (b
) = child_fndecl
;
540 /* Build the function calls to GOMP_parallel etc to actually
541 generate the parallel operation. REGION is the parallel region
542 being expanded. BB is the block where to insert the code. WS_ARGS
543 will be set if this is a call to a combined parallel+workshare
544 construct, it contains the list of additional arguments needed by
545 the workshare construct. */
548 expand_parallel_call (struct omp_region
*region
, basic_block bb
,
549 gomp_parallel
*entry_stmt
,
550 vec
<tree
, va_gc
> *ws_args
)
552 tree t
, t1
, t2
, val
, cond
, c
, clauses
, flags
;
553 gimple_stmt_iterator gsi
;
555 enum built_in_function start_ix
;
557 location_t clause_loc
;
558 vec
<tree
, va_gc
> *args
;
560 clauses
= gimple_omp_parallel_clauses (entry_stmt
);
562 /* Determine what flavor of GOMP_parallel we will be
564 start_ix
= BUILT_IN_GOMP_PARALLEL
;
565 tree rtmp
= omp_find_clause (clauses
, OMP_CLAUSE__REDUCTEMP_
);
567 start_ix
= BUILT_IN_GOMP_PARALLEL_REDUCTIONS
;
568 else if (is_combined_parallel (region
))
570 switch (region
->inner
->type
)
573 gcc_assert (region
->inner
->sched_kind
!= OMP_CLAUSE_SCHEDULE_AUTO
);
574 switch (region
->inner
->sched_kind
)
576 case OMP_CLAUSE_SCHEDULE_RUNTIME
:
577 /* For lastprivate(conditional:), our implementation
578 requires monotonic behavior. */
579 if (region
->inner
->has_lastprivate_conditional
!= 0)
581 else if ((region
->inner
->sched_modifiers
582 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC
) != 0)
584 else if ((region
->inner
->sched_modifiers
585 & OMP_CLAUSE_SCHEDULE_MONOTONIC
) == 0)
590 case OMP_CLAUSE_SCHEDULE_DYNAMIC
:
591 case OMP_CLAUSE_SCHEDULE_GUIDED
:
592 if ((region
->inner
->sched_modifiers
593 & OMP_CLAUSE_SCHEDULE_MONOTONIC
) == 0
594 && !region
->inner
->has_lastprivate_conditional
)
596 start_ix2
= 3 + region
->inner
->sched_kind
;
601 start_ix2
= region
->inner
->sched_kind
;
604 start_ix2
+= (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC
;
605 start_ix
= (enum built_in_function
) start_ix2
;
607 case GIMPLE_OMP_SECTIONS
:
608 start_ix
= BUILT_IN_GOMP_PARALLEL_SECTIONS
;
615 /* By default, the value of NUM_THREADS is zero (selected at run time)
616 and there is no conditional. */
618 val
= build_int_cst (unsigned_type_node
, 0);
619 flags
= build_int_cst (unsigned_type_node
, 0);
621 c
= omp_find_clause (clauses
, OMP_CLAUSE_IF
);
623 cond
= OMP_CLAUSE_IF_EXPR (c
);
625 c
= omp_find_clause (clauses
, OMP_CLAUSE_NUM_THREADS
);
628 val
= OMP_CLAUSE_NUM_THREADS_EXPR (c
);
629 clause_loc
= OMP_CLAUSE_LOCATION (c
);
632 clause_loc
= gimple_location (entry_stmt
);
634 c
= omp_find_clause (clauses
, OMP_CLAUSE_PROC_BIND
);
636 flags
= build_int_cst (unsigned_type_node
, OMP_CLAUSE_PROC_BIND_KIND (c
));
638 /* Ensure 'val' is of the correct type. */
639 val
= fold_convert_loc (clause_loc
, unsigned_type_node
, val
);
641 /* If we found the clause 'if (cond)', build either
642 (cond != 0) or (cond ? val : 1u). */
645 cond
= gimple_boolify (cond
);
647 if (integer_zerop (val
))
648 val
= fold_build2_loc (clause_loc
,
649 EQ_EXPR
, unsigned_type_node
, cond
,
650 build_int_cst (TREE_TYPE (cond
), 0));
653 basic_block cond_bb
, then_bb
, else_bb
;
654 edge e
, e_then
, e_else
;
655 tree tmp_then
, tmp_else
, tmp_join
, tmp_var
;
657 tmp_var
= create_tmp_var (TREE_TYPE (val
));
658 if (gimple_in_ssa_p (cfun
))
660 tmp_then
= make_ssa_name (tmp_var
);
661 tmp_else
= make_ssa_name (tmp_var
);
662 tmp_join
= make_ssa_name (tmp_var
);
671 e
= split_block_after_labels (bb
);
676 then_bb
= create_empty_bb (cond_bb
);
677 else_bb
= create_empty_bb (then_bb
);
678 set_immediate_dominator (CDI_DOMINATORS
, then_bb
, cond_bb
);
679 set_immediate_dominator (CDI_DOMINATORS
, else_bb
, cond_bb
);
681 stmt
= gimple_build_cond_empty (cond
);
682 gsi
= gsi_start_bb (cond_bb
);
683 gsi_insert_after (&gsi
, stmt
, GSI_CONTINUE_LINKING
);
685 gsi
= gsi_start_bb (then_bb
);
686 expand_omp_build_assign (&gsi
, tmp_then
, val
, true);
688 gsi
= gsi_start_bb (else_bb
);
689 expand_omp_build_assign (&gsi
, tmp_else
,
690 build_int_cst (unsigned_type_node
, 1),
693 make_edge (cond_bb
, then_bb
, EDGE_TRUE_VALUE
);
694 make_edge (cond_bb
, else_bb
, EDGE_FALSE_VALUE
);
695 add_bb_to_loop (then_bb
, cond_bb
->loop_father
);
696 add_bb_to_loop (else_bb
, cond_bb
->loop_father
);
697 e_then
= make_edge (then_bb
, bb
, EDGE_FALLTHRU
);
698 e_else
= make_edge (else_bb
, bb
, EDGE_FALLTHRU
);
700 if (gimple_in_ssa_p (cfun
))
702 gphi
*phi
= create_phi_node (tmp_join
, bb
);
703 add_phi_arg (phi
, tmp_then
, e_then
, UNKNOWN_LOCATION
);
704 add_phi_arg (phi
, tmp_else
, e_else
, UNKNOWN_LOCATION
);
710 gsi
= gsi_start_bb (bb
);
711 val
= force_gimple_operand_gsi (&gsi
, val
, true, NULL_TREE
,
712 false, GSI_CONTINUE_LINKING
);
715 gsi
= gsi_last_nondebug_bb (bb
);
716 t
= gimple_omp_parallel_data_arg (entry_stmt
);
718 t1
= null_pointer_node
;
720 t1
= build_fold_addr_expr (t
);
721 tree child_fndecl
= gimple_omp_parallel_child_fn (entry_stmt
);
722 t2
= build_fold_addr_expr (child_fndecl
);
724 vec_alloc (args
, 4 + vec_safe_length (ws_args
));
725 args
->quick_push (t2
);
726 args
->quick_push (t1
);
727 args
->quick_push (val
);
729 args
->splice (*ws_args
);
730 args
->quick_push (flags
);
732 t
= build_call_expr_loc_vec (UNKNOWN_LOCATION
,
733 builtin_decl_explicit (start_ix
), args
);
737 tree type
= TREE_TYPE (OMP_CLAUSE_DECL (rtmp
));
738 t
= build2 (MODIFY_EXPR
, type
, OMP_CLAUSE_DECL (rtmp
),
740 fold_convert (pointer_sized_int_node
, t
)));
742 force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
743 false, GSI_CONTINUE_LINKING
);
746 /* Build the function call to GOMP_task to actually
747 generate the task operation. BB is the block where to insert the code. */
750 expand_task_call (struct omp_region
*region
, basic_block bb
,
751 gomp_task
*entry_stmt
)
754 gimple_stmt_iterator gsi
;
755 location_t loc
= gimple_location (entry_stmt
);
757 tree clauses
= gimple_omp_task_clauses (entry_stmt
);
759 tree ifc
= omp_find_clause (clauses
, OMP_CLAUSE_IF
);
760 tree untied
= omp_find_clause (clauses
, OMP_CLAUSE_UNTIED
);
761 tree mergeable
= omp_find_clause (clauses
, OMP_CLAUSE_MERGEABLE
);
762 tree depend
= omp_find_clause (clauses
, OMP_CLAUSE_DEPEND
);
763 tree finalc
= omp_find_clause (clauses
, OMP_CLAUSE_FINAL
);
764 tree priority
= omp_find_clause (clauses
, OMP_CLAUSE_PRIORITY
);
767 = (untied
? GOMP_TASK_FLAG_UNTIED
: 0)
768 | (mergeable
? GOMP_TASK_FLAG_MERGEABLE
: 0)
769 | (depend
? GOMP_TASK_FLAG_DEPEND
: 0);
771 bool taskloop_p
= gimple_omp_task_taskloop_p (entry_stmt
);
772 tree startvar
= NULL_TREE
, endvar
= NULL_TREE
, step
= NULL_TREE
;
773 tree num_tasks
= NULL_TREE
;
777 gimple
*g
= last_stmt (region
->outer
->entry
);
778 gcc_assert (gimple_code (g
) == GIMPLE_OMP_FOR
779 && gimple_omp_for_kind (g
) == GF_OMP_FOR_KIND_TASKLOOP
);
780 struct omp_for_data fd
;
781 omp_extract_for_data (as_a
<gomp_for
*> (g
), &fd
, NULL
);
782 startvar
= omp_find_clause (clauses
, OMP_CLAUSE__LOOPTEMP_
);
783 endvar
= omp_find_clause (OMP_CLAUSE_CHAIN (startvar
),
784 OMP_CLAUSE__LOOPTEMP_
);
785 startvar
= OMP_CLAUSE_DECL (startvar
);
786 endvar
= OMP_CLAUSE_DECL (endvar
);
787 step
= fold_convert_loc (loc
, fd
.iter_type
, fd
.loop
.step
);
788 if (fd
.loop
.cond_code
== LT_EXPR
)
789 iflags
|= GOMP_TASK_FLAG_UP
;
790 tree tclauses
= gimple_omp_for_clauses (g
);
791 num_tasks
= omp_find_clause (tclauses
, OMP_CLAUSE_NUM_TASKS
);
793 num_tasks
= OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks
);
796 num_tasks
= omp_find_clause (tclauses
, OMP_CLAUSE_GRAINSIZE
);
799 iflags
|= GOMP_TASK_FLAG_GRAINSIZE
;
800 num_tasks
= OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks
);
803 num_tasks
= integer_zero_node
;
805 num_tasks
= fold_convert_loc (loc
, long_integer_type_node
, num_tasks
);
806 if (ifc
== NULL_TREE
)
807 iflags
|= GOMP_TASK_FLAG_IF
;
808 if (omp_find_clause (tclauses
, OMP_CLAUSE_NOGROUP
))
809 iflags
|= GOMP_TASK_FLAG_NOGROUP
;
810 ull
= fd
.iter_type
== long_long_unsigned_type_node
;
811 if (omp_find_clause (clauses
, OMP_CLAUSE_REDUCTION
))
812 iflags
|= GOMP_TASK_FLAG_REDUCTION
;
815 iflags
|= GOMP_TASK_FLAG_PRIORITY
;
817 tree flags
= build_int_cst (unsigned_type_node
, iflags
);
819 tree cond
= boolean_true_node
;
824 tree t
= gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc
));
825 t
= fold_build3_loc (loc
, COND_EXPR
, unsigned_type_node
, t
,
826 build_int_cst (unsigned_type_node
,
828 build_int_cst (unsigned_type_node
, 0));
829 flags
= fold_build2_loc (loc
, PLUS_EXPR
, unsigned_type_node
,
833 cond
= gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc
));
838 tree t
= gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc
));
839 t
= fold_build3_loc (loc
, COND_EXPR
, unsigned_type_node
, t
,
840 build_int_cst (unsigned_type_node
,
841 GOMP_TASK_FLAG_FINAL
),
842 build_int_cst (unsigned_type_node
, 0));
843 flags
= fold_build2_loc (loc
, PLUS_EXPR
, unsigned_type_node
, flags
, t
);
846 depend
= OMP_CLAUSE_DECL (depend
);
848 depend
= build_int_cst (ptr_type_node
, 0);
850 priority
= fold_convert (integer_type_node
,
851 OMP_CLAUSE_PRIORITY_EXPR (priority
));
853 priority
= integer_zero_node
;
855 gsi
= gsi_last_nondebug_bb (bb
);
856 tree t
= gimple_omp_task_data_arg (entry_stmt
);
858 t2
= null_pointer_node
;
860 t2
= build_fold_addr_expr_loc (loc
, t
);
861 t1
= build_fold_addr_expr_loc (loc
, gimple_omp_task_child_fn (entry_stmt
));
862 t
= gimple_omp_task_copy_fn (entry_stmt
);
864 t3
= null_pointer_node
;
866 t3
= build_fold_addr_expr_loc (loc
, t
);
869 t
= build_call_expr (ull
870 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL
)
871 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP
),
873 gimple_omp_task_arg_size (entry_stmt
),
874 gimple_omp_task_arg_align (entry_stmt
), flags
,
875 num_tasks
, priority
, startvar
, endvar
, step
);
877 t
= build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK
),
879 gimple_omp_task_arg_size (entry_stmt
),
880 gimple_omp_task_arg_align (entry_stmt
), cond
, flags
,
883 force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
884 false, GSI_CONTINUE_LINKING
);
887 /* Build the function call to GOMP_taskwait_depend to actually
888 generate the taskwait operation. BB is the block where to insert the
892 expand_taskwait_call (basic_block bb
, gomp_task
*entry_stmt
)
894 tree clauses
= gimple_omp_task_clauses (entry_stmt
);
895 tree depend
= omp_find_clause (clauses
, OMP_CLAUSE_DEPEND
);
896 if (depend
== NULL_TREE
)
899 depend
= OMP_CLAUSE_DECL (depend
);
901 gimple_stmt_iterator gsi
= gsi_last_nondebug_bb (bb
);
903 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASKWAIT_DEPEND
),
906 force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
907 false, GSI_CONTINUE_LINKING
);
910 /* Build the function call to GOMP_teams_reg to actually
911 generate the host teams operation. REGION is the teams region
912 being expanded. BB is the block where to insert the code. */
915 expand_teams_call (basic_block bb
, gomp_teams
*entry_stmt
)
917 tree clauses
= gimple_omp_teams_clauses (entry_stmt
);
918 tree num_teams
= omp_find_clause (clauses
, OMP_CLAUSE_NUM_TEAMS
);
919 if (num_teams
== NULL_TREE
)
920 num_teams
= build_int_cst (unsigned_type_node
, 0);
923 num_teams
= OMP_CLAUSE_NUM_TEAMS_EXPR (num_teams
);
924 num_teams
= fold_convert (unsigned_type_node
, num_teams
);
926 tree thread_limit
= omp_find_clause (clauses
, OMP_CLAUSE_THREAD_LIMIT
);
927 if (thread_limit
== NULL_TREE
)
928 thread_limit
= build_int_cst (unsigned_type_node
, 0);
931 thread_limit
= OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit
);
932 thread_limit
= fold_convert (unsigned_type_node
, thread_limit
);
935 gimple_stmt_iterator gsi
= gsi_last_nondebug_bb (bb
);
936 tree t
= gimple_omp_teams_data_arg (entry_stmt
), t1
;
938 t1
= null_pointer_node
;
940 t1
= build_fold_addr_expr (t
);
941 tree child_fndecl
= gimple_omp_teams_child_fn (entry_stmt
);
942 tree t2
= build_fold_addr_expr (child_fndecl
);
944 vec
<tree
, va_gc
> *args
;
946 args
->quick_push (t2
);
947 args
->quick_push (t1
);
948 args
->quick_push (num_teams
);
949 args
->quick_push (thread_limit
);
950 /* For future extensibility. */
951 args
->quick_push (build_zero_cst (unsigned_type_node
));
953 t
= build_call_expr_loc_vec (UNKNOWN_LOCATION
,
954 builtin_decl_explicit (BUILT_IN_GOMP_TEAMS_REG
),
957 force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
958 false, GSI_CONTINUE_LINKING
);
961 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
964 vec2chain (vec
<tree
, va_gc
> *v
)
966 tree chain
= NULL_TREE
, t
;
969 FOR_EACH_VEC_SAFE_ELT_REVERSE (v
, ix
, t
)
971 DECL_CHAIN (t
) = chain
;
978 /* Remove barriers in REGION->EXIT's block. Note that this is only
979 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
980 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
981 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
985 remove_exit_barrier (struct omp_region
*region
)
987 gimple_stmt_iterator gsi
;
992 int any_addressable_vars
= -1;
994 exit_bb
= region
->exit
;
996 /* If the parallel region doesn't return, we don't have REGION->EXIT
1001 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
1002 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
1003 statements that can appear in between are extremely limited -- no
1004 memory operations at all. Here, we allow nothing at all, so the
1005 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
1006 gsi
= gsi_last_nondebug_bb (exit_bb
);
1007 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_RETURN
);
1008 gsi_prev_nondebug (&gsi
);
1009 if (!gsi_end_p (gsi
) && gimple_code (gsi_stmt (gsi
)) != GIMPLE_LABEL
)
1012 FOR_EACH_EDGE (e
, ei
, exit_bb
->preds
)
1014 gsi
= gsi_last_nondebug_bb (e
->src
);
1015 if (gsi_end_p (gsi
))
1017 stmt
= gsi_stmt (gsi
);
1018 if (gimple_code (stmt
) == GIMPLE_OMP_RETURN
1019 && !gimple_omp_return_nowait_p (stmt
))
1021 /* OpenMP 3.0 tasks unfortunately prevent this optimization
1022 in many cases. If there could be tasks queued, the barrier
1023 might be needed to let the tasks run before some local
1024 variable of the parallel that the task uses as shared
1025 runs out of scope. The task can be spawned either
1026 from within current function (this would be easy to check)
1027 or from some function it calls and gets passed an address
1028 of such a variable. */
1029 if (any_addressable_vars
< 0)
1031 gomp_parallel
*parallel_stmt
1032 = as_a
<gomp_parallel
*> (last_stmt (region
->entry
));
1033 tree child_fun
= gimple_omp_parallel_child_fn (parallel_stmt
);
1034 tree local_decls
, block
, decl
;
1037 any_addressable_vars
= 0;
1038 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun
), ix
, decl
)
1039 if (TREE_ADDRESSABLE (decl
))
1041 any_addressable_vars
= 1;
1044 for (block
= gimple_block (stmt
);
1045 !any_addressable_vars
1047 && TREE_CODE (block
) == BLOCK
;
1048 block
= BLOCK_SUPERCONTEXT (block
))
1050 for (local_decls
= BLOCK_VARS (block
);
1052 local_decls
= DECL_CHAIN (local_decls
))
1053 if (TREE_ADDRESSABLE (local_decls
))
1055 any_addressable_vars
= 1;
1058 if (block
== gimple_block (parallel_stmt
))
1062 if (!any_addressable_vars
)
1063 gimple_omp_return_set_nowait (stmt
);
1069 remove_exit_barriers (struct omp_region
*region
)
1071 if (region
->type
== GIMPLE_OMP_PARALLEL
)
1072 remove_exit_barrier (region
);
1076 region
= region
->inner
;
1077 remove_exit_barriers (region
);
1078 while (region
->next
)
1080 region
= region
->next
;
1081 remove_exit_barriers (region
);
1086 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
1087 calls. These can't be declared as const functions, but
1088 within one parallel body they are constant, so they can be
1089 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1090 which are declared const. Similarly for task body, except
1091 that in untied task omp_get_thread_num () can change at any task
1092 scheduling point. */
1095 optimize_omp_library_calls (gimple
*entry_stmt
)
1098 gimple_stmt_iterator gsi
;
1099 tree thr_num_tree
= builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM
);
1100 tree thr_num_id
= DECL_ASSEMBLER_NAME (thr_num_tree
);
1101 tree num_thr_tree
= builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS
);
1102 tree num_thr_id
= DECL_ASSEMBLER_NAME (num_thr_tree
);
1103 bool untied_task
= (gimple_code (entry_stmt
) == GIMPLE_OMP_TASK
1104 && omp_find_clause (gimple_omp_task_clauses (entry_stmt
),
1105 OMP_CLAUSE_UNTIED
) != NULL
);
1107 FOR_EACH_BB_FN (bb
, cfun
)
1108 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1110 gimple
*call
= gsi_stmt (gsi
);
1113 if (is_gimple_call (call
)
1114 && (decl
= gimple_call_fndecl (call
))
1115 && DECL_EXTERNAL (decl
)
1116 && TREE_PUBLIC (decl
)
1117 && DECL_INITIAL (decl
) == NULL
)
1121 if (DECL_NAME (decl
) == thr_num_id
)
1123 /* In #pragma omp task untied omp_get_thread_num () can change
1124 during the execution of the task region. */
1127 built_in
= builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM
);
1129 else if (DECL_NAME (decl
) == num_thr_id
)
1130 built_in
= builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS
);
1134 if (DECL_ASSEMBLER_NAME (decl
) != DECL_ASSEMBLER_NAME (built_in
)
1135 || gimple_call_num_args (call
) != 0)
1138 if (flag_exceptions
&& !TREE_NOTHROW (decl
))
1141 if (TREE_CODE (TREE_TYPE (decl
)) != FUNCTION_TYPE
1142 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl
)),
1143 TREE_TYPE (TREE_TYPE (built_in
))))
1146 gimple_call_set_fndecl (call
, built_in
);
1151 /* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1155 expand_omp_regimplify_p (tree
*tp
, int *walk_subtrees
, void *)
1159 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1160 if (VAR_P (t
) && DECL_HAS_VALUE_EXPR_P (t
))
1163 if (TREE_CODE (t
) == ADDR_EXPR
)
1164 recompute_tree_invariant_for_addr_expr (t
);
1166 *walk_subtrees
= !TYPE_P (t
) && !DECL_P (t
);
1170 /* Prepend or append TO = FROM assignment before or after *GSI_P. */
1173 expand_omp_build_assign (gimple_stmt_iterator
*gsi_p
, tree to
, tree from
,
1176 bool simple_p
= DECL_P (to
) && TREE_ADDRESSABLE (to
);
1177 from
= force_gimple_operand_gsi (gsi_p
, from
, simple_p
, NULL_TREE
,
1178 !after
, after
? GSI_CONTINUE_LINKING
1180 gimple
*stmt
= gimple_build_assign (to
, from
);
1182 gsi_insert_after (gsi_p
, stmt
, GSI_CONTINUE_LINKING
);
1184 gsi_insert_before (gsi_p
, stmt
, GSI_SAME_STMT
);
1185 if (walk_tree (&from
, expand_omp_regimplify_p
, NULL
, NULL
)
1186 || walk_tree (&to
, expand_omp_regimplify_p
, NULL
, NULL
))
1188 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
1189 gimple_regimplify_operands (stmt
, &gsi
);
1193 /* Expand the OpenMP parallel or task directive starting at REGION. */
1196 expand_omp_taskreg (struct omp_region
*region
)
1198 basic_block entry_bb
, exit_bb
, new_bb
;
1199 struct function
*child_cfun
;
1200 tree child_fn
, block
, t
;
1201 gimple_stmt_iterator gsi
;
1202 gimple
*entry_stmt
, *stmt
;
1204 vec
<tree
, va_gc
> *ws_args
;
1206 entry_stmt
= last_stmt (region
->entry
);
1207 if (gimple_code (entry_stmt
) == GIMPLE_OMP_TASK
1208 && gimple_omp_task_taskwait_p (entry_stmt
))
1210 new_bb
= region
->entry
;
1211 gsi
= gsi_last_nondebug_bb (region
->entry
);
1212 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_TASK
);
1213 gsi_remove (&gsi
, true);
1214 expand_taskwait_call (new_bb
, as_a
<gomp_task
*> (entry_stmt
));
1218 child_fn
= gimple_omp_taskreg_child_fn (entry_stmt
);
1219 child_cfun
= DECL_STRUCT_FUNCTION (child_fn
);
1221 entry_bb
= region
->entry
;
1222 if (gimple_code (entry_stmt
) == GIMPLE_OMP_TASK
)
1223 exit_bb
= region
->cont
;
1225 exit_bb
= region
->exit
;
1227 if (is_combined_parallel (region
))
1228 ws_args
= region
->ws_args
;
1232 if (child_cfun
->cfg
)
1234 /* Due to inlining, it may happen that we have already outlined
1235 the region, in which case all we need to do is make the
1236 sub-graph unreachable and emit the parallel call. */
1237 edge entry_succ_e
, exit_succ_e
;
1239 entry_succ_e
= single_succ_edge (entry_bb
);
1241 gsi
= gsi_last_nondebug_bb (entry_bb
);
1242 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_PARALLEL
1243 || gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_TASK
1244 || gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_TEAMS
);
1245 gsi_remove (&gsi
, true);
1250 exit_succ_e
= single_succ_edge (exit_bb
);
1251 make_edge (new_bb
, exit_succ_e
->dest
, EDGE_FALLTHRU
);
1253 remove_edge_and_dominated_blocks (entry_succ_e
);
1257 unsigned srcidx
, dstidx
, num
;
1259 /* If the parallel region needs data sent from the parent
1260 function, then the very first statement (except possible
1261 tree profile counter updates) of the parallel body
1262 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1263 &.OMP_DATA_O is passed as an argument to the child function,
1264 we need to replace it with the argument as seen by the child
1267 In most cases, this will end up being the identity assignment
1268 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1269 a function call that has been inlined, the original PARM_DECL
1270 .OMP_DATA_I may have been converted into a different local
1271 variable. In which case, we need to keep the assignment. */
1272 if (gimple_omp_taskreg_data_arg (entry_stmt
))
1274 basic_block entry_succ_bb
1275 = single_succ_p (entry_bb
) ? single_succ (entry_bb
)
1276 : FALLTHRU_EDGE (entry_bb
)->dest
;
1278 gimple
*parcopy_stmt
= NULL
;
1280 for (gsi
= gsi_start_bb (entry_succ_bb
); ; gsi_next (&gsi
))
1284 gcc_assert (!gsi_end_p (gsi
));
1285 stmt
= gsi_stmt (gsi
);
1286 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1289 if (gimple_num_ops (stmt
) == 2)
1291 tree arg
= gimple_assign_rhs1 (stmt
);
1293 /* We're ignore the subcode because we're
1294 effectively doing a STRIP_NOPS. */
1296 if (TREE_CODE (arg
) == ADDR_EXPR
1297 && (TREE_OPERAND (arg
, 0)
1298 == gimple_omp_taskreg_data_arg (entry_stmt
)))
1300 parcopy_stmt
= stmt
;
1306 gcc_assert (parcopy_stmt
!= NULL
);
1307 arg
= DECL_ARGUMENTS (child_fn
);
1309 if (!gimple_in_ssa_p (cfun
))
1311 if (gimple_assign_lhs (parcopy_stmt
) == arg
)
1312 gsi_remove (&gsi
, true);
1315 /* ?? Is setting the subcode really necessary ?? */
1316 gimple_omp_set_subcode (parcopy_stmt
, TREE_CODE (arg
));
1317 gimple_assign_set_rhs1 (parcopy_stmt
, arg
);
1322 tree lhs
= gimple_assign_lhs (parcopy_stmt
);
1323 gcc_assert (SSA_NAME_VAR (lhs
) == arg
);
1324 /* We'd like to set the rhs to the default def in the child_fn,
1325 but it's too early to create ssa names in the child_fn.
1326 Instead, we set the rhs to the parm. In
1327 move_sese_region_to_fn, we introduce a default def for the
1328 parm, map the parm to it's default def, and once we encounter
1329 this stmt, replace the parm with the default def. */
1330 gimple_assign_set_rhs1 (parcopy_stmt
, arg
);
1331 update_stmt (parcopy_stmt
);
1335 /* Declare local variables needed in CHILD_CFUN. */
1336 block
= DECL_INITIAL (child_fn
);
1337 BLOCK_VARS (block
) = vec2chain (child_cfun
->local_decls
);
1338 /* The gimplifier could record temporaries in parallel/task block
1339 rather than in containing function's local_decls chain,
1340 which would mean cgraph missed finalizing them. Do it now. */
1341 for (t
= BLOCK_VARS (block
); t
; t
= DECL_CHAIN (t
))
1342 if (VAR_P (t
) && TREE_STATIC (t
) && !DECL_EXTERNAL (t
))
1343 varpool_node::finalize_decl (t
);
1344 DECL_SAVED_TREE (child_fn
) = NULL
;
1345 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1346 gimple_set_body (child_fn
, NULL
);
1347 TREE_USED (block
) = 1;
1349 /* Reset DECL_CONTEXT on function arguments. */
1350 for (t
= DECL_ARGUMENTS (child_fn
); t
; t
= DECL_CHAIN (t
))
1351 DECL_CONTEXT (t
) = child_fn
;
1353 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1354 so that it can be moved to the child function. */
1355 gsi
= gsi_last_nondebug_bb (entry_bb
);
1356 stmt
= gsi_stmt (gsi
);
1357 gcc_assert (stmt
&& (gimple_code (stmt
) == GIMPLE_OMP_PARALLEL
1358 || gimple_code (stmt
) == GIMPLE_OMP_TASK
1359 || gimple_code (stmt
) == GIMPLE_OMP_TEAMS
));
1360 e
= split_block (entry_bb
, stmt
);
1361 gsi_remove (&gsi
, true);
1364 if (gimple_code (entry_stmt
) != GIMPLE_OMP_TASK
)
1365 single_succ_edge (entry_bb
)->flags
= EDGE_FALLTHRU
;
1368 e2
= make_edge (e
->src
, BRANCH_EDGE (entry_bb
)->dest
, EDGE_ABNORMAL
);
1369 gcc_assert (e2
->dest
== region
->exit
);
1370 remove_edge (BRANCH_EDGE (entry_bb
));
1371 set_immediate_dominator (CDI_DOMINATORS
, e2
->dest
, e
->src
);
1372 gsi
= gsi_last_nondebug_bb (region
->exit
);
1373 gcc_assert (!gsi_end_p (gsi
)
1374 && gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_RETURN
);
1375 gsi_remove (&gsi
, true);
1378 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1381 gsi
= gsi_last_nondebug_bb (exit_bb
);
1382 gcc_assert (!gsi_end_p (gsi
)
1383 && (gimple_code (gsi_stmt (gsi
))
1384 == (e2
? GIMPLE_OMP_CONTINUE
: GIMPLE_OMP_RETURN
)));
1385 stmt
= gimple_build_return (NULL
);
1386 gsi_insert_after (&gsi
, stmt
, GSI_SAME_STMT
);
1387 gsi_remove (&gsi
, true);
1390 /* Move the parallel region into CHILD_CFUN. */
1392 if (gimple_in_ssa_p (cfun
))
1394 init_tree_ssa (child_cfun
);
1395 init_ssa_operands (child_cfun
);
1396 child_cfun
->gimple_df
->in_ssa_p
= true;
1400 block
= gimple_block (entry_stmt
);
1402 new_bb
= move_sese_region_to_fn (child_cfun
, entry_bb
, exit_bb
, block
);
1404 single_succ_edge (new_bb
)->flags
= EDGE_FALLTHRU
;
1407 basic_block dest_bb
= e2
->dest
;
1409 make_edge (new_bb
, dest_bb
, EDGE_FALLTHRU
);
1411 set_immediate_dominator (CDI_DOMINATORS
, dest_bb
, new_bb
);
1413 /* When the OMP expansion process cannot guarantee an up-to-date
1414 loop tree arrange for the child function to fixup loops. */
1415 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP
))
1416 child_cfun
->x_current_loops
->state
|= LOOPS_NEED_FIXUP
;
1418 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1419 num
= vec_safe_length (child_cfun
->local_decls
);
1420 for (srcidx
= 0, dstidx
= 0; srcidx
< num
; srcidx
++)
1422 t
= (*child_cfun
->local_decls
)[srcidx
];
1423 if (DECL_CONTEXT (t
) == cfun
->decl
)
1425 if (srcidx
!= dstidx
)
1426 (*child_cfun
->local_decls
)[dstidx
] = t
;
1430 vec_safe_truncate (child_cfun
->local_decls
, dstidx
);
1432 /* Inform the callgraph about the new function. */
1433 child_cfun
->curr_properties
= cfun
->curr_properties
;
1434 child_cfun
->has_simduid_loops
|= cfun
->has_simduid_loops
;
1435 child_cfun
->has_force_vectorize_loops
|= cfun
->has_force_vectorize_loops
;
1436 cgraph_node
*node
= cgraph_node::get_create (child_fn
);
1437 node
->parallelized_function
= 1;
1438 cgraph_node::add_new_function (child_fn
, true);
1440 bool need_asm
= DECL_ASSEMBLER_NAME_SET_P (current_function_decl
)
1441 && !DECL_ASSEMBLER_NAME_SET_P (child_fn
);
1443 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1444 fixed in a following pass. */
1445 push_cfun (child_cfun
);
1447 assign_assembler_name_if_needed (child_fn
);
1450 optimize_omp_library_calls (entry_stmt
);
1451 update_max_bb_count ();
1452 cgraph_edge::rebuild_edges ();
1454 /* Some EH regions might become dead, see PR34608. If
1455 pass_cleanup_cfg isn't the first pass to happen with the
1456 new child, these dead EH edges might cause problems.
1457 Clean them up now. */
1458 if (flag_exceptions
)
1461 bool changed
= false;
1463 FOR_EACH_BB_FN (bb
, cfun
)
1464 changed
|= gimple_purge_dead_eh_edges (bb
);
1466 cleanup_tree_cfg ();
1468 if (gimple_in_ssa_p (cfun
))
1469 update_ssa (TODO_update_ssa
);
1470 if (flag_checking
&& !loops_state_satisfies_p (LOOPS_NEED_FIXUP
))
1471 verify_loop_structure ();
1474 if (dump_file
&& !gimple_in_ssa_p (cfun
))
1476 omp_any_child_fn_dumped
= true;
1477 dump_function_header (dump_file
, child_fn
, dump_flags
);
1478 dump_function_to_file (child_fn
, dump_file
, dump_flags
);
1482 adjust_context_and_scope (region
, gimple_block (entry_stmt
), child_fn
);
1484 if (gimple_code (entry_stmt
) == GIMPLE_OMP_PARALLEL
)
1485 expand_parallel_call (region
, new_bb
,
1486 as_a
<gomp_parallel
*> (entry_stmt
), ws_args
);
1487 else if (gimple_code (entry_stmt
) == GIMPLE_OMP_TEAMS
)
1488 expand_teams_call (new_bb
, as_a
<gomp_teams
*> (entry_stmt
));
1490 expand_task_call (region
, new_bb
, as_a
<gomp_task
*> (entry_stmt
));
1491 if (gimple_in_ssa_p (cfun
))
1492 update_ssa (TODO_update_ssa_only_virtuals
);
1495 /* Information about members of an OpenACC collapsed loop nest. */
1497 struct oacc_collapse
1499 tree base
; /* Base value. */
1500 tree iters
; /* Number of steps. */
1501 tree step
; /* Step size. */
1502 tree tile
; /* Tile increment (if tiled). */
1503 tree outer
; /* Tile iterator var. */
1506 /* Helper for expand_oacc_for. Determine collapsed loop information.
1507 Fill in COUNTS array. Emit any initialization code before GSI.
1508 Return the calculated outer loop bound of BOUND_TYPE. */
1511 expand_oacc_collapse_init (const struct omp_for_data
*fd
,
1512 gimple_stmt_iterator
*gsi
,
1513 oacc_collapse
*counts
, tree bound_type
,
1516 tree tiling
= fd
->tiling
;
1517 tree total
= build_int_cst (bound_type
, 1);
1520 gcc_assert (integer_onep (fd
->loop
.step
));
1521 gcc_assert (integer_zerop (fd
->loop
.n1
));
1523 /* When tiling, the first operand of the tile clause applies to the
1524 innermost loop, and we work outwards from there. Seems
1525 backwards, but whatever. */
1526 for (ix
= fd
->collapse
; ix
--;)
1528 const omp_for_data_loop
*loop
= &fd
->loops
[ix
];
1530 tree iter_type
= TREE_TYPE (loop
->v
);
1531 tree diff_type
= iter_type
;
1532 tree plus_type
= iter_type
;
1534 gcc_assert (loop
->cond_code
== fd
->loop
.cond_code
);
1536 if (POINTER_TYPE_P (iter_type
))
1537 plus_type
= sizetype
;
1538 if (POINTER_TYPE_P (diff_type
) || TYPE_UNSIGNED (diff_type
))
1539 diff_type
= signed_type_for (diff_type
);
1540 if (TYPE_PRECISION (diff_type
) < TYPE_PRECISION (integer_type_node
))
1541 diff_type
= integer_type_node
;
1545 tree num
= build_int_cst (integer_type_node
, fd
->collapse
);
1546 tree loop_no
= build_int_cst (integer_type_node
, ix
);
1547 tree tile
= TREE_VALUE (tiling
);
1549 = gimple_build_call_internal (IFN_GOACC_TILE
, 5, num
, loop_no
, tile
,
1550 /* gwv-outer=*/integer_zero_node
,
1551 /* gwv-inner=*/integer_zero_node
);
1553 counts
[ix
].outer
= create_tmp_var (iter_type
, ".outer");
1554 counts
[ix
].tile
= create_tmp_var (diff_type
, ".tile");
1555 gimple_call_set_lhs (call
, counts
[ix
].tile
);
1556 gimple_set_location (call
, loc
);
1557 gsi_insert_before (gsi
, call
, GSI_SAME_STMT
);
1559 tiling
= TREE_CHAIN (tiling
);
1563 counts
[ix
].tile
= NULL
;
1564 counts
[ix
].outer
= loop
->v
;
1569 tree s
= loop
->step
;
1570 bool up
= loop
->cond_code
== LT_EXPR
;
1571 tree dir
= build_int_cst (diff_type
, up
? +1 : -1);
1575 b
= force_gimple_operand_gsi (gsi
, b
, true, NULL_TREE
,
1576 true, GSI_SAME_STMT
);
1577 e
= force_gimple_operand_gsi (gsi
, e
, true, NULL_TREE
,
1578 true, GSI_SAME_STMT
);
1580 /* Convert the step, avoiding possible unsigned->signed overflow. */
1581 negating
= !up
&& TYPE_UNSIGNED (TREE_TYPE (s
));
1583 s
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (s
), s
);
1584 s
= fold_convert (diff_type
, s
);
1586 s
= fold_build1 (NEGATE_EXPR
, diff_type
, s
);
1587 s
= force_gimple_operand_gsi (gsi
, s
, true, NULL_TREE
,
1588 true, GSI_SAME_STMT
);
1590 /* Determine the range, avoiding possible unsigned->signed overflow. */
1591 negating
= !up
&& TYPE_UNSIGNED (iter_type
);
1592 expr
= fold_build2 (MINUS_EXPR
, plus_type
,
1593 fold_convert (plus_type
, negating
? b
: e
),
1594 fold_convert (plus_type
, negating
? e
: b
));
1595 expr
= fold_convert (diff_type
, expr
);
1597 expr
= fold_build1 (NEGATE_EXPR
, diff_type
, expr
);
1598 tree range
= force_gimple_operand_gsi
1599 (gsi
, expr
, true, NULL_TREE
, true, GSI_SAME_STMT
);
1601 /* Determine number of iterations. */
1602 expr
= fold_build2 (MINUS_EXPR
, diff_type
, range
, dir
);
1603 expr
= fold_build2 (PLUS_EXPR
, diff_type
, expr
, s
);
1604 expr
= fold_build2 (TRUNC_DIV_EXPR
, diff_type
, expr
, s
);
1606 tree iters
= force_gimple_operand_gsi (gsi
, expr
, true, NULL_TREE
,
1607 true, GSI_SAME_STMT
);
1609 counts
[ix
].base
= b
;
1610 counts
[ix
].iters
= iters
;
1611 counts
[ix
].step
= s
;
1613 total
= fold_build2 (MULT_EXPR
, bound_type
, total
,
1614 fold_convert (bound_type
, iters
));
1620 /* Emit initializers for collapsed loop members. INNER is true if
1621 this is for the element loop of a TILE. IVAR is the outer
1622 loop iteration variable, from which collapsed loop iteration values
1623 are calculated. COUNTS array has been initialized by
1624 expand_oacc_collapse_inits. */
1627 expand_oacc_collapse_vars (const struct omp_for_data
*fd
, bool inner
,
1628 gimple_stmt_iterator
*gsi
,
1629 const oacc_collapse
*counts
, tree ivar
)
1631 tree ivar_type
= TREE_TYPE (ivar
);
1633 /* The most rapidly changing iteration variable is the innermost
1635 for (int ix
= fd
->collapse
; ix
--;)
1637 const omp_for_data_loop
*loop
= &fd
->loops
[ix
];
1638 const oacc_collapse
*collapse
= &counts
[ix
];
1639 tree v
= inner
? loop
->v
: collapse
->outer
;
1640 tree iter_type
= TREE_TYPE (v
);
1641 tree diff_type
= TREE_TYPE (collapse
->step
);
1642 tree plus_type
= iter_type
;
1643 enum tree_code plus_code
= PLUS_EXPR
;
1646 if (POINTER_TYPE_P (iter_type
))
1648 plus_code
= POINTER_PLUS_EXPR
;
1649 plus_type
= sizetype
;
1655 tree mod
= fold_convert (ivar_type
, collapse
->iters
);
1656 ivar
= fold_build2 (TRUNC_DIV_EXPR
, ivar_type
, expr
, mod
);
1657 expr
= fold_build2 (TRUNC_MOD_EXPR
, ivar_type
, expr
, mod
);
1658 ivar
= force_gimple_operand_gsi (gsi
, ivar
, true, NULL_TREE
,
1659 true, GSI_SAME_STMT
);
1662 expr
= fold_build2 (MULT_EXPR
, diff_type
, fold_convert (diff_type
, expr
),
1664 expr
= fold_build2 (plus_code
, iter_type
,
1665 inner
? collapse
->outer
: collapse
->base
,
1666 fold_convert (plus_type
, expr
));
1667 expr
= force_gimple_operand_gsi (gsi
, expr
, false, NULL_TREE
,
1668 true, GSI_SAME_STMT
);
1669 gassign
*ass
= gimple_build_assign (v
, expr
);
1670 gsi_insert_before (gsi
, ass
, GSI_SAME_STMT
);
1674 /* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1675 of the combined collapse > 1 loop constructs, generate code like:
1676 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1681 count3 = (adj + N32 - N31) / STEP3;
1682 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1687 count2 = (adj + N22 - N21) / STEP2;
1688 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1693 count1 = (adj + N12 - N11) / STEP1;
1694 count = count1 * count2 * count3;
1695 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1697 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1698 of the combined loop constructs, just initialize COUNTS array
1699 from the _looptemp_ clauses. For loop nests with non-rectangular
1700 loops, do this only for the rectangular loops. Then pick
1701 the loops which reference outer vars in their bound expressions
1702 and the loops which they refer to and for this sub-nest compute
1703 number of iterations. For triangular loops use Faulhaber's formula,
1704 otherwise as a fallback, compute by iterating the loops.
1705 If e.g. the sub-nest is
1706 for (I = N11; I COND1 N12; I += STEP1)
1707 for (J = M21 * I + N21; J COND2 M22 * I + N22; J += STEP2)
1708 for (K = M31 * J + N31; K COND3 M32 * J + N32; K += STEP3)
1711 for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
1712 for (tmpj = M21 * tmpi + N21;
1713 tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
1715 int tmpk1 = M31 * tmpj + N31;
1716 int tmpk2 = M32 * tmpj + N32;
1717 if (tmpk1 COND3 tmpk2)
1723 COUNT += (adj + tmpk2 - tmpk1) / STEP3;
1726 and finally multiply the counts of the rectangular loops not
1727 in the sub-nest with COUNT. Also, as counts[fd->last_nonrect]
1728 store number of iterations of the loops from fd->first_nonrect
1729 to fd->last_nonrect inclusive, i.e. the above COUNT multiplied
1730 by the counts of rectangular loops not referenced in any non-rectangular
1731 loops sandwitched in between those. */
1733 /* NOTE: It *could* be better to moosh all of the BBs together,
1734 creating one larger BB with all the computation and the unexpected
1735 jump at the end. I.e.
1737 bool zero3, zero2, zero1, zero;
1740 count3 = (N32 - N31) /[cl] STEP3;
1742 count2 = (N22 - N21) /[cl] STEP2;
1744 count1 = (N12 - N11) /[cl] STEP1;
1745 zero = zero3 || zero2 || zero1;
1746 count = count1 * count2 * count3;
1747 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1749 After all, we expect the zero=false, and thus we expect to have to
1750 evaluate all of the comparison expressions, so short-circuiting
1751 oughtn't be a win. Since the condition isn't protecting a
1752 denominator, we're not concerned about divide-by-zero, so we can
1753 fully evaluate count even if a numerator turned out to be wrong.
1755 It seems like putting this all together would create much better
1756 scheduling opportunities, and less pressure on the chip's branch
1760 expand_omp_for_init_counts (struct omp_for_data
*fd
, gimple_stmt_iterator
*gsi
,
1761 basic_block
&entry_bb
, tree
*counts
,
1762 basic_block
&zero_iter1_bb
, int &first_zero_iter1
,
1763 basic_block
&zero_iter2_bb
, int &first_zero_iter2
,
1764 basic_block
&l2_dom_bb
)
1766 tree t
, type
= TREE_TYPE (fd
->loop
.v
);
1770 /* Collapsed loops need work for expansion into SSA form. */
1771 gcc_assert (!gimple_in_ssa_p (cfun
));
1773 if (gimple_omp_for_combined_into_p (fd
->for_stmt
)
1774 && TREE_CODE (fd
->loop
.n2
) != INTEGER_CST
)
1776 gcc_assert (fd
->ordered
== 0);
1777 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1778 isn't supposed to be handled, as the inner loop doesn't
1780 tree innerc
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
1781 OMP_CLAUSE__LOOPTEMP_
);
1782 gcc_assert (innerc
);
1783 for (i
= 0; i
< fd
->collapse
; i
++)
1785 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
1786 OMP_CLAUSE__LOOPTEMP_
);
1787 gcc_assert (innerc
);
1789 counts
[i
] = OMP_CLAUSE_DECL (innerc
);
1791 counts
[0] = NULL_TREE
;
1796 for (i
= fd
->collapse
; i
< fd
->ordered
; i
++)
1798 tree itype
= TREE_TYPE (fd
->loops
[i
].v
);
1799 counts
[i
] = NULL_TREE
;
1800 t
= fold_binary (fd
->loops
[i
].cond_code
, boolean_type_node
,
1801 fold_convert (itype
, fd
->loops
[i
].n1
),
1802 fold_convert (itype
, fd
->loops
[i
].n2
));
1803 if (t
&& integer_zerop (t
))
1805 for (i
= fd
->collapse
; i
< fd
->ordered
; i
++)
1806 counts
[i
] = build_int_cst (type
, 0);
1810 bool rect_count_seen
= false;
1811 for (i
= 0; i
< (fd
->ordered
? fd
->ordered
: fd
->collapse
); i
++)
1813 tree itype
= TREE_TYPE (fd
->loops
[i
].v
);
1815 if (i
>= fd
->collapse
&& counts
[i
])
1819 /* Skip loops that use outer iterators in their expressions
1820 during this phase. */
1821 if (fd
->loops
[i
].m1
|| fd
->loops
[i
].m2
)
1823 counts
[i
] = build_zero_cst (type
);
1827 if ((SSA_VAR_P (fd
->loop
.n2
) || i
>= fd
->collapse
)
1828 && ((t
= fold_binary (fd
->loops
[i
].cond_code
, boolean_type_node
,
1829 fold_convert (itype
, fd
->loops
[i
].n1
),
1830 fold_convert (itype
, fd
->loops
[i
].n2
)))
1831 == NULL_TREE
|| !integer_onep (t
)))
1835 n1
= fold_convert (itype
, unshare_expr (fd
->loops
[i
].n1
));
1836 n1
= force_gimple_operand_gsi (gsi
, n1
, true, NULL_TREE
,
1837 true, GSI_SAME_STMT
);
1838 n2
= fold_convert (itype
, unshare_expr (fd
->loops
[i
].n2
));
1839 n2
= force_gimple_operand_gsi (gsi
, n2
, true, NULL_TREE
,
1840 true, GSI_SAME_STMT
);
1841 cond_stmt
= gimple_build_cond (fd
->loops
[i
].cond_code
, n1
, n2
,
1842 NULL_TREE
, NULL_TREE
);
1843 gsi_insert_before (gsi
, cond_stmt
, GSI_SAME_STMT
);
1844 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt
),
1845 expand_omp_regimplify_p
, NULL
, NULL
)
1846 || walk_tree (gimple_cond_rhs_ptr (cond_stmt
),
1847 expand_omp_regimplify_p
, NULL
, NULL
))
1849 *gsi
= gsi_for_stmt (cond_stmt
);
1850 gimple_regimplify_operands (cond_stmt
, gsi
);
1852 e
= split_block (entry_bb
, cond_stmt
);
1853 basic_block
&zero_iter_bb
1854 = i
< fd
->collapse
? zero_iter1_bb
: zero_iter2_bb
;
1855 int &first_zero_iter
1856 = i
< fd
->collapse
? first_zero_iter1
: first_zero_iter2
;
1857 if (zero_iter_bb
== NULL
)
1859 gassign
*assign_stmt
;
1860 first_zero_iter
= i
;
1861 zero_iter_bb
= create_empty_bb (entry_bb
);
1862 add_bb_to_loop (zero_iter_bb
, entry_bb
->loop_father
);
1863 *gsi
= gsi_after_labels (zero_iter_bb
);
1864 if (i
< fd
->collapse
)
1865 assign_stmt
= gimple_build_assign (fd
->loop
.n2
,
1866 build_zero_cst (type
));
1869 counts
[i
] = create_tmp_reg (type
, ".count");
1871 = gimple_build_assign (counts
[i
], build_zero_cst (type
));
1873 gsi_insert_before (gsi
, assign_stmt
, GSI_SAME_STMT
);
1874 set_immediate_dominator (CDI_DOMINATORS
, zero_iter_bb
,
1877 ne
= make_edge (entry_bb
, zero_iter_bb
, EDGE_FALSE_VALUE
);
1878 ne
->probability
= profile_probability::very_unlikely ();
1879 e
->flags
= EDGE_TRUE_VALUE
;
1880 e
->probability
= ne
->probability
.invert ();
1881 if (l2_dom_bb
== NULL
)
1882 l2_dom_bb
= entry_bb
;
1884 *gsi
= gsi_last_nondebug_bb (entry_bb
);
1887 if (POINTER_TYPE_P (itype
))
1888 itype
= signed_type_for (itype
);
1889 t
= build_int_cst (itype
, (fd
->loops
[i
].cond_code
== LT_EXPR
1891 t
= fold_build2 (PLUS_EXPR
, itype
,
1892 fold_convert (itype
, fd
->loops
[i
].step
), t
);
1893 t
= fold_build2 (PLUS_EXPR
, itype
, t
,
1894 fold_convert (itype
, fd
->loops
[i
].n2
));
1895 t
= fold_build2 (MINUS_EXPR
, itype
, t
,
1896 fold_convert (itype
, fd
->loops
[i
].n1
));
1897 /* ?? We could probably use CEIL_DIV_EXPR instead of
1898 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1899 generate the same code in the end because generically we
1900 don't know that the values involved must be negative for
1902 if (TYPE_UNSIGNED (itype
) && fd
->loops
[i
].cond_code
== GT_EXPR
)
1903 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
,
1904 fold_build1 (NEGATE_EXPR
, itype
, t
),
1905 fold_build1 (NEGATE_EXPR
, itype
,
1906 fold_convert (itype
,
1907 fd
->loops
[i
].step
)));
1909 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
, t
,
1910 fold_convert (itype
, fd
->loops
[i
].step
));
1911 t
= fold_convert (type
, t
);
1912 if (TREE_CODE (t
) == INTEGER_CST
)
1916 if (i
< fd
->collapse
|| i
!= first_zero_iter2
)
1917 counts
[i
] = create_tmp_reg (type
, ".count");
1918 expand_omp_build_assign (gsi
, counts
[i
], t
);
1920 if (SSA_VAR_P (fd
->loop
.n2
) && i
< fd
->collapse
)
1922 if (fd
->non_rect
&& i
>= fd
->first_nonrect
&& i
<= fd
->last_nonrect
)
1924 if (!rect_count_seen
)
1927 rect_count_seen
= true;
1930 t
= fold_build2 (MULT_EXPR
, type
, fd
->loop
.n2
, counts
[i
]);
1931 expand_omp_build_assign (gsi
, fd
->loop
.n2
, t
);
1934 if (fd
->non_rect
&& SSA_VAR_P (fd
->loop
.n2
))
1936 gcc_assert (fd
->last_nonrect
!= -1);
1938 counts
[fd
->last_nonrect
] = create_tmp_reg (type
, ".count");
1939 expand_omp_build_assign (gsi
, counts
[fd
->last_nonrect
],
1940 build_zero_cst (type
));
1941 for (i
= fd
->first_nonrect
+ 1; i
< fd
->last_nonrect
; i
++)
1944 || fd
->loops
[i
].non_rect_referenced
)
1946 if (i
== fd
->last_nonrect
1947 && fd
->loops
[i
].outer
== fd
->last_nonrect
- fd
->first_nonrect
1948 && !TYPE_UNSIGNED (TREE_TYPE (fd
->loops
[i
].v
)))
1950 int o
= fd
->first_nonrect
;
1951 tree itype
= TREE_TYPE (fd
->loops
[o
].v
);
1952 tree n1o
= create_tmp_reg (itype
, ".n1o");
1953 t
= fold_convert (itype
, unshare_expr (fd
->loops
[o
].n1
));
1954 expand_omp_build_assign (gsi
, n1o
, t
);
1955 tree n2o
= create_tmp_reg (itype
, ".n2o");
1956 t
= fold_convert (itype
, unshare_expr (fd
->loops
[o
].n2
));
1957 expand_omp_build_assign (gsi
, n2o
, t
);
1958 if (fd
->loops
[i
].m1
&& fd
->loops
[i
].m2
)
1959 t
= fold_build2 (MINUS_EXPR
, itype
, unshare_expr (fd
->loops
[i
].m2
),
1960 unshare_expr (fd
->loops
[i
].m1
));
1961 else if (fd
->loops
[i
].m1
)
1962 t
= fold_unary (NEGATE_EXPR
, itype
,
1963 unshare_expr (fd
->loops
[i
].m1
));
1965 t
= unshare_expr (fd
->loops
[i
].m2
);
1967 = force_gimple_operand_gsi (gsi
, t
, true, NULL_TREE
,
1968 true, GSI_SAME_STMT
);
1970 gimple_stmt_iterator gsi2
= *gsi
;
1972 e
= split_block (entry_bb
, gsi_stmt (gsi2
));
1973 e
= split_block (e
->dest
, (gimple
*) NULL
);
1974 basic_block bb1
= e
->src
;
1976 *gsi
= gsi_after_labels (entry_bb
);
1978 gsi2
= gsi_after_labels (bb1
);
1979 tree ostep
= fold_convert (itype
, fd
->loops
[o
].step
);
1980 t
= build_int_cst (itype
, (fd
->loops
[o
].cond_code
1981 == LT_EXPR
? -1 : 1));
1982 t
= fold_build2 (PLUS_EXPR
, itype
, ostep
, t
);
1983 t
= fold_build2 (PLUS_EXPR
, itype
, t
, n2o
);
1984 t
= fold_build2 (MINUS_EXPR
, itype
, t
, n1o
);
1985 if (TYPE_UNSIGNED (itype
)
1986 && fd
->loops
[o
].cond_code
== GT_EXPR
)
1987 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
,
1988 fold_build1 (NEGATE_EXPR
, itype
, t
),
1989 fold_build1 (NEGATE_EXPR
, itype
, ostep
));
1991 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
, t
, ostep
);
1993 = force_gimple_operand_gsi (&gsi2
, t
, true, NULL_TREE
,
1994 true, GSI_SAME_STMT
);
1995 t
= fold_build2 (MINUS_EXPR
, itype
, outer_niters
,
1996 build_one_cst (itype
));
1997 t
= fold_build2 (MULT_EXPR
, itype
, t
, ostep
);
1998 t
= fold_build2 (PLUS_EXPR
, itype
, n1o
, t
);
1999 tree last
= force_gimple_operand_gsi (&gsi2
, t
, true, NULL_TREE
,
2000 true, GSI_SAME_STMT
);
2001 tree n1
, n2
, n1e
, n2e
;
2002 t
= fold_convert (itype
, unshare_expr (fd
->loops
[i
].n1
));
2003 if (fd
->loops
[i
].m1
)
2005 n1
= fold_convert (itype
, unshare_expr (fd
->loops
[i
].m1
));
2006 n1
= fold_build2 (MULT_EXPR
, itype
, n1o
, n1
);
2007 n1
= fold_build2 (PLUS_EXPR
, itype
, n1
, t
);
2011 n1
= force_gimple_operand_gsi (&gsi2
, n1
, true, NULL_TREE
,
2012 true, GSI_SAME_STMT
);
2013 t
= fold_convert (itype
, unshare_expr (fd
->loops
[i
].n2
));
2014 if (fd
->loops
[i
].m2
)
2016 n2
= fold_convert (itype
, unshare_expr (fd
->loops
[i
].m2
));
2017 n2
= fold_build2 (MULT_EXPR
, itype
, n1o
, n2
);
2018 n2
= fold_build2 (PLUS_EXPR
, itype
, n2
, t
);
2022 n2
= force_gimple_operand_gsi (&gsi2
, n2
, true, NULL_TREE
,
2023 true, GSI_SAME_STMT
);
2024 t
= fold_convert (itype
, unshare_expr (fd
->loops
[i
].n1
));
2025 if (fd
->loops
[i
].m1
)
2027 n1e
= fold_convert (itype
, unshare_expr (fd
->loops
[i
].m1
));
2028 n1e
= fold_build2 (MULT_EXPR
, itype
, last
, n1e
);
2029 n1e
= fold_build2 (PLUS_EXPR
, itype
, n1e
, t
);
2033 n1e
= force_gimple_operand_gsi (&gsi2
, n1e
, true, NULL_TREE
,
2034 true, GSI_SAME_STMT
);
2035 t
= fold_convert (itype
, unshare_expr (fd
->loops
[i
].n2
));
2036 if (fd
->loops
[i
].m2
)
2038 n2e
= fold_convert (itype
, unshare_expr (fd
->loops
[i
].m2
));
2039 n2e
= fold_build2 (MULT_EXPR
, itype
, last
, n2e
);
2040 n2e
= fold_build2 (PLUS_EXPR
, itype
, n2e
, t
);
2044 n2e
= force_gimple_operand_gsi (&gsi2
, n2e
, true, NULL_TREE
,
2045 true, GSI_SAME_STMT
);
2047 = gimple_build_cond (fd
->loops
[i
].cond_code
, n1
, n2
,
2048 NULL_TREE
, NULL_TREE
);
2049 gsi_insert_before (&gsi2
, cond_stmt
, GSI_SAME_STMT
);
2050 e
= split_block (bb1
, cond_stmt
);
2051 e
->flags
= EDGE_TRUE_VALUE
;
2052 e
->probability
= profile_probability::likely ().guessed ();
2053 basic_block bb2
= e
->dest
;
2054 gsi2
= gsi_after_labels (bb2
);
2056 cond_stmt
= gimple_build_cond (fd
->loops
[i
].cond_code
, n1e
, n2e
,
2057 NULL_TREE
, NULL_TREE
);
2058 gsi_insert_before (&gsi2
, cond_stmt
, GSI_SAME_STMT
);
2059 e
= split_block (bb2
, cond_stmt
);
2060 e
->flags
= EDGE_TRUE_VALUE
;
2061 e
->probability
= profile_probability::likely ().guessed ();
2062 gsi2
= gsi_after_labels (e
->dest
);
2064 tree step
= fold_convert (itype
, fd
->loops
[i
].step
);
2065 t
= build_int_cst (itype
, (fd
->loops
[i
].cond_code
2066 == LT_EXPR
? -1 : 1));
2067 t
= fold_build2 (PLUS_EXPR
, itype
, step
, t
);
2068 t
= fold_build2 (PLUS_EXPR
, itype
, t
, n2
);
2069 t
= fold_build2 (MINUS_EXPR
, itype
, t
, n1
);
2070 if (TYPE_UNSIGNED (itype
)
2071 && fd
->loops
[i
].cond_code
== GT_EXPR
)
2072 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
,
2073 fold_build1 (NEGATE_EXPR
, itype
, t
),
2074 fold_build1 (NEGATE_EXPR
, itype
, step
));
2076 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
, t
, step
);
2077 tree first_inner_iterations
2078 = force_gimple_operand_gsi (&gsi2
, t
, true, NULL_TREE
,
2079 true, GSI_SAME_STMT
);
2080 t
= fold_build2 (MULT_EXPR
, itype
, m2minusm1
, ostep
);
2081 if (TYPE_UNSIGNED (itype
)
2082 && fd
->loops
[i
].cond_code
== GT_EXPR
)
2083 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
,
2084 fold_build1 (NEGATE_EXPR
, itype
, t
),
2085 fold_build1 (NEGATE_EXPR
, itype
, step
));
2087 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
, t
, step
);
2089 = force_gimple_operand_gsi (&gsi2
, t
, true, NULL_TREE
,
2090 true, GSI_SAME_STMT
);
2091 t
= fold_build2 (MINUS_EXPR
, itype
, outer_niters
,
2092 build_one_cst (itype
));
2093 t
= fold_build2 (MULT_EXPR
, itype
, t
, outer_niters
);
2094 t
= fold_build2 (RSHIFT_EXPR
, itype
, t
, integer_one_node
);
2095 t
= fold_build2 (MULT_EXPR
, itype
, factor
, t
);
2096 t
= fold_build2 (PLUS_EXPR
, itype
,
2097 fold_build2 (MULT_EXPR
, itype
, outer_niters
,
2098 first_inner_iterations
), t
);
2099 expand_omp_build_assign (&gsi2
, counts
[fd
->last_nonrect
],
2100 fold_convert (type
, t
));
2102 basic_block bb3
= create_empty_bb (bb1
);
2103 add_bb_to_loop (bb3
, bb1
->loop_father
);
2105 e
= make_edge (bb1
, bb3
, EDGE_FALSE_VALUE
);
2106 e
->probability
= profile_probability::unlikely ().guessed ();
2108 gsi2
= gsi_after_labels (bb3
);
2109 cond_stmt
= gimple_build_cond (fd
->loops
[i
].cond_code
, n1e
, n2e
,
2110 NULL_TREE
, NULL_TREE
);
2111 gsi_insert_before (&gsi2
, cond_stmt
, GSI_SAME_STMT
);
2112 e
= split_block (bb3
, cond_stmt
);
2113 e
->flags
= EDGE_TRUE_VALUE
;
2114 e
->probability
= profile_probability::likely ().guessed ();
2115 basic_block bb4
= e
->dest
;
2117 ne
= make_edge (bb3
, entry_bb
, EDGE_FALSE_VALUE
);
2118 ne
->probability
= e
->probability
.invert ();
2120 basic_block bb5
= create_empty_bb (bb2
);
2121 add_bb_to_loop (bb5
, bb2
->loop_father
);
2123 ne
= make_edge (bb2
, bb5
, EDGE_FALSE_VALUE
);
2124 ne
->probability
= profile_probability::unlikely ().guessed ();
2126 for (int j
= 0; j
< 2; j
++)
2128 gsi2
= gsi_after_labels (j
? bb5
: bb4
);
2129 t
= fold_build2 (MINUS_EXPR
, itype
,
2130 unshare_expr (fd
->loops
[i
].n1
),
2131 unshare_expr (fd
->loops
[i
].n2
));
2132 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
, t
, m2minusm1
);
2134 = force_gimple_operand_gsi (&gsi2
, t
, true, NULL_TREE
,
2135 true, GSI_SAME_STMT
);
2136 t
= fold_build2 (MINUS_EXPR
, itype
, tem
, n1o
);
2137 t
= fold_build2 (TRUNC_MOD_EXPR
, itype
, t
, ostep
);
2138 t
= fold_build2 (MINUS_EXPR
, itype
, tem
, t
);
2139 tem
= force_gimple_operand_gsi (&gsi2
, t
, true, NULL_TREE
,
2140 true, GSI_SAME_STMT
);
2141 t
= fold_convert (itype
, unshare_expr (fd
->loops
[i
].n1
));
2142 if (fd
->loops
[i
].m1
)
2144 n1
= fold_convert (itype
, unshare_expr (fd
->loops
[i
].m1
));
2145 n1
= fold_build2 (MULT_EXPR
, itype
, tem
, n1
);
2146 n1
= fold_build2 (PLUS_EXPR
, itype
, n1
, t
);
2150 n1
= force_gimple_operand_gsi (&gsi2
, n1
, true, NULL_TREE
,
2151 true, GSI_SAME_STMT
);
2152 t
= fold_convert (itype
, unshare_expr (fd
->loops
[i
].n2
));
2153 if (fd
->loops
[i
].m2
)
2155 n2
= fold_convert (itype
, unshare_expr (fd
->loops
[i
].m2
));
2156 n2
= fold_build2 (MULT_EXPR
, itype
, tem
, n2
);
2157 n2
= fold_build2 (PLUS_EXPR
, itype
, n2
, t
);
2161 n2
= force_gimple_operand_gsi (&gsi2
, n2
, true, NULL_TREE
,
2162 true, GSI_SAME_STMT
);
2163 expand_omp_build_assign (&gsi2
, j
? n2o
: n1o
, tem
);
2165 cond_stmt
= gimple_build_cond (fd
->loops
[i
].cond_code
, n1
, n2
,
2166 NULL_TREE
, NULL_TREE
);
2167 gsi_insert_before (&gsi2
, cond_stmt
, GSI_SAME_STMT
);
2168 e
= split_block (gsi_bb (gsi2
), cond_stmt
);
2169 e
->flags
= j
? EDGE_TRUE_VALUE
: EDGE_FALSE_VALUE
;
2170 e
->probability
= profile_probability::unlikely ().guessed ();
2171 ne
= make_edge (e
->src
, bb1
,
2172 j
? EDGE_FALSE_VALUE
: EDGE_TRUE_VALUE
);
2173 ne
->probability
= e
->probability
.invert ();
2174 gsi2
= gsi_after_labels (e
->dest
);
2176 t
= fold_build2 (PLUS_EXPR
, itype
, tem
, ostep
);
2177 expand_omp_build_assign (&gsi2
, j
? n2o
: n1o
, t
);
2179 make_edge (e
->dest
, bb1
, EDGE_FALLTHRU
);
2182 set_immediate_dominator (CDI_DOMINATORS
, bb3
, bb1
);
2183 set_immediate_dominator (CDI_DOMINATORS
, bb5
, bb2
);
2184 set_immediate_dominator (CDI_DOMINATORS
, entry_bb
, bb1
);
2186 if (fd
->first_nonrect
+ 1 == fd
->last_nonrect
)
2188 fd
->first_inner_iterations
= first_inner_iterations
;
2189 fd
->factor
= factor
;
2195 /* Fallback implementation. Evaluate the loops with m1/m2
2196 non-NULL as well as their outer loops at runtime using temporaries
2197 instead of the original iteration variables, and in the
2198 body just bump the counter. */
2199 gimple_stmt_iterator gsi2
= *gsi
;
2201 e
= split_block (entry_bb
, gsi_stmt (gsi2
));
2202 e
= split_block (e
->dest
, (gimple
*) NULL
);
2203 basic_block cur_bb
= e
->src
;
2204 basic_block next_bb
= e
->dest
;
2206 *gsi
= gsi_after_labels (entry_bb
);
2208 tree
*vs
= XALLOCAVEC (tree
, fd
->last_nonrect
);
2209 memset (vs
, 0, fd
->last_nonrect
* sizeof (tree
));
2211 for (i
= 0; i
<= fd
->last_nonrect
; i
++)
2213 if (fd
->loops
[i
].m1
== NULL_TREE
2214 && fd
->loops
[i
].m2
== NULL_TREE
2215 && !fd
->loops
[i
].non_rect_referenced
)
2218 tree itype
= TREE_TYPE (fd
->loops
[i
].v
);
2220 gsi2
= gsi_after_labels (cur_bb
);
2222 t
= fold_convert (itype
, unshare_expr (fd
->loops
[i
].n1
));
2223 if (fd
->loops
[i
].m1
)
2225 n1
= fold_convert (itype
, unshare_expr (fd
->loops
[i
].m1
));
2226 n1
= fold_build2 (MULT_EXPR
, itype
,
2227 vs
[i
- fd
->loops
[i
].outer
], n1
);
2228 n1
= fold_build2 (PLUS_EXPR
, itype
, n1
, t
);
2232 n1
= force_gimple_operand_gsi (&gsi2
, n1
, true, NULL_TREE
,
2233 true, GSI_SAME_STMT
);
2234 if (i
< fd
->last_nonrect
)
2236 vs
[i
] = create_tmp_reg (itype
, ".it");
2237 expand_omp_build_assign (&gsi2
, vs
[i
], n1
);
2239 t
= fold_convert (itype
, unshare_expr (fd
->loops
[i
].n2
));
2240 if (fd
->loops
[i
].m2
)
2242 n2
= fold_convert (itype
, unshare_expr (fd
->loops
[i
].m2
));
2243 n2
= fold_build2 (MULT_EXPR
, itype
,
2244 vs
[i
- fd
->loops
[i
].outer
], n2
);
2245 n2
= fold_build2 (PLUS_EXPR
, itype
, n2
, t
);
2249 n2
= force_gimple_operand_gsi (&gsi2
, n2
, true, NULL_TREE
,
2250 true, GSI_SAME_STMT
);
2251 if (i
== fd
->last_nonrect
)
2254 = gimple_build_cond (fd
->loops
[i
].cond_code
, n1
, n2
,
2255 NULL_TREE
, NULL_TREE
);
2256 gsi_insert_before (&gsi2
, cond_stmt
, GSI_SAME_STMT
);
2257 e
= split_block (cur_bb
, cond_stmt
);
2258 e
->flags
= EDGE_TRUE_VALUE
;
2259 ne
= make_edge (cur_bb
, next_bb
, EDGE_FALSE_VALUE
);
2260 e
->probability
= profile_probability::likely ().guessed ();
2261 ne
->probability
= e
->probability
.invert ();
2262 gsi2
= gsi_after_labels (e
->dest
);
2264 t
= build_int_cst (itype
, (fd
->loops
[i
].cond_code
== LT_EXPR
2266 t
= fold_build2 (PLUS_EXPR
, itype
,
2267 fold_convert (itype
, fd
->loops
[i
].step
), t
);
2268 t
= fold_build2 (PLUS_EXPR
, itype
, t
, n2
);
2269 t
= fold_build2 (MINUS_EXPR
, itype
, t
, n1
);
2270 tree step
= fold_convert (itype
, fd
->loops
[i
].step
);
2271 if (TYPE_UNSIGNED (itype
)
2272 && fd
->loops
[i
].cond_code
== GT_EXPR
)
2273 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
,
2274 fold_build1 (NEGATE_EXPR
, itype
, t
),
2275 fold_build1 (NEGATE_EXPR
, itype
, step
));
2277 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
, t
, step
);
2278 t
= fold_convert (type
, t
);
2279 t
= fold_build2 (PLUS_EXPR
, type
,
2280 counts
[fd
->last_nonrect
], t
);
2281 t
= force_gimple_operand_gsi (&gsi2
, t
, true, NULL_TREE
,
2282 true, GSI_SAME_STMT
);
2283 expand_omp_build_assign (&gsi2
, counts
[fd
->last_nonrect
], t
);
2284 e
= make_edge (e
->dest
, next_bb
, EDGE_FALLTHRU
);
2285 set_immediate_dominator (CDI_DOMINATORS
, next_bb
, cur_bb
);
2288 e
= split_block (cur_bb
, last_stmt (cur_bb
));
2290 basic_block new_cur_bb
= create_empty_bb (cur_bb
);
2291 add_bb_to_loop (new_cur_bb
, cur_bb
->loop_father
);
2293 gsi2
= gsi_after_labels (e
->dest
);
2294 tree step
= fold_convert (itype
,
2295 unshare_expr (fd
->loops
[i
].step
));
2296 t
= fold_build2 (PLUS_EXPR
, itype
, vs
[i
], step
);
2297 t
= force_gimple_operand_gsi (&gsi2
, t
, true, NULL_TREE
,
2298 true, GSI_SAME_STMT
);
2299 expand_omp_build_assign (&gsi2
, vs
[i
], t
);
2301 ne
= split_block (e
->dest
, last_stmt (e
->dest
));
2302 gsi2
= gsi_after_labels (ne
->dest
);
2305 = gimple_build_cond (fd
->loops
[i
].cond_code
, vs
[i
], n2
,
2306 NULL_TREE
, NULL_TREE
);
2307 gsi_insert_before (&gsi2
, cond_stmt
, GSI_SAME_STMT
);
2309 if (next_bb
== entry_bb
)
2311 e3
= find_edge (ne
->dest
, next_bb
);
2312 e3
->flags
= EDGE_FALSE_VALUE
;
2315 e3
= make_edge (ne
->dest
, next_bb
, EDGE_FALSE_VALUE
);
2316 e4
= make_edge (ne
->dest
, new_cur_bb
, EDGE_TRUE_VALUE
);
2317 e4
->probability
= profile_probability::likely ().guessed ();
2318 e3
->probability
= e4
->probability
.invert ();
2319 basic_block esrc
= e
->src
;
2320 make_edge (e
->src
, ne
->dest
, EDGE_FALLTHRU
);
2321 cur_bb
= new_cur_bb
;
2322 basic_block latch_bb
= next_bb
;
2325 set_immediate_dominator (CDI_DOMINATORS
, ne
->dest
, esrc
);
2326 set_immediate_dominator (CDI_DOMINATORS
, latch_bb
, ne
->dest
);
2327 set_immediate_dominator (CDI_DOMINATORS
, cur_bb
, ne
->dest
);
2331 for (i
= fd
->first_nonrect
; i
< fd
->last_nonrect
; i
++)
2332 if (!fd
->loops
[i
].non_rect_referenced
2333 && fd
->loops
[i
].m1
== NULL_TREE
2334 && fd
->loops
[i
].m2
== NULL_TREE
)
2339 t
= fold_build2 (MULT_EXPR
, type
, t
, counts
[i
]);
2343 t
= fold_build2 (MULT_EXPR
, type
, counts
[fd
->last_nonrect
], t
);
2344 expand_omp_build_assign (gsi
, counts
[fd
->last_nonrect
], t
);
2346 if (!rect_count_seen
)
2347 t
= counts
[fd
->last_nonrect
];
2349 t
= fold_build2 (MULT_EXPR
, type
, fd
->loop
.n2
,
2350 counts
[fd
->last_nonrect
]);
2351 expand_omp_build_assign (gsi
, fd
->loop
.n2
, t
);
2353 else if (fd
->non_rect
)
2355 tree t
= fd
->loop
.n2
;
2356 gcc_assert (TREE_CODE (t
) == INTEGER_CST
);
2357 int non_rect_referenced
= 0, non_rect
= 0;
2358 for (i
= 0; i
< fd
->collapse
; i
++)
2360 if ((i
< fd
->first_nonrect
|| i
> fd
->last_nonrect
)
2361 && !integer_zerop (counts
[i
]))
2362 t
= fold_build2 (TRUNC_DIV_EXPR
, type
, t
, counts
[i
]);
2363 if (fd
->loops
[i
].non_rect_referenced
)
2364 non_rect_referenced
++;
2365 if (fd
->loops
[i
].m1
|| fd
->loops
[i
].m2
)
2368 gcc_assert (non_rect
== 1 && non_rect_referenced
== 1);
2369 counts
[fd
->last_nonrect
] = t
;
2373 /* Helper function for expand_omp_{for_*,simd}. Generate code like:
2375 V3 = N31 + (T % count3) * STEP3;
2377 V2 = N21 + (T % count2) * STEP2;
2379 V1 = N11 + T * STEP1;
2380 if this loop doesn't have an inner loop construct combined with it.
2381 If it does have an inner loop construct combined with it and the
2382 iteration count isn't known constant, store values from counts array
2383 into its _looptemp_ temporaries instead.
2384 For non-rectangular loops (between fd->first_nonrect and fd->last_nonrect
2385 inclusive), use the count of all those loops together, and either
2386 find quadratic etc. equation roots, or as a fallback, do:
2388 for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
2389 for (tmpj = M21 * tmpi + N21;
2390 tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
2392 int tmpk1 = M31 * tmpj + N31;
2393 int tmpk2 = M32 * tmpj + N32;
2394 if (tmpk1 COND3 tmpk2)
2400 int temp = (adj + tmpk2 - tmpk1) / STEP3;
2401 if (COUNT + temp > T)
2405 V3 = tmpk1 + (T - COUNT) * STEP3;
2413 but for optional innermost or outermost rectangular loops that aren't
2414 referenced by other loop expressions keep doing the division/modulo. */
2417 expand_omp_for_init_vars (struct omp_for_data
*fd
, gimple_stmt_iterator
*gsi
,
2418 tree
*counts
, tree
*nonrect_bounds
,
2419 gimple
*inner_stmt
, tree startvar
)
2422 if (gimple_omp_for_combined_p (fd
->for_stmt
))
2424 /* If fd->loop.n2 is constant, then no propagation of the counts
2425 is needed, they are constant. */
2426 if (TREE_CODE (fd
->loop
.n2
) == INTEGER_CST
)
2429 tree clauses
= gimple_code (inner_stmt
) != GIMPLE_OMP_FOR
2430 ? gimple_omp_taskreg_clauses (inner_stmt
)
2431 : gimple_omp_for_clauses (inner_stmt
);
2432 /* First two _looptemp_ clauses are for istart/iend, counts[0]
2433 isn't supposed to be handled, as the inner loop doesn't
2435 tree innerc
= omp_find_clause (clauses
, OMP_CLAUSE__LOOPTEMP_
);
2436 gcc_assert (innerc
);
2437 for (i
= 0; i
< fd
->collapse
; i
++)
2439 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
2440 OMP_CLAUSE__LOOPTEMP_
);
2441 gcc_assert (innerc
);
2444 tree tem
= OMP_CLAUSE_DECL (innerc
);
2445 tree t
= fold_convert (TREE_TYPE (tem
), counts
[i
]);
2446 t
= force_gimple_operand_gsi (gsi
, t
, false, NULL_TREE
,
2447 false, GSI_CONTINUE_LINKING
);
2448 gassign
*stmt
= gimple_build_assign (tem
, t
);
2449 gsi_insert_after (gsi
, stmt
, GSI_CONTINUE_LINKING
);
2455 tree type
= TREE_TYPE (fd
->loop
.v
);
2456 tree tem
= create_tmp_reg (type
, ".tem");
2457 gassign
*stmt
= gimple_build_assign (tem
, startvar
);
2458 gsi_insert_after (gsi
, stmt
, GSI_CONTINUE_LINKING
);
2460 for (i
= fd
->collapse
- 1; i
>= 0; i
--)
2462 tree vtype
= TREE_TYPE (fd
->loops
[i
].v
), itype
, t
;
2464 if (POINTER_TYPE_P (vtype
))
2465 itype
= signed_type_for (vtype
);
2466 if (i
!= 0 && (i
!= fd
->last_nonrect
|| fd
->first_nonrect
))
2467 t
= fold_build2 (TRUNC_MOD_EXPR
, type
, tem
, counts
[i
]);
2470 if (i
== fd
->last_nonrect
)
2472 t
= force_gimple_operand_gsi (gsi
, t
, true, NULL_TREE
,
2473 false, GSI_CONTINUE_LINKING
);
2475 tree idx
= create_tmp_reg (type
, ".count");
2476 expand_omp_build_assign (gsi
, idx
,
2477 build_zero_cst (type
), true);
2478 basic_block bb_triang
= NULL
, bb_triang_dom
= NULL
;
2479 if (fd
->first_nonrect
+ 1 == fd
->last_nonrect
2480 && (TREE_CODE (fd
->loop
.n2
) == INTEGER_CST
2481 || (fd
->first_inner_iterations
2482 /* For now. Later add clauses to propagate the
2484 && !gimple_omp_for_combined_into_p (fd
->for_stmt
)))
2485 && (optab_handler (sqrt_optab
, TYPE_MODE (double_type_node
))
2486 != CODE_FOR_nothing
))
2488 tree outer_n1
= fd
->adjn1
? fd
->adjn1
: fd
->loops
[i
- 1].n1
;
2489 tree itype
= TREE_TYPE (fd
->loops
[i
].v
);
2490 tree first_inner_iterations
= fd
->first_inner_iterations
;
2491 tree factor
= fd
->factor
;
2493 = gimple_build_cond (NE_EXPR
, factor
,
2494 build_zero_cst (TREE_TYPE (factor
)),
2495 NULL_TREE
, NULL_TREE
);
2496 gsi_insert_after (gsi
, cond_stmt
, GSI_CONTINUE_LINKING
);
2497 edge e
= split_block (gsi_bb (*gsi
), cond_stmt
);
2498 basic_block bb0
= e
->src
;
2499 e
->flags
= EDGE_TRUE_VALUE
;
2500 e
->probability
= profile_probability::likely ();
2501 bb_triang_dom
= bb0
;
2502 *gsi
= gsi_after_labels (e
->dest
);
2503 tree slltype
= long_long_integer_type_node
;
2504 tree ulltype
= long_long_unsigned_type_node
;
2505 tree stopvalull
= fold_convert (ulltype
, stopval
);
2507 = force_gimple_operand_gsi (gsi
, stopvalull
, true, NULL_TREE
,
2508 false, GSI_CONTINUE_LINKING
);
2509 first_inner_iterations
2510 = fold_convert (slltype
, first_inner_iterations
);
2511 first_inner_iterations
2512 = force_gimple_operand_gsi (gsi
, first_inner_iterations
, true,
2514 GSI_CONTINUE_LINKING
);
2515 factor
= fold_convert (slltype
, factor
);
2517 = force_gimple_operand_gsi (gsi
, factor
, true, NULL_TREE
,
2518 false, GSI_CONTINUE_LINKING
);
2519 tree first_inner_iterationsd
2520 = fold_build1 (FLOAT_EXPR
, double_type_node
,
2521 first_inner_iterations
);
2522 first_inner_iterationsd
2523 = force_gimple_operand_gsi (gsi
, first_inner_iterationsd
, true,
2525 GSI_CONTINUE_LINKING
);
2526 tree factord
= fold_build1 (FLOAT_EXPR
, double_type_node
,
2528 factord
= force_gimple_operand_gsi (gsi
, factord
, true,
2530 GSI_CONTINUE_LINKING
);
2531 tree stopvald
= fold_build1 (FLOAT_EXPR
, double_type_node
,
2533 stopvald
= force_gimple_operand_gsi (gsi
, stopvald
, true,
2535 GSI_CONTINUE_LINKING
);
2536 /* Temporarily disable flag_rounding_math, values will be
2537 decimal numbers divided by 2 and worst case imprecisions
2538 due to too large values ought to be caught later by the
2539 checks for fallback. */
2540 int save_flag_rounding_math
= flag_rounding_math
;
2541 flag_rounding_math
= 0;
2542 t
= fold_build2 (RDIV_EXPR
, double_type_node
, factord
,
2543 build_real (double_type_node
, dconst2
));
2544 tree t3
= fold_build2 (MINUS_EXPR
, double_type_node
,
2545 first_inner_iterationsd
, t
);
2546 t3
= force_gimple_operand_gsi (gsi
, t3
, true, NULL_TREE
, false,
2547 GSI_CONTINUE_LINKING
);
2548 t
= fold_build2 (MULT_EXPR
, double_type_node
, factord
,
2549 build_real (double_type_node
, dconst2
));
2550 t
= fold_build2 (MULT_EXPR
, double_type_node
, t
, stopvald
);
2551 t
= fold_build2 (PLUS_EXPR
, double_type_node
, t
,
2552 fold_build2 (MULT_EXPR
, double_type_node
,
2554 flag_rounding_math
= save_flag_rounding_math
;
2555 t
= force_gimple_operand_gsi (gsi
, t
, true, NULL_TREE
, false,
2556 GSI_CONTINUE_LINKING
);
2558 && cfun
->can_throw_non_call_exceptions
2559 && operation_could_trap_p (LT_EXPR
, true, false, NULL_TREE
))
2561 tree tem
= fold_build2 (LT_EXPR
, boolean_type_node
, t
,
2562 build_zero_cst (double_type_node
));
2563 tem
= force_gimple_operand_gsi (gsi
, tem
, true, NULL_TREE
,
2564 false, GSI_CONTINUE_LINKING
);
2565 cond_stmt
= gimple_build_cond (NE_EXPR
, tem
,
2567 NULL_TREE
, NULL_TREE
);
2571 = gimple_build_cond (LT_EXPR
, t
,
2572 build_zero_cst (double_type_node
),
2573 NULL_TREE
, NULL_TREE
);
2574 gsi_insert_after (gsi
, cond_stmt
, GSI_CONTINUE_LINKING
);
2575 e
= split_block (gsi_bb (*gsi
), cond_stmt
);
2576 basic_block bb1
= e
->src
;
2577 e
->flags
= EDGE_FALSE_VALUE
;
2578 e
->probability
= profile_probability::very_likely ();
2579 *gsi
= gsi_after_labels (e
->dest
);
2580 gcall
*call
= gimple_build_call_internal (IFN_SQRT
, 1, t
);
2581 tree sqrtr
= create_tmp_var (double_type_node
);
2582 gimple_call_set_lhs (call
, sqrtr
);
2583 gsi_insert_after (gsi
, call
, GSI_CONTINUE_LINKING
);
2584 t
= fold_build2 (MINUS_EXPR
, double_type_node
, sqrtr
, t3
);
2585 t
= fold_build2 (RDIV_EXPR
, double_type_node
, t
, factord
);
2586 t
= fold_build1 (FIX_TRUNC_EXPR
, ulltype
, t
);
2587 tree c
= create_tmp_var (ulltype
);
2588 tree d
= create_tmp_var (ulltype
);
2589 expand_omp_build_assign (gsi
, c
, t
, true);
2590 t
= fold_build2 (MINUS_EXPR
, ulltype
, c
,
2591 build_one_cst (ulltype
));
2592 t
= fold_build2 (MULT_EXPR
, ulltype
, c
, t
);
2593 t
= fold_build2 (RSHIFT_EXPR
, ulltype
, t
, integer_one_node
);
2594 t
= fold_build2 (MULT_EXPR
, ulltype
,
2595 fold_convert (ulltype
, fd
->factor
), t
);
2597 = fold_build2 (MULT_EXPR
, ulltype
, c
,
2598 fold_convert (ulltype
,
2599 fd
->first_inner_iterations
));
2600 t
= fold_build2 (PLUS_EXPR
, ulltype
, t
, t2
);
2601 expand_omp_build_assign (gsi
, d
, t
, true);
2602 t
= fold_build2 (MULT_EXPR
, ulltype
,
2603 fold_convert (ulltype
, fd
->factor
), c
);
2604 t
= fold_build2 (PLUS_EXPR
, ulltype
,
2605 t
, fold_convert (ulltype
,
2606 fd
->first_inner_iterations
));
2607 t2
= force_gimple_operand_gsi (gsi
, t
, true, NULL_TREE
, false,
2608 GSI_CONTINUE_LINKING
);
2609 cond_stmt
= gimple_build_cond (GE_EXPR
, stopvalull
, d
,
2610 NULL_TREE
, NULL_TREE
);
2611 gsi_insert_after (gsi
, cond_stmt
, GSI_CONTINUE_LINKING
);
2612 e
= split_block (gsi_bb (*gsi
), cond_stmt
);
2613 basic_block bb2
= e
->src
;
2614 e
->flags
= EDGE_TRUE_VALUE
;
2615 e
->probability
= profile_probability::very_likely ();
2616 *gsi
= gsi_after_labels (e
->dest
);
2617 t
= fold_build2 (PLUS_EXPR
, ulltype
, d
, t2
);
2618 t
= force_gimple_operand_gsi (gsi
, t
, true, NULL_TREE
, false,
2619 GSI_CONTINUE_LINKING
);
2620 cond_stmt
= gimple_build_cond (GE_EXPR
, stopvalull
, t
,
2621 NULL_TREE
, NULL_TREE
);
2622 gsi_insert_after (gsi
, cond_stmt
, GSI_CONTINUE_LINKING
);
2623 e
= split_block (gsi_bb (*gsi
), cond_stmt
);
2624 basic_block bb3
= e
->src
;
2625 e
->flags
= EDGE_FALSE_VALUE
;
2626 e
->probability
= profile_probability::very_likely ();
2627 *gsi
= gsi_after_labels (e
->dest
);
2628 t
= fold_convert (itype
, c
);
2629 t
= fold_build2 (MULT_EXPR
, itype
, t
, fd
->loops
[i
- 1].step
);
2630 t
= fold_build2 (PLUS_EXPR
, itype
, outer_n1
, t
);
2631 t
= force_gimple_operand_gsi (gsi
, t
, true, NULL_TREE
, false,
2632 GSI_CONTINUE_LINKING
);
2633 expand_omp_build_assign (gsi
, fd
->loops
[i
- 1].v
, t
, true);
2634 t2
= fold_build2 (MINUS_EXPR
, ulltype
, stopvalull
, d
);
2635 t2
= fold_convert (itype
, t2
);
2636 t2
= fold_build2 (MULT_EXPR
, itype
, t2
, fd
->loops
[i
].step
);
2637 t2
= fold_build2 (PLUS_EXPR
, itype
, t2
, fd
->loops
[i
].n1
);
2638 if (fd
->loops
[i
].m1
)
2640 t
= fold_build2 (MULT_EXPR
, itype
, t
, fd
->loops
[i
].m1
);
2641 t2
= fold_build2 (PLUS_EXPR
, itype
, t2
, t
);
2643 expand_omp_build_assign (gsi
, fd
->loops
[i
].v
, t2
, true);
2644 e
= split_block (gsi_bb (*gsi
), gsi_stmt (*gsi
));
2646 *gsi
= gsi_after_labels (e
->dest
);
2648 e
= make_edge (bb1
, gsi_bb (*gsi
), EDGE_TRUE_VALUE
);
2649 e
->probability
= profile_probability::very_unlikely ();
2650 e
= make_edge (bb2
, gsi_bb (*gsi
), EDGE_FALSE_VALUE
);
2651 e
->probability
= profile_probability::very_unlikely ();
2652 e
= make_edge (bb3
, gsi_bb (*gsi
), EDGE_TRUE_VALUE
);
2653 e
->probability
= profile_probability::very_unlikely ();
2655 basic_block bb4
= create_empty_bb (bb0
);
2656 add_bb_to_loop (bb4
, bb0
->loop_father
);
2657 e
= make_edge (bb0
, bb4
, EDGE_FALSE_VALUE
);
2658 e
->probability
= profile_probability::unlikely ();
2659 make_edge (bb4
, gsi_bb (*gsi
), EDGE_FALLTHRU
);
2660 set_immediate_dominator (CDI_DOMINATORS
, bb4
, bb0
);
2661 set_immediate_dominator (CDI_DOMINATORS
, gsi_bb (*gsi
), bb0
);
2662 gimple_stmt_iterator gsi2
= gsi_after_labels (bb4
);
2663 t2
= fold_build2 (TRUNC_DIV_EXPR
, type
,
2664 counts
[i
], counts
[i
- 1]);
2665 t2
= force_gimple_operand_gsi (&gsi2
, t2
, true, NULL_TREE
, false,
2666 GSI_CONTINUE_LINKING
);
2667 t
= fold_build2 (TRUNC_MOD_EXPR
, type
, stopval
, t2
);
2668 t2
= fold_build2 (TRUNC_DIV_EXPR
, type
, stopval
, t2
);
2669 t
= fold_convert (itype
, t
);
2670 t2
= fold_convert (itype
, t2
);
2671 t
= fold_build2 (MULT_EXPR
, itype
, t
,
2672 fold_convert (itype
, fd
->loops
[i
].step
));
2673 t
= fold_build2 (PLUS_EXPR
, itype
, fd
->loops
[i
].n1
, t
);
2674 t2
= fold_build2 (MULT_EXPR
, itype
, t2
,
2675 fold_convert (itype
, fd
->loops
[i
- 1].step
));
2676 t2
= fold_build2 (PLUS_EXPR
, itype
, fd
->loops
[i
- 1].n1
, t2
);
2677 t2
= force_gimple_operand_gsi (&gsi2
, t2
, false, NULL_TREE
,
2678 false, GSI_CONTINUE_LINKING
);
2679 stmt
= gimple_build_assign (fd
->loops
[i
- 1].v
, t2
);
2680 gsi_insert_after (&gsi2
, stmt
, GSI_CONTINUE_LINKING
);
2681 if (fd
->loops
[i
].m1
)
2683 t2
= fold_build2 (MULT_EXPR
, itype
, fd
->loops
[i
].m1
,
2684 fd
->loops
[i
- 1].v
);
2685 t
= fold_build2 (PLUS_EXPR
, itype
, t
, t2
);
2687 t
= force_gimple_operand_gsi (&gsi2
, t
, false, NULL_TREE
,
2688 false, GSI_CONTINUE_LINKING
);
2689 stmt
= gimple_build_assign (fd
->loops
[i
].v
, t
);
2690 gsi_insert_after (&gsi2
, stmt
, GSI_CONTINUE_LINKING
);
2692 /* Fallback implementation. Evaluate the loops in between
2693 (inclusive) fd->first_nonrect and fd->last_nonrect at
2694 runtime unsing temporaries instead of the original iteration
2695 variables, in the body just bump the counter and compare
2696 with the desired value. */
2697 gimple_stmt_iterator gsi2
= *gsi
;
2698 basic_block entry_bb
= gsi_bb (gsi2
);
2699 edge e
= split_block (entry_bb
, gsi_stmt (gsi2
));
2700 e
= split_block (e
->dest
, (gimple
*) NULL
);
2701 basic_block dom_bb
= NULL
;
2702 basic_block cur_bb
= e
->src
;
2703 basic_block next_bb
= e
->dest
;
2705 *gsi
= gsi_after_labels (entry_bb
);
2707 tree
*vs
= XALLOCAVEC (tree
, fd
->last_nonrect
);
2708 tree n1
= NULL_TREE
, n2
= NULL_TREE
;
2709 memset (vs
, 0, fd
->last_nonrect
* sizeof (tree
));
2711 for (int j
= fd
->first_nonrect
; j
<= fd
->last_nonrect
; j
++)
2713 tree itype
= TREE_TYPE (fd
->loops
[j
].v
);
2714 bool rect_p
= (fd
->loops
[j
].m1
== NULL_TREE
2715 && fd
->loops
[j
].m2
== NULL_TREE
2716 && !fd
->loops
[j
].non_rect_referenced
);
2717 gsi2
= gsi_after_labels (cur_bb
);
2718 t
= fold_convert (itype
, unshare_expr (fd
->loops
[j
].n1
));
2719 if (fd
->loops
[j
].m1
)
2721 n1
= fold_convert (itype
, unshare_expr (fd
->loops
[j
].m1
));
2722 n1
= fold_build2 (MULT_EXPR
, itype
,
2723 vs
[j
- fd
->loops
[j
].outer
], n1
);
2724 n1
= fold_build2 (PLUS_EXPR
, itype
, n1
, t
);
2727 n1
= build_zero_cst (type
);
2730 n1
= force_gimple_operand_gsi (&gsi2
, n1
, true, NULL_TREE
,
2731 true, GSI_SAME_STMT
);
2732 if (j
< fd
->last_nonrect
)
2734 vs
[j
] = create_tmp_reg (rect_p
? type
: itype
, ".it");
2735 expand_omp_build_assign (&gsi2
, vs
[j
], n1
);
2737 t
= fold_convert (itype
, unshare_expr (fd
->loops
[j
].n2
));
2738 if (fd
->loops
[j
].m2
)
2740 n2
= fold_convert (itype
, unshare_expr (fd
->loops
[j
].m2
));
2741 n2
= fold_build2 (MULT_EXPR
, itype
,
2742 vs
[j
- fd
->loops
[j
].outer
], n2
);
2743 n2
= fold_build2 (PLUS_EXPR
, itype
, n2
, t
);
2749 n2
= force_gimple_operand_gsi (&gsi2
, n2
, true, NULL_TREE
,
2750 true, GSI_SAME_STMT
);
2751 if (j
== fd
->last_nonrect
)
2754 = gimple_build_cond (fd
->loops
[j
].cond_code
, n1
, n2
,
2755 NULL_TREE
, NULL_TREE
);
2756 gsi_insert_before (&gsi2
, cond_stmt
, GSI_SAME_STMT
);
2757 e
= split_block (cur_bb
, cond_stmt
);
2758 e
->flags
= EDGE_TRUE_VALUE
;
2759 edge ne
= make_edge (cur_bb
, next_bb
, EDGE_FALSE_VALUE
);
2760 e
->probability
= profile_probability::likely ().guessed ();
2761 ne
->probability
= e
->probability
.invert ();
2762 gsi2
= gsi_after_labels (e
->dest
);
2764 t
= build_int_cst (itype
, (fd
->loops
[j
].cond_code
== LT_EXPR
2766 t
= fold_build2 (PLUS_EXPR
, itype
,
2767 fold_convert (itype
, fd
->loops
[j
].step
), t
);
2768 t
= fold_build2 (PLUS_EXPR
, itype
, t
, n2
);
2769 t
= fold_build2 (MINUS_EXPR
, itype
, t
, n1
);
2770 tree step
= fold_convert (itype
, fd
->loops
[j
].step
);
2771 if (TYPE_UNSIGNED (itype
)
2772 && fd
->loops
[j
].cond_code
== GT_EXPR
)
2773 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
,
2774 fold_build1 (NEGATE_EXPR
, itype
, t
),
2775 fold_build1 (NEGATE_EXPR
, itype
, step
));
2777 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
, t
, step
);
2778 t
= fold_convert (type
, t
);
2779 t
= fold_build2 (PLUS_EXPR
, type
, idx
, t
);
2780 t
= force_gimple_operand_gsi (&gsi2
, t
, true, NULL_TREE
,
2781 true, GSI_SAME_STMT
);
2782 e
= make_edge (e
->dest
, next_bb
, EDGE_FALLTHRU
);
2783 set_immediate_dominator (CDI_DOMINATORS
, next_bb
, cur_bb
);
2785 = gimple_build_cond (LE_EXPR
, t
, stopval
, NULL_TREE
,
2787 gsi_insert_before (&gsi2
, cond_stmt
, GSI_SAME_STMT
);
2788 e
= split_block (gsi_bb (gsi2
), cond_stmt
);
2789 e
->flags
= EDGE_TRUE_VALUE
;
2790 e
->probability
= profile_probability::likely ().guessed ();
2791 ne
= make_edge (e
->src
, entry_bb
, EDGE_FALSE_VALUE
);
2792 ne
->probability
= e
->probability
.invert ();
2793 gsi2
= gsi_after_labels (e
->dest
);
2794 expand_omp_build_assign (&gsi2
, idx
, t
);
2795 set_immediate_dominator (CDI_DOMINATORS
, entry_bb
, dom_bb
);
2798 e
= split_block (cur_bb
, last_stmt (cur_bb
));
2800 basic_block new_cur_bb
= create_empty_bb (cur_bb
);
2801 add_bb_to_loop (new_cur_bb
, cur_bb
->loop_father
);
2803 gsi2
= gsi_after_labels (e
->dest
);
2805 t
= fold_build2 (PLUS_EXPR
, type
, vs
[j
],
2806 build_one_cst (type
));
2810 = fold_convert (itype
, unshare_expr (fd
->loops
[j
].step
));
2811 t
= fold_build2 (PLUS_EXPR
, itype
, vs
[j
], step
);
2813 t
= force_gimple_operand_gsi (&gsi2
, t
, true, NULL_TREE
,
2814 true, GSI_SAME_STMT
);
2815 expand_omp_build_assign (&gsi2
, vs
[j
], t
);
2817 edge ne
= split_block (e
->dest
, last_stmt (e
->dest
));
2818 gsi2
= gsi_after_labels (ne
->dest
);
2821 if (next_bb
== entry_bb
)
2822 /* No need to actually check the outermost condition. */
2824 = gimple_build_cond (EQ_EXPR
, boolean_true_node
,
2826 NULL_TREE
, NULL_TREE
);
2829 = gimple_build_cond (rect_p
? LT_EXPR
2830 : fd
->loops
[j
].cond_code
,
2831 vs
[j
], n2
, NULL_TREE
, NULL_TREE
);
2832 gsi_insert_before (&gsi2
, cond_stmt
, GSI_SAME_STMT
);
2834 if (next_bb
== entry_bb
)
2836 e3
= find_edge (ne
->dest
, next_bb
);
2837 e3
->flags
= EDGE_FALSE_VALUE
;
2841 e3
= make_edge (ne
->dest
, next_bb
, EDGE_FALSE_VALUE
);
2842 e4
= make_edge (ne
->dest
, new_cur_bb
, EDGE_TRUE_VALUE
);
2843 e4
->probability
= profile_probability::likely ().guessed ();
2844 e3
->probability
= e4
->probability
.invert ();
2845 basic_block esrc
= e
->src
;
2846 make_edge (e
->src
, ne
->dest
, EDGE_FALLTHRU
);
2847 cur_bb
= new_cur_bb
;
2848 basic_block latch_bb
= next_bb
;
2851 set_immediate_dominator (CDI_DOMINATORS
, ne
->dest
, esrc
);
2852 set_immediate_dominator (CDI_DOMINATORS
, latch_bb
, ne
->dest
);
2853 set_immediate_dominator (CDI_DOMINATORS
, cur_bb
, ne
->dest
);
2855 for (int j
= fd
->last_nonrect
; j
>= fd
->first_nonrect
; j
--)
2857 tree itype
= TREE_TYPE (fd
->loops
[j
].v
);
2858 bool rect_p
= (fd
->loops
[j
].m1
== NULL_TREE
2859 && fd
->loops
[j
].m2
== NULL_TREE
2860 && !fd
->loops
[j
].non_rect_referenced
);
2861 if (j
== fd
->last_nonrect
)
2863 t
= fold_build2 (MINUS_EXPR
, type
, stopval
, idx
);
2864 t
= fold_convert (itype
, t
);
2866 = fold_convert (itype
, unshare_expr (fd
->loops
[j
].step
));
2867 t
= fold_build2 (MULT_EXPR
, itype
, t
, t2
);
2868 t
= fold_build2 (PLUS_EXPR
, itype
, n1
, t
);
2872 t
= fold_convert (itype
, vs
[j
]);
2873 t
= fold_build2 (MULT_EXPR
, itype
, t
,
2874 fold_convert (itype
, fd
->loops
[j
].step
));
2875 if (POINTER_TYPE_P (vtype
))
2876 t
= fold_build_pointer_plus (fd
->loops
[j
].n1
, t
);
2878 t
= fold_build2 (PLUS_EXPR
, itype
, fd
->loops
[j
].n1
, t
);
2882 t
= force_gimple_operand_gsi (gsi
, t
, false,
2885 stmt
= gimple_build_assign (fd
->loops
[j
].v
, t
);
2886 gsi_insert_before (gsi
, stmt
, GSI_SAME_STMT
);
2888 if (gsi_end_p (*gsi
))
2889 *gsi
= gsi_last_bb (gsi_bb (*gsi
));
2894 e
= split_block (gsi_bb (*gsi
), gsi_stmt (*gsi
));
2895 make_edge (bb_triang
, e
->dest
, EDGE_FALLTHRU
);
2896 *gsi
= gsi_after_labels (e
->dest
);
2897 if (!gsi_end_p (*gsi
))
2898 gsi_insert_before (gsi
, gimple_build_nop (), GSI_NEW_STMT
);
2899 set_immediate_dominator (CDI_DOMINATORS
, e
->dest
, bb_triang_dom
);
2904 t
= fold_convert (itype
, t
);
2905 t
= fold_build2 (MULT_EXPR
, itype
, t
,
2906 fold_convert (itype
, fd
->loops
[i
].step
));
2907 if (POINTER_TYPE_P (vtype
))
2908 t
= fold_build_pointer_plus (fd
->loops
[i
].n1
, t
);
2910 t
= fold_build2 (PLUS_EXPR
, itype
, fd
->loops
[i
].n1
, t
);
2911 t
= force_gimple_operand_gsi (gsi
, t
,
2912 DECL_P (fd
->loops
[i
].v
)
2913 && TREE_ADDRESSABLE (fd
->loops
[i
].v
),
2915 GSI_CONTINUE_LINKING
);
2916 stmt
= gimple_build_assign (fd
->loops
[i
].v
, t
);
2917 gsi_insert_after (gsi
, stmt
, GSI_CONTINUE_LINKING
);
2919 if (i
!= 0 && (i
!= fd
->last_nonrect
|| fd
->first_nonrect
))
2921 t
= fold_build2 (TRUNC_DIV_EXPR
, type
, tem
, counts
[i
]);
2922 t
= force_gimple_operand_gsi (gsi
, t
, false, NULL_TREE
,
2923 false, GSI_CONTINUE_LINKING
);
2924 stmt
= gimple_build_assign (tem
, t
);
2925 gsi_insert_after (gsi
, stmt
, GSI_CONTINUE_LINKING
);
2927 if (i
== fd
->last_nonrect
)
2928 i
= fd
->first_nonrect
;
2931 for (i
= 0; i
<= fd
->last_nonrect
; i
++)
2932 if (fd
->loops
[i
].m2
)
2934 tree itype
= TREE_TYPE (fd
->loops
[i
].v
);
2936 tree t
= fold_convert (itype
, unshare_expr (fd
->loops
[i
].m2
));
2937 t
= fold_build2 (MULT_EXPR
, itype
,
2938 fd
->loops
[i
- fd
->loops
[i
].outer
].v
, t
);
2939 t
= fold_build2 (PLUS_EXPR
, itype
, t
,
2940 fold_convert (itype
,
2941 unshare_expr (fd
->loops
[i
].n2
)));
2942 nonrect_bounds
[i
] = create_tmp_reg (itype
, ".bound");
2943 t
= force_gimple_operand_gsi (gsi
, t
, false,
2945 GSI_CONTINUE_LINKING
);
2946 stmt
= gimple_build_assign (nonrect_bounds
[i
], t
);
2947 gsi_insert_after (gsi
, stmt
, GSI_CONTINUE_LINKING
);
2951 /* Helper function for expand_omp_for_*. Generate code like:
2954 if (V3 cond3 N32) goto BODY_BB; else goto L11;
2958 if (V2 cond2 N22) goto BODY_BB; else goto L12;
2963 For non-rectangular loops, use temporaries stored in nonrect_bounds
2964 for the upper bounds if M?2 multiplier is present. Given e.g.
2965 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2966 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2967 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
2968 for (V4 = N41 + M41 * V2; V4 cond4 N42 + M42 * V2; V4 += STEP4)
2972 if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L11;
2974 V4 = N41 + M41 * V2; // This can be left out if the loop
2975 // refers to the immediate parent loop
2977 if (V3 cond3 N32) goto BODY_BB; else goto L12;
2981 if (V2 cond2 N22) goto L120; else goto L13;
2983 V4 = N41 + M41 * V2;
2984 NONRECT_BOUND4 = N42 + M42 * V2;
2985 if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L12;
2992 extract_omp_for_update_vars (struct omp_for_data
*fd
, tree
*nonrect_bounds
,
2993 basic_block cont_bb
, basic_block body_bb
)
2995 basic_block last_bb
, bb
, collapse_bb
= NULL
;
2997 gimple_stmt_iterator gsi
;
3003 for (i
= fd
->collapse
- 1; i
>= 0; i
--)
3005 tree vtype
= TREE_TYPE (fd
->loops
[i
].v
);
3007 bb
= create_empty_bb (last_bb
);
3008 add_bb_to_loop (bb
, last_bb
->loop_father
);
3009 gsi
= gsi_start_bb (bb
);
3011 if (i
< fd
->collapse
- 1)
3013 e
= make_edge (last_bb
, bb
, EDGE_FALSE_VALUE
);
3015 = profile_probability::guessed_always ().apply_scale (1, 8);
3017 struct omp_for_data_loop
*l
= &fd
->loops
[i
+ 1];
3018 if (l
->m1
== NULL_TREE
|| l
->outer
!= 1)
3024 = fold_build2 (MULT_EXPR
, TREE_TYPE (t
),
3025 fd
->loops
[i
+ 1 - l
->outer
].v
, l
->m1
);
3026 t
= fold_build2 (PLUS_EXPR
, TREE_TYPE (t
), t2
, t
);
3028 t
= force_gimple_operand_gsi (&gsi
, t
,
3030 && TREE_ADDRESSABLE (l
->v
),
3032 GSI_CONTINUE_LINKING
);
3033 stmt
= gimple_build_assign (l
->v
, t
);
3034 gsi_insert_after (&gsi
, stmt
, GSI_CONTINUE_LINKING
);
3040 set_immediate_dominator (CDI_DOMINATORS
, bb
, last_bb
);
3042 if (POINTER_TYPE_P (vtype
))
3043 t
= fold_build_pointer_plus (fd
->loops
[i
].v
, fd
->loops
[i
].step
);
3045 t
= fold_build2 (PLUS_EXPR
, vtype
, fd
->loops
[i
].v
, fd
->loops
[i
].step
);
3046 t
= force_gimple_operand_gsi (&gsi
, t
,
3047 DECL_P (fd
->loops
[i
].v
)
3048 && TREE_ADDRESSABLE (fd
->loops
[i
].v
),
3049 NULL_TREE
, false, GSI_CONTINUE_LINKING
);
3050 stmt
= gimple_build_assign (fd
->loops
[i
].v
, t
);
3051 gsi_insert_after (&gsi
, stmt
, GSI_CONTINUE_LINKING
);
3053 if (fd
->loops
[i
].non_rect_referenced
)
3055 basic_block update_bb
= NULL
, prev_bb
= NULL
;
3056 for (int j
= i
+ 1; j
<= fd
->last_nonrect
; j
++)
3057 if (j
- fd
->loops
[j
].outer
== i
)
3060 struct omp_for_data_loop
*l
= &fd
->loops
[j
];
3061 basic_block this_bb
= create_empty_bb (last_bb
);
3062 add_bb_to_loop (this_bb
, last_bb
->loop_father
);
3063 gimple_stmt_iterator gsi2
= gsi_start_bb (this_bb
);
3066 e
= make_edge (prev_bb
, this_bb
, EDGE_TRUE_VALUE
);
3068 = profile_probability::guessed_always ().apply_scale (7,
3070 set_immediate_dominator (CDI_DOMINATORS
, this_bb
, prev_bb
);
3074 t
= fold_build2 (MULT_EXPR
, TREE_TYPE (l
->m1
), l
->m1
,
3076 t
= fold_build2 (PLUS_EXPR
, TREE_TYPE (l
->v
), t
, l
->n1
);
3077 n1
= force_gimple_operand_gsi (&gsi2
, t
, true, NULL_TREE
,
3079 GSI_CONTINUE_LINKING
);
3080 stmt
= gimple_build_assign (l
->v
, n1
);
3081 gsi_insert_after (&gsi2
, stmt
, GSI_CONTINUE_LINKING
);
3085 n1
= force_gimple_operand_gsi (&gsi2
, l
->n1
, true,
3087 GSI_CONTINUE_LINKING
);
3090 t
= fold_build2 (MULT_EXPR
, TREE_TYPE (l
->m2
), l
->m2
,
3092 t
= fold_build2 (PLUS_EXPR
, TREE_TYPE (nonrect_bounds
[j
]),
3093 t
, unshare_expr (l
->n2
));
3094 n2
= force_gimple_operand_gsi (&gsi2
, t
, true, NULL_TREE
,
3096 GSI_CONTINUE_LINKING
);
3097 stmt
= gimple_build_assign (nonrect_bounds
[j
], n2
);
3098 gsi_insert_after (&gsi2
, stmt
, GSI_CONTINUE_LINKING
);
3099 n2
= nonrect_bounds
[j
];
3102 n2
= force_gimple_operand_gsi (&gsi2
, unshare_expr (l
->n2
),
3103 true, NULL_TREE
, false,
3104 GSI_CONTINUE_LINKING
);
3106 = gimple_build_cond (l
->cond_code
, n1
, n2
,
3107 NULL_TREE
, NULL_TREE
);
3108 gsi_insert_after (&gsi2
, cond_stmt
, GSI_CONTINUE_LINKING
);
3109 if (update_bb
== NULL
)
3110 update_bb
= this_bb
;
3111 e
= make_edge (this_bb
, bb
, EDGE_FALSE_VALUE
);
3113 = profile_probability::guessed_always ().apply_scale (1, 8);
3114 if (prev_bb
== NULL
)
3115 set_immediate_dominator (CDI_DOMINATORS
, this_bb
, bb
);
3118 e
= make_edge (prev_bb
, body_bb
, EDGE_TRUE_VALUE
);
3120 = profile_probability::guessed_always ().apply_scale (7, 8);
3121 body_bb
= update_bb
;
3126 if (fd
->loops
[i
].m2
)
3127 t
= nonrect_bounds
[i
];
3129 t
= unshare_expr (fd
->loops
[i
].n2
);
3130 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
3131 false, GSI_CONTINUE_LINKING
);
3132 tree v
= fd
->loops
[i
].v
;
3133 if (DECL_P (v
) && TREE_ADDRESSABLE (v
))
3134 v
= force_gimple_operand_gsi (&gsi
, v
, true, NULL_TREE
,
3135 false, GSI_CONTINUE_LINKING
);
3136 t
= fold_build2 (fd
->loops
[i
].cond_code
, boolean_type_node
, v
, t
);
3137 stmt
= gimple_build_cond_empty (t
);
3138 gsi_insert_after (&gsi
, stmt
, GSI_CONTINUE_LINKING
);
3139 if (walk_tree (gimple_cond_lhs_ptr (as_a
<gcond
*> (stmt
)),
3140 expand_omp_regimplify_p
, NULL
, NULL
)
3141 || walk_tree (gimple_cond_rhs_ptr (as_a
<gcond
*> (stmt
)),
3142 expand_omp_regimplify_p
, NULL
, NULL
))
3143 gimple_regimplify_operands (stmt
, &gsi
);
3144 e
= make_edge (bb
, body_bb
, EDGE_TRUE_VALUE
);
3145 e
->probability
= profile_probability::guessed_always ().apply_scale (7, 8);
3148 make_edge (bb
, body_bb
, EDGE_FALLTHRU
);
3149 set_immediate_dominator (CDI_DOMINATORS
, bb
, last_bb
);
3156 /* Expand #pragma omp ordered depend(source). */
3159 expand_omp_ordered_source (gimple_stmt_iterator
*gsi
, struct omp_for_data
*fd
,
3160 tree
*counts
, location_t loc
)
3162 enum built_in_function source_ix
3163 = fd
->iter_type
== long_integer_type_node
3164 ? BUILT_IN_GOMP_DOACROSS_POST
: BUILT_IN_GOMP_DOACROSS_ULL_POST
;
3166 = gimple_build_call (builtin_decl_explicit (source_ix
), 1,
3167 build_fold_addr_expr (counts
[fd
->ordered
]));
3168 gimple_set_location (g
, loc
);
3169 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
3172 /* Expand a single depend from #pragma omp ordered depend(sink:...). */
3175 expand_omp_ordered_sink (gimple_stmt_iterator
*gsi
, struct omp_for_data
*fd
,
3176 tree
*counts
, tree c
, location_t loc
)
3178 auto_vec
<tree
, 10> args
;
3179 enum built_in_function sink_ix
3180 = fd
->iter_type
== long_integer_type_node
3181 ? BUILT_IN_GOMP_DOACROSS_WAIT
: BUILT_IN_GOMP_DOACROSS_ULL_WAIT
;
3182 tree t
, off
, coff
= NULL_TREE
, deps
= OMP_CLAUSE_DECL (c
), cond
= NULL_TREE
;
3184 gimple_stmt_iterator gsi2
= *gsi
;
3185 bool warned_step
= false;
3187 for (i
= 0; i
< fd
->ordered
; i
++)
3189 tree step
= NULL_TREE
;
3190 off
= TREE_PURPOSE (deps
);
3191 if (TREE_CODE (off
) == TRUNC_DIV_EXPR
)
3193 step
= TREE_OPERAND (off
, 1);
3194 off
= TREE_OPERAND (off
, 0);
3196 if (!integer_zerop (off
))
3198 gcc_assert (fd
->loops
[i
].cond_code
== LT_EXPR
3199 || fd
->loops
[i
].cond_code
== GT_EXPR
);
3200 bool forward
= fd
->loops
[i
].cond_code
== LT_EXPR
;
3203 /* Non-simple Fortran DO loops. If step is variable,
3204 we don't know at compile even the direction, so can't
3206 if (TREE_CODE (step
) != INTEGER_CST
)
3208 forward
= tree_int_cst_sgn (step
) != -1;
3210 if (forward
^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps
))
3211 warning_at (loc
, 0, "%<depend%> clause with %<sink%> modifier "
3212 "waiting for lexically later iteration");
3215 deps
= TREE_CHAIN (deps
);
3217 /* If all offsets corresponding to the collapsed loops are zero,
3218 this depend clause can be ignored. FIXME: but there is still a
3219 flush needed. We need to emit one __sync_synchronize () for it
3220 though (perhaps conditionally)? Solve this together with the
3221 conservative dependence folding optimization.
3222 if (i >= fd->collapse)
3225 deps
= OMP_CLAUSE_DECL (c
);
3227 edge e1
= split_block (gsi_bb (gsi2
), gsi_stmt (gsi2
));
3228 edge e2
= split_block_after_labels (e1
->dest
);
3230 gsi2
= gsi_after_labels (e1
->dest
);
3231 *gsi
= gsi_last_bb (e1
->src
);
3232 for (i
= 0; i
< fd
->ordered
; i
++)
3234 tree itype
= TREE_TYPE (fd
->loops
[i
].v
);
3235 tree step
= NULL_TREE
;
3236 tree orig_off
= NULL_TREE
;
3237 if (POINTER_TYPE_P (itype
))
3240 deps
= TREE_CHAIN (deps
);
3241 off
= TREE_PURPOSE (deps
);
3242 if (TREE_CODE (off
) == TRUNC_DIV_EXPR
)
3244 step
= TREE_OPERAND (off
, 1);
3245 off
= TREE_OPERAND (off
, 0);
3246 gcc_assert (fd
->loops
[i
].cond_code
== LT_EXPR
3247 && integer_onep (fd
->loops
[i
].step
)
3248 && !POINTER_TYPE_P (TREE_TYPE (fd
->loops
[i
].v
)));
3250 tree s
= fold_convert_loc (loc
, itype
, step
? step
: fd
->loops
[i
].step
);
3253 off
= fold_convert_loc (loc
, itype
, off
);
3255 off
= fold_build2_loc (loc
, TRUNC_DIV_EXPR
, itype
, off
, s
);
3258 if (integer_zerop (off
))
3259 t
= boolean_true_node
;
3263 tree co
= fold_convert_loc (loc
, itype
, off
);
3264 if (POINTER_TYPE_P (TREE_TYPE (fd
->loops
[i
].v
)))
3266 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps
))
3267 co
= fold_build1_loc (loc
, NEGATE_EXPR
, itype
, co
);
3268 a
= fold_build2_loc (loc
, POINTER_PLUS_EXPR
,
3269 TREE_TYPE (fd
->loops
[i
].v
), fd
->loops
[i
].v
,
3272 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps
))
3273 a
= fold_build2_loc (loc
, MINUS_EXPR
, TREE_TYPE (fd
->loops
[i
].v
),
3274 fd
->loops
[i
].v
, co
);
3276 a
= fold_build2_loc (loc
, PLUS_EXPR
, TREE_TYPE (fd
->loops
[i
].v
),
3277 fd
->loops
[i
].v
, co
);
3281 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps
))
3282 t1
= fold_build2_loc (loc
, GE_EXPR
, boolean_type_node
, a
,
3285 t1
= fold_build2_loc (loc
, LT_EXPR
, boolean_type_node
, a
,
3287 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps
))
3288 t2
= fold_build2_loc (loc
, LT_EXPR
, boolean_type_node
, a
,
3291 t2
= fold_build2_loc (loc
, GE_EXPR
, boolean_type_node
, a
,
3293 t
= fold_build2_loc (loc
, LT_EXPR
, boolean_type_node
,
3294 step
, build_int_cst (TREE_TYPE (step
), 0));
3295 if (TREE_CODE (step
) != INTEGER_CST
)
3297 t1
= unshare_expr (t1
);
3298 t1
= force_gimple_operand_gsi (gsi
, t1
, true, NULL_TREE
,
3299 false, GSI_CONTINUE_LINKING
);
3300 t2
= unshare_expr (t2
);
3301 t2
= force_gimple_operand_gsi (gsi
, t2
, true, NULL_TREE
,
3302 false, GSI_CONTINUE_LINKING
);
3304 t
= fold_build3_loc (loc
, COND_EXPR
, boolean_type_node
,
3307 else if (fd
->loops
[i
].cond_code
== LT_EXPR
)
3309 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps
))
3310 t
= fold_build2_loc (loc
, GE_EXPR
, boolean_type_node
, a
,
3313 t
= fold_build2_loc (loc
, LT_EXPR
, boolean_type_node
, a
,
3316 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps
))
3317 t
= fold_build2_loc (loc
, GT_EXPR
, boolean_type_node
, a
,
3320 t
= fold_build2_loc (loc
, LE_EXPR
, boolean_type_node
, a
,
3324 cond
= fold_build2_loc (loc
, BIT_AND_EXPR
, boolean_type_node
, cond
, t
);
3328 off
= fold_convert_loc (loc
, itype
, off
);
3331 || (fd
->loops
[i
].cond_code
== LT_EXPR
3332 ? !integer_onep (fd
->loops
[i
].step
)
3333 : !integer_minus_onep (fd
->loops
[i
].step
)))
3335 if (step
== NULL_TREE
3336 && TYPE_UNSIGNED (itype
)
3337 && fd
->loops
[i
].cond_code
== GT_EXPR
)
3338 t
= fold_build2_loc (loc
, TRUNC_MOD_EXPR
, itype
, off
,
3339 fold_build1_loc (loc
, NEGATE_EXPR
, itype
,
3342 t
= fold_build2_loc (loc
, TRUNC_MOD_EXPR
, itype
,
3343 orig_off
? orig_off
: off
, s
);
3344 t
= fold_build2_loc (loc
, EQ_EXPR
, boolean_type_node
, t
,
3345 build_int_cst (itype
, 0));
3346 if (integer_zerop (t
) && !warned_step
)
3348 warning_at (loc
, 0, "%<depend%> clause with %<sink%> modifier "
3349 "refers to iteration never in the iteration "
3353 cond
= fold_build2_loc (loc
, BIT_AND_EXPR
, boolean_type_node
,
3357 if (i
<= fd
->collapse
- 1 && fd
->collapse
> 1)
3363 t
= fold_build2_loc (loc
, MINUS_EXPR
, TREE_TYPE (fd
->loops
[i
].v
),
3364 fd
->loops
[i
].v
, fd
->loops
[i
].n1
);
3365 t
= fold_convert_loc (loc
, fd
->iter_type
, t
);
3368 /* We have divided off by step already earlier. */;
3369 else if (TYPE_UNSIGNED (itype
) && fd
->loops
[i
].cond_code
== GT_EXPR
)
3370 off
= fold_build2_loc (loc
, TRUNC_DIV_EXPR
, itype
, off
,
3371 fold_build1_loc (loc
, NEGATE_EXPR
, itype
,
3374 off
= fold_build2_loc (loc
, TRUNC_DIV_EXPR
, itype
, off
, s
);
3375 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps
))
3376 off
= fold_build1_loc (loc
, NEGATE_EXPR
, itype
, off
);
3377 off
= fold_convert_loc (loc
, fd
->iter_type
, off
);
3378 if (i
<= fd
->collapse
- 1 && fd
->collapse
> 1)
3381 off
= fold_build2_loc (loc
, PLUS_EXPR
, fd
->iter_type
, coff
,
3383 if (i
< fd
->collapse
- 1)
3385 coff
= fold_build2_loc (loc
, MULT_EXPR
, fd
->iter_type
, off
,
3390 off
= unshare_expr (off
);
3391 t
= fold_build2_loc (loc
, PLUS_EXPR
, fd
->iter_type
, t
, off
);
3392 t
= force_gimple_operand_gsi (&gsi2
, t
, true, NULL_TREE
,
3393 true, GSI_SAME_STMT
);
3396 gimple
*g
= gimple_build_call_vec (builtin_decl_explicit (sink_ix
), args
);
3397 gimple_set_location (g
, loc
);
3398 gsi_insert_before (&gsi2
, g
, GSI_SAME_STMT
);
3400 cond
= unshare_expr (cond
);
3401 cond
= force_gimple_operand_gsi (gsi
, cond
, true, NULL_TREE
, false,
3402 GSI_CONTINUE_LINKING
);
3403 gsi_insert_after (gsi
, gimple_build_cond_empty (cond
), GSI_NEW_STMT
);
3404 edge e3
= make_edge (e1
->src
, e2
->dest
, EDGE_FALSE_VALUE
);
3405 e3
->probability
= profile_probability::guessed_always ().apply_scale (1, 8);
3406 e1
->probability
= e3
->probability
.invert ();
3407 e1
->flags
= EDGE_TRUE_VALUE
;
3408 set_immediate_dominator (CDI_DOMINATORS
, e2
->dest
, e1
->src
);
3410 *gsi
= gsi_after_labels (e2
->dest
);
3413 /* Expand all #pragma omp ordered depend(source) and
3414 #pragma omp ordered depend(sink:...) constructs in the current
3415 #pragma omp for ordered(n) region. */
3418 expand_omp_ordered_source_sink (struct omp_region
*region
,
3419 struct omp_for_data
*fd
, tree
*counts
,
3420 basic_block cont_bb
)
3422 struct omp_region
*inner
;
3424 for (i
= fd
->collapse
- 1; i
< fd
->ordered
; i
++)
3425 if (i
== fd
->collapse
- 1 && fd
->collapse
> 1)
3426 counts
[i
] = NULL_TREE
;
3427 else if (i
>= fd
->collapse
&& !cont_bb
)
3428 counts
[i
] = build_zero_cst (fd
->iter_type
);
3429 else if (!POINTER_TYPE_P (TREE_TYPE (fd
->loops
[i
].v
))
3430 && integer_onep (fd
->loops
[i
].step
))
3431 counts
[i
] = NULL_TREE
;
3433 counts
[i
] = create_tmp_var (fd
->iter_type
, ".orditer");
3435 = build_array_type_nelts (fd
->iter_type
, fd
->ordered
- fd
->collapse
+ 1);
3436 counts
[fd
->ordered
] = create_tmp_var (atype
, ".orditera");
3437 TREE_ADDRESSABLE (counts
[fd
->ordered
]) = 1;
3439 for (inner
= region
->inner
; inner
; inner
= inner
->next
)
3440 if (inner
->type
== GIMPLE_OMP_ORDERED
)
3442 gomp_ordered
*ord_stmt
= inner
->ord_stmt
;
3443 gimple_stmt_iterator gsi
= gsi_for_stmt (ord_stmt
);
3444 location_t loc
= gimple_location (ord_stmt
);
3446 for (c
= gimple_omp_ordered_clauses (ord_stmt
);
3447 c
; c
= OMP_CLAUSE_CHAIN (c
))
3448 if (OMP_CLAUSE_DEPEND_KIND (c
) == OMP_CLAUSE_DEPEND_SOURCE
)
3451 expand_omp_ordered_source (&gsi
, fd
, counts
, loc
);
3452 for (c
= gimple_omp_ordered_clauses (ord_stmt
);
3453 c
; c
= OMP_CLAUSE_CHAIN (c
))
3454 if (OMP_CLAUSE_DEPEND_KIND (c
) == OMP_CLAUSE_DEPEND_SINK
)
3455 expand_omp_ordered_sink (&gsi
, fd
, counts
, c
, loc
);
3456 gsi_remove (&gsi
, true);
3460 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
3464 expand_omp_for_ordered_loops (struct omp_for_data
*fd
, tree
*counts
,
3465 basic_block cont_bb
, basic_block body_bb
,
3466 bool ordered_lastprivate
)
3468 if (fd
->ordered
== fd
->collapse
)
3473 gimple_stmt_iterator gsi
= gsi_after_labels (body_bb
);
3474 for (int i
= fd
->collapse
; i
< fd
->ordered
; i
++)
3476 tree type
= TREE_TYPE (fd
->loops
[i
].v
);
3477 tree n1
= fold_convert (type
, fd
->loops
[i
].n1
);
3478 expand_omp_build_assign (&gsi
, fd
->loops
[i
].v
, n1
);
3479 tree aref
= build4 (ARRAY_REF
, fd
->iter_type
, counts
[fd
->ordered
],
3480 size_int (i
- fd
->collapse
+ 1),
3481 NULL_TREE
, NULL_TREE
);
3482 expand_omp_build_assign (&gsi
, aref
, build_zero_cst (fd
->iter_type
));
3487 for (int i
= fd
->ordered
- 1; i
>= fd
->collapse
; i
--)
3489 tree t
, type
= TREE_TYPE (fd
->loops
[i
].v
);
3490 gimple_stmt_iterator gsi
= gsi_after_labels (body_bb
);
3491 expand_omp_build_assign (&gsi
, fd
->loops
[i
].v
,
3492 fold_convert (type
, fd
->loops
[i
].n1
));
3494 expand_omp_build_assign (&gsi
, counts
[i
],
3495 build_zero_cst (fd
->iter_type
));
3496 tree aref
= build4 (ARRAY_REF
, fd
->iter_type
, counts
[fd
->ordered
],
3497 size_int (i
- fd
->collapse
+ 1),
3498 NULL_TREE
, NULL_TREE
);
3499 expand_omp_build_assign (&gsi
, aref
, build_zero_cst (fd
->iter_type
));
3500 if (!gsi_end_p (gsi
))
3503 gsi
= gsi_last_bb (body_bb
);
3504 edge e1
= split_block (body_bb
, gsi_stmt (gsi
));
3505 basic_block new_body
= e1
->dest
;
3506 if (body_bb
== cont_bb
)
3509 basic_block new_header
;
3510 if (EDGE_COUNT (cont_bb
->preds
) > 0)
3512 gsi
= gsi_last_bb (cont_bb
);
3513 if (POINTER_TYPE_P (type
))
3514 t
= fold_build_pointer_plus (fd
->loops
[i
].v
,
3515 fold_convert (sizetype
,
3516 fd
->loops
[i
].step
));
3518 t
= fold_build2 (PLUS_EXPR
, type
, fd
->loops
[i
].v
,
3519 fold_convert (type
, fd
->loops
[i
].step
));
3520 expand_omp_build_assign (&gsi
, fd
->loops
[i
].v
, t
);
3523 t
= fold_build2 (PLUS_EXPR
, fd
->iter_type
, counts
[i
],
3524 build_int_cst (fd
->iter_type
, 1));
3525 expand_omp_build_assign (&gsi
, counts
[i
], t
);
3530 t
= fold_build2 (MINUS_EXPR
, TREE_TYPE (fd
->loops
[i
].v
),
3531 fd
->loops
[i
].v
, fd
->loops
[i
].n1
);
3532 t
= fold_convert (fd
->iter_type
, t
);
3533 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
3534 true, GSI_SAME_STMT
);
3536 aref
= build4 (ARRAY_REF
, fd
->iter_type
, counts
[fd
->ordered
],
3537 size_int (i
- fd
->collapse
+ 1),
3538 NULL_TREE
, NULL_TREE
);
3539 expand_omp_build_assign (&gsi
, aref
, t
);
3541 e2
= split_block (cont_bb
, gsi_stmt (gsi
));
3542 new_header
= e2
->dest
;
3545 new_header
= cont_bb
;
3546 gsi
= gsi_after_labels (new_header
);
3547 tree v
= force_gimple_operand_gsi (&gsi
, fd
->loops
[i
].v
, true, NULL_TREE
,
3548 true, GSI_SAME_STMT
);
3550 = force_gimple_operand_gsi (&gsi
, fold_convert (type
, fd
->loops
[i
].n2
),
3551 true, NULL_TREE
, true, GSI_SAME_STMT
);
3552 t
= build2 (fd
->loops
[i
].cond_code
, boolean_type_node
, v
, n2
);
3553 gsi_insert_before (&gsi
, gimple_build_cond_empty (t
), GSI_NEW_STMT
);
3554 edge e3
= split_block (new_header
, gsi_stmt (gsi
));
3557 make_edge (body_bb
, new_header
, EDGE_FALLTHRU
);
3558 e3
->flags
= EDGE_FALSE_VALUE
;
3559 e3
->probability
= profile_probability::guessed_always ().apply_scale (1, 8);
3560 e1
= make_edge (new_header
, new_body
, EDGE_TRUE_VALUE
);
3561 e1
->probability
= e3
->probability
.invert ();
3563 set_immediate_dominator (CDI_DOMINATORS
, new_header
, body_bb
);
3564 set_immediate_dominator (CDI_DOMINATORS
, new_body
, new_header
);
3568 class loop
*loop
= alloc_loop ();
3569 loop
->header
= new_header
;
3570 loop
->latch
= e2
->src
;
3571 add_loop (loop
, body_bb
->loop_father
);
3575 /* If there are any lastprivate clauses and it is possible some loops
3576 might have zero iterations, ensure all the decls are initialized,
3577 otherwise we could crash evaluating C++ class iterators with lastprivate
3579 bool need_inits
= false;
3580 for (int i
= fd
->collapse
; ordered_lastprivate
&& i
< fd
->ordered
; i
++)
3583 tree type
= TREE_TYPE (fd
->loops
[i
].v
);
3584 gimple_stmt_iterator gsi
= gsi_after_labels (body_bb
);
3585 expand_omp_build_assign (&gsi
, fd
->loops
[i
].v
,
3586 fold_convert (type
, fd
->loops
[i
].n1
));
3590 tree type
= TREE_TYPE (fd
->loops
[i
].v
);
3591 tree this_cond
= fold_build2 (fd
->loops
[i
].cond_code
,
3593 fold_convert (type
, fd
->loops
[i
].n1
),
3594 fold_convert (type
, fd
->loops
[i
].n2
));
3595 if (!integer_onep (this_cond
))
3602 /* A subroutine of expand_omp_for. Generate code for a parallel
3603 loop with any schedule. Given parameters:
3605 for (V = N1; V cond N2; V += STEP) BODY;
3607 where COND is "<" or ">", we generate pseudocode
3609 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
3610 if (more) goto L0; else goto L3;
3617 if (V cond iend) goto L1; else goto L2;
3619 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3622 If this is a combined omp parallel loop, instead of the call to
3623 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
3624 If this is gimple_omp_for_combined_p loop, then instead of assigning
3625 V and iend in L0 we assign the first two _looptemp_ clause decls of the
3626 inner GIMPLE_OMP_FOR and V += STEP; and
3627 if (V cond iend) goto L1; else goto L2; are removed.
3629 For collapsed loops, given parameters:
3631 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
3632 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
3633 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
3636 we generate pseudocode
3638 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
3643 count3 = (adj + N32 - N31) / STEP3;
3644 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
3649 count2 = (adj + N22 - N21) / STEP2;
3650 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
3655 count1 = (adj + N12 - N11) / STEP1;
3656 count = count1 * count2 * count3;
3661 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
3662 if (more) goto L0; else goto L3;
3666 V3 = N31 + (T % count3) * STEP3;
3668 V2 = N21 + (T % count2) * STEP2;
3670 V1 = N11 + T * STEP1;
3675 if (V < iend) goto L10; else goto L2;
3678 if (V3 cond3 N32) goto L1; else goto L11;
3682 if (V2 cond2 N22) goto L1; else goto L12;
3688 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3694 expand_omp_for_generic (struct omp_region
*region
,
3695 struct omp_for_data
*fd
,
3696 enum built_in_function start_fn
,
3697 enum built_in_function next_fn
,
3701 tree type
, istart0
, iend0
, iend
;
3702 tree t
, vmain
, vback
, bias
= NULL_TREE
;
3703 basic_block entry_bb
, cont_bb
, exit_bb
, l0_bb
, l1_bb
, collapse_bb
;
3704 basic_block l2_bb
= NULL
, l3_bb
= NULL
;
3705 gimple_stmt_iterator gsi
;
3706 gassign
*assign_stmt
;
3707 bool in_combined_parallel
= is_combined_parallel (region
);
3708 bool broken_loop
= region
->cont
== NULL
;
3710 tree
*counts
= NULL
;
3712 bool ordered_lastprivate
= false;
3714 gcc_assert (!broken_loop
|| !in_combined_parallel
);
3715 gcc_assert (fd
->iter_type
== long_integer_type_node
3716 || !in_combined_parallel
);
3718 entry_bb
= region
->entry
;
3719 cont_bb
= region
->cont
;
3721 gcc_assert (EDGE_COUNT (entry_bb
->succs
) == 2);
3722 gcc_assert (broken_loop
3723 || BRANCH_EDGE (entry_bb
)->dest
== FALLTHRU_EDGE (cont_bb
)->dest
);
3724 l0_bb
= split_edge (FALLTHRU_EDGE (entry_bb
));
3725 l1_bb
= single_succ (l0_bb
);
3728 l2_bb
= create_empty_bb (cont_bb
);
3729 gcc_assert (BRANCH_EDGE (cont_bb
)->dest
== l1_bb
3730 || (single_succ_edge (BRANCH_EDGE (cont_bb
)->dest
)->dest
3732 gcc_assert (EDGE_COUNT (cont_bb
->succs
) == 2);
3736 l3_bb
= BRANCH_EDGE (entry_bb
)->dest
;
3737 exit_bb
= region
->exit
;
3739 gsi
= gsi_last_nondebug_bb (entry_bb
);
3741 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_FOR
);
3743 && omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
3744 OMP_CLAUSE_LASTPRIVATE
))
3745 ordered_lastprivate
= false;
3746 tree reductions
= NULL_TREE
;
3747 tree mem
= NULL_TREE
, cond_var
= NULL_TREE
, condtemp
= NULL_TREE
;
3748 tree memv
= NULL_TREE
;
3749 if (fd
->lastprivate_conditional
)
3751 tree c
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
3752 OMP_CLAUSE__CONDTEMP_
);
3753 if (fd
->have_pointer_condtemp
)
3754 condtemp
= OMP_CLAUSE_DECL (c
);
3755 c
= omp_find_clause (OMP_CLAUSE_CHAIN (c
), OMP_CLAUSE__CONDTEMP_
);
3756 cond_var
= OMP_CLAUSE_DECL (c
);
3760 if (fd
->have_reductemp
)
3762 tree c
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
3763 OMP_CLAUSE__REDUCTEMP_
);
3764 reductions
= OMP_CLAUSE_DECL (c
);
3765 gcc_assert (TREE_CODE (reductions
) == SSA_NAME
);
3766 gimple
*g
= SSA_NAME_DEF_STMT (reductions
);
3767 reductions
= gimple_assign_rhs1 (g
);
3768 OMP_CLAUSE_DECL (c
) = reductions
;
3769 entry_bb
= gimple_bb (g
);
3770 edge e
= split_block (entry_bb
, g
);
3771 if (region
->entry
== entry_bb
)
3772 region
->entry
= e
->dest
;
3773 gsi
= gsi_last_bb (entry_bb
);
3776 reductions
= null_pointer_node
;
3777 if (fd
->have_pointer_condtemp
)
3779 tree type
= TREE_TYPE (condtemp
);
3780 memv
= create_tmp_var (type
);
3781 TREE_ADDRESSABLE (memv
) = 1;
3782 unsigned HOST_WIDE_INT sz
3783 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type
)));
3784 sz
*= fd
->lastprivate_conditional
;
3785 expand_omp_build_assign (&gsi
, memv
, build_int_cst (type
, sz
),
3787 mem
= build_fold_addr_expr (memv
);
3790 mem
= null_pointer_node
;
3792 if (fd
->collapse
> 1 || fd
->ordered
)
3794 int first_zero_iter1
= -1, first_zero_iter2
= -1;
3795 basic_block zero_iter1_bb
= NULL
, zero_iter2_bb
= NULL
, l2_dom_bb
= NULL
;
3797 counts
= XALLOCAVEC (tree
, fd
->ordered
? fd
->ordered
+ 1 : fd
->collapse
);
3798 expand_omp_for_init_counts (fd
, &gsi
, entry_bb
, counts
,
3799 zero_iter1_bb
, first_zero_iter1
,
3800 zero_iter2_bb
, first_zero_iter2
, l2_dom_bb
);
3804 /* Some counts[i] vars might be uninitialized if
3805 some loop has zero iterations. But the body shouldn't
3806 be executed in that case, so just avoid uninit warnings. */
3807 for (i
= first_zero_iter1
;
3808 i
< (fd
->ordered
? fd
->ordered
: fd
->collapse
); i
++)
3809 if (SSA_VAR_P (counts
[i
]))
3810 TREE_NO_WARNING (counts
[i
]) = 1;
3812 e
= split_block (entry_bb
, gsi_stmt (gsi
));
3814 make_edge (zero_iter1_bb
, entry_bb
, EDGE_FALLTHRU
);
3815 gsi
= gsi_last_nondebug_bb (entry_bb
);
3816 set_immediate_dominator (CDI_DOMINATORS
, entry_bb
,
3817 get_immediate_dominator (CDI_DOMINATORS
,
3822 /* Some counts[i] vars might be uninitialized if
3823 some loop has zero iterations. But the body shouldn't
3824 be executed in that case, so just avoid uninit warnings. */
3825 for (i
= first_zero_iter2
; i
< fd
->ordered
; i
++)
3826 if (SSA_VAR_P (counts
[i
]))
3827 TREE_NO_WARNING (counts
[i
]) = 1;
3829 make_edge (zero_iter2_bb
, entry_bb
, EDGE_FALLTHRU
);
3833 e
= split_block (entry_bb
, gsi_stmt (gsi
));
3835 make_edge (zero_iter2_bb
, entry_bb
, EDGE_FALLTHRU
);
3836 gsi
= gsi_last_nondebug_bb (entry_bb
);
3837 set_immediate_dominator (CDI_DOMINATORS
, entry_bb
,
3838 get_immediate_dominator
3839 (CDI_DOMINATORS
, zero_iter2_bb
));
3842 if (fd
->collapse
== 1)
3844 counts
[0] = fd
->loop
.n2
;
3845 fd
->loop
= fd
->loops
[0];
3849 type
= TREE_TYPE (fd
->loop
.v
);
3850 istart0
= create_tmp_var (fd
->iter_type
, ".istart0");
3851 iend0
= create_tmp_var (fd
->iter_type
, ".iend0");
3852 TREE_ADDRESSABLE (istart0
) = 1;
3853 TREE_ADDRESSABLE (iend0
) = 1;
3855 /* See if we need to bias by LLONG_MIN. */
3856 if (fd
->iter_type
== long_long_unsigned_type_node
3857 && TREE_CODE (type
) == INTEGER_TYPE
3858 && !TYPE_UNSIGNED (type
)
3859 && fd
->ordered
== 0)
3863 if (fd
->loop
.cond_code
== LT_EXPR
)
3866 n2
= fold_build2 (PLUS_EXPR
, type
, fd
->loop
.n2
, fd
->loop
.step
);
3870 n1
= fold_build2 (MINUS_EXPR
, type
, fd
->loop
.n2
, fd
->loop
.step
);
3873 if (TREE_CODE (n1
) != INTEGER_CST
3874 || TREE_CODE (n2
) != INTEGER_CST
3875 || ((tree_int_cst_sgn (n1
) < 0) ^ (tree_int_cst_sgn (n2
) < 0)))
3876 bias
= fold_convert (fd
->iter_type
, TYPE_MIN_VALUE (type
));
3879 gimple_stmt_iterator gsif
= gsi
;
3882 tree arr
= NULL_TREE
;
3883 if (in_combined_parallel
)
3885 gcc_assert (fd
->ordered
== 0);
3886 /* In a combined parallel loop, emit a call to
3887 GOMP_loop_foo_next. */
3888 t
= build_call_expr (builtin_decl_explicit (next_fn
), 2,
3889 build_fold_addr_expr (istart0
),
3890 build_fold_addr_expr (iend0
));
3894 tree t0
, t1
, t2
, t3
, t4
;
3895 /* If this is not a combined parallel loop, emit a call to
3896 GOMP_loop_foo_start in ENTRY_BB. */
3897 t4
= build_fold_addr_expr (iend0
);
3898 t3
= build_fold_addr_expr (istart0
);
3901 t0
= build_int_cst (unsigned_type_node
,
3902 fd
->ordered
- fd
->collapse
+ 1);
3903 arr
= create_tmp_var (build_array_type_nelts (fd
->iter_type
,
3905 - fd
->collapse
+ 1),
3907 DECL_NAMELESS (arr
) = 1;
3908 TREE_ADDRESSABLE (arr
) = 1;
3909 TREE_STATIC (arr
) = 1;
3910 vec
<constructor_elt
, va_gc
> *v
;
3911 vec_alloc (v
, fd
->ordered
- fd
->collapse
+ 1);
3914 for (idx
= 0; idx
< fd
->ordered
- fd
->collapse
+ 1; idx
++)
3917 if (idx
== 0 && fd
->collapse
> 1)
3920 c
= counts
[idx
+ fd
->collapse
- 1];
3921 tree purpose
= size_int (idx
);
3922 CONSTRUCTOR_APPEND_ELT (v
, purpose
, c
);
3923 if (TREE_CODE (c
) != INTEGER_CST
)
3924 TREE_STATIC (arr
) = 0;
3927 DECL_INITIAL (arr
) = build_constructor (TREE_TYPE (arr
), v
);
3928 if (!TREE_STATIC (arr
))
3929 force_gimple_operand_gsi (&gsi
, build1 (DECL_EXPR
,
3930 void_type_node
, arr
),
3931 true, NULL_TREE
, true, GSI_SAME_STMT
);
3932 t1
= build_fold_addr_expr (arr
);
3937 t2
= fold_convert (fd
->iter_type
, fd
->loop
.step
);
3940 if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
3943 = omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
3944 OMP_CLAUSE__LOOPTEMP_
);
3945 gcc_assert (innerc
);
3946 t0
= OMP_CLAUSE_DECL (innerc
);
3947 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
3948 OMP_CLAUSE__LOOPTEMP_
);
3949 gcc_assert (innerc
);
3950 t1
= OMP_CLAUSE_DECL (innerc
);
3952 if (POINTER_TYPE_P (TREE_TYPE (t0
))
3953 && TYPE_PRECISION (TREE_TYPE (t0
))
3954 != TYPE_PRECISION (fd
->iter_type
))
3956 /* Avoid casting pointers to integer of a different size. */
3957 tree itype
= signed_type_for (type
);
3958 t1
= fold_convert (fd
->iter_type
, fold_convert (itype
, t1
));
3959 t0
= fold_convert (fd
->iter_type
, fold_convert (itype
, t0
));
3963 t1
= fold_convert (fd
->iter_type
, t1
);
3964 t0
= fold_convert (fd
->iter_type
, t0
);
3968 t1
= fold_build2 (PLUS_EXPR
, fd
->iter_type
, t1
, bias
);
3969 t0
= fold_build2 (PLUS_EXPR
, fd
->iter_type
, t0
, bias
);
3972 if (fd
->iter_type
== long_integer_type_node
|| fd
->ordered
)
3976 t
= fold_convert (fd
->iter_type
, fd
->chunk_size
);
3977 t
= omp_adjust_chunk_size (t
, fd
->simd_schedule
);
3981 t
= build_call_expr (builtin_decl_explicit (start_fn
),
3982 8, t0
, t1
, sched_arg
, t
, t3
, t4
,
3985 t
= build_call_expr (builtin_decl_explicit (start_fn
),
3986 9, t0
, t1
, t2
, sched_arg
, t
, t3
, t4
,
3989 else if (fd
->ordered
)
3990 t
= build_call_expr (builtin_decl_explicit (start_fn
),
3991 5, t0
, t1
, t
, t3
, t4
);
3993 t
= build_call_expr (builtin_decl_explicit (start_fn
),
3994 6, t0
, t1
, t2
, t
, t3
, t4
);
3996 else if (fd
->ordered
)
3997 t
= build_call_expr (builtin_decl_explicit (start_fn
),
4000 t
= build_call_expr (builtin_decl_explicit (start_fn
),
4001 5, t0
, t1
, t2
, t3
, t4
);
4009 /* The GOMP_loop_ull_*start functions have additional boolean
4010 argument, true for < loops and false for > loops.
4011 In Fortran, the C bool type can be different from
4012 boolean_type_node. */
4013 bfn_decl
= builtin_decl_explicit (start_fn
);
4014 c_bool_type
= TREE_TYPE (TREE_TYPE (bfn_decl
));
4015 t5
= build_int_cst (c_bool_type
,
4016 fd
->loop
.cond_code
== LT_EXPR
? 1 : 0);
4019 tree bfn_decl
= builtin_decl_explicit (start_fn
);
4020 t
= fold_convert (fd
->iter_type
, fd
->chunk_size
);
4021 t
= omp_adjust_chunk_size (t
, fd
->simd_schedule
);
4023 t
= build_call_expr (bfn_decl
, 10, t5
, t0
, t1
, t2
, sched_arg
,
4024 t
, t3
, t4
, reductions
, mem
);
4026 t
= build_call_expr (bfn_decl
, 7, t5
, t0
, t1
, t2
, t
, t3
, t4
);
4029 t
= build_call_expr (builtin_decl_explicit (start_fn
),
4030 6, t5
, t0
, t1
, t2
, t3
, t4
);
4033 if (TREE_TYPE (t
) != boolean_type_node
)
4034 t
= fold_build2 (NE_EXPR
, boolean_type_node
,
4035 t
, build_int_cst (TREE_TYPE (t
), 0));
4036 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
4037 true, GSI_SAME_STMT
);
4038 if (arr
&& !TREE_STATIC (arr
))
4040 tree clobber
= build_clobber (TREE_TYPE (arr
));
4041 gsi_insert_before (&gsi
, gimple_build_assign (arr
, clobber
),
4044 if (fd
->have_pointer_condtemp
)
4045 expand_omp_build_assign (&gsi
, condtemp
, memv
, false);
4046 if (fd
->have_reductemp
)
4048 gimple
*g
= gsi_stmt (gsi
);
4049 gsi_remove (&gsi
, true);
4050 release_ssa_name (gimple_assign_lhs (g
));
4052 entry_bb
= region
->entry
;
4053 gsi
= gsi_last_nondebug_bb (entry_bb
);
4055 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_FOR
);
4057 gsi_insert_after (&gsi
, gimple_build_cond_empty (t
), GSI_SAME_STMT
);
4059 /* Remove the GIMPLE_OMP_FOR statement. */
4060 gsi_remove (&gsi
, true);
4062 if (gsi_end_p (gsif
))
4063 gsif
= gsi_after_labels (gsi_bb (gsif
));
4066 /* Iteration setup for sequential loop goes in L0_BB. */
4067 tree startvar
= fd
->loop
.v
;
4068 tree endvar
= NULL_TREE
;
4070 if (gimple_omp_for_combined_p (fd
->for_stmt
))
4072 gcc_assert (gimple_code (inner_stmt
) == GIMPLE_OMP_FOR
4073 && gimple_omp_for_kind (inner_stmt
)
4074 == GF_OMP_FOR_KIND_SIMD
);
4075 tree innerc
= omp_find_clause (gimple_omp_for_clauses (inner_stmt
),
4076 OMP_CLAUSE__LOOPTEMP_
);
4077 gcc_assert (innerc
);
4078 startvar
= OMP_CLAUSE_DECL (innerc
);
4079 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
4080 OMP_CLAUSE__LOOPTEMP_
);
4081 gcc_assert (innerc
);
4082 endvar
= OMP_CLAUSE_DECL (innerc
);
4085 gsi
= gsi_start_bb (l0_bb
);
4087 if (fd
->ordered
&& fd
->collapse
== 1)
4088 t
= fold_build2 (MULT_EXPR
, fd
->iter_type
, t
,
4089 fold_convert (fd
->iter_type
, fd
->loop
.step
));
4091 t
= fold_build2 (MINUS_EXPR
, fd
->iter_type
, t
, bias
);
4092 if (fd
->ordered
&& fd
->collapse
== 1)
4094 if (POINTER_TYPE_P (TREE_TYPE (startvar
)))
4095 t
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (startvar
),
4096 fd
->loop
.n1
, fold_convert (sizetype
, t
));
4099 t
= fold_convert (TREE_TYPE (startvar
), t
);
4100 t
= fold_build2 (PLUS_EXPR
, TREE_TYPE (startvar
),
4106 if (POINTER_TYPE_P (TREE_TYPE (startvar
)))
4107 t
= fold_convert (signed_type_for (TREE_TYPE (startvar
)), t
);
4108 t
= fold_convert (TREE_TYPE (startvar
), t
);
4110 t
= force_gimple_operand_gsi (&gsi
, t
,
4112 && TREE_ADDRESSABLE (startvar
),
4113 NULL_TREE
, false, GSI_CONTINUE_LINKING
);
4114 assign_stmt
= gimple_build_assign (startvar
, t
);
4115 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
4118 tree itype
= TREE_TYPE (cond_var
);
4119 /* For lastprivate(conditional:) itervar, we need some iteration
4120 counter that starts at unsigned non-zero and increases.
4121 Prefer as few IVs as possible, so if we can use startvar
4122 itself, use that, or startvar + constant (those would be
4123 incremented with step), and as last resort use the s0 + 1
4124 incremented by 1. */
4125 if ((fd
->ordered
&& fd
->collapse
== 1)
4127 || POINTER_TYPE_P (type
)
4128 || TREE_CODE (fd
->loop
.n1
) != INTEGER_CST
4129 || fd
->loop
.cond_code
!= LT_EXPR
)
4130 t
= fold_build2 (PLUS_EXPR
, itype
, fold_convert (itype
, istart0
),
4131 build_int_cst (itype
, 1));
4132 else if (tree_int_cst_sgn (fd
->loop
.n1
) == 1)
4133 t
= fold_convert (itype
, t
);
4136 tree c
= fold_convert (itype
, fd
->loop
.n1
);
4137 c
= fold_build2 (MINUS_EXPR
, itype
, build_int_cst (itype
, 1), c
);
4138 t
= fold_build2 (PLUS_EXPR
, itype
, fold_convert (itype
, t
), c
);
4140 t
= force_gimple_operand_gsi (&gsi
, t
, false,
4141 NULL_TREE
, false, GSI_CONTINUE_LINKING
);
4142 assign_stmt
= gimple_build_assign (cond_var
, t
);
4143 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
4147 if (fd
->ordered
&& fd
->collapse
== 1)
4148 t
= fold_build2 (MULT_EXPR
, fd
->iter_type
, t
,
4149 fold_convert (fd
->iter_type
, fd
->loop
.step
));
4151 t
= fold_build2 (MINUS_EXPR
, fd
->iter_type
, t
, bias
);
4152 if (fd
->ordered
&& fd
->collapse
== 1)
4154 if (POINTER_TYPE_P (TREE_TYPE (startvar
)))
4155 t
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (startvar
),
4156 fd
->loop
.n1
, fold_convert (sizetype
, t
));
4159 t
= fold_convert (TREE_TYPE (startvar
), t
);
4160 t
= fold_build2 (PLUS_EXPR
, TREE_TYPE (startvar
),
4166 if (POINTER_TYPE_P (TREE_TYPE (startvar
)))
4167 t
= fold_convert (signed_type_for (TREE_TYPE (startvar
)), t
);
4168 t
= fold_convert (TREE_TYPE (startvar
), t
);
4170 iend
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
4171 false, GSI_CONTINUE_LINKING
);
4174 assign_stmt
= gimple_build_assign (endvar
, iend
);
4175 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
4176 if (useless_type_conversion_p (TREE_TYPE (fd
->loop
.v
), TREE_TYPE (iend
)))
4177 assign_stmt
= gimple_build_assign (fd
->loop
.v
, iend
);
4179 assign_stmt
= gimple_build_assign (fd
->loop
.v
, NOP_EXPR
, iend
);
4180 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
4182 /* Handle linear clause adjustments. */
4183 tree itercnt
= NULL_TREE
;
4184 if (gimple_omp_for_kind (fd
->for_stmt
) == GF_OMP_FOR_KIND_FOR
)
4185 for (tree c
= gimple_omp_for_clauses (fd
->for_stmt
);
4186 c
; c
= OMP_CLAUSE_CHAIN (c
))
4187 if (OMP_CLAUSE_CODE (c
) == OMP_CLAUSE_LINEAR
4188 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c
))
4190 tree d
= OMP_CLAUSE_DECL (c
);
4191 bool is_ref
= omp_is_reference (d
);
4192 tree t
= d
, a
, dest
;
4194 t
= build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c
), t
);
4195 tree type
= TREE_TYPE (t
);
4196 if (POINTER_TYPE_P (type
))
4198 dest
= unshare_expr (t
);
4199 tree v
= create_tmp_var (TREE_TYPE (t
), NULL
);
4200 expand_omp_build_assign (&gsif
, v
, t
);
4201 if (itercnt
== NULL_TREE
)
4204 tree n1
= fd
->loop
.n1
;
4205 if (POINTER_TYPE_P (TREE_TYPE (itercnt
)))
4208 = fold_convert (signed_type_for (TREE_TYPE (itercnt
)),
4210 n1
= fold_convert (TREE_TYPE (itercnt
), n1
);
4212 itercnt
= fold_build2 (MINUS_EXPR
, TREE_TYPE (itercnt
),
4214 itercnt
= fold_build2 (EXACT_DIV_EXPR
, TREE_TYPE (itercnt
),
4215 itercnt
, fd
->loop
.step
);
4216 itercnt
= force_gimple_operand_gsi (&gsi
, itercnt
, true,
4218 GSI_CONTINUE_LINKING
);
4220 a
= fold_build2 (MULT_EXPR
, type
,
4221 fold_convert (type
, itercnt
),
4222 fold_convert (type
, OMP_CLAUSE_LINEAR_STEP (c
)));
4223 t
= fold_build2 (type
== TREE_TYPE (t
) ? PLUS_EXPR
4224 : POINTER_PLUS_EXPR
, TREE_TYPE (t
), v
, a
);
4225 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
4226 false, GSI_CONTINUE_LINKING
);
4227 assign_stmt
= gimple_build_assign (dest
, t
);
4228 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
4230 if (fd
->collapse
> 1)
4231 expand_omp_for_init_vars (fd
, &gsi
, counts
, NULL
, inner_stmt
, startvar
);
4235 /* Until now, counts array contained number of iterations or
4236 variable containing it for ith loop. From now on, we need
4237 those counts only for collapsed loops, and only for the 2nd
4238 till the last collapsed one. Move those one element earlier,
4239 we'll use counts[fd->collapse - 1] for the first source/sink
4240 iteration counter and so on and counts[fd->ordered]
4241 as the array holding the current counter values for
4243 if (fd
->collapse
> 1)
4244 memmove (counts
, counts
+ 1, (fd
->collapse
- 1) * sizeof (counts
[0]));
4248 for (i
= fd
->collapse
; i
< fd
->ordered
; i
++)
4250 tree type
= TREE_TYPE (fd
->loops
[i
].v
);
4252 = fold_build2 (fd
->loops
[i
].cond_code
, boolean_type_node
,
4253 fold_convert (type
, fd
->loops
[i
].n1
),
4254 fold_convert (type
, fd
->loops
[i
].n2
));
4255 if (!integer_onep (this_cond
))
4258 if (i
< fd
->ordered
)
4261 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun
)->prev_bb
);
4262 add_bb_to_loop (cont_bb
, l1_bb
->loop_father
);
4263 gimple_stmt_iterator gsi
= gsi_after_labels (cont_bb
);
4264 gimple
*g
= gimple_build_omp_continue (fd
->loop
.v
, fd
->loop
.v
);
4265 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
4266 make_edge (cont_bb
, l3_bb
, EDGE_FALLTHRU
);
4267 make_edge (cont_bb
, l1_bb
, 0);
4268 l2_bb
= create_empty_bb (cont_bb
);
4269 broken_loop
= false;
4272 expand_omp_ordered_source_sink (region
, fd
, counts
, cont_bb
);
4273 cont_bb
= expand_omp_for_ordered_loops (fd
, counts
, cont_bb
, l1_bb
,
4274 ordered_lastprivate
);
4275 if (counts
[fd
->collapse
- 1])
4277 gcc_assert (fd
->collapse
== 1);
4278 gsi
= gsi_last_bb (l0_bb
);
4279 expand_omp_build_assign (&gsi
, counts
[fd
->collapse
- 1],
4281 gsi
= gsi_last_bb (cont_bb
);
4282 t
= fold_build2 (PLUS_EXPR
, fd
->iter_type
, counts
[fd
->collapse
- 1],
4283 build_int_cst (fd
->iter_type
, 1));
4284 expand_omp_build_assign (&gsi
, counts
[fd
->collapse
- 1], t
);
4285 tree aref
= build4 (ARRAY_REF
, fd
->iter_type
, counts
[fd
->ordered
],
4286 size_zero_node
, NULL_TREE
, NULL_TREE
);
4287 expand_omp_build_assign (&gsi
, aref
, counts
[fd
->collapse
- 1]);
4288 t
= counts
[fd
->collapse
- 1];
4290 else if (fd
->collapse
> 1)
4294 t
= fold_build2 (MINUS_EXPR
, TREE_TYPE (fd
->loops
[0].v
),
4295 fd
->loops
[0].v
, fd
->loops
[0].n1
);
4296 t
= fold_convert (fd
->iter_type
, t
);
4298 gsi
= gsi_last_bb (l0_bb
);
4299 tree aref
= build4 (ARRAY_REF
, fd
->iter_type
, counts
[fd
->ordered
],
4300 size_zero_node
, NULL_TREE
, NULL_TREE
);
4301 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
4302 false, GSI_CONTINUE_LINKING
);
4303 expand_omp_build_assign (&gsi
, aref
, t
, true);
4308 /* Code to control the increment and predicate for the sequential
4309 loop goes in the CONT_BB. */
4310 gsi
= gsi_last_nondebug_bb (cont_bb
);
4311 gomp_continue
*cont_stmt
= as_a
<gomp_continue
*> (gsi_stmt (gsi
));
4312 gcc_assert (gimple_code (cont_stmt
) == GIMPLE_OMP_CONTINUE
);
4313 vmain
= gimple_omp_continue_control_use (cont_stmt
);
4314 vback
= gimple_omp_continue_control_def (cont_stmt
);
4318 tree itype
= TREE_TYPE (cond_var
);
4320 if ((fd
->ordered
&& fd
->collapse
== 1)
4322 || POINTER_TYPE_P (type
)
4323 || TREE_CODE (fd
->loop
.n1
) != INTEGER_CST
4324 || fd
->loop
.cond_code
!= LT_EXPR
)
4325 t2
= build_int_cst (itype
, 1);
4327 t2
= fold_convert (itype
, fd
->loop
.step
);
4328 t2
= fold_build2 (PLUS_EXPR
, itype
, cond_var
, t2
);
4329 t2
= force_gimple_operand_gsi (&gsi
, t2
, false,
4330 NULL_TREE
, true, GSI_SAME_STMT
);
4331 assign_stmt
= gimple_build_assign (cond_var
, t2
);
4332 gsi_insert_before (&gsi
, assign_stmt
, GSI_SAME_STMT
);
4335 if (!gimple_omp_for_combined_p (fd
->for_stmt
))
4337 if (POINTER_TYPE_P (type
))
4338 t
= fold_build_pointer_plus (vmain
, fd
->loop
.step
);
4340 t
= fold_build2 (PLUS_EXPR
, type
, vmain
, fd
->loop
.step
);
4341 t
= force_gimple_operand_gsi (&gsi
, t
,
4343 && TREE_ADDRESSABLE (vback
),
4344 NULL_TREE
, true, GSI_SAME_STMT
);
4345 assign_stmt
= gimple_build_assign (vback
, t
);
4346 gsi_insert_before (&gsi
, assign_stmt
, GSI_SAME_STMT
);
4348 if (fd
->ordered
&& counts
[fd
->collapse
- 1] == NULL_TREE
)
4351 if (fd
->collapse
> 1)
4355 tem
= fold_build2 (MINUS_EXPR
, TREE_TYPE (fd
->loops
[0].v
),
4356 fd
->loops
[0].v
, fd
->loops
[0].n1
);
4357 tem
= fold_convert (fd
->iter_type
, tem
);
4359 tree aref
= build4 (ARRAY_REF
, fd
->iter_type
,
4360 counts
[fd
->ordered
], size_zero_node
,
4361 NULL_TREE
, NULL_TREE
);
4362 tem
= force_gimple_operand_gsi (&gsi
, tem
, true, NULL_TREE
,
4363 true, GSI_SAME_STMT
);
4364 expand_omp_build_assign (&gsi
, aref
, tem
);
4367 t
= build2 (fd
->loop
.cond_code
, boolean_type_node
,
4368 DECL_P (vback
) && TREE_ADDRESSABLE (vback
) ? t
: vback
,
4370 gcond
*cond_stmt
= gimple_build_cond_empty (t
);
4371 gsi_insert_before (&gsi
, cond_stmt
, GSI_SAME_STMT
);
4374 /* Remove GIMPLE_OMP_CONTINUE. */
4375 gsi_remove (&gsi
, true);
4377 if (fd
->collapse
> 1 && !gimple_omp_for_combined_p (fd
->for_stmt
))
4378 collapse_bb
= extract_omp_for_update_vars (fd
, NULL
, cont_bb
, l1_bb
);
4380 /* Emit code to get the next parallel iteration in L2_BB. */
4381 gsi
= gsi_start_bb (l2_bb
);
4383 t
= build_call_expr (builtin_decl_explicit (next_fn
), 2,
4384 build_fold_addr_expr (istart0
),
4385 build_fold_addr_expr (iend0
));
4386 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
4387 false, GSI_CONTINUE_LINKING
);
4388 if (TREE_TYPE (t
) != boolean_type_node
)
4389 t
= fold_build2 (NE_EXPR
, boolean_type_node
,
4390 t
, build_int_cst (TREE_TYPE (t
), 0));
4391 gcond
*cond_stmt
= gimple_build_cond_empty (t
);
4392 gsi_insert_after (&gsi
, cond_stmt
, GSI_CONTINUE_LINKING
);
4395 /* Add the loop cleanup function. */
4396 gsi
= gsi_last_nondebug_bb (exit_bb
);
4397 if (gimple_omp_return_nowait_p (gsi_stmt (gsi
)))
4398 t
= builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT
);
4399 else if (gimple_omp_return_lhs (gsi_stmt (gsi
)))
4400 t
= builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL
);
4402 t
= builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END
);
4403 gcall
*call_stmt
= gimple_build_call (t
, 0);
4406 tree arr
= counts
[fd
->ordered
];
4407 tree clobber
= build_clobber (TREE_TYPE (arr
));
4408 gsi_insert_after (&gsi
, gimple_build_assign (arr
, clobber
),
4411 if (gimple_omp_return_lhs (gsi_stmt (gsi
)))
4413 gimple_call_set_lhs (call_stmt
, gimple_omp_return_lhs (gsi_stmt (gsi
)));
4414 if (fd
->have_reductemp
)
4416 gimple
*g
= gimple_build_assign (reductions
, NOP_EXPR
,
4417 gimple_call_lhs (call_stmt
));
4418 gsi_insert_after (&gsi
, g
, GSI_SAME_STMT
);
4421 gsi_insert_after (&gsi
, call_stmt
, GSI_SAME_STMT
);
4422 gsi_remove (&gsi
, true);
4424 /* Connect the new blocks. */
4425 find_edge (entry_bb
, l0_bb
)->flags
= EDGE_TRUE_VALUE
;
4426 find_edge (entry_bb
, l3_bb
)->flags
= EDGE_FALSE_VALUE
;
4432 e
= find_edge (cont_bb
, l3_bb
);
4433 ne
= make_edge (l2_bb
, l3_bb
, EDGE_FALSE_VALUE
);
4435 phis
= phi_nodes (l3_bb
);
4436 for (gsi
= gsi_start (phis
); !gsi_end_p (gsi
); gsi_next (&gsi
))
4438 gimple
*phi
= gsi_stmt (gsi
);
4439 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi
, ne
),
4440 PHI_ARG_DEF_FROM_EDGE (phi
, e
));
4444 make_edge (cont_bb
, l2_bb
, EDGE_FALSE_VALUE
);
4445 e
= find_edge (cont_bb
, l1_bb
);
4448 e
= BRANCH_EDGE (cont_bb
);
4449 gcc_assert (single_succ (e
->dest
) == l1_bb
);
4451 if (gimple_omp_for_combined_p (fd
->for_stmt
))
4456 else if (fd
->collapse
> 1)
4459 e
= make_edge (cont_bb
, collapse_bb
, EDGE_TRUE_VALUE
);
4462 e
->flags
= EDGE_TRUE_VALUE
;
4465 e
->probability
= profile_probability::guessed_always ().apply_scale (7, 8);
4466 find_edge (cont_bb
, l2_bb
)->probability
= e
->probability
.invert ();
4470 e
= find_edge (cont_bb
, l2_bb
);
4471 e
->flags
= EDGE_FALLTHRU
;
4473 make_edge (l2_bb
, l0_bb
, EDGE_TRUE_VALUE
);
4475 if (gimple_in_ssa_p (cfun
))
4477 /* Add phis to the outer loop that connect to the phis in the inner,
4478 original loop, and move the loop entry value of the inner phi to
4479 the loop entry value of the outer phi. */
4481 for (psi
= gsi_start_phis (l3_bb
); !gsi_end_p (psi
); gsi_next (&psi
))
4485 gphi
*exit_phi
= psi
.phi ();
4487 if (virtual_operand_p (gimple_phi_result (exit_phi
)))
4490 edge l2_to_l3
= find_edge (l2_bb
, l3_bb
);
4491 tree exit_res
= PHI_ARG_DEF_FROM_EDGE (exit_phi
, l2_to_l3
);
4493 basic_block latch
= BRANCH_EDGE (cont_bb
)->dest
;
4494 edge latch_to_l1
= find_edge (latch
, l1_bb
);
4496 = find_phi_with_arg_on_edge (exit_res
, latch_to_l1
);
4498 tree t
= gimple_phi_result (exit_phi
);
4499 tree new_res
= copy_ssa_name (t
, NULL
);
4500 nphi
= create_phi_node (new_res
, l0_bb
);
4502 edge l0_to_l1
= find_edge (l0_bb
, l1_bb
);
4503 t
= PHI_ARG_DEF_FROM_EDGE (inner_phi
, l0_to_l1
);
4504 locus
= gimple_phi_arg_location_from_edge (inner_phi
, l0_to_l1
);
4505 edge entry_to_l0
= find_edge (entry_bb
, l0_bb
);
4506 add_phi_arg (nphi
, t
, entry_to_l0
, locus
);
4508 edge l2_to_l0
= find_edge (l2_bb
, l0_bb
);
4509 add_phi_arg (nphi
, exit_res
, l2_to_l0
, UNKNOWN_LOCATION
);
4511 add_phi_arg (inner_phi
, new_res
, l0_to_l1
, UNKNOWN_LOCATION
);
4515 set_immediate_dominator (CDI_DOMINATORS
, l2_bb
,
4516 recompute_dominator (CDI_DOMINATORS
, l2_bb
));
4517 set_immediate_dominator (CDI_DOMINATORS
, l3_bb
,
4518 recompute_dominator (CDI_DOMINATORS
, l3_bb
));
4519 set_immediate_dominator (CDI_DOMINATORS
, l0_bb
,
4520 recompute_dominator (CDI_DOMINATORS
, l0_bb
));
4521 set_immediate_dominator (CDI_DOMINATORS
, l1_bb
,
4522 recompute_dominator (CDI_DOMINATORS
, l1_bb
));
4524 /* We enter expand_omp_for_generic with a loop. This original loop may
4525 have its own loop struct, or it may be part of an outer loop struct
4526 (which may be the fake loop). */
4527 class loop
*outer_loop
= entry_bb
->loop_father
;
4528 bool orig_loop_has_loop_struct
= l1_bb
->loop_father
!= outer_loop
;
4530 add_bb_to_loop (l2_bb
, outer_loop
);
4532 /* We've added a new loop around the original loop. Allocate the
4533 corresponding loop struct. */
4534 class loop
*new_loop
= alloc_loop ();
4535 new_loop
->header
= l0_bb
;
4536 new_loop
->latch
= l2_bb
;
4537 add_loop (new_loop
, outer_loop
);
4539 /* Allocate a loop structure for the original loop unless we already
4541 if (!orig_loop_has_loop_struct
4542 && !gimple_omp_for_combined_p (fd
->for_stmt
))
4544 class loop
*orig_loop
= alloc_loop ();
4545 orig_loop
->header
= l1_bb
;
4546 /* The loop may have multiple latches. */
4547 add_loop (orig_loop
, new_loop
);
4552 /* Helper function for expand_omp_for_static_nochunk. If PTR is NULL,
4553 compute needed allocation size. If !ALLOC of team allocations,
4554 if ALLOC of thread allocation. SZ is the initial needed size for
4555 other purposes, ALLOC_ALIGN guaranteed alignment of allocation in bytes,
4556 CNT number of elements of each array, for !ALLOC this is
4557 omp_get_num_threads (), for ALLOC number of iterations handled by the
4558 current thread. If PTR is non-NULL, it is the start of the allocation
4559 and this routine shall assign to OMP_CLAUSE_DECL (c) of those _scantemp_
4560 clauses pointers to the corresponding arrays. */
4563 expand_omp_scantemp_alloc (tree clauses
, tree ptr
, unsigned HOST_WIDE_INT sz
,
4564 unsigned HOST_WIDE_INT alloc_align
, tree cnt
,
4565 gimple_stmt_iterator
*gsi
, bool alloc
)
4567 tree eltsz
= NULL_TREE
;
4568 unsigned HOST_WIDE_INT preval
= 0;
4570 ptr
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (ptr
),
4571 ptr
, size_int (sz
));
4572 for (tree c
= clauses
; c
; c
= OMP_CLAUSE_CHAIN (c
))
4573 if (OMP_CLAUSE_CODE (c
) == OMP_CLAUSE__SCANTEMP_
4574 && !OMP_CLAUSE__SCANTEMP__CONTROL (c
)
4575 && (!OMP_CLAUSE__SCANTEMP__ALLOC (c
)) != alloc
)
4577 tree pointee_type
= TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c
)));
4578 unsigned HOST_WIDE_INT al
= TYPE_ALIGN_UNIT (pointee_type
);
4579 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type
)))
4581 unsigned HOST_WIDE_INT szl
4582 = tree_to_uhwi (TYPE_SIZE_UNIT (pointee_type
));
4583 szl
= least_bit_hwi (szl
);
4587 if (ptr
== NULL_TREE
)
4589 if (eltsz
== NULL_TREE
)
4590 eltsz
= TYPE_SIZE_UNIT (pointee_type
);
4592 eltsz
= size_binop (PLUS_EXPR
, eltsz
,
4593 TYPE_SIZE_UNIT (pointee_type
));
4595 if (preval
== 0 && al
<= alloc_align
)
4597 unsigned HOST_WIDE_INT diff
= ROUND_UP (sz
, al
) - sz
;
4600 ptr
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (ptr
),
4601 ptr
, size_int (diff
));
4603 else if (al
> preval
)
4607 ptr
= fold_convert (pointer_sized_int_node
, ptr
);
4608 ptr
= fold_build2 (PLUS_EXPR
, pointer_sized_int_node
, ptr
,
4609 build_int_cst (pointer_sized_int_node
,
4611 ptr
= fold_build2 (BIT_AND_EXPR
, pointer_sized_int_node
, ptr
,
4612 build_int_cst (pointer_sized_int_node
,
4613 -(HOST_WIDE_INT
) al
));
4614 ptr
= fold_convert (ptr_type_node
, ptr
);
4619 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type
)))
4625 expand_omp_build_assign (gsi
, OMP_CLAUSE_DECL (c
), ptr
, false);
4626 ptr
= OMP_CLAUSE_DECL (c
);
4627 ptr
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (ptr
), ptr
,
4628 size_binop (MULT_EXPR
, cnt
,
4629 TYPE_SIZE_UNIT (pointee_type
)));
4633 if (ptr
== NULL_TREE
)
4635 eltsz
= size_binop (MULT_EXPR
, eltsz
, cnt
);
4637 eltsz
= size_binop (PLUS_EXPR
, eltsz
, size_int (sz
));
4644 /* A subroutine of expand_omp_for. Generate code for a parallel
4645 loop with static schedule and no specified chunk size. Given
4648 for (V = N1; V cond N2; V += STEP) BODY;
4650 where COND is "<" or ">", we generate pseudocode
4652 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
4657 if ((__typeof (V)) -1 > 0 && cond is >)
4658 n = -(adj + N2 - N1) / -STEP;
4660 n = (adj + N2 - N1) / STEP;
4663 if (threadid < tt) goto L3; else goto L4;
4668 s0 = q * threadid + tt;
4671 if (s0 >= e0) goto L2; else goto L0;
4677 if (V cond e) goto L1;
4682 expand_omp_for_static_nochunk (struct omp_region
*region
,
4683 struct omp_for_data
*fd
,
4686 tree n
, q
, s0
, e0
, e
, t
, tt
, nthreads
= NULL_TREE
, threadid
;
4687 tree type
, itype
, vmain
, vback
;
4688 basic_block entry_bb
, second_bb
, third_bb
, exit_bb
, seq_start_bb
;
4689 basic_block body_bb
, cont_bb
, collapse_bb
= NULL
;
4690 basic_block fin_bb
, fourth_bb
= NULL
, fifth_bb
= NULL
, sixth_bb
= NULL
;
4691 basic_block exit1_bb
= NULL
, exit2_bb
= NULL
, exit3_bb
= NULL
;
4692 gimple_stmt_iterator gsi
, gsip
;
4694 bool broken_loop
= region
->cont
== NULL
;
4695 tree
*counts
= NULL
;
4697 tree reductions
= NULL_TREE
;
4698 tree cond_var
= NULL_TREE
, condtemp
= NULL_TREE
;
4700 itype
= type
= TREE_TYPE (fd
->loop
.v
);
4701 if (POINTER_TYPE_P (type
))
4702 itype
= signed_type_for (type
);
4704 entry_bb
= region
->entry
;
4705 cont_bb
= region
->cont
;
4706 gcc_assert (EDGE_COUNT (entry_bb
->succs
) == 2);
4707 fin_bb
= BRANCH_EDGE (entry_bb
)->dest
;
4708 gcc_assert (broken_loop
4709 || (fin_bb
== FALLTHRU_EDGE (cont_bb
)->dest
));
4710 seq_start_bb
= split_edge (FALLTHRU_EDGE (entry_bb
));
4711 body_bb
= single_succ (seq_start_bb
);
4714 gcc_assert (BRANCH_EDGE (cont_bb
)->dest
== body_bb
4715 || single_succ (BRANCH_EDGE (cont_bb
)->dest
) == body_bb
);
4716 gcc_assert (EDGE_COUNT (cont_bb
->succs
) == 2);
4718 exit_bb
= region
->exit
;
4720 /* Iteration space partitioning goes in ENTRY_BB. */
4721 gsi
= gsi_last_nondebug_bb (entry_bb
);
4722 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_FOR
);
4726 if (fd
->collapse
> 1)
4728 int first_zero_iter
= -1, dummy
= -1;
4729 basic_block l2_dom_bb
= NULL
, dummy_bb
= NULL
;
4731 counts
= XALLOCAVEC (tree
, fd
->collapse
);
4732 expand_omp_for_init_counts (fd
, &gsi
, entry_bb
, counts
,
4733 fin_bb
, first_zero_iter
,
4734 dummy_bb
, dummy
, l2_dom_bb
);
4737 else if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
4738 t
= integer_one_node
;
4740 t
= fold_binary (fd
->loop
.cond_code
, boolean_type_node
,
4741 fold_convert (type
, fd
->loop
.n1
),
4742 fold_convert (type
, fd
->loop
.n2
));
4743 if (fd
->collapse
== 1
4744 && TYPE_UNSIGNED (type
)
4745 && (t
== NULL_TREE
|| !integer_onep (t
)))
4747 n1
= fold_convert (type
, unshare_expr (fd
->loop
.n1
));
4748 n1
= force_gimple_operand_gsi (&gsi
, n1
, true, NULL_TREE
,
4749 true, GSI_SAME_STMT
);
4750 n2
= fold_convert (type
, unshare_expr (fd
->loop
.n2
));
4751 n2
= force_gimple_operand_gsi (&gsi
, n2
, true, NULL_TREE
,
4752 true, GSI_SAME_STMT
);
4753 gcond
*cond_stmt
= gimple_build_cond (fd
->loop
.cond_code
, n1
, n2
,
4754 NULL_TREE
, NULL_TREE
);
4755 gsi_insert_before (&gsi
, cond_stmt
, GSI_SAME_STMT
);
4756 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt
),
4757 expand_omp_regimplify_p
, NULL
, NULL
)
4758 || walk_tree (gimple_cond_rhs_ptr (cond_stmt
),
4759 expand_omp_regimplify_p
, NULL
, NULL
))
4761 gsi
= gsi_for_stmt (cond_stmt
);
4762 gimple_regimplify_operands (cond_stmt
, &gsi
);
4764 ep
= split_block (entry_bb
, cond_stmt
);
4765 ep
->flags
= EDGE_TRUE_VALUE
;
4766 entry_bb
= ep
->dest
;
4767 ep
->probability
= profile_probability::very_likely ();
4768 ep
= make_edge (ep
->src
, fin_bb
, EDGE_FALSE_VALUE
);
4769 ep
->probability
= profile_probability::very_unlikely ();
4770 if (gimple_in_ssa_p (cfun
))
4772 int dest_idx
= find_edge (entry_bb
, fin_bb
)->dest_idx
;
4773 for (gphi_iterator gpi
= gsi_start_phis (fin_bb
);
4774 !gsi_end_p (gpi
); gsi_next (&gpi
))
4776 gphi
*phi
= gpi
.phi ();
4777 add_phi_arg (phi
, gimple_phi_arg_def (phi
, dest_idx
),
4778 ep
, UNKNOWN_LOCATION
);
4781 gsi
= gsi_last_bb (entry_bb
);
4784 if (fd
->lastprivate_conditional
)
4786 tree clauses
= gimple_omp_for_clauses (fd
->for_stmt
);
4787 tree c
= omp_find_clause (clauses
, OMP_CLAUSE__CONDTEMP_
);
4788 if (fd
->have_pointer_condtemp
)
4789 condtemp
= OMP_CLAUSE_DECL (c
);
4790 c
= omp_find_clause (OMP_CLAUSE_CHAIN (c
), OMP_CLAUSE__CONDTEMP_
);
4791 cond_var
= OMP_CLAUSE_DECL (c
);
4793 if (fd
->have_reductemp
4794 /* For scan, we don't want to reinitialize condtemp before the
4796 || (fd
->have_pointer_condtemp
&& !fd
->have_scantemp
)
4797 || fd
->have_nonctrl_scantemp
)
4799 tree t1
= build_int_cst (long_integer_type_node
, 0);
4800 tree t2
= build_int_cst (long_integer_type_node
, 1);
4801 tree t3
= build_int_cstu (long_integer_type_node
,
4802 (HOST_WIDE_INT_1U
<< 31) + 1);
4803 tree clauses
= gimple_omp_for_clauses (fd
->for_stmt
);
4804 gimple_stmt_iterator gsi2
= gsi_none ();
4806 tree mem
= null_pointer_node
, memv
= NULL_TREE
;
4807 unsigned HOST_WIDE_INT condtemp_sz
= 0;
4808 unsigned HOST_WIDE_INT alloc_align
= 0;
4809 if (fd
->have_reductemp
)
4811 gcc_assert (!fd
->have_nonctrl_scantemp
);
4812 tree c
= omp_find_clause (clauses
, OMP_CLAUSE__REDUCTEMP_
);
4813 reductions
= OMP_CLAUSE_DECL (c
);
4814 gcc_assert (TREE_CODE (reductions
) == SSA_NAME
);
4815 g
= SSA_NAME_DEF_STMT (reductions
);
4816 reductions
= gimple_assign_rhs1 (g
);
4817 OMP_CLAUSE_DECL (c
) = reductions
;
4818 gsi2
= gsi_for_stmt (g
);
4822 if (gsi_end_p (gsip
))
4823 gsi2
= gsi_after_labels (region
->entry
);
4826 reductions
= null_pointer_node
;
4828 if (fd
->have_pointer_condtemp
|| fd
->have_nonctrl_scantemp
)
4831 if (fd
->have_pointer_condtemp
)
4832 type
= TREE_TYPE (condtemp
);
4834 type
= ptr_type_node
;
4835 memv
= create_tmp_var (type
);
4836 TREE_ADDRESSABLE (memv
) = 1;
4837 unsigned HOST_WIDE_INT sz
= 0;
4838 tree size
= NULL_TREE
;
4839 if (fd
->have_pointer_condtemp
)
4841 sz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type
)));
4842 sz
*= fd
->lastprivate_conditional
;
4845 if (fd
->have_nonctrl_scantemp
)
4847 nthreads
= builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS
);
4848 gimple
*g
= gimple_build_call (nthreads
, 0);
4849 nthreads
= create_tmp_var (integer_type_node
);
4850 gimple_call_set_lhs (g
, nthreads
);
4851 gsi_insert_before (&gsi2
, g
, GSI_SAME_STMT
);
4852 nthreads
= fold_convert (sizetype
, nthreads
);
4853 alloc_align
= TYPE_ALIGN_UNIT (long_long_integer_type_node
);
4854 size
= expand_omp_scantemp_alloc (clauses
, NULL_TREE
, sz
,
4855 alloc_align
, nthreads
, NULL
,
4857 size
= fold_convert (type
, size
);
4860 size
= build_int_cst (type
, sz
);
4861 expand_omp_build_assign (&gsi2
, memv
, size
, false);
4862 mem
= build_fold_addr_expr (memv
);
4865 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START
),
4866 9, t1
, t2
, t2
, t3
, t1
, null_pointer_node
,
4867 null_pointer_node
, reductions
, mem
);
4868 force_gimple_operand_gsi (&gsi2
, t
, true, NULL_TREE
,
4869 true, GSI_SAME_STMT
);
4870 if (fd
->have_pointer_condtemp
)
4871 expand_omp_build_assign (&gsi2
, condtemp
, memv
, false);
4872 if (fd
->have_nonctrl_scantemp
)
4874 tree ptr
= fd
->have_pointer_condtemp
? condtemp
: memv
;
4875 expand_omp_scantemp_alloc (clauses
, ptr
, condtemp_sz
,
4876 alloc_align
, nthreads
, &gsi2
, false);
4878 if (fd
->have_reductemp
)
4880 gsi_remove (&gsi2
, true);
4881 release_ssa_name (gimple_assign_lhs (g
));
4884 switch (gimple_omp_for_kind (fd
->for_stmt
))
4886 case GF_OMP_FOR_KIND_FOR
:
4887 nthreads
= builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS
);
4888 threadid
= builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM
);
4890 case GF_OMP_FOR_KIND_DISTRIBUTE
:
4891 nthreads
= builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS
);
4892 threadid
= builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM
);
4897 nthreads
= build_call_expr (nthreads
, 0);
4898 nthreads
= fold_convert (itype
, nthreads
);
4899 nthreads
= force_gimple_operand_gsi (&gsi
, nthreads
, true, NULL_TREE
,
4900 true, GSI_SAME_STMT
);
4901 threadid
= build_call_expr (threadid
, 0);
4902 threadid
= fold_convert (itype
, threadid
);
4903 threadid
= force_gimple_operand_gsi (&gsi
, threadid
, true, NULL_TREE
,
4904 true, GSI_SAME_STMT
);
4908 step
= fd
->loop
.step
;
4909 if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
4911 tree innerc
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
4912 OMP_CLAUSE__LOOPTEMP_
);
4913 gcc_assert (innerc
);
4914 n1
= OMP_CLAUSE_DECL (innerc
);
4915 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
4916 OMP_CLAUSE__LOOPTEMP_
);
4917 gcc_assert (innerc
);
4918 n2
= OMP_CLAUSE_DECL (innerc
);
4920 n1
= force_gimple_operand_gsi (&gsi
, fold_convert (type
, n1
),
4921 true, NULL_TREE
, true, GSI_SAME_STMT
);
4922 n2
= force_gimple_operand_gsi (&gsi
, fold_convert (itype
, n2
),
4923 true, NULL_TREE
, true, GSI_SAME_STMT
);
4924 step
= force_gimple_operand_gsi (&gsi
, fold_convert (itype
, step
),
4925 true, NULL_TREE
, true, GSI_SAME_STMT
);
4927 t
= build_int_cst (itype
, (fd
->loop
.cond_code
== LT_EXPR
? -1 : 1));
4928 t
= fold_build2 (PLUS_EXPR
, itype
, step
, t
);
4929 t
= fold_build2 (PLUS_EXPR
, itype
, t
, n2
);
4930 t
= fold_build2 (MINUS_EXPR
, itype
, t
, fold_convert (itype
, n1
));
4931 if (TYPE_UNSIGNED (itype
) && fd
->loop
.cond_code
== GT_EXPR
)
4932 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
,
4933 fold_build1 (NEGATE_EXPR
, itype
, t
),
4934 fold_build1 (NEGATE_EXPR
, itype
, step
));
4936 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
, t
, step
);
4937 t
= fold_convert (itype
, t
);
4938 n
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
, true, GSI_SAME_STMT
);
4940 q
= create_tmp_reg (itype
, "q");
4941 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
, n
, nthreads
);
4942 t
= force_gimple_operand_gsi (&gsi
, t
, false, NULL_TREE
, true, GSI_SAME_STMT
);
4943 gsi_insert_before (&gsi
, gimple_build_assign (q
, t
), GSI_SAME_STMT
);
4945 tt
= create_tmp_reg (itype
, "tt");
4946 t
= fold_build2 (TRUNC_MOD_EXPR
, itype
, n
, nthreads
);
4947 t
= force_gimple_operand_gsi (&gsi
, t
, false, NULL_TREE
, true, GSI_SAME_STMT
);
4948 gsi_insert_before (&gsi
, gimple_build_assign (tt
, t
), GSI_SAME_STMT
);
4950 t
= build2 (LT_EXPR
, boolean_type_node
, threadid
, tt
);
4951 gcond
*cond_stmt
= gimple_build_cond_empty (t
);
4952 gsi_insert_before (&gsi
, cond_stmt
, GSI_SAME_STMT
);
4954 second_bb
= split_block (entry_bb
, cond_stmt
)->dest
;
4955 gsi
= gsi_last_nondebug_bb (second_bb
);
4956 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_FOR
);
4958 gsi_insert_before (&gsi
, gimple_build_assign (tt
, build_int_cst (itype
, 0)),
4960 gassign
*assign_stmt
4961 = gimple_build_assign (q
, PLUS_EXPR
, q
, build_int_cst (itype
, 1));
4962 gsi_insert_before (&gsi
, assign_stmt
, GSI_SAME_STMT
);
4964 third_bb
= split_block (second_bb
, assign_stmt
)->dest
;
4965 gsi
= gsi_last_nondebug_bb (third_bb
);
4966 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_FOR
);
4968 if (fd
->have_nonctrl_scantemp
)
4970 tree clauses
= gimple_omp_for_clauses (fd
->for_stmt
);
4971 tree controlp
= NULL_TREE
, controlb
= NULL_TREE
;
4972 for (tree c
= clauses
; c
; c
= OMP_CLAUSE_CHAIN (c
))
4973 if (OMP_CLAUSE_CODE (c
) == OMP_CLAUSE__SCANTEMP_
4974 && OMP_CLAUSE__SCANTEMP__CONTROL (c
))
4976 if (TREE_TYPE (OMP_CLAUSE_DECL (c
)) == boolean_type_node
)
4977 controlb
= OMP_CLAUSE_DECL (c
);
4979 controlp
= OMP_CLAUSE_DECL (c
);
4980 if (controlb
&& controlp
)
4983 gcc_assert (controlp
&& controlb
);
4984 tree cnt
= create_tmp_var (sizetype
);
4985 gimple
*g
= gimple_build_assign (cnt
, NOP_EXPR
, q
);
4986 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
4987 unsigned HOST_WIDE_INT alloc_align
= TYPE_ALIGN_UNIT (ptr_type_node
);
4988 tree sz
= expand_omp_scantemp_alloc (clauses
, NULL_TREE
, 0,
4989 alloc_align
, cnt
, NULL
, true);
4990 tree size
= create_tmp_var (sizetype
);
4991 expand_omp_build_assign (&gsi
, size
, sz
, false);
4992 tree cmp
= fold_build2 (GT_EXPR
, boolean_type_node
,
4993 size
, size_int (16384));
4994 expand_omp_build_assign (&gsi
, controlb
, cmp
);
4995 g
= gimple_build_cond (NE_EXPR
, controlb
, boolean_false_node
,
4996 NULL_TREE
, NULL_TREE
);
4997 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
4998 fourth_bb
= split_block (third_bb
, g
)->dest
;
4999 gsi
= gsi_last_nondebug_bb (fourth_bb
);
5000 /* FIXME: Once we have allocators, this should use allocator. */
5001 g
= gimple_build_call (builtin_decl_explicit (BUILT_IN_MALLOC
), 1, size
);
5002 gimple_call_set_lhs (g
, controlp
);
5003 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
5004 expand_omp_scantemp_alloc (clauses
, controlp
, 0, alloc_align
, cnt
,
5008 fifth_bb
= split_block (fourth_bb
, g
)->dest
;
5009 gsi
= gsi_last_nondebug_bb (fifth_bb
);
5011 g
= gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_SAVE
), 0);
5012 gimple_call_set_lhs (g
, controlp
);
5013 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
5014 tree alloca_decl
= builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN
);
5015 for (tree c
= clauses
; c
; c
= OMP_CLAUSE_CHAIN (c
))
5016 if (OMP_CLAUSE_CODE (c
) == OMP_CLAUSE__SCANTEMP_
5017 && OMP_CLAUSE__SCANTEMP__ALLOC (c
))
5019 tree tmp
= create_tmp_var (sizetype
);
5020 tree pointee_type
= TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c
)));
5021 g
= gimple_build_assign (tmp
, MULT_EXPR
, cnt
,
5022 TYPE_SIZE_UNIT (pointee_type
));
5023 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
5024 g
= gimple_build_call (alloca_decl
, 2, tmp
,
5025 size_int (TYPE_ALIGN (pointee_type
)));
5026 gimple_call_set_lhs (g
, OMP_CLAUSE_DECL (c
));
5027 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
5030 sixth_bb
= split_block (fifth_bb
, g
)->dest
;
5031 gsi
= gsi_last_nondebug_bb (sixth_bb
);
5034 t
= build2 (MULT_EXPR
, itype
, q
, threadid
);
5035 t
= build2 (PLUS_EXPR
, itype
, t
, tt
);
5036 s0
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
, true, GSI_SAME_STMT
);
5038 t
= fold_build2 (PLUS_EXPR
, itype
, s0
, q
);
5039 e0
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
, true, GSI_SAME_STMT
);
5041 t
= build2 (GE_EXPR
, boolean_type_node
, s0
, e0
);
5042 gsi_insert_before (&gsi
, gimple_build_cond_empty (t
), GSI_SAME_STMT
);
5044 /* Remove the GIMPLE_OMP_FOR statement. */
5045 gsi_remove (&gsi
, true);
5047 /* Setup code for sequential iteration goes in SEQ_START_BB. */
5048 gsi
= gsi_start_bb (seq_start_bb
);
5050 tree startvar
= fd
->loop
.v
;
5051 tree endvar
= NULL_TREE
;
5053 if (gimple_omp_for_combined_p (fd
->for_stmt
))
5055 tree clauses
= gimple_code (inner_stmt
) == GIMPLE_OMP_PARALLEL
5056 ? gimple_omp_parallel_clauses (inner_stmt
)
5057 : gimple_omp_for_clauses (inner_stmt
);
5058 tree innerc
= omp_find_clause (clauses
, OMP_CLAUSE__LOOPTEMP_
);
5059 gcc_assert (innerc
);
5060 startvar
= OMP_CLAUSE_DECL (innerc
);
5061 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
5062 OMP_CLAUSE__LOOPTEMP_
);
5063 gcc_assert (innerc
);
5064 endvar
= OMP_CLAUSE_DECL (innerc
);
5065 if (fd
->collapse
> 1 && TREE_CODE (fd
->loop
.n2
) != INTEGER_CST
5066 && gimple_omp_for_kind (fd
->for_stmt
) == GF_OMP_FOR_KIND_DISTRIBUTE
)
5069 for (i
= 1; i
< fd
->collapse
; i
++)
5071 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
5072 OMP_CLAUSE__LOOPTEMP_
);
5073 gcc_assert (innerc
);
5075 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
5076 OMP_CLAUSE__LOOPTEMP_
);
5079 /* If needed (distribute parallel for with lastprivate),
5080 propagate down the total number of iterations. */
5081 tree t
= fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc
)),
5083 t
= force_gimple_operand_gsi (&gsi
, t
, false, NULL_TREE
, false,
5084 GSI_CONTINUE_LINKING
);
5085 assign_stmt
= gimple_build_assign (OMP_CLAUSE_DECL (innerc
), t
);
5086 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
5090 t
= fold_convert (itype
, s0
);
5091 t
= fold_build2 (MULT_EXPR
, itype
, t
, step
);
5092 if (POINTER_TYPE_P (type
))
5094 t
= fold_build_pointer_plus (n1
, t
);
5095 if (!POINTER_TYPE_P (TREE_TYPE (startvar
))
5096 && TYPE_PRECISION (TREE_TYPE (startvar
)) > TYPE_PRECISION (type
))
5097 t
= fold_convert (signed_type_for (type
), t
);
5100 t
= fold_build2 (PLUS_EXPR
, type
, t
, n1
);
5101 t
= fold_convert (TREE_TYPE (startvar
), t
);
5102 t
= force_gimple_operand_gsi (&gsi
, t
,
5104 && TREE_ADDRESSABLE (startvar
),
5105 NULL_TREE
, false, GSI_CONTINUE_LINKING
);
5106 assign_stmt
= gimple_build_assign (startvar
, t
);
5107 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
5110 tree itype
= TREE_TYPE (cond_var
);
5111 /* For lastprivate(conditional:) itervar, we need some iteration
5112 counter that starts at unsigned non-zero and increases.
5113 Prefer as few IVs as possible, so if we can use startvar
5114 itself, use that, or startvar + constant (those would be
5115 incremented with step), and as last resort use the s0 + 1
5116 incremented by 1. */
5117 if (POINTER_TYPE_P (type
)
5118 || TREE_CODE (n1
) != INTEGER_CST
5119 || fd
->loop
.cond_code
!= LT_EXPR
)
5120 t
= fold_build2 (PLUS_EXPR
, itype
, fold_convert (itype
, s0
),
5121 build_int_cst (itype
, 1));
5122 else if (tree_int_cst_sgn (n1
) == 1)
5123 t
= fold_convert (itype
, t
);
5126 tree c
= fold_convert (itype
, n1
);
5127 c
= fold_build2 (MINUS_EXPR
, itype
, build_int_cst (itype
, 1), c
);
5128 t
= fold_build2 (PLUS_EXPR
, itype
, fold_convert (itype
, t
), c
);
5130 t
= force_gimple_operand_gsi (&gsi
, t
, false,
5131 NULL_TREE
, false, GSI_CONTINUE_LINKING
);
5132 assign_stmt
= gimple_build_assign (cond_var
, t
);
5133 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
5136 t
= fold_convert (itype
, e0
);
5137 t
= fold_build2 (MULT_EXPR
, itype
, t
, step
);
5138 if (POINTER_TYPE_P (type
))
5140 t
= fold_build_pointer_plus (n1
, t
);
5141 if (!POINTER_TYPE_P (TREE_TYPE (startvar
))
5142 && TYPE_PRECISION (TREE_TYPE (startvar
)) > TYPE_PRECISION (type
))
5143 t
= fold_convert (signed_type_for (type
), t
);
5146 t
= fold_build2 (PLUS_EXPR
, type
, t
, n1
);
5147 t
= fold_convert (TREE_TYPE (startvar
), t
);
5148 e
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
5149 false, GSI_CONTINUE_LINKING
);
5152 assign_stmt
= gimple_build_assign (endvar
, e
);
5153 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
5154 if (useless_type_conversion_p (TREE_TYPE (fd
->loop
.v
), TREE_TYPE (e
)))
5155 assign_stmt
= gimple_build_assign (fd
->loop
.v
, e
);
5157 assign_stmt
= gimple_build_assign (fd
->loop
.v
, NOP_EXPR
, e
);
5158 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
5160 /* Handle linear clause adjustments. */
5161 tree itercnt
= NULL_TREE
;
5162 tree
*nonrect_bounds
= NULL
;
5163 if (gimple_omp_for_kind (fd
->for_stmt
) == GF_OMP_FOR_KIND_FOR
)
5164 for (tree c
= gimple_omp_for_clauses (fd
->for_stmt
);
5165 c
; c
= OMP_CLAUSE_CHAIN (c
))
5166 if (OMP_CLAUSE_CODE (c
) == OMP_CLAUSE_LINEAR
5167 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c
))
5169 tree d
= OMP_CLAUSE_DECL (c
);
5170 bool is_ref
= omp_is_reference (d
);
5171 tree t
= d
, a
, dest
;
5173 t
= build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c
), t
);
5174 if (itercnt
== NULL_TREE
)
5176 if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
5178 itercnt
= fold_build2 (MINUS_EXPR
, itype
,
5179 fold_convert (itype
, n1
),
5180 fold_convert (itype
, fd
->loop
.n1
));
5181 itercnt
= fold_build2 (EXACT_DIV_EXPR
, itype
, itercnt
, step
);
5182 itercnt
= fold_build2 (PLUS_EXPR
, itype
, itercnt
, s0
);
5183 itercnt
= force_gimple_operand_gsi (&gsi
, itercnt
, true,
5185 GSI_CONTINUE_LINKING
);
5190 tree type
= TREE_TYPE (t
);
5191 if (POINTER_TYPE_P (type
))
5193 a
= fold_build2 (MULT_EXPR
, type
,
5194 fold_convert (type
, itercnt
),
5195 fold_convert (type
, OMP_CLAUSE_LINEAR_STEP (c
)));
5196 dest
= unshare_expr (t
);
5197 t
= fold_build2 (type
== TREE_TYPE (t
) ? PLUS_EXPR
5198 : POINTER_PLUS_EXPR
, TREE_TYPE (t
), t
, a
);
5199 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
5200 false, GSI_CONTINUE_LINKING
);
5201 assign_stmt
= gimple_build_assign (dest
, t
);
5202 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
5204 if (fd
->collapse
> 1)
5208 nonrect_bounds
= XALLOCAVEC (tree
, fd
->last_nonrect
+ 1);
5209 memset (nonrect_bounds
, 0, sizeof (tree
) * (fd
->last_nonrect
+ 1));
5211 expand_omp_for_init_vars (fd
, &gsi
, counts
, nonrect_bounds
, inner_stmt
,
5217 /* The code controlling the sequential loop replaces the
5218 GIMPLE_OMP_CONTINUE. */
5219 gsi
= gsi_last_nondebug_bb (cont_bb
);
5220 gomp_continue
*cont_stmt
= as_a
<gomp_continue
*> (gsi_stmt (gsi
));
5221 gcc_assert (gimple_code (cont_stmt
) == GIMPLE_OMP_CONTINUE
);
5222 vmain
= gimple_omp_continue_control_use (cont_stmt
);
5223 vback
= gimple_omp_continue_control_def (cont_stmt
);
5227 tree itype
= TREE_TYPE (cond_var
);
5229 if (POINTER_TYPE_P (type
)
5230 || TREE_CODE (n1
) != INTEGER_CST
5231 || fd
->loop
.cond_code
!= LT_EXPR
)
5232 t2
= build_int_cst (itype
, 1);
5234 t2
= fold_convert (itype
, step
);
5235 t2
= fold_build2 (PLUS_EXPR
, itype
, cond_var
, t2
);
5236 t2
= force_gimple_operand_gsi (&gsi
, t2
, false,
5237 NULL_TREE
, true, GSI_SAME_STMT
);
5238 assign_stmt
= gimple_build_assign (cond_var
, t2
);
5239 gsi_insert_before (&gsi
, assign_stmt
, GSI_SAME_STMT
);
5242 if (!gimple_omp_for_combined_p (fd
->for_stmt
))
5244 if (POINTER_TYPE_P (type
))
5245 t
= fold_build_pointer_plus (vmain
, step
);
5247 t
= fold_build2 (PLUS_EXPR
, type
, vmain
, step
);
5248 t
= force_gimple_operand_gsi (&gsi
, t
,
5250 && TREE_ADDRESSABLE (vback
),
5251 NULL_TREE
, true, GSI_SAME_STMT
);
5252 assign_stmt
= gimple_build_assign (vback
, t
);
5253 gsi_insert_before (&gsi
, assign_stmt
, GSI_SAME_STMT
);
5255 t
= build2 (fd
->loop
.cond_code
, boolean_type_node
,
5256 DECL_P (vback
) && TREE_ADDRESSABLE (vback
)
5258 gsi_insert_before (&gsi
, gimple_build_cond_empty (t
), GSI_SAME_STMT
);
5261 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5262 gsi_remove (&gsi
, true);
5264 if (fd
->collapse
> 1 && !gimple_omp_for_combined_p (fd
->for_stmt
))
5265 collapse_bb
= extract_omp_for_update_vars (fd
, nonrect_bounds
,
5269 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
5270 gsi
= gsi_last_nondebug_bb (exit_bb
);
5271 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi
)))
5273 t
= gimple_omp_return_lhs (gsi_stmt (gsi
));
5274 if (fd
->have_reductemp
5275 || ((fd
->have_pointer_condtemp
|| fd
->have_scantemp
)
5276 && !fd
->have_nonctrl_scantemp
))
5280 fn
= builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL
);
5282 fn
= builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END
);
5283 gcall
*g
= gimple_build_call (fn
, 0);
5286 gimple_call_set_lhs (g
, t
);
5287 if (fd
->have_reductemp
)
5288 gsi_insert_after (&gsi
, gimple_build_assign (reductions
,
5292 gsi_insert_after (&gsi
, g
, GSI_SAME_STMT
);
5295 gsi_insert_after (&gsi
, omp_build_barrier (t
), GSI_SAME_STMT
);
5297 else if ((fd
->have_pointer_condtemp
|| fd
->have_scantemp
)
5298 && !fd
->have_nonctrl_scantemp
)
5300 tree fn
= builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT
);
5301 gcall
*g
= gimple_build_call (fn
, 0);
5302 gsi_insert_after (&gsi
, g
, GSI_SAME_STMT
);
5304 if (fd
->have_scantemp
&& !fd
->have_nonctrl_scantemp
)
5306 tree clauses
= gimple_omp_for_clauses (fd
->for_stmt
);
5307 tree controlp
= NULL_TREE
, controlb
= NULL_TREE
;
5308 for (tree c
= clauses
; c
; c
= OMP_CLAUSE_CHAIN (c
))
5309 if (OMP_CLAUSE_CODE (c
) == OMP_CLAUSE__SCANTEMP_
5310 && OMP_CLAUSE__SCANTEMP__CONTROL (c
))
5312 if (TREE_TYPE (OMP_CLAUSE_DECL (c
)) == boolean_type_node
)
5313 controlb
= OMP_CLAUSE_DECL (c
);
5315 controlp
= OMP_CLAUSE_DECL (c
);
5316 if (controlb
&& controlp
)
5319 gcc_assert (controlp
&& controlb
);
5320 gimple
*g
= gimple_build_cond (NE_EXPR
, controlb
, boolean_false_node
,
5321 NULL_TREE
, NULL_TREE
);
5322 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
5323 exit1_bb
= split_block (exit_bb
, g
)->dest
;
5324 gsi
= gsi_after_labels (exit1_bb
);
5325 g
= gimple_build_call (builtin_decl_explicit (BUILT_IN_FREE
), 1,
5327 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
5328 exit2_bb
= split_block (exit1_bb
, g
)->dest
;
5329 gsi
= gsi_after_labels (exit2_bb
);
5330 g
= gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_RESTORE
), 1,
5332 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
5333 exit3_bb
= split_block (exit2_bb
, g
)->dest
;
5334 gsi
= gsi_after_labels (exit3_bb
);
5336 gsi_remove (&gsi
, true);
5338 /* Connect all the blocks. */
5339 ep
= make_edge (entry_bb
, third_bb
, EDGE_FALSE_VALUE
);
5340 ep
->probability
= profile_probability::guessed_always ().apply_scale (3, 4);
5341 ep
= find_edge (entry_bb
, second_bb
);
5342 ep
->flags
= EDGE_TRUE_VALUE
;
5343 ep
->probability
= profile_probability::guessed_always ().apply_scale (1, 4);
5346 ep
= make_edge (third_bb
, fifth_bb
, EDGE_FALSE_VALUE
);
5348 = profile_probability::guessed_always ().apply_scale (1, 2);
5349 ep
= find_edge (third_bb
, fourth_bb
);
5350 ep
->flags
= EDGE_TRUE_VALUE
;
5352 = profile_probability::guessed_always ().apply_scale (1, 2);
5353 ep
= find_edge (fourth_bb
, fifth_bb
);
5354 redirect_edge_and_branch (ep
, sixth_bb
);
5357 sixth_bb
= third_bb
;
5358 find_edge (sixth_bb
, seq_start_bb
)->flags
= EDGE_FALSE_VALUE
;
5359 find_edge (sixth_bb
, fin_bb
)->flags
= EDGE_TRUE_VALUE
;
5362 ep
= make_edge (exit_bb
, exit2_bb
, EDGE_FALSE_VALUE
);
5364 = profile_probability::guessed_always ().apply_scale (1, 2);
5365 ep
= find_edge (exit_bb
, exit1_bb
);
5366 ep
->flags
= EDGE_TRUE_VALUE
;
5368 = profile_probability::guessed_always ().apply_scale (1, 2);
5369 ep
= find_edge (exit1_bb
, exit2_bb
);
5370 redirect_edge_and_branch (ep
, exit3_bb
);
5375 ep
= find_edge (cont_bb
, body_bb
);
5378 ep
= BRANCH_EDGE (cont_bb
);
5379 gcc_assert (single_succ (ep
->dest
) == body_bb
);
5381 if (gimple_omp_for_combined_p (fd
->for_stmt
))
5386 else if (fd
->collapse
> 1)
5389 ep
= make_edge (cont_bb
, collapse_bb
, EDGE_TRUE_VALUE
);
5392 ep
->flags
= EDGE_TRUE_VALUE
;
5393 find_edge (cont_bb
, fin_bb
)->flags
5394 = ep
? EDGE_FALSE_VALUE
: EDGE_FALLTHRU
;
5397 set_immediate_dominator (CDI_DOMINATORS
, second_bb
, entry_bb
);
5398 set_immediate_dominator (CDI_DOMINATORS
, third_bb
, entry_bb
);
5401 set_immediate_dominator (CDI_DOMINATORS
, fifth_bb
, third_bb
);
5402 set_immediate_dominator (CDI_DOMINATORS
, sixth_bb
, third_bb
);
5404 set_immediate_dominator (CDI_DOMINATORS
, seq_start_bb
, sixth_bb
);
5406 set_immediate_dominator (CDI_DOMINATORS
, body_bb
,
5407 recompute_dominator (CDI_DOMINATORS
, body_bb
));
5408 set_immediate_dominator (CDI_DOMINATORS
, fin_bb
,
5409 recompute_dominator (CDI_DOMINATORS
, fin_bb
));
5412 set_immediate_dominator (CDI_DOMINATORS
, exit2_bb
, exit_bb
);
5413 set_immediate_dominator (CDI_DOMINATORS
, exit3_bb
, exit_bb
);
5416 class loop
*loop
= body_bb
->loop_father
;
5417 if (loop
!= entry_bb
->loop_father
)
5419 gcc_assert (broken_loop
|| loop
->header
== body_bb
);
5420 gcc_assert (broken_loop
5421 || loop
->latch
== region
->cont
5422 || single_pred (loop
->latch
) == region
->cont
);
5426 if (!broken_loop
&& !gimple_omp_for_combined_p (fd
->for_stmt
))
5428 loop
= alloc_loop ();
5429 loop
->header
= body_bb
;
5430 if (collapse_bb
== NULL
)
5431 loop
->latch
= cont_bb
;
5432 add_loop (loop
, body_bb
->loop_father
);
5436 /* Return phi in E->DEST with ARG on edge E. */
5439 find_phi_with_arg_on_edge (tree arg
, edge e
)
5441 basic_block bb
= e
->dest
;
5443 for (gphi_iterator gpi
= gsi_start_phis (bb
);
5447 gphi
*phi
= gpi
.phi ();
5448 if (PHI_ARG_DEF_FROM_EDGE (phi
, e
) == arg
)
5455 /* A subroutine of expand_omp_for. Generate code for a parallel
5456 loop with static schedule and a specified chunk size. Given
5459 for (V = N1; V cond N2; V += STEP) BODY;
5461 where COND is "<" or ">", we generate pseudocode
5463 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
5468 if ((__typeof (V)) -1 > 0 && cond is >)
5469 n = -(adj + N2 - N1) / -STEP;
5471 n = (adj + N2 - N1) / STEP;
5473 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
5474 here so that V is defined
5475 if the loop is not entered
5477 s0 = (trip * nthreads + threadid) * CHUNK;
5478 e0 = min (s0 + CHUNK, n);
5479 if (s0 < n) goto L1; else goto L4;
5486 if (V cond e) goto L2; else goto L3;
5494 expand_omp_for_static_chunk (struct omp_region
*region
,
5495 struct omp_for_data
*fd
, gimple
*inner_stmt
)
5497 tree n
, s0
, e0
, e
, t
;
5498 tree trip_var
, trip_init
, trip_main
, trip_back
, nthreads
, threadid
;
5499 tree type
, itype
, vmain
, vback
, vextra
;
5500 basic_block entry_bb
, exit_bb
, body_bb
, seq_start_bb
, iter_part_bb
;
5501 basic_block trip_update_bb
= NULL
, cont_bb
, collapse_bb
= NULL
, fin_bb
;
5502 gimple_stmt_iterator gsi
, gsip
;
5504 bool broken_loop
= region
->cont
== NULL
;
5505 tree
*counts
= NULL
;
5507 tree reductions
= NULL_TREE
;
5508 tree cond_var
= NULL_TREE
, condtemp
= NULL_TREE
;
5510 itype
= type
= TREE_TYPE (fd
->loop
.v
);
5511 if (POINTER_TYPE_P (type
))
5512 itype
= signed_type_for (type
);
5514 entry_bb
= region
->entry
;
5515 se
= split_block (entry_bb
, last_stmt (entry_bb
));
5517 iter_part_bb
= se
->dest
;
5518 cont_bb
= region
->cont
;
5519 gcc_assert (EDGE_COUNT (iter_part_bb
->succs
) == 2);
5520 fin_bb
= BRANCH_EDGE (iter_part_bb
)->dest
;
5521 gcc_assert (broken_loop
5522 || fin_bb
== FALLTHRU_EDGE (cont_bb
)->dest
);
5523 seq_start_bb
= split_edge (FALLTHRU_EDGE (iter_part_bb
));
5524 body_bb
= single_succ (seq_start_bb
);
5527 gcc_assert (BRANCH_EDGE (cont_bb
)->dest
== body_bb
5528 || single_succ (BRANCH_EDGE (cont_bb
)->dest
) == body_bb
);
5529 gcc_assert (EDGE_COUNT (cont_bb
->succs
) == 2);
5530 trip_update_bb
= split_edge (FALLTHRU_EDGE (cont_bb
));
5532 exit_bb
= region
->exit
;
5534 /* Trip and adjustment setup goes in ENTRY_BB. */
5535 gsi
= gsi_last_nondebug_bb (entry_bb
);
5536 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_FOR
);
5540 if (fd
->collapse
> 1)
5542 int first_zero_iter
= -1, dummy
= -1;
5543 basic_block l2_dom_bb
= NULL
, dummy_bb
= NULL
;
5545 counts
= XALLOCAVEC (tree
, fd
->collapse
);
5546 expand_omp_for_init_counts (fd
, &gsi
, entry_bb
, counts
,
5547 fin_bb
, first_zero_iter
,
5548 dummy_bb
, dummy
, l2_dom_bb
);
5551 else if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
5552 t
= integer_one_node
;
5554 t
= fold_binary (fd
->loop
.cond_code
, boolean_type_node
,
5555 fold_convert (type
, fd
->loop
.n1
),
5556 fold_convert (type
, fd
->loop
.n2
));
5557 if (fd
->collapse
== 1
5558 && TYPE_UNSIGNED (type
)
5559 && (t
== NULL_TREE
|| !integer_onep (t
)))
5561 n1
= fold_convert (type
, unshare_expr (fd
->loop
.n1
));
5562 n1
= force_gimple_operand_gsi (&gsi
, n1
, true, NULL_TREE
,
5563 true, GSI_SAME_STMT
);
5564 n2
= fold_convert (type
, unshare_expr (fd
->loop
.n2
));
5565 n2
= force_gimple_operand_gsi (&gsi
, n2
, true, NULL_TREE
,
5566 true, GSI_SAME_STMT
);
5567 gcond
*cond_stmt
= gimple_build_cond (fd
->loop
.cond_code
, n1
, n2
,
5568 NULL_TREE
, NULL_TREE
);
5569 gsi_insert_before (&gsi
, cond_stmt
, GSI_SAME_STMT
);
5570 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt
),
5571 expand_omp_regimplify_p
, NULL
, NULL
)
5572 || walk_tree (gimple_cond_rhs_ptr (cond_stmt
),
5573 expand_omp_regimplify_p
, NULL
, NULL
))
5575 gsi
= gsi_for_stmt (cond_stmt
);
5576 gimple_regimplify_operands (cond_stmt
, &gsi
);
5578 se
= split_block (entry_bb
, cond_stmt
);
5579 se
->flags
= EDGE_TRUE_VALUE
;
5580 entry_bb
= se
->dest
;
5581 se
->probability
= profile_probability::very_likely ();
5582 se
= make_edge (se
->src
, fin_bb
, EDGE_FALSE_VALUE
);
5583 se
->probability
= profile_probability::very_unlikely ();
5584 if (gimple_in_ssa_p (cfun
))
5586 int dest_idx
= find_edge (iter_part_bb
, fin_bb
)->dest_idx
;
5587 for (gphi_iterator gpi
= gsi_start_phis (fin_bb
);
5588 !gsi_end_p (gpi
); gsi_next (&gpi
))
5590 gphi
*phi
= gpi
.phi ();
5591 add_phi_arg (phi
, gimple_phi_arg_def (phi
, dest_idx
),
5592 se
, UNKNOWN_LOCATION
);
5595 gsi
= gsi_last_bb (entry_bb
);
5598 if (fd
->lastprivate_conditional
)
5600 tree clauses
= gimple_omp_for_clauses (fd
->for_stmt
);
5601 tree c
= omp_find_clause (clauses
, OMP_CLAUSE__CONDTEMP_
);
5602 if (fd
->have_pointer_condtemp
)
5603 condtemp
= OMP_CLAUSE_DECL (c
);
5604 c
= omp_find_clause (OMP_CLAUSE_CHAIN (c
), OMP_CLAUSE__CONDTEMP_
);
5605 cond_var
= OMP_CLAUSE_DECL (c
);
5607 if (fd
->have_reductemp
|| fd
->have_pointer_condtemp
)
5609 tree t1
= build_int_cst (long_integer_type_node
, 0);
5610 tree t2
= build_int_cst (long_integer_type_node
, 1);
5611 tree t3
= build_int_cstu (long_integer_type_node
,
5612 (HOST_WIDE_INT_1U
<< 31) + 1);
5613 tree clauses
= gimple_omp_for_clauses (fd
->for_stmt
);
5614 gimple_stmt_iterator gsi2
= gsi_none ();
5616 tree mem
= null_pointer_node
, memv
= NULL_TREE
;
5617 if (fd
->have_reductemp
)
5619 tree c
= omp_find_clause (clauses
, OMP_CLAUSE__REDUCTEMP_
);
5620 reductions
= OMP_CLAUSE_DECL (c
);
5621 gcc_assert (TREE_CODE (reductions
) == SSA_NAME
);
5622 g
= SSA_NAME_DEF_STMT (reductions
);
5623 reductions
= gimple_assign_rhs1 (g
);
5624 OMP_CLAUSE_DECL (c
) = reductions
;
5625 gsi2
= gsi_for_stmt (g
);
5629 if (gsi_end_p (gsip
))
5630 gsi2
= gsi_after_labels (region
->entry
);
5633 reductions
= null_pointer_node
;
5635 if (fd
->have_pointer_condtemp
)
5637 tree type
= TREE_TYPE (condtemp
);
5638 memv
= create_tmp_var (type
);
5639 TREE_ADDRESSABLE (memv
) = 1;
5640 unsigned HOST_WIDE_INT sz
5641 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type
)));
5642 sz
*= fd
->lastprivate_conditional
;
5643 expand_omp_build_assign (&gsi2
, memv
, build_int_cst (type
, sz
),
5645 mem
= build_fold_addr_expr (memv
);
5648 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START
),
5649 9, t1
, t2
, t2
, t3
, t1
, null_pointer_node
,
5650 null_pointer_node
, reductions
, mem
);
5651 force_gimple_operand_gsi (&gsi2
, t
, true, NULL_TREE
,
5652 true, GSI_SAME_STMT
);
5653 if (fd
->have_pointer_condtemp
)
5654 expand_omp_build_assign (&gsi2
, condtemp
, memv
, false);
5655 if (fd
->have_reductemp
)
5657 gsi_remove (&gsi2
, true);
5658 release_ssa_name (gimple_assign_lhs (g
));
5661 switch (gimple_omp_for_kind (fd
->for_stmt
))
5663 case GF_OMP_FOR_KIND_FOR
:
5664 nthreads
= builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS
);
5665 threadid
= builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM
);
5667 case GF_OMP_FOR_KIND_DISTRIBUTE
:
5668 nthreads
= builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS
);
5669 threadid
= builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM
);
5674 nthreads
= build_call_expr (nthreads
, 0);
5675 nthreads
= fold_convert (itype
, nthreads
);
5676 nthreads
= force_gimple_operand_gsi (&gsi
, nthreads
, true, NULL_TREE
,
5677 true, GSI_SAME_STMT
);
5678 threadid
= build_call_expr (threadid
, 0);
5679 threadid
= fold_convert (itype
, threadid
);
5680 threadid
= force_gimple_operand_gsi (&gsi
, threadid
, true, NULL_TREE
,
5681 true, GSI_SAME_STMT
);
5685 step
= fd
->loop
.step
;
5686 if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
5688 tree innerc
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
5689 OMP_CLAUSE__LOOPTEMP_
);
5690 gcc_assert (innerc
);
5691 n1
= OMP_CLAUSE_DECL (innerc
);
5692 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
5693 OMP_CLAUSE__LOOPTEMP_
);
5694 gcc_assert (innerc
);
5695 n2
= OMP_CLAUSE_DECL (innerc
);
5697 n1
= force_gimple_operand_gsi (&gsi
, fold_convert (type
, n1
),
5698 true, NULL_TREE
, true, GSI_SAME_STMT
);
5699 n2
= force_gimple_operand_gsi (&gsi
, fold_convert (itype
, n2
),
5700 true, NULL_TREE
, true, GSI_SAME_STMT
);
5701 step
= force_gimple_operand_gsi (&gsi
, fold_convert (itype
, step
),
5702 true, NULL_TREE
, true, GSI_SAME_STMT
);
5703 tree chunk_size
= fold_convert (itype
, fd
->chunk_size
);
5704 chunk_size
= omp_adjust_chunk_size (chunk_size
, fd
->simd_schedule
);
5706 = force_gimple_operand_gsi (&gsi
, chunk_size
, true, NULL_TREE
, true,
5709 t
= build_int_cst (itype
, (fd
->loop
.cond_code
== LT_EXPR
? -1 : 1));
5710 t
= fold_build2 (PLUS_EXPR
, itype
, step
, t
);
5711 t
= fold_build2 (PLUS_EXPR
, itype
, t
, n2
);
5712 t
= fold_build2 (MINUS_EXPR
, itype
, t
, fold_convert (itype
, n1
));
5713 if (TYPE_UNSIGNED (itype
) && fd
->loop
.cond_code
== GT_EXPR
)
5714 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
,
5715 fold_build1 (NEGATE_EXPR
, itype
, t
),
5716 fold_build1 (NEGATE_EXPR
, itype
, step
));
5718 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
, t
, step
);
5719 t
= fold_convert (itype
, t
);
5720 n
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
5721 true, GSI_SAME_STMT
);
5723 trip_var
= create_tmp_reg (itype
, ".trip");
5724 if (gimple_in_ssa_p (cfun
))
5726 trip_init
= make_ssa_name (trip_var
);
5727 trip_main
= make_ssa_name (trip_var
);
5728 trip_back
= make_ssa_name (trip_var
);
5732 trip_init
= trip_var
;
5733 trip_main
= trip_var
;
5734 trip_back
= trip_var
;
5737 gassign
*assign_stmt
5738 = gimple_build_assign (trip_init
, build_int_cst (itype
, 0));
5739 gsi_insert_before (&gsi
, assign_stmt
, GSI_SAME_STMT
);
5741 t
= fold_build2 (MULT_EXPR
, itype
, threadid
, chunk_size
);
5742 t
= fold_build2 (MULT_EXPR
, itype
, t
, step
);
5743 if (POINTER_TYPE_P (type
))
5744 t
= fold_build_pointer_plus (n1
, t
);
5746 t
= fold_build2 (PLUS_EXPR
, type
, t
, n1
);
5747 vextra
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
5748 true, GSI_SAME_STMT
);
5750 /* Remove the GIMPLE_OMP_FOR. */
5751 gsi_remove (&gsi
, true);
5753 gimple_stmt_iterator gsif
= gsi
;
5755 /* Iteration space partitioning goes in ITER_PART_BB. */
5756 gsi
= gsi_last_bb (iter_part_bb
);
5758 t
= fold_build2 (MULT_EXPR
, itype
, trip_main
, nthreads
);
5759 t
= fold_build2 (PLUS_EXPR
, itype
, t
, threadid
);
5760 t
= fold_build2 (MULT_EXPR
, itype
, t
, chunk_size
);
5761 s0
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
5762 false, GSI_CONTINUE_LINKING
);
5764 t
= fold_build2 (PLUS_EXPR
, itype
, s0
, chunk_size
);
5765 t
= fold_build2 (MIN_EXPR
, itype
, t
, n
);
5766 e0
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
5767 false, GSI_CONTINUE_LINKING
);
5769 t
= build2 (LT_EXPR
, boolean_type_node
, s0
, n
);
5770 gsi_insert_after (&gsi
, gimple_build_cond_empty (t
), GSI_CONTINUE_LINKING
);
5772 /* Setup code for sequential iteration goes in SEQ_START_BB. */
5773 gsi
= gsi_start_bb (seq_start_bb
);
5775 tree startvar
= fd
->loop
.v
;
5776 tree endvar
= NULL_TREE
;
5778 if (gimple_omp_for_combined_p (fd
->for_stmt
))
5780 tree clauses
= gimple_code (inner_stmt
) == GIMPLE_OMP_PARALLEL
5781 ? gimple_omp_parallel_clauses (inner_stmt
)
5782 : gimple_omp_for_clauses (inner_stmt
);
5783 tree innerc
= omp_find_clause (clauses
, OMP_CLAUSE__LOOPTEMP_
);
5784 gcc_assert (innerc
);
5785 startvar
= OMP_CLAUSE_DECL (innerc
);
5786 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
5787 OMP_CLAUSE__LOOPTEMP_
);
5788 gcc_assert (innerc
);
5789 endvar
= OMP_CLAUSE_DECL (innerc
);
5790 if (fd
->collapse
> 1 && TREE_CODE (fd
->loop
.n2
) != INTEGER_CST
5791 && gimple_omp_for_kind (fd
->for_stmt
) == GF_OMP_FOR_KIND_DISTRIBUTE
)
5794 for (i
= 1; i
< fd
->collapse
; i
++)
5796 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
5797 OMP_CLAUSE__LOOPTEMP_
);
5798 gcc_assert (innerc
);
5800 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
5801 OMP_CLAUSE__LOOPTEMP_
);
5804 /* If needed (distribute parallel for with lastprivate),
5805 propagate down the total number of iterations. */
5806 tree t
= fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc
)),
5808 t
= force_gimple_operand_gsi (&gsi
, t
, false, NULL_TREE
, false,
5809 GSI_CONTINUE_LINKING
);
5810 assign_stmt
= gimple_build_assign (OMP_CLAUSE_DECL (innerc
), t
);
5811 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
5816 t
= fold_convert (itype
, s0
);
5817 t
= fold_build2 (MULT_EXPR
, itype
, t
, step
);
5818 if (POINTER_TYPE_P (type
))
5820 t
= fold_build_pointer_plus (n1
, t
);
5821 if (!POINTER_TYPE_P (TREE_TYPE (startvar
))
5822 && TYPE_PRECISION (TREE_TYPE (startvar
)) > TYPE_PRECISION (type
))
5823 t
= fold_convert (signed_type_for (type
), t
);
5826 t
= fold_build2 (PLUS_EXPR
, type
, t
, n1
);
5827 t
= fold_convert (TREE_TYPE (startvar
), t
);
5828 t
= force_gimple_operand_gsi (&gsi
, t
,
5830 && TREE_ADDRESSABLE (startvar
),
5831 NULL_TREE
, false, GSI_CONTINUE_LINKING
);
5832 assign_stmt
= gimple_build_assign (startvar
, t
);
5833 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
5836 tree itype
= TREE_TYPE (cond_var
);
5837 /* For lastprivate(conditional:) itervar, we need some iteration
5838 counter that starts at unsigned non-zero and increases.
5839 Prefer as few IVs as possible, so if we can use startvar
5840 itself, use that, or startvar + constant (those would be
5841 incremented with step), and as last resort use the s0 + 1
5842 incremented by 1. */
5843 if (POINTER_TYPE_P (type
)
5844 || TREE_CODE (n1
) != INTEGER_CST
5845 || fd
->loop
.cond_code
!= LT_EXPR
)
5846 t
= fold_build2 (PLUS_EXPR
, itype
, fold_convert (itype
, s0
),
5847 build_int_cst (itype
, 1));
5848 else if (tree_int_cst_sgn (n1
) == 1)
5849 t
= fold_convert (itype
, t
);
5852 tree c
= fold_convert (itype
, n1
);
5853 c
= fold_build2 (MINUS_EXPR
, itype
, build_int_cst (itype
, 1), c
);
5854 t
= fold_build2 (PLUS_EXPR
, itype
, fold_convert (itype
, t
), c
);
5856 t
= force_gimple_operand_gsi (&gsi
, t
, false,
5857 NULL_TREE
, false, GSI_CONTINUE_LINKING
);
5858 assign_stmt
= gimple_build_assign (cond_var
, t
);
5859 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
5862 t
= fold_convert (itype
, e0
);
5863 t
= fold_build2 (MULT_EXPR
, itype
, t
, step
);
5864 if (POINTER_TYPE_P (type
))
5866 t
= fold_build_pointer_plus (n1
, t
);
5867 if (!POINTER_TYPE_P (TREE_TYPE (startvar
))
5868 && TYPE_PRECISION (TREE_TYPE (startvar
)) > TYPE_PRECISION (type
))
5869 t
= fold_convert (signed_type_for (type
), t
);
5872 t
= fold_build2 (PLUS_EXPR
, type
, t
, n1
);
5873 t
= fold_convert (TREE_TYPE (startvar
), t
);
5874 e
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
5875 false, GSI_CONTINUE_LINKING
);
5878 assign_stmt
= gimple_build_assign (endvar
, e
);
5879 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
5880 if (useless_type_conversion_p (TREE_TYPE (fd
->loop
.v
), TREE_TYPE (e
)))
5881 assign_stmt
= gimple_build_assign (fd
->loop
.v
, e
);
5883 assign_stmt
= gimple_build_assign (fd
->loop
.v
, NOP_EXPR
, e
);
5884 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
5886 /* Handle linear clause adjustments. */
5887 tree itercnt
= NULL_TREE
, itercntbias
= NULL_TREE
;
5888 if (gimple_omp_for_kind (fd
->for_stmt
) == GF_OMP_FOR_KIND_FOR
)
5889 for (tree c
= gimple_omp_for_clauses (fd
->for_stmt
);
5890 c
; c
= OMP_CLAUSE_CHAIN (c
))
5891 if (OMP_CLAUSE_CODE (c
) == OMP_CLAUSE_LINEAR
5892 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c
))
5894 tree d
= OMP_CLAUSE_DECL (c
);
5895 bool is_ref
= omp_is_reference (d
);
5896 tree t
= d
, a
, dest
;
5898 t
= build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c
), t
);
5899 tree type
= TREE_TYPE (t
);
5900 if (POINTER_TYPE_P (type
))
5902 dest
= unshare_expr (t
);
5903 tree v
= create_tmp_var (TREE_TYPE (t
), NULL
);
5904 expand_omp_build_assign (&gsif
, v
, t
);
5905 if (itercnt
== NULL_TREE
)
5907 if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
5910 = fold_build2 (MINUS_EXPR
, itype
, fold_convert (itype
, n1
),
5911 fold_convert (itype
, fd
->loop
.n1
));
5912 itercntbias
= fold_build2 (EXACT_DIV_EXPR
, itype
,
5915 = force_gimple_operand_gsi (&gsif
, itercntbias
, true,
5918 itercnt
= fold_build2 (PLUS_EXPR
, itype
, itercntbias
, s0
);
5919 itercnt
= force_gimple_operand_gsi (&gsi
, itercnt
, true,
5921 GSI_CONTINUE_LINKING
);
5926 a
= fold_build2 (MULT_EXPR
, type
,
5927 fold_convert (type
, itercnt
),
5928 fold_convert (type
, OMP_CLAUSE_LINEAR_STEP (c
)));
5929 t
= fold_build2 (type
== TREE_TYPE (t
) ? PLUS_EXPR
5930 : POINTER_PLUS_EXPR
, TREE_TYPE (t
), v
, a
);
5931 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
5932 false, GSI_CONTINUE_LINKING
);
5933 assign_stmt
= gimple_build_assign (dest
, t
);
5934 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
5936 if (fd
->collapse
> 1)
5937 expand_omp_for_init_vars (fd
, &gsi
, counts
, NULL
, inner_stmt
, startvar
);
5941 /* The code controlling the sequential loop goes in CONT_BB,
5942 replacing the GIMPLE_OMP_CONTINUE. */
5943 gsi
= gsi_last_nondebug_bb (cont_bb
);
5944 gomp_continue
*cont_stmt
= as_a
<gomp_continue
*> (gsi_stmt (gsi
));
5945 vmain
= gimple_omp_continue_control_use (cont_stmt
);
5946 vback
= gimple_omp_continue_control_def (cont_stmt
);
5950 tree itype
= TREE_TYPE (cond_var
);
5952 if (POINTER_TYPE_P (type
)
5953 || TREE_CODE (n1
) != INTEGER_CST
5954 || fd
->loop
.cond_code
!= LT_EXPR
)
5955 t2
= build_int_cst (itype
, 1);
5957 t2
= fold_convert (itype
, step
);
5958 t2
= fold_build2 (PLUS_EXPR
, itype
, cond_var
, t2
);
5959 t2
= force_gimple_operand_gsi (&gsi
, t2
, false,
5960 NULL_TREE
, true, GSI_SAME_STMT
);
5961 assign_stmt
= gimple_build_assign (cond_var
, t2
);
5962 gsi_insert_before (&gsi
, assign_stmt
, GSI_SAME_STMT
);
5965 if (!gimple_omp_for_combined_p (fd
->for_stmt
))
5967 if (POINTER_TYPE_P (type
))
5968 t
= fold_build_pointer_plus (vmain
, step
);
5970 t
= fold_build2 (PLUS_EXPR
, type
, vmain
, step
);
5971 if (DECL_P (vback
) && TREE_ADDRESSABLE (vback
))
5972 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
5973 true, GSI_SAME_STMT
);
5974 assign_stmt
= gimple_build_assign (vback
, t
);
5975 gsi_insert_before (&gsi
, assign_stmt
, GSI_SAME_STMT
);
5977 if (tree_int_cst_equal (fd
->chunk_size
, integer_one_node
))
5978 t
= build2 (EQ_EXPR
, boolean_type_node
,
5979 build_int_cst (itype
, 0),
5980 build_int_cst (itype
, 1));
5982 t
= build2 (fd
->loop
.cond_code
, boolean_type_node
,
5983 DECL_P (vback
) && TREE_ADDRESSABLE (vback
)
5985 gsi_insert_before (&gsi
, gimple_build_cond_empty (t
), GSI_SAME_STMT
);
5988 /* Remove GIMPLE_OMP_CONTINUE. */
5989 gsi_remove (&gsi
, true);
5991 if (fd
->collapse
> 1 && !gimple_omp_for_combined_p (fd
->for_stmt
))
5992 collapse_bb
= extract_omp_for_update_vars (fd
, NULL
, cont_bb
, body_bb
);
5994 /* Trip update code goes into TRIP_UPDATE_BB. */
5995 gsi
= gsi_start_bb (trip_update_bb
);
5997 t
= build_int_cst (itype
, 1);
5998 t
= build2 (PLUS_EXPR
, itype
, trip_main
, t
);
5999 assign_stmt
= gimple_build_assign (trip_back
, t
);
6000 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
6003 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
6004 gsi
= gsi_last_nondebug_bb (exit_bb
);
6005 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi
)))
6007 t
= gimple_omp_return_lhs (gsi_stmt (gsi
));
6008 if (fd
->have_reductemp
|| fd
->have_pointer_condtemp
)
6012 fn
= builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL
);
6014 fn
= builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END
);
6015 gcall
*g
= gimple_build_call (fn
, 0);
6018 gimple_call_set_lhs (g
, t
);
6019 if (fd
->have_reductemp
)
6020 gsi_insert_after (&gsi
, gimple_build_assign (reductions
,
6024 gsi_insert_after (&gsi
, g
, GSI_SAME_STMT
);
6027 gsi_insert_after (&gsi
, omp_build_barrier (t
), GSI_SAME_STMT
);
6029 else if (fd
->have_pointer_condtemp
)
6031 tree fn
= builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT
);
6032 gcall
*g
= gimple_build_call (fn
, 0);
6033 gsi_insert_after (&gsi
, g
, GSI_SAME_STMT
);
6035 gsi_remove (&gsi
, true);
6037 /* Connect the new blocks. */
6038 find_edge (iter_part_bb
, seq_start_bb
)->flags
= EDGE_TRUE_VALUE
;
6039 find_edge (iter_part_bb
, fin_bb
)->flags
= EDGE_FALSE_VALUE
;
6043 se
= find_edge (cont_bb
, body_bb
);
6046 se
= BRANCH_EDGE (cont_bb
);
6047 gcc_assert (single_succ (se
->dest
) == body_bb
);
6049 if (gimple_omp_for_combined_p (fd
->for_stmt
))
6054 else if (fd
->collapse
> 1)
6057 se
= make_edge (cont_bb
, collapse_bb
, EDGE_TRUE_VALUE
);
6060 se
->flags
= EDGE_TRUE_VALUE
;
6061 find_edge (cont_bb
, trip_update_bb
)->flags
6062 = se
? EDGE_FALSE_VALUE
: EDGE_FALLTHRU
;
6064 redirect_edge_and_branch (single_succ_edge (trip_update_bb
),
6068 if (gimple_in_ssa_p (cfun
))
6076 gcc_assert (fd
->collapse
== 1 && !broken_loop
);
6078 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
6079 remove arguments of the phi nodes in fin_bb. We need to create
6080 appropriate phi nodes in iter_part_bb instead. */
6081 se
= find_edge (iter_part_bb
, fin_bb
);
6082 re
= single_succ_edge (trip_update_bb
);
6083 vec
<edge_var_map
> *head
= redirect_edge_var_map_vector (re
);
6084 ene
= single_succ_edge (entry_bb
);
6086 psi
= gsi_start_phis (fin_bb
);
6087 for (i
= 0; !gsi_end_p (psi
) && head
->iterate (i
, &vm
);
6088 gsi_next (&psi
), ++i
)
6094 if (operand_equal_p (gimple_phi_arg_def (phi
, 0),
6095 redirect_edge_var_map_def (vm
), 0))
6098 t
= gimple_phi_result (phi
);
6099 gcc_assert (t
== redirect_edge_var_map_result (vm
));
6101 if (!single_pred_p (fin_bb
))
6102 t
= copy_ssa_name (t
, phi
);
6104 nphi
= create_phi_node (t
, iter_part_bb
);
6106 t
= PHI_ARG_DEF_FROM_EDGE (phi
, se
);
6107 locus
= gimple_phi_arg_location_from_edge (phi
, se
);
6109 /* A special case -- fd->loop.v is not yet computed in
6110 iter_part_bb, we need to use vextra instead. */
6111 if (t
== fd
->loop
.v
)
6113 add_phi_arg (nphi
, t
, ene
, locus
);
6114 locus
= redirect_edge_var_map_location (vm
);
6115 tree back_arg
= redirect_edge_var_map_def (vm
);
6116 add_phi_arg (nphi
, back_arg
, re
, locus
);
6117 edge ce
= find_edge (cont_bb
, body_bb
);
6120 ce
= BRANCH_EDGE (cont_bb
);
6121 gcc_assert (single_succ (ce
->dest
) == body_bb
);
6122 ce
= single_succ_edge (ce
->dest
);
6124 gphi
*inner_loop_phi
= find_phi_with_arg_on_edge (back_arg
, ce
);
6125 gcc_assert (inner_loop_phi
!= NULL
);
6126 add_phi_arg (inner_loop_phi
, gimple_phi_result (nphi
),
6127 find_edge (seq_start_bb
, body_bb
), locus
);
6129 if (!single_pred_p (fin_bb
))
6130 add_phi_arg (phi
, gimple_phi_result (nphi
), se
, locus
);
6132 gcc_assert (gsi_end_p (psi
) && (head
== NULL
|| i
== head
->length ()));
6133 redirect_edge_var_map_clear (re
);
6134 if (single_pred_p (fin_bb
))
6137 psi
= gsi_start_phis (fin_bb
);
6138 if (gsi_end_p (psi
))
6140 remove_phi_node (&psi
, false);
6143 /* Make phi node for trip. */
6144 phi
= create_phi_node (trip_main
, iter_part_bb
);
6145 add_phi_arg (phi
, trip_back
, single_succ_edge (trip_update_bb
),
6147 add_phi_arg (phi
, trip_init
, single_succ_edge (entry_bb
),
6152 set_immediate_dominator (CDI_DOMINATORS
, trip_update_bb
, cont_bb
);
6153 set_immediate_dominator (CDI_DOMINATORS
, iter_part_bb
,
6154 recompute_dominator (CDI_DOMINATORS
, iter_part_bb
));
6155 set_immediate_dominator (CDI_DOMINATORS
, fin_bb
,
6156 recompute_dominator (CDI_DOMINATORS
, fin_bb
));
6157 set_immediate_dominator (CDI_DOMINATORS
, seq_start_bb
,
6158 recompute_dominator (CDI_DOMINATORS
, seq_start_bb
));
6159 set_immediate_dominator (CDI_DOMINATORS
, body_bb
,
6160 recompute_dominator (CDI_DOMINATORS
, body_bb
));
6164 class loop
*loop
= body_bb
->loop_father
;
6165 class loop
*trip_loop
= alloc_loop ();
6166 trip_loop
->header
= iter_part_bb
;
6167 trip_loop
->latch
= trip_update_bb
;
6168 add_loop (trip_loop
, iter_part_bb
->loop_father
);
6170 if (loop
!= entry_bb
->loop_father
)
6172 gcc_assert (loop
->header
== body_bb
);
6173 gcc_assert (loop
->latch
== region
->cont
6174 || single_pred (loop
->latch
) == region
->cont
);
6175 trip_loop
->inner
= loop
;
6179 if (!gimple_omp_for_combined_p (fd
->for_stmt
))
6181 loop
= alloc_loop ();
6182 loop
->header
= body_bb
;
6183 if (collapse_bb
== NULL
)
6184 loop
->latch
= cont_bb
;
6185 add_loop (loop
, trip_loop
);
6190 /* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
6191 loop. Given parameters:
6193 for (V = N1; V cond N2; V += STEP) BODY;
6195 where COND is "<" or ">", we generate pseudocode
6203 if (V cond N2) goto L0; else goto L2;
6206 For collapsed loops, emit the outer loops as scalar
6207 and only try to vectorize the innermost loop. */
6210 expand_omp_simd (struct omp_region
*region
, struct omp_for_data
*fd
)
6213 basic_block entry_bb
, cont_bb
, exit_bb
, l0_bb
, l1_bb
, l2_bb
, l2_dom_bb
;
6214 gimple_stmt_iterator gsi
;
6217 bool broken_loop
= region
->cont
== NULL
;
6219 tree
*counts
= NULL
;
6221 int safelen_int
= INT_MAX
;
6222 bool dont_vectorize
= false;
6223 tree safelen
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
6224 OMP_CLAUSE_SAFELEN
);
6225 tree simduid
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
6226 OMP_CLAUSE__SIMDUID_
);
6227 tree ifc
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
6229 tree simdlen
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
6230 OMP_CLAUSE_SIMDLEN
);
6231 tree condtemp
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
6232 OMP_CLAUSE__CONDTEMP_
);
6234 tree cond_var
= condtemp
? OMP_CLAUSE_DECL (condtemp
) : NULL_TREE
;
6239 safelen
= OMP_CLAUSE_SAFELEN_EXPR (safelen
);
6240 if (!poly_int_tree_p (safelen
, &val
))
6243 safelen_int
= MIN (constant_lower_bound (val
), INT_MAX
);
6244 if (safelen_int
== 1)
6247 if ((ifc
&& integer_zerop (OMP_CLAUSE_IF_EXPR (ifc
)))
6248 || (simdlen
&& integer_onep (OMP_CLAUSE_SIMDLEN_EXPR (simdlen
))))
6251 dont_vectorize
= true;
6253 type
= TREE_TYPE (fd
->loop
.v
);
6254 entry_bb
= region
->entry
;
6255 cont_bb
= region
->cont
;
6256 gcc_assert (EDGE_COUNT (entry_bb
->succs
) == 2);
6257 gcc_assert (broken_loop
6258 || BRANCH_EDGE (entry_bb
)->dest
== FALLTHRU_EDGE (cont_bb
)->dest
);
6259 l0_bb
= FALLTHRU_EDGE (entry_bb
)->dest
;
6262 gcc_assert (BRANCH_EDGE (cont_bb
)->dest
== l0_bb
);
6263 gcc_assert (EDGE_COUNT (cont_bb
->succs
) == 2);
6264 l1_bb
= split_block (cont_bb
, last_stmt (cont_bb
))->dest
;
6265 l2_bb
= BRANCH_EDGE (entry_bb
)->dest
;
6269 BRANCH_EDGE (entry_bb
)->flags
&= ~EDGE_ABNORMAL
;
6270 l1_bb
= split_edge (BRANCH_EDGE (entry_bb
));
6271 l2_bb
= single_succ (l1_bb
);
6273 exit_bb
= region
->exit
;
6276 gsi
= gsi_last_nondebug_bb (entry_bb
);
6278 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_FOR
);
6279 /* Not needed in SSA form right now. */
6280 gcc_assert (!gimple_in_ssa_p (cfun
));
6281 if (fd
->collapse
> 1
6282 && (gimple_omp_for_combined_into_p (fd
->for_stmt
)
6285 int first_zero_iter
= -1, dummy
= -1;
6286 basic_block zero_iter_bb
= l2_bb
, dummy_bb
= NULL
;
6288 counts
= XALLOCAVEC (tree
, fd
->collapse
);
6289 expand_omp_for_init_counts (fd
, &gsi
, entry_bb
, counts
,
6290 zero_iter_bb
, first_zero_iter
,
6291 dummy_bb
, dummy
, l2_dom_bb
);
6293 if (l2_dom_bb
== NULL
)
6298 if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
6300 tree innerc
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
6301 OMP_CLAUSE__LOOPTEMP_
);
6302 gcc_assert (innerc
);
6303 n1
= OMP_CLAUSE_DECL (innerc
);
6304 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
6305 OMP_CLAUSE__LOOPTEMP_
);
6306 gcc_assert (innerc
);
6307 n2
= OMP_CLAUSE_DECL (innerc
);
6309 tree step
= fd
->loop
.step
;
6311 bool is_simt
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
6315 cfun
->curr_properties
&= ~PROP_gimple_lomp_dev
;
6316 is_simt
= safelen_int
> 1;
6318 tree simt_lane
= NULL_TREE
, simt_maxlane
= NULL_TREE
;
6321 simt_lane
= create_tmp_var (unsigned_type_node
);
6322 gimple
*g
= gimple_build_call_internal (IFN_GOMP_SIMT_LANE
, 0);
6323 gimple_call_set_lhs (g
, simt_lane
);
6324 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
6325 tree offset
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), step
,
6326 fold_convert (TREE_TYPE (step
), simt_lane
));
6327 n1
= fold_convert (type
, n1
);
6328 if (POINTER_TYPE_P (type
))
6329 n1
= fold_build_pointer_plus (n1
, offset
);
6331 n1
= fold_build2 (PLUS_EXPR
, type
, n1
, fold_convert (type
, offset
));
6333 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
6334 if (fd
->collapse
> 1)
6335 simt_maxlane
= build_one_cst (unsigned_type_node
);
6336 else if (safelen_int
< omp_max_simt_vf ())
6337 simt_maxlane
= build_int_cst (unsigned_type_node
, safelen_int
);
6339 = build_call_expr_internal_loc (UNKNOWN_LOCATION
, IFN_GOMP_SIMT_VF
,
6340 unsigned_type_node
, 0);
6342 vf
= fold_build2 (MIN_EXPR
, unsigned_type_node
, vf
, simt_maxlane
);
6343 vf
= fold_convert (TREE_TYPE (step
), vf
);
6344 step
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), step
, vf
);
6347 tree n2var
= NULL_TREE
;
6348 tree n2v
= NULL_TREE
;
6349 tree
*nonrect_bounds
= NULL
;
6350 tree min_arg1
= NULL_TREE
, min_arg2
= NULL_TREE
;
6351 if (fd
->collapse
> 1)
6353 if (broken_loop
|| gimple_omp_for_combined_into_p (fd
->for_stmt
))
6357 nonrect_bounds
= XALLOCAVEC (tree
, fd
->last_nonrect
+ 1);
6358 memset (nonrect_bounds
, 0,
6359 sizeof (tree
) * (fd
->last_nonrect
+ 1));
6361 expand_omp_build_assign (&gsi
, fd
->loop
.v
, fold_convert (type
, n1
));
6362 gcc_assert (entry_bb
== gsi_bb (gsi
));
6363 gcc_assert (fd
->for_stmt
== gsi_stmt (gsi
));
6365 entry_bb
= split_block (entry_bb
, gsi_stmt (gsi
))->dest
;
6366 expand_omp_for_init_vars (fd
, &gsi
, counts
, nonrect_bounds
,
6368 gsi
= gsi_for_stmt (fd
->for_stmt
);
6372 else if (gimple_omp_for_combined_into_p (fd
->for_stmt
))
6374 /* Compute in n2var the limit for the first innermost loop,
6375 i.e. fd->loop.v + MIN (n2 - fd->loop.v, cnt)
6376 where cnt is how many iterations would the loop have if
6377 all further iterations were assigned to the current task. */
6378 n2var
= create_tmp_var (type
);
6379 i
= fd
->collapse
- 1;
6380 tree itype
= TREE_TYPE (fd
->loops
[i
].v
);
6381 if (POINTER_TYPE_P (itype
))
6382 itype
= signed_type_for (itype
);
6383 t
= build_int_cst (itype
, (fd
->loops
[i
].cond_code
== LT_EXPR
6385 t
= fold_build2 (PLUS_EXPR
, itype
,
6386 fold_convert (itype
, fd
->loops
[i
].step
), t
);
6387 t
= fold_build2 (PLUS_EXPR
, itype
, t
,
6388 fold_convert (itype
, fd
->loops
[i
].n2
));
6389 if (fd
->loops
[i
].m2
)
6391 tree t2
= fold_convert (itype
,
6392 fd
->loops
[i
- fd
->loops
[i
].outer
].v
);
6393 tree t3
= fold_convert (itype
, fd
->loops
[i
].m2
);
6394 t2
= fold_build2 (MULT_EXPR
, TREE_TYPE (t
), t2
, t3
);
6395 t
= fold_build2 (PLUS_EXPR
, itype
, t
, t2
);
6397 t
= fold_build2 (MINUS_EXPR
, itype
, t
,
6398 fold_convert (itype
, fd
->loops
[i
].v
));
6399 if (TYPE_UNSIGNED (itype
) && fd
->loops
[i
].cond_code
== GT_EXPR
)
6400 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
,
6401 fold_build1 (NEGATE_EXPR
, itype
, t
),
6402 fold_build1 (NEGATE_EXPR
, itype
,
6403 fold_convert (itype
,
6404 fd
->loops
[i
].step
)));
6406 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
, t
,
6407 fold_convert (itype
, fd
->loops
[i
].step
));
6408 t
= fold_convert (type
, t
);
6409 tree t2
= fold_build2 (MINUS_EXPR
, type
, n2
, n1
);
6410 min_arg1
= create_tmp_var (type
);
6411 expand_omp_build_assign (&gsi
, min_arg1
, t2
);
6412 min_arg2
= create_tmp_var (type
);
6413 expand_omp_build_assign (&gsi
, min_arg2
, t
);
6417 if (TREE_CODE (n2
) == INTEGER_CST
)
6419 /* Indicate for lastprivate handling that at least one iteration
6420 has been performed, without wasting runtime. */
6421 if (integer_nonzerop (n2
))
6422 expand_omp_build_assign (&gsi
, fd
->loop
.v
,
6423 fold_convert (type
, n2
));
6425 /* Indicate that no iteration has been performed. */
6426 expand_omp_build_assign (&gsi
, fd
->loop
.v
,
6427 build_one_cst (type
));
6431 expand_omp_build_assign (&gsi
, fd
->loop
.v
,
6432 build_zero_cst (type
));
6433 expand_omp_build_assign (&gsi
, n2
, build_one_cst (type
));
6435 for (i
= 0; i
< fd
->collapse
; i
++)
6437 t
= fold_convert (TREE_TYPE (fd
->loops
[i
].v
), fd
->loops
[i
].n1
);
6438 if (fd
->loops
[i
].m1
)
6441 = fold_convert (TREE_TYPE (t
),
6442 fd
->loops
[i
- fd
->loops
[i
].outer
].v
);
6443 tree t3
= fold_convert (TREE_TYPE (t
), fd
->loops
[i
].m1
);
6444 t2
= fold_build2 (MULT_EXPR
, TREE_TYPE (t
), t2
, t3
);
6445 t
= fold_build2 (PLUS_EXPR
, TREE_TYPE (t
), t
, t2
);
6447 expand_omp_build_assign (&gsi
, fd
->loops
[i
].v
, t
);
6448 /* For normal non-combined collapsed loops just initialize
6449 the outermost iterator in the entry_bb. */
6456 expand_omp_build_assign (&gsi
, fd
->loop
.v
, fold_convert (type
, n1
));
6457 tree altv
= NULL_TREE
, altn2
= NULL_TREE
;
6458 if (fd
->collapse
== 1
6460 && TREE_CODE (fd
->loops
[0].step
) != INTEGER_CST
)
6462 /* The vectorizer currently punts on loops with non-constant steps
6463 for the main IV (can't compute number of iterations and gives up
6464 because of that). As for OpenMP loops it is always possible to
6465 compute the number of iterations upfront, use an alternate IV
6466 as the loop iterator:
6467 altn2 = n1 < n2 ? (n2 - n1 + step - 1) / step : 0;
6468 for (i = n1, altv = 0; altv < altn2; altv++, i += step) */
6469 altv
= create_tmp_var (unsigned_type_for (TREE_TYPE (fd
->loops
[0].v
)));
6470 expand_omp_build_assign (&gsi
, altv
, build_zero_cst (TREE_TYPE (altv
)));
6471 tree itype
= TREE_TYPE (fd
->loop
.v
);
6472 if (POINTER_TYPE_P (itype
))
6473 itype
= signed_type_for (itype
);
6474 t
= build_int_cst (itype
, (fd
->loop
.cond_code
== LT_EXPR
? -1 : 1));
6475 t
= fold_build2 (PLUS_EXPR
, itype
,
6476 fold_convert (itype
, fd
->loop
.step
), t
);
6477 t
= fold_build2 (PLUS_EXPR
, itype
, t
, fold_convert (itype
, n2
));
6478 t
= fold_build2 (MINUS_EXPR
, itype
, t
,
6479 fold_convert (itype
, fd
->loop
.v
));
6480 if (TYPE_UNSIGNED (itype
) && fd
->loop
.cond_code
== GT_EXPR
)
6481 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
,
6482 fold_build1 (NEGATE_EXPR
, itype
, t
),
6483 fold_build1 (NEGATE_EXPR
, itype
,
6484 fold_convert (itype
, fd
->loop
.step
)));
6486 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
, t
,
6487 fold_convert (itype
, fd
->loop
.step
));
6488 t
= fold_convert (TREE_TYPE (altv
), t
);
6489 altn2
= create_tmp_var (TREE_TYPE (altv
));
6490 expand_omp_build_assign (&gsi
, altn2
, t
);
6491 tree t2
= fold_convert (TREE_TYPE (fd
->loop
.v
), n2
);
6492 t2
= force_gimple_operand_gsi (&gsi
, t2
, true, NULL_TREE
,
6493 true, GSI_SAME_STMT
);
6494 t2
= fold_build2 (fd
->loop
.cond_code
, boolean_type_node
, fd
->loop
.v
, t2
);
6495 gassign
*g
= gimple_build_assign (altn2
, COND_EXPR
, t2
, altn2
,
6496 build_zero_cst (TREE_TYPE (altv
)));
6497 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
6499 else if (fd
->collapse
> 1
6501 && !gimple_omp_for_combined_into_p (fd
->for_stmt
)
6502 && TREE_CODE (fd
->loops
[fd
->collapse
- 1].step
) != INTEGER_CST
)
6504 altv
= create_tmp_var (unsigned_type_for (TREE_TYPE (fd
->loops
[0].v
)));
6505 altn2
= create_tmp_var (TREE_TYPE (altv
));
6509 if (POINTER_TYPE_P (type
)
6510 || TREE_CODE (n1
) != INTEGER_CST
6511 || fd
->loop
.cond_code
!= LT_EXPR
6512 || tree_int_cst_sgn (n1
) != 1)
6513 expand_omp_build_assign (&gsi
, cond_var
,
6514 build_one_cst (TREE_TYPE (cond_var
)));
6516 expand_omp_build_assign (&gsi
, cond_var
,
6517 fold_convert (TREE_TYPE (cond_var
), n1
));
6520 /* Remove the GIMPLE_OMP_FOR statement. */
6521 gsi_remove (&gsi
, true);
6525 /* Code to control the increment goes in the CONT_BB. */
6526 gsi
= gsi_last_nondebug_bb (cont_bb
);
6527 stmt
= gsi_stmt (gsi
);
6528 gcc_assert (gimple_code (stmt
) == GIMPLE_OMP_CONTINUE
);
6530 if (fd
->collapse
== 1
6531 || gimple_omp_for_combined_into_p (fd
->for_stmt
))
6533 if (POINTER_TYPE_P (type
))
6534 t
= fold_build_pointer_plus (fd
->loop
.v
, step
);
6536 t
= fold_build2 (PLUS_EXPR
, type
, fd
->loop
.v
, step
);
6537 expand_omp_build_assign (&gsi
, fd
->loop
.v
, t
);
6539 else if (TREE_CODE (n2
) != INTEGER_CST
)
6540 expand_omp_build_assign (&gsi
, fd
->loop
.v
, build_one_cst (type
));
6543 t
= fold_build2 (PLUS_EXPR
, TREE_TYPE (altv
), altv
,
6544 build_one_cst (TREE_TYPE (altv
)));
6545 expand_omp_build_assign (&gsi
, altv
, t
);
6548 if (fd
->collapse
> 1)
6550 i
= fd
->collapse
- 1;
6551 if (POINTER_TYPE_P (TREE_TYPE (fd
->loops
[i
].v
)))
6553 t
= fold_convert (sizetype
, fd
->loops
[i
].step
);
6554 t
= fold_build_pointer_plus (fd
->loops
[i
].v
, t
);
6558 t
= fold_convert (TREE_TYPE (fd
->loops
[i
].v
),
6560 t
= fold_build2 (PLUS_EXPR
, TREE_TYPE (fd
->loops
[i
].v
),
6563 expand_omp_build_assign (&gsi
, fd
->loops
[i
].v
, t
);
6567 if (POINTER_TYPE_P (type
)
6568 || TREE_CODE (n1
) != INTEGER_CST
6569 || fd
->loop
.cond_code
!= LT_EXPR
6570 || tree_int_cst_sgn (n1
) != 1)
6571 t
= fold_build2 (PLUS_EXPR
, TREE_TYPE (cond_var
), cond_var
,
6572 build_one_cst (TREE_TYPE (cond_var
)));
6574 t
= fold_build2 (PLUS_EXPR
, TREE_TYPE (cond_var
), cond_var
,
6575 fold_convert (TREE_TYPE (cond_var
), step
));
6576 expand_omp_build_assign (&gsi
, cond_var
, t
);
6579 /* Remove GIMPLE_OMP_CONTINUE. */
6580 gsi_remove (&gsi
, true);
6583 /* Emit the condition in L1_BB. */
6584 gsi
= gsi_start_bb (l1_bb
);
6587 t
= build2 (LT_EXPR
, boolean_type_node
, altv
, altn2
);
6588 else if (fd
->collapse
> 1
6589 && !gimple_omp_for_combined_into_p (fd
->for_stmt
)
6592 i
= fd
->collapse
- 1;
6593 tree itype
= TREE_TYPE (fd
->loops
[i
].v
);
6594 if (fd
->loops
[i
].m2
)
6595 t
= n2v
= create_tmp_var (itype
);
6597 t
= fold_convert (itype
, fd
->loops
[i
].n2
);
6598 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
6599 false, GSI_CONTINUE_LINKING
);
6600 tree v
= fd
->loops
[i
].v
;
6601 if (DECL_P (v
) && TREE_ADDRESSABLE (v
))
6602 v
= force_gimple_operand_gsi (&gsi
, v
, true, NULL_TREE
,
6603 false, GSI_CONTINUE_LINKING
);
6604 t
= build2 (fd
->loops
[i
].cond_code
, boolean_type_node
, v
, t
);
6608 if (fd
->collapse
> 1 && !broken_loop
)
6611 t
= fold_convert (type
, n2
);
6612 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
6613 false, GSI_CONTINUE_LINKING
);
6614 tree v
= fd
->loop
.v
;
6615 if (DECL_P (v
) && TREE_ADDRESSABLE (v
))
6616 v
= force_gimple_operand_gsi (&gsi
, v
, true, NULL_TREE
,
6617 false, GSI_CONTINUE_LINKING
);
6618 t
= build2 (fd
->loop
.cond_code
, boolean_type_node
, v
, t
);
6620 cond_stmt
= gimple_build_cond_empty (t
);
6621 gsi_insert_after (&gsi
, cond_stmt
, GSI_CONTINUE_LINKING
);
6622 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt
), expand_omp_regimplify_p
,
6624 || walk_tree (gimple_cond_rhs_ptr (cond_stmt
), expand_omp_regimplify_p
,
6627 gsi
= gsi_for_stmt (cond_stmt
);
6628 gimple_regimplify_operands (cond_stmt
, &gsi
);
6631 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
6634 gsi
= gsi_start_bb (l2_bb
);
6635 step
= fold_build2 (MINUS_EXPR
, TREE_TYPE (step
), fd
->loop
.step
, step
);
6636 if (POINTER_TYPE_P (type
))
6637 t
= fold_build_pointer_plus (fd
->loop
.v
, step
);
6639 t
= fold_build2 (PLUS_EXPR
, type
, fd
->loop
.v
, step
);
6640 expand_omp_build_assign (&gsi
, fd
->loop
.v
, t
);
6643 /* Remove GIMPLE_OMP_RETURN. */
6644 gsi
= gsi_last_nondebug_bb (exit_bb
);
6645 gsi_remove (&gsi
, true);
6647 /* Connect the new blocks. */
6648 remove_edge (FALLTHRU_EDGE (entry_bb
));
6652 remove_edge (BRANCH_EDGE (entry_bb
));
6653 make_edge (entry_bb
, l1_bb
, EDGE_FALLTHRU
);
6655 e
= BRANCH_EDGE (l1_bb
);
6656 ne
= FALLTHRU_EDGE (l1_bb
);
6657 e
->flags
= EDGE_TRUE_VALUE
;
6661 single_succ_edge (entry_bb
)->flags
= EDGE_FALLTHRU
;
6663 ne
= single_succ_edge (l1_bb
);
6664 e
= make_edge (l1_bb
, l0_bb
, EDGE_TRUE_VALUE
);
6667 ne
->flags
= EDGE_FALSE_VALUE
;
6668 e
->probability
= profile_probability::guessed_always ().apply_scale (7, 8);
6669 ne
->probability
= e
->probability
.invert ();
6671 set_immediate_dominator (CDI_DOMINATORS
, l1_bb
, entry_bb
);
6672 set_immediate_dominator (CDI_DOMINATORS
, l0_bb
, l1_bb
);
6676 cond_stmt
= gimple_build_cond (LT_EXPR
, simt_lane
, simt_maxlane
,
6677 NULL_TREE
, NULL_TREE
);
6678 gsi
= gsi_last_bb (entry_bb
);
6679 gsi_insert_after (&gsi
, cond_stmt
, GSI_NEW_STMT
);
6680 make_edge (entry_bb
, l2_bb
, EDGE_FALSE_VALUE
);
6681 FALLTHRU_EDGE (entry_bb
)->flags
= EDGE_TRUE_VALUE
;
6682 FALLTHRU_EDGE (entry_bb
)->probability
6683 = profile_probability::guessed_always ().apply_scale (7, 8);
6684 BRANCH_EDGE (entry_bb
)->probability
6685 = FALLTHRU_EDGE (entry_bb
)->probability
.invert ();
6686 l2_dom_bb
= entry_bb
;
6688 set_immediate_dominator (CDI_DOMINATORS
, l2_bb
, l2_dom_bb
);
6690 if (!broken_loop
&& fd
->collapse
> 1)
6692 basic_block last_bb
= l1_bb
;
6693 basic_block init_bb
= NULL
;
6694 for (i
= fd
->collapse
- 2; i
>= 0; i
--)
6696 tree nextn2v
= NULL_TREE
;
6697 if (EDGE_SUCC (last_bb
, 0)->flags
& EDGE_FALSE_VALUE
)
6698 e
= EDGE_SUCC (last_bb
, 0);
6700 e
= EDGE_SUCC (last_bb
, 1);
6701 basic_block bb
= split_edge (e
);
6702 if (POINTER_TYPE_P (TREE_TYPE (fd
->loops
[i
].v
)))
6704 t
= fold_convert (sizetype
, fd
->loops
[i
].step
);
6705 t
= fold_build_pointer_plus (fd
->loops
[i
].v
, t
);
6709 t
= fold_convert (TREE_TYPE (fd
->loops
[i
].v
),
6711 t
= fold_build2 (PLUS_EXPR
, TREE_TYPE (fd
->loops
[i
].v
),
6714 gsi
= gsi_after_labels (bb
);
6715 expand_omp_build_assign (&gsi
, fd
->loops
[i
].v
, t
);
6717 bb
= split_block (bb
, last_stmt (bb
))->dest
;
6718 gsi
= gsi_start_bb (bb
);
6719 tree itype
= TREE_TYPE (fd
->loops
[i
].v
);
6720 if (fd
->loops
[i
].m2
)
6721 t
= nextn2v
= create_tmp_var (itype
);
6723 t
= fold_convert (itype
, fd
->loops
[i
].n2
);
6724 t
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
6725 false, GSI_CONTINUE_LINKING
);
6726 tree v
= fd
->loops
[i
].v
;
6727 if (DECL_P (v
) && TREE_ADDRESSABLE (v
))
6728 v
= force_gimple_operand_gsi (&gsi
, v
, true, NULL_TREE
,
6729 false, GSI_CONTINUE_LINKING
);
6730 t
= build2 (fd
->loops
[i
].cond_code
, boolean_type_node
, v
, t
);
6731 cond_stmt
= gimple_build_cond_empty (t
);
6732 gsi_insert_after (&gsi
, cond_stmt
, GSI_CONTINUE_LINKING
);
6733 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt
),
6734 expand_omp_regimplify_p
, NULL
, NULL
)
6735 || walk_tree (gimple_cond_rhs_ptr (cond_stmt
),
6736 expand_omp_regimplify_p
, NULL
, NULL
))
6738 gsi
= gsi_for_stmt (cond_stmt
);
6739 gimple_regimplify_operands (cond_stmt
, &gsi
);
6741 ne
= single_succ_edge (bb
);
6742 ne
->flags
= EDGE_FALSE_VALUE
;
6744 init_bb
= create_empty_bb (bb
);
6745 set_immediate_dominator (CDI_DOMINATORS
, init_bb
, bb
);
6746 add_bb_to_loop (init_bb
, bb
->loop_father
);
6747 e
= make_edge (bb
, init_bb
, EDGE_TRUE_VALUE
);
6749 = profile_probability::guessed_always ().apply_scale (7, 8);
6750 ne
->probability
= e
->probability
.invert ();
6752 gsi
= gsi_after_labels (init_bb
);
6753 t
= fold_convert (TREE_TYPE (fd
->loops
[i
+ 1].v
),
6754 fd
->loops
[i
+ 1].n1
);
6755 if (fd
->loops
[i
+ 1].m1
)
6757 tree t2
= fold_convert (TREE_TYPE (t
),
6759 - fd
->loops
[i
+ 1].outer
].v
);
6760 tree t3
= fold_convert (TREE_TYPE (t
), fd
->loops
[i
+ 1].m1
);
6761 t2
= fold_build2 (MULT_EXPR
, TREE_TYPE (t
), t2
, t3
);
6762 t
= fold_build2 (PLUS_EXPR
, TREE_TYPE (t
), t
, t2
);
6764 expand_omp_build_assign (&gsi
, fd
->loops
[i
+ 1].v
, t
);
6765 if (fd
->loops
[i
+ 1].m2
)
6767 if (i
+ 2 == fd
->collapse
&& (n2var
|| altv
))
6769 gcc_assert (n2v
== NULL_TREE
);
6770 n2v
= create_tmp_var (TREE_TYPE (fd
->loops
[i
+ 1].v
));
6772 t
= fold_convert (TREE_TYPE (fd
->loops
[i
+ 1].v
),
6773 fd
->loops
[i
+ 1].n2
);
6774 tree t2
= fold_convert (TREE_TYPE (t
),
6776 - fd
->loops
[i
+ 1].outer
].v
);
6777 tree t3
= fold_convert (TREE_TYPE (t
), fd
->loops
[i
+ 1].m2
);
6778 t2
= fold_build2 (MULT_EXPR
, TREE_TYPE (t
), t2
, t3
);
6779 t
= fold_build2 (PLUS_EXPR
, TREE_TYPE (t
), t
, t2
);
6780 expand_omp_build_assign (&gsi
, n2v
, t
);
6782 if (i
+ 2 == fd
->collapse
&& n2var
)
6784 /* For composite simd, n2 is the first iteration the current
6785 task shouldn't already handle, so we effectively want to use
6786 for (V3 = N31; V < N2 && V3 < N32; V++, V3 += STEP3)
6787 as the vectorized loop. Except the vectorizer will not
6788 vectorize that, so instead compute N2VAR as
6789 N2VAR = V + MIN (N2 - V, COUNTS3) and use
6790 for (V3 = N31; V < N2VAR; V++, V3 += STEP3)
6791 as the loop to vectorize. */
6792 tree t2
= fold_build2 (MINUS_EXPR
, type
, n2
, fd
->loop
.v
);
6793 if (fd
->loops
[i
+ 1].m1
|| fd
->loops
[i
+ 1].m2
)
6795 t
= build_int_cst (itype
, (fd
->loops
[i
+ 1].cond_code
6796 == LT_EXPR
? -1 : 1));
6797 t
= fold_build2 (PLUS_EXPR
, itype
,
6798 fold_convert (itype
,
6799 fd
->loops
[i
+ 1].step
), t
);
6800 if (fd
->loops
[i
+ 1].m2
)
6801 t
= fold_build2 (PLUS_EXPR
, itype
, t
, n2v
);
6803 t
= fold_build2 (PLUS_EXPR
, itype
, t
,
6804 fold_convert (itype
,
6805 fd
->loops
[i
+ 1].n2
));
6806 t
= fold_build2 (MINUS_EXPR
, itype
, t
,
6807 fold_convert (itype
, fd
->loops
[i
+ 1].v
));
6808 tree step
= fold_convert (itype
, fd
->loops
[i
+ 1].step
);
6809 if (TYPE_UNSIGNED (itype
)
6810 && fd
->loops
[i
+ 1].cond_code
== GT_EXPR
)
6811 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
,
6812 fold_build1 (NEGATE_EXPR
, itype
, t
),
6813 fold_build1 (NEGATE_EXPR
, itype
, step
));
6815 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
, t
, step
);
6816 t
= fold_convert (type
, t
);
6820 expand_omp_build_assign (&gsi
, min_arg1
, t2
);
6821 expand_omp_build_assign (&gsi
, min_arg2
, t
);
6822 e
= split_block (init_bb
, last_stmt (init_bb
));
6823 gsi
= gsi_after_labels (e
->dest
);
6825 remove_edge (FALLTHRU_EDGE (entry_bb
));
6826 make_edge (entry_bb
, init_bb
, EDGE_FALLTHRU
);
6827 set_immediate_dominator (CDI_DOMINATORS
, init_bb
, entry_bb
);
6828 set_immediate_dominator (CDI_DOMINATORS
, l1_bb
, init_bb
);
6829 t
= fold_build2 (MIN_EXPR
, type
, min_arg1
, min_arg2
);
6830 t
= fold_build2 (PLUS_EXPR
, type
, fd
->loop
.v
, t
);
6831 expand_omp_build_assign (&gsi
, n2var
, t
);
6833 if (i
+ 2 == fd
->collapse
&& altv
)
6835 /* The vectorizer currently punts on loops with non-constant
6836 steps for the main IV (can't compute number of iterations
6837 and gives up because of that). As for OpenMP loops it is
6838 always possible to compute the number of iterations upfront,
6839 use an alternate IV as the loop iterator. */
6840 expand_omp_build_assign (&gsi
, altv
,
6841 build_zero_cst (TREE_TYPE (altv
)));
6842 tree itype
= TREE_TYPE (fd
->loops
[i
+ 1].v
);
6843 if (POINTER_TYPE_P (itype
))
6844 itype
= signed_type_for (itype
);
6845 t
= build_int_cst (itype
, (fd
->loops
[i
+ 1].cond_code
== LT_EXPR
6847 t
= fold_build2 (PLUS_EXPR
, itype
,
6848 fold_convert (itype
, fd
->loops
[i
+ 1].step
), t
);
6849 t
= fold_build2 (PLUS_EXPR
, itype
, t
,
6850 fold_convert (itype
,
6852 ? n2v
: fd
->loops
[i
+ 1].n2
));
6853 t
= fold_build2 (MINUS_EXPR
, itype
, t
,
6854 fold_convert (itype
, fd
->loops
[i
+ 1].v
));
6855 tree step
= fold_convert (itype
, fd
->loops
[i
+ 1].step
);
6856 if (TYPE_UNSIGNED (itype
)
6857 && fd
->loops
[i
+ 1].cond_code
== GT_EXPR
)
6858 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
,
6859 fold_build1 (NEGATE_EXPR
, itype
, t
),
6860 fold_build1 (NEGATE_EXPR
, itype
, step
));
6862 t
= fold_build2 (TRUNC_DIV_EXPR
, itype
, t
, step
);
6863 t
= fold_convert (TREE_TYPE (altv
), t
);
6864 expand_omp_build_assign (&gsi
, altn2
, t
);
6865 tree t2
= fold_convert (TREE_TYPE (fd
->loops
[i
+ 1].v
),
6867 ? n2v
: fd
->loops
[i
+ 1].n2
);
6868 t2
= force_gimple_operand_gsi (&gsi
, t2
, true, NULL_TREE
,
6869 true, GSI_SAME_STMT
);
6870 t2
= fold_build2 (fd
->loops
[i
+ 1].cond_code
, boolean_type_node
,
6871 fd
->loops
[i
+ 1].v
, t2
);
6873 = gimple_build_assign (altn2
, COND_EXPR
, t2
, altn2
,
6874 build_zero_cst (TREE_TYPE (altv
)));
6875 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
6879 make_edge (init_bb
, last_bb
, EDGE_FALLTHRU
);
6880 if (!gimple_omp_for_combined_into_p (fd
->for_stmt
))
6882 e
= find_edge (entry_bb
, last_bb
);
6883 redirect_edge_succ (e
, bb
);
6884 set_immediate_dominator (CDI_DOMINATORS
, bb
, entry_bb
);
6885 set_immediate_dominator (CDI_DOMINATORS
, last_bb
, init_bb
);
6893 class loop
*loop
= alloc_loop ();
6894 loop
->header
= l1_bb
;
6895 loop
->latch
= cont_bb
;
6896 add_loop (loop
, l1_bb
->loop_father
);
6897 loop
->safelen
= safelen_int
;
6900 loop
->simduid
= OMP_CLAUSE__SIMDUID__DECL (simduid
);
6901 cfun
->has_simduid_loops
= true;
6903 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
6905 if ((flag_tree_loop_vectorize
6906 || !global_options_set
.x_flag_tree_loop_vectorize
)
6907 && flag_tree_loop_optimize
6908 && loop
->safelen
> 1)
6910 loop
->force_vectorize
= true;
6911 if (simdlen
&& tree_fits_uhwi_p (OMP_CLAUSE_SIMDLEN_EXPR (simdlen
)))
6913 unsigned HOST_WIDE_INT v
6914 = tree_to_uhwi (OMP_CLAUSE_SIMDLEN_EXPR (simdlen
));
6915 if (v
< INT_MAX
&& v
<= (unsigned HOST_WIDE_INT
) loop
->safelen
)
6918 cfun
->has_force_vectorize_loops
= true;
6920 else if (dont_vectorize
)
6921 loop
->dont_vectorize
= true;
6924 cfun
->has_simduid_loops
= true;
6927 /* Taskloop construct is represented after gimplification with
6928 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
6929 in between them. This routine expands the outer GIMPLE_OMP_FOR,
6930 which should just compute all the needed loop temporaries
6931 for GIMPLE_OMP_TASK. */
6934 expand_omp_taskloop_for_outer (struct omp_region
*region
,
6935 struct omp_for_data
*fd
,
6938 tree type
, bias
= NULL_TREE
;
6939 basic_block entry_bb
, cont_bb
, exit_bb
;
6940 gimple_stmt_iterator gsi
;
6941 gassign
*assign_stmt
;
6942 tree
*counts
= NULL
;
6945 gcc_assert (inner_stmt
);
6946 gcc_assert (region
->cont
);
6947 gcc_assert (gimple_code (inner_stmt
) == GIMPLE_OMP_TASK
6948 && gimple_omp_task_taskloop_p (inner_stmt
));
6949 type
= TREE_TYPE (fd
->loop
.v
);
6951 /* See if we need to bias by LLONG_MIN. */
6952 if (fd
->iter_type
== long_long_unsigned_type_node
6953 && TREE_CODE (type
) == INTEGER_TYPE
6954 && !TYPE_UNSIGNED (type
))
6958 if (fd
->loop
.cond_code
== LT_EXPR
)
6961 n2
= fold_build2 (PLUS_EXPR
, type
, fd
->loop
.n2
, fd
->loop
.step
);
6965 n1
= fold_build2 (MINUS_EXPR
, type
, fd
->loop
.n2
, fd
->loop
.step
);
6968 if (TREE_CODE (n1
) != INTEGER_CST
6969 || TREE_CODE (n2
) != INTEGER_CST
6970 || ((tree_int_cst_sgn (n1
) < 0) ^ (tree_int_cst_sgn (n2
) < 0)))
6971 bias
= fold_convert (fd
->iter_type
, TYPE_MIN_VALUE (type
));
6974 entry_bb
= region
->entry
;
6975 cont_bb
= region
->cont
;
6976 gcc_assert (EDGE_COUNT (entry_bb
->succs
) == 2);
6977 gcc_assert (BRANCH_EDGE (entry_bb
)->dest
== FALLTHRU_EDGE (cont_bb
)->dest
);
6978 exit_bb
= region
->exit
;
6980 gsi
= gsi_last_nondebug_bb (entry_bb
);
6981 gimple
*for_stmt
= gsi_stmt (gsi
);
6982 gcc_assert (gimple_code (for_stmt
) == GIMPLE_OMP_FOR
);
6983 if (fd
->collapse
> 1)
6985 int first_zero_iter
= -1, dummy
= -1;
6986 basic_block zero_iter_bb
= NULL
, dummy_bb
= NULL
, l2_dom_bb
= NULL
;
6988 counts
= XALLOCAVEC (tree
, fd
->collapse
);
6989 expand_omp_for_init_counts (fd
, &gsi
, entry_bb
, counts
,
6990 zero_iter_bb
, first_zero_iter
,
6991 dummy_bb
, dummy
, l2_dom_bb
);
6995 /* Some counts[i] vars might be uninitialized if
6996 some loop has zero iterations. But the body shouldn't
6997 be executed in that case, so just avoid uninit warnings. */
6998 for (i
= first_zero_iter
; i
< fd
->collapse
; i
++)
6999 if (SSA_VAR_P (counts
[i
]))
7000 TREE_NO_WARNING (counts
[i
]) = 1;
7002 edge e
= split_block (entry_bb
, gsi_stmt (gsi
));
7004 make_edge (zero_iter_bb
, entry_bb
, EDGE_FALLTHRU
);
7005 gsi
= gsi_last_bb (entry_bb
);
7006 set_immediate_dominator (CDI_DOMINATORS
, entry_bb
,
7007 get_immediate_dominator (CDI_DOMINATORS
,
7015 if (POINTER_TYPE_P (TREE_TYPE (t0
))
7016 && TYPE_PRECISION (TREE_TYPE (t0
))
7017 != TYPE_PRECISION (fd
->iter_type
))
7019 /* Avoid casting pointers to integer of a different size. */
7020 tree itype
= signed_type_for (type
);
7021 t1
= fold_convert (fd
->iter_type
, fold_convert (itype
, t1
));
7022 t0
= fold_convert (fd
->iter_type
, fold_convert (itype
, t0
));
7026 t1
= fold_convert (fd
->iter_type
, t1
);
7027 t0
= fold_convert (fd
->iter_type
, t0
);
7031 t1
= fold_build2 (PLUS_EXPR
, fd
->iter_type
, t1
, bias
);
7032 t0
= fold_build2 (PLUS_EXPR
, fd
->iter_type
, t0
, bias
);
7035 tree innerc
= omp_find_clause (gimple_omp_task_clauses (inner_stmt
),
7036 OMP_CLAUSE__LOOPTEMP_
);
7037 gcc_assert (innerc
);
7038 tree startvar
= OMP_CLAUSE_DECL (innerc
);
7039 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
), OMP_CLAUSE__LOOPTEMP_
);
7040 gcc_assert (innerc
);
7041 tree endvar
= OMP_CLAUSE_DECL (innerc
);
7042 if (fd
->collapse
> 1 && TREE_CODE (fd
->loop
.n2
) != INTEGER_CST
)
7044 gcc_assert (innerc
);
7045 for (i
= 1; i
< fd
->collapse
; i
++)
7047 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
7048 OMP_CLAUSE__LOOPTEMP_
);
7049 gcc_assert (innerc
);
7051 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
7052 OMP_CLAUSE__LOOPTEMP_
);
7055 /* If needed (inner taskloop has lastprivate clause), propagate
7056 down the total number of iterations. */
7057 tree t
= force_gimple_operand_gsi (&gsi
, fd
->loop
.n2
, false,
7059 GSI_CONTINUE_LINKING
);
7060 assign_stmt
= gimple_build_assign (OMP_CLAUSE_DECL (innerc
), t
);
7061 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
7065 t0
= force_gimple_operand_gsi (&gsi
, t0
, false, NULL_TREE
, false,
7066 GSI_CONTINUE_LINKING
);
7067 assign_stmt
= gimple_build_assign (startvar
, t0
);
7068 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
7070 t1
= force_gimple_operand_gsi (&gsi
, t1
, false, NULL_TREE
, false,
7071 GSI_CONTINUE_LINKING
);
7072 assign_stmt
= gimple_build_assign (endvar
, t1
);
7073 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
7074 if (fd
->collapse
> 1)
7075 expand_omp_for_init_vars (fd
, &gsi
, counts
, NULL
, inner_stmt
, startvar
);
7077 /* Remove the GIMPLE_OMP_FOR statement. */
7078 gsi
= gsi_for_stmt (for_stmt
);
7079 gsi_remove (&gsi
, true);
7081 gsi
= gsi_last_nondebug_bb (cont_bb
);
7082 gsi_remove (&gsi
, true);
7084 gsi
= gsi_last_nondebug_bb (exit_bb
);
7085 gsi_remove (&gsi
, true);
7087 FALLTHRU_EDGE (entry_bb
)->probability
= profile_probability::always ();
7088 remove_edge (BRANCH_EDGE (entry_bb
));
7089 FALLTHRU_EDGE (cont_bb
)->probability
= profile_probability::always ();
7090 remove_edge (BRANCH_EDGE (cont_bb
));
7091 set_immediate_dominator (CDI_DOMINATORS
, exit_bb
, cont_bb
);
7092 set_immediate_dominator (CDI_DOMINATORS
, region
->entry
,
7093 recompute_dominator (CDI_DOMINATORS
, region
->entry
));
7096 /* Taskloop construct is represented after gimplification with
7097 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
7098 in between them. This routine expands the inner GIMPLE_OMP_FOR.
7099 GOMP_taskloop{,_ull} function arranges for each task to be given just
7100 a single range of iterations. */
7103 expand_omp_taskloop_for_inner (struct omp_region
*region
,
7104 struct omp_for_data
*fd
,
7107 tree e
, t
, type
, itype
, vmain
, vback
, bias
= NULL_TREE
;
7108 basic_block entry_bb
, exit_bb
, body_bb
, cont_bb
, collapse_bb
= NULL
;
7110 gimple_stmt_iterator gsi
;
7112 bool broken_loop
= region
->cont
== NULL
;
7113 tree
*counts
= NULL
;
7116 itype
= type
= TREE_TYPE (fd
->loop
.v
);
7117 if (POINTER_TYPE_P (type
))
7118 itype
= signed_type_for (type
);
7120 /* See if we need to bias by LLONG_MIN. */
7121 if (fd
->iter_type
== long_long_unsigned_type_node
7122 && TREE_CODE (type
) == INTEGER_TYPE
7123 && !TYPE_UNSIGNED (type
))
7127 if (fd
->loop
.cond_code
== LT_EXPR
)
7130 n2
= fold_build2 (PLUS_EXPR
, type
, fd
->loop
.n2
, fd
->loop
.step
);
7134 n1
= fold_build2 (MINUS_EXPR
, type
, fd
->loop
.n2
, fd
->loop
.step
);
7137 if (TREE_CODE (n1
) != INTEGER_CST
7138 || TREE_CODE (n2
) != INTEGER_CST
7139 || ((tree_int_cst_sgn (n1
) < 0) ^ (tree_int_cst_sgn (n2
) < 0)))
7140 bias
= fold_convert (fd
->iter_type
, TYPE_MIN_VALUE (type
));
7143 entry_bb
= region
->entry
;
7144 cont_bb
= region
->cont
;
7145 gcc_assert (EDGE_COUNT (entry_bb
->succs
) == 2);
7146 fin_bb
= BRANCH_EDGE (entry_bb
)->dest
;
7147 gcc_assert (broken_loop
7148 || (fin_bb
== FALLTHRU_EDGE (cont_bb
)->dest
));
7149 body_bb
= FALLTHRU_EDGE (entry_bb
)->dest
;
7152 gcc_assert (BRANCH_EDGE (cont_bb
)->dest
== body_bb
);
7153 gcc_assert (EDGE_COUNT (cont_bb
->succs
) == 2);
7155 exit_bb
= region
->exit
;
7157 /* Iteration space partitioning goes in ENTRY_BB. */
7158 gsi
= gsi_last_nondebug_bb (entry_bb
);
7159 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_FOR
);
7161 if (fd
->collapse
> 1)
7163 int first_zero_iter
= -1, dummy
= -1;
7164 basic_block l2_dom_bb
= NULL
, dummy_bb
= NULL
;
7166 counts
= XALLOCAVEC (tree
, fd
->collapse
);
7167 expand_omp_for_init_counts (fd
, &gsi
, entry_bb
, counts
,
7168 fin_bb
, first_zero_iter
,
7169 dummy_bb
, dummy
, l2_dom_bb
);
7173 t
= integer_one_node
;
7175 step
= fd
->loop
.step
;
7176 tree innerc
= omp_find_clause (gimple_omp_for_clauses (fd
->for_stmt
),
7177 OMP_CLAUSE__LOOPTEMP_
);
7178 gcc_assert (innerc
);
7179 n1
= OMP_CLAUSE_DECL (innerc
);
7180 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
), OMP_CLAUSE__LOOPTEMP_
);
7181 gcc_assert (innerc
);
7182 n2
= OMP_CLAUSE_DECL (innerc
);
7185 n1
= fold_build2 (PLUS_EXPR
, fd
->iter_type
, n1
, bias
);
7186 n2
= fold_build2 (PLUS_EXPR
, fd
->iter_type
, n2
, bias
);
7188 n1
= force_gimple_operand_gsi (&gsi
, fold_convert (type
, n1
),
7189 true, NULL_TREE
, true, GSI_SAME_STMT
);
7190 n2
= force_gimple_operand_gsi (&gsi
, fold_convert (itype
, n2
),
7191 true, NULL_TREE
, true, GSI_SAME_STMT
);
7192 step
= force_gimple_operand_gsi (&gsi
, fold_convert (itype
, step
),
7193 true, NULL_TREE
, true, GSI_SAME_STMT
);
7195 tree startvar
= fd
->loop
.v
;
7196 tree endvar
= NULL_TREE
;
7198 if (gimple_omp_for_combined_p (fd
->for_stmt
))
7200 tree clauses
= gimple_omp_for_clauses (inner_stmt
);
7201 tree innerc
= omp_find_clause (clauses
, OMP_CLAUSE__LOOPTEMP_
);
7202 gcc_assert (innerc
);
7203 startvar
= OMP_CLAUSE_DECL (innerc
);
7204 innerc
= omp_find_clause (OMP_CLAUSE_CHAIN (innerc
),
7205 OMP_CLAUSE__LOOPTEMP_
);
7206 gcc_assert (innerc
);
7207 endvar
= OMP_CLAUSE_DECL (innerc
);
7209 t
= fold_convert (TREE_TYPE (startvar
), n1
);
7210 t
= force_gimple_operand_gsi (&gsi
, t
,
7212 && TREE_ADDRESSABLE (startvar
),
7213 NULL_TREE
, false, GSI_CONTINUE_LINKING
);
7214 gimple
*assign_stmt
= gimple_build_assign (startvar
, t
);
7215 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
7217 t
= fold_convert (TREE_TYPE (startvar
), n2
);
7218 e
= force_gimple_operand_gsi (&gsi
, t
, true, NULL_TREE
,
7219 false, GSI_CONTINUE_LINKING
);
7222 assign_stmt
= gimple_build_assign (endvar
, e
);
7223 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
7224 if (useless_type_conversion_p (TREE_TYPE (fd
->loop
.v
), TREE_TYPE (e
)))
7225 assign_stmt
= gimple_build_assign (fd
->loop
.v
, e
);
7227 assign_stmt
= gimple_build_assign (fd
->loop
.v
, NOP_EXPR
, e
);
7228 gsi_insert_after (&gsi
, assign_stmt
, GSI_CONTINUE_LINKING
);
7231 tree
*nonrect_bounds
= NULL
;
7232 if (fd
->collapse
> 1)
7236 nonrect_bounds
= XALLOCAVEC (tree
, fd
->last_nonrect
+ 1);
7237 memset (nonrect_bounds
, 0, sizeof (tree
) * (fd
->last_nonrect
+ 1));
7239 gcc_assert (gsi_bb (gsi
) == entry_bb
);
7240 expand_omp_for_init_vars (fd
, &gsi
, counts
, nonrect_bounds
, inner_stmt
,
7242 entry_bb
= gsi_bb (gsi
);
7247 /* The code controlling the sequential loop replaces the
7248 GIMPLE_OMP_CONTINUE. */
7249 gsi
= gsi_last_nondebug_bb (cont_bb
);
7250 gomp_continue
*cont_stmt
= as_a
<gomp_continue
*> (gsi_stmt (gsi
));
7251 gcc_assert (gimple_code (cont_stmt
) == GIMPLE_OMP_CONTINUE
);
7252 vmain
= gimple_omp_continue_control_use (cont_stmt
);
7253 vback
= gimple_omp_continue_control_def (cont_stmt
);
7255 if (!gimple_omp_for_combined_p (fd
->for_stmt
))
7257 if (POINTER_TYPE_P (type
))
7258 t
= fold_build_pointer_plus (vmain
, step
);
7260 t
= fold_build2 (PLUS_EXPR
, type
, vmain
, step
);
7261 t
= force_gimple_operand_gsi (&gsi
, t
,
7263 && TREE_ADDRESSABLE (vback
),
7264 NULL_TREE
, true, GSI_SAME_STMT
);
7265 assign_stmt
= gimple_build_assign (vback
, t
);
7266 gsi_insert_before (&gsi
, assign_stmt
, GSI_SAME_STMT
);
7268 t
= build2 (fd
->loop
.cond_code
, boolean_type_node
,
7269 DECL_P (vback
) && TREE_ADDRESSABLE (vback
)
7271 gsi_insert_before (&gsi
, gimple_build_cond_empty (t
), GSI_SAME_STMT
);
7274 /* Remove the GIMPLE_OMP_CONTINUE statement. */
7275 gsi_remove (&gsi
, true);
7277 if (fd
->collapse
> 1 && !gimple_omp_for_combined_p (fd
->for_stmt
))
7278 collapse_bb
= extract_omp_for_update_vars (fd
, nonrect_bounds
,
7282 /* Remove the GIMPLE_OMP_FOR statement. */
7283 gsi
= gsi_for_stmt (fd
->for_stmt
);
7284 gsi_remove (&gsi
, true);
7286 /* Remove the GIMPLE_OMP_RETURN statement. */
7287 gsi
= gsi_last_nondebug_bb (exit_bb
);
7288 gsi_remove (&gsi
, true);
7290 FALLTHRU_EDGE (entry_bb
)->probability
= profile_probability::always ();
7292 remove_edge (BRANCH_EDGE (entry_bb
));
7295 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb
));
7296 region
->outer
->cont
= NULL
;
7299 /* Connect all the blocks. */
7302 ep
= find_edge (cont_bb
, body_bb
);
7303 if (gimple_omp_for_combined_p (fd
->for_stmt
))
7308 else if (fd
->collapse
> 1)
7311 ep
= make_edge (cont_bb
, collapse_bb
, EDGE_TRUE_VALUE
);
7314 ep
->flags
= EDGE_TRUE_VALUE
;
7315 find_edge (cont_bb
, fin_bb
)->flags
7316 = ep
? EDGE_FALSE_VALUE
: EDGE_FALLTHRU
;
7319 set_immediate_dominator (CDI_DOMINATORS
, body_bb
,
7320 recompute_dominator (CDI_DOMINATORS
, body_bb
));
7322 set_immediate_dominator (CDI_DOMINATORS
, fin_bb
,
7323 recompute_dominator (CDI_DOMINATORS
, fin_bb
));
7325 if (!broken_loop
&& !gimple_omp_for_combined_p (fd
->for_stmt
))
7327 class loop
*loop
= alloc_loop ();
7328 loop
->header
= body_bb
;
7329 if (collapse_bb
== NULL
)
7330 loop
->latch
= cont_bb
;
7331 add_loop (loop
, body_bb
->loop_father
);
7335 /* A subroutine of expand_omp_for. Generate code for an OpenACC
7336 partitioned loop. The lowering here is abstracted, in that the
7337 loop parameters are passed through internal functions, which are
7338 further lowered by oacc_device_lower, once we get to the target
7339 compiler. The loop is of the form:
7341 for (V = B; V LTGT E; V += S) {BODY}
7343 where LTGT is < or >. We may have a specified chunking size, CHUNKING
7344 (constant 0 for no chunking) and we will have a GWV partitioning
7345 mask, specifying dimensions over which the loop is to be
7346 partitioned (see note below). We generate code that looks like
7347 (this ignores tiling):
7349 <entry_bb> [incoming FALL->body, BRANCH->exit]
7350 typedef signedintify (typeof (V)) T; // underlying signed integral type
7353 T DIR = LTGT == '<' ? +1 : -1;
7354 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
7355 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
7357 <head_bb> [created by splitting end of entry_bb]
7358 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
7359 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
7360 if (!(offset LTGT bound)) goto bottom_bb;
7362 <body_bb> [incoming]
7366 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
7368 if (offset LTGT bound) goto body_bb; [*]
7370 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
7372 if (chunk < chunk_max) goto head_bb;
7374 <exit_bb> [incoming]
7375 V = B + ((range -/+ 1) / S +/- 1) * S [*]
7377 [*] Needed if V live at end of loop. */
7380 expand_oacc_for (struct omp_region
*region
, struct omp_for_data
*fd
)
7382 tree v
= fd
->loop
.v
;
7383 enum tree_code cond_code
= fd
->loop
.cond_code
;
7384 enum tree_code plus_code
= PLUS_EXPR
;
7386 tree chunk_size
= integer_minus_one_node
;
7387 tree gwv
= integer_zero_node
;
7388 tree iter_type
= TREE_TYPE (v
);
7389 tree diff_type
= iter_type
;
7390 tree plus_type
= iter_type
;
7391 struct oacc_collapse
*counts
= NULL
;
7393 gcc_checking_assert (gimple_omp_for_kind (fd
->for_stmt
)
7394 == GF_OMP_FOR_KIND_OACC_LOOP
);
7395 gcc_assert (!gimple_omp_for_combined_into_p (fd
->for_stmt
));
7396 gcc_assert (cond_code
== LT_EXPR
|| cond_code
== GT_EXPR
);
7398 if (POINTER_TYPE_P (iter_type
))
7400 plus_code
= POINTER_PLUS_EXPR
;
7401 plus_type
= sizetype
;
7403 if (POINTER_TYPE_P (diff_type
) || TYPE_UNSIGNED (diff_type
))
7404 diff_type
= signed_type_for (diff_type
);
7405 if (TYPE_PRECISION (diff_type
) < TYPE_PRECISION (integer_type_node
))
7406 diff_type
= integer_type_node
;
7408 basic_block entry_bb
= region
->entry
; /* BB ending in OMP_FOR */
7409 basic_block exit_bb
= region
->exit
; /* BB ending in OMP_RETURN */
7410 basic_block cont_bb
= region
->cont
; /* BB ending in OMP_CONTINUE */
7411 basic_block bottom_bb
= NULL
;
7413 /* entry_bb has two successors; the branch edge is to the exit
7414 block, fallthrough edge to body. */
7415 gcc_assert (EDGE_COUNT (entry_bb
->succs
) == 2
7416 && BRANCH_EDGE (entry_bb
)->dest
== exit_bb
);
7418 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
7419 body_bb, or to a block whose only successor is the body_bb. Its
7420 fallthrough successor is the final block (same as the branch
7421 successor of the entry_bb). */
7424 basic_block body_bb
= FALLTHRU_EDGE (entry_bb
)->dest
;
7425 basic_block bed
= BRANCH_EDGE (cont_bb
)->dest
;
7427 gcc_assert (FALLTHRU_EDGE (cont_bb
)->dest
== exit_bb
);
7428 gcc_assert (bed
== body_bb
|| single_succ_edge (bed
)->dest
== body_bb
);
7431 gcc_assert (!gimple_in_ssa_p (cfun
));
7433 /* The exit block only has entry_bb and cont_bb as predecessors. */
7434 gcc_assert (EDGE_COUNT (exit_bb
->preds
) == 1 + (cont_bb
!= NULL
));
7437 tree chunk_max
= NULL_TREE
;
7439 tree step
= create_tmp_var (diff_type
, ".step");
7440 bool up
= cond_code
== LT_EXPR
;
7441 tree dir
= build_int_cst (diff_type
, up
? +1 : -1);
7442 bool chunking
= !gimple_in_ssa_p (cfun
);
7446 tree tile_size
= NULL_TREE
;
7447 tree element_s
= NULL_TREE
;
7448 tree e_bound
= NULL_TREE
, e_offset
= NULL_TREE
, e_step
= NULL_TREE
;
7449 basic_block elem_body_bb
= NULL
;
7450 basic_block elem_cont_bb
= NULL
;
7452 /* SSA instances. */
7453 tree offset_incr
= NULL_TREE
;
7454 tree offset_init
= NULL_TREE
;
7456 gimple_stmt_iterator gsi
;
7462 edge split
, be
, fte
;
7464 /* Split the end of entry_bb to create head_bb. */
7465 split
= split_block (entry_bb
, last_stmt (entry_bb
));
7466 basic_block head_bb
= split
->dest
;
7467 entry_bb
= split
->src
;
7469 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
7470 gsi
= gsi_last_nondebug_bb (entry_bb
);
7471 gomp_for
*for_stmt
= as_a
<gomp_for
*> (gsi_stmt (gsi
));
7472 loc
= gimple_location (for_stmt
);
7474 if (gimple_in_ssa_p (cfun
))
7476 offset_init
= gimple_omp_for_index (for_stmt
, 0);
7477 gcc_assert (integer_zerop (fd
->loop
.n1
));
7478 /* The SSA parallelizer does gang parallelism. */
7479 gwv
= build_int_cst (integer_type_node
, GOMP_DIM_MASK (GOMP_DIM_GANG
));
7482 if (fd
->collapse
> 1 || fd
->tiling
)
7484 gcc_assert (!gimple_in_ssa_p (cfun
) && up
);
7485 counts
= XALLOCAVEC (struct oacc_collapse
, fd
->collapse
);
7486 tree total
= expand_oacc_collapse_init (fd
, &gsi
, counts
,
7487 TREE_TYPE (fd
->loop
.n2
), loc
);
7489 if (SSA_VAR_P (fd
->loop
.n2
))
7491 total
= force_gimple_operand_gsi (&gsi
, total
, false, NULL_TREE
,
7492 true, GSI_SAME_STMT
);
7493 ass
= gimple_build_assign (fd
->loop
.n2
, total
);
7494 gsi_insert_before (&gsi
, ass
, GSI_SAME_STMT
);
7498 tree b
= fd
->loop
.n1
;
7499 tree e
= fd
->loop
.n2
;
7500 tree s
= fd
->loop
.step
;
7502 b
= force_gimple_operand_gsi (&gsi
, b
, true, NULL_TREE
, true, GSI_SAME_STMT
);
7503 e
= force_gimple_operand_gsi (&gsi
, e
, true, NULL_TREE
, true, GSI_SAME_STMT
);
7505 /* Convert the step, avoiding possible unsigned->signed overflow. */
7506 negating
= !up
&& TYPE_UNSIGNED (TREE_TYPE (s
));
7508 s
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (s
), s
);
7509 s
= fold_convert (diff_type
, s
);
7511 s
= fold_build1 (NEGATE_EXPR
, diff_type
, s
);
7512 s
= force_gimple_operand_gsi (&gsi
, s
, true, NULL_TREE
, true, GSI_SAME_STMT
);
7515 chunk_size
= integer_zero_node
;
7516 expr
= fold_convert (diff_type
, chunk_size
);
7517 chunk_size
= force_gimple_operand_gsi (&gsi
, expr
, true,
7518 NULL_TREE
, true, GSI_SAME_STMT
);
7522 /* Determine the tile size and element step,
7523 modify the outer loop step size. */
7524 tile_size
= create_tmp_var (diff_type
, ".tile_size");
7525 expr
= build_int_cst (diff_type
, 1);
7526 for (int ix
= 0; ix
< fd
->collapse
; ix
++)
7527 expr
= fold_build2 (MULT_EXPR
, diff_type
, counts
[ix
].tile
, expr
);
7528 expr
= force_gimple_operand_gsi (&gsi
, expr
, true,
7529 NULL_TREE
, true, GSI_SAME_STMT
);
7530 ass
= gimple_build_assign (tile_size
, expr
);
7531 gsi_insert_before (&gsi
, ass
, GSI_SAME_STMT
);
7533 element_s
= create_tmp_var (diff_type
, ".element_s");
7534 ass
= gimple_build_assign (element_s
, s
);
7535 gsi_insert_before (&gsi
, ass
, GSI_SAME_STMT
);
7537 expr
= fold_build2 (MULT_EXPR
, diff_type
, s
, tile_size
);
7538 s
= force_gimple_operand_gsi (&gsi
, expr
, true,
7539 NULL_TREE
, true, GSI_SAME_STMT
);
7542 /* Determine the range, avoiding possible unsigned->signed overflow. */
7543 negating
= !up
&& TYPE_UNSIGNED (iter_type
);
7544 expr
= fold_build2 (MINUS_EXPR
, plus_type
,
7545 fold_convert (plus_type
, negating
? b
: e
),
7546 fold_convert (plus_type
, negating
? e
: b
));
7547 expr
= fold_convert (diff_type
, expr
);
7549 expr
= fold_build1 (NEGATE_EXPR
, diff_type
, expr
);
7550 tree range
= force_gimple_operand_gsi (&gsi
, expr
, true,
7551 NULL_TREE
, true, GSI_SAME_STMT
);
7553 chunk_no
= build_int_cst (diff_type
, 0);
7556 gcc_assert (!gimple_in_ssa_p (cfun
));
7559 chunk_max
= create_tmp_var (diff_type
, ".chunk_max");
7560 chunk_no
= create_tmp_var (diff_type
, ".chunk_no");
7562 ass
= gimple_build_assign (chunk_no
, expr
);
7563 gsi_insert_before (&gsi
, ass
, GSI_SAME_STMT
);
7565 call
= gimple_build_call_internal (IFN_GOACC_LOOP
, 6,
7566 build_int_cst (integer_type_node
,
7567 IFN_GOACC_LOOP_CHUNKS
),
7568 dir
, range
, s
, chunk_size
, gwv
);
7569 gimple_call_set_lhs (call
, chunk_max
);
7570 gimple_set_location (call
, loc
);
7571 gsi_insert_before (&gsi
, call
, GSI_SAME_STMT
);
7574 chunk_size
= chunk_no
;
7576 call
= gimple_build_call_internal (IFN_GOACC_LOOP
, 6,
7577 build_int_cst (integer_type_node
,
7578 IFN_GOACC_LOOP_STEP
),
7579 dir
, range
, s
, chunk_size
, gwv
);
7580 gimple_call_set_lhs (call
, step
);
7581 gimple_set_location (call
, loc
);
7582 gsi_insert_before (&gsi
, call
, GSI_SAME_STMT
);
7584 /* Remove the GIMPLE_OMP_FOR. */
7585 gsi_remove (&gsi
, true);
7587 /* Fixup edges from head_bb. */
7588 be
= BRANCH_EDGE (head_bb
);
7589 fte
= FALLTHRU_EDGE (head_bb
);
7590 be
->flags
|= EDGE_FALSE_VALUE
;
7591 fte
->flags
^= EDGE_FALLTHRU
| EDGE_TRUE_VALUE
;
7593 basic_block body_bb
= fte
->dest
;
7595 if (gimple_in_ssa_p (cfun
))
7597 gsi
= gsi_last_nondebug_bb (cont_bb
);
7598 gomp_continue
*cont_stmt
= as_a
<gomp_continue
*> (gsi_stmt (gsi
));
7600 offset
= gimple_omp_continue_control_use (cont_stmt
);
7601 offset_incr
= gimple_omp_continue_control_def (cont_stmt
);
7605 offset
= create_tmp_var (diff_type
, ".offset");
7606 offset_init
= offset_incr
= offset
;
7608 bound
= create_tmp_var (TREE_TYPE (offset
), ".bound");
7610 /* Loop offset & bound go into head_bb. */
7611 gsi
= gsi_start_bb (head_bb
);
7613 call
= gimple_build_call_internal (IFN_GOACC_LOOP
, 7,
7614 build_int_cst (integer_type_node
,
7615 IFN_GOACC_LOOP_OFFSET
),
7617 chunk_size
, gwv
, chunk_no
);
7618 gimple_call_set_lhs (call
, offset_init
);
7619 gimple_set_location (call
, loc
);
7620 gsi_insert_after (&gsi
, call
, GSI_CONTINUE_LINKING
);
7622 call
= gimple_build_call_internal (IFN_GOACC_LOOP
, 7,
7623 build_int_cst (integer_type_node
,
7624 IFN_GOACC_LOOP_BOUND
),
7626 chunk_size
, gwv
, offset_init
);
7627 gimple_call_set_lhs (call
, bound
);
7628 gimple_set_location (call
, loc
);
7629 gsi_insert_after (&gsi
, call
, GSI_CONTINUE_LINKING
);
7631 expr
= build2 (cond_code
, boolean_type_node
, offset_init
, bound
);
7632 gsi_insert_after (&gsi
, gimple_build_cond_empty (expr
),
7633 GSI_CONTINUE_LINKING
);
7635 /* V assignment goes into body_bb. */
7636 if (!gimple_in_ssa_p (cfun
))
7638 gsi
= gsi_start_bb (body_bb
);
7640 expr
= build2 (plus_code
, iter_type
, b
,
7641 fold_convert (plus_type
, offset
));
7642 expr
= force_gimple_operand_gsi (&gsi
, expr
, false, NULL_TREE
,
7643 true, GSI_SAME_STMT
);
7644 ass
= gimple_build_assign (v
, expr
);
7645 gsi_insert_before (&gsi
, ass
, GSI_SAME_STMT
);
7647 if (fd
->collapse
> 1 || fd
->tiling
)
7648 expand_oacc_collapse_vars (fd
, false, &gsi
, counts
, v
);
7652 /* Determine the range of the element loop -- usually simply
7653 the tile_size, but could be smaller if the final
7654 iteration of the outer loop is a partial tile. */
7655 tree e_range
= create_tmp_var (diff_type
, ".e_range");
7657 expr
= build2 (MIN_EXPR
, diff_type
,
7658 build2 (MINUS_EXPR
, diff_type
, bound
, offset
),
7659 build2 (MULT_EXPR
, diff_type
, tile_size
,
7661 expr
= force_gimple_operand_gsi (&gsi
, expr
, false, NULL_TREE
,
7662 true, GSI_SAME_STMT
);
7663 ass
= gimple_build_assign (e_range
, expr
);
7664 gsi_insert_before (&gsi
, ass
, GSI_SAME_STMT
);
7666 /* Determine bound, offset & step of inner loop. */
7667 e_bound
= create_tmp_var (diff_type
, ".e_bound");
7668 e_offset
= create_tmp_var (diff_type
, ".e_offset");
7669 e_step
= create_tmp_var (diff_type
, ".e_step");
7671 /* Mark these as element loops. */
7672 tree t
, e_gwv
= integer_minus_one_node
;
7673 tree chunk
= build_int_cst (diff_type
, 0); /* Never chunked. */
7675 t
= build_int_cst (integer_type_node
, IFN_GOACC_LOOP_OFFSET
);
7676 call
= gimple_build_call_internal (IFN_GOACC_LOOP
, 7, t
, dir
, e_range
,
7677 element_s
, chunk
, e_gwv
, chunk
);
7678 gimple_call_set_lhs (call
, e_offset
);
7679 gimple_set_location (call
, loc
);
7680 gsi_insert_before (&gsi
, call
, GSI_SAME_STMT
);
7682 t
= build_int_cst (integer_type_node
, IFN_GOACC_LOOP_BOUND
);
7683 call
= gimple_build_call_internal (IFN_GOACC_LOOP
, 7, t
, dir
, e_range
,
7684 element_s
, chunk
, e_gwv
, e_offset
);
7685 gimple_call_set_lhs (call
, e_bound
);
7686 gimple_set_location (call
, loc
);
7687 gsi_insert_before (&gsi
, call
, GSI_SAME_STMT
);
7689 t
= build_int_cst (integer_type_node
, IFN_GOACC_LOOP_STEP
);
7690 call
= gimple_build_call_internal (IFN_GOACC_LOOP
, 6, t
, dir
, e_range
,
7691 element_s
, chunk
, e_gwv
);
7692 gimple_call_set_lhs (call
, e_step
);
7693 gimple_set_location (call
, loc
);
7694 gsi_insert_before (&gsi
, call
, GSI_SAME_STMT
);
7696 /* Add test and split block. */
7697 expr
= build2 (cond_code
, boolean_type_node
, e_offset
, e_bound
);
7698 stmt
= gimple_build_cond_empty (expr
);
7699 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
7700 split
= split_block (body_bb
, stmt
);
7701 elem_body_bb
= split
->dest
;
7702 if (cont_bb
== body_bb
)
7703 cont_bb
= elem_body_bb
;
7704 body_bb
= split
->src
;
7706 split
->flags
^= EDGE_FALLTHRU
| EDGE_TRUE_VALUE
;
7708 /* Add a dummy exit for the tiled block when cont_bb is missing. */
7709 if (cont_bb
== NULL
)
7711 edge e
= make_edge (body_bb
, exit_bb
, EDGE_FALSE_VALUE
);
7712 e
->probability
= profile_probability::even ();
7713 split
->probability
= profile_probability::even ();
7716 /* Initialize the user's loop vars. */
7717 gsi
= gsi_start_bb (elem_body_bb
);
7718 expand_oacc_collapse_vars (fd
, true, &gsi
, counts
, e_offset
);
7722 /* Loop increment goes into cont_bb. If this is not a loop, we
7723 will have spawned threads as if it was, and each one will
7724 execute one iteration. The specification is not explicit about
7725 whether such constructs are ill-formed or not, and they can
7726 occur, especially when noreturn routines are involved. */
7729 gsi
= gsi_last_nondebug_bb (cont_bb
);
7730 gomp_continue
*cont_stmt
= as_a
<gomp_continue
*> (gsi_stmt (gsi
));
7731 loc
= gimple_location (cont_stmt
);
7735 /* Insert element loop increment and test. */
7736 expr
= build2 (PLUS_EXPR
, diff_type
, e_offset
, e_step
);
7737 expr
= force_gimple_operand_gsi (&gsi
, expr
, false, NULL_TREE
,
7738 true, GSI_SAME_STMT
);
7739 ass
= gimple_build_assign (e_offset
, expr
);
7740 gsi_insert_before (&gsi
, ass
, GSI_SAME_STMT
);
7741 expr
= build2 (cond_code
, boolean_type_node
, e_offset
, e_bound
);
7743 stmt
= gimple_build_cond_empty (expr
);
7744 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
7745 split
= split_block (cont_bb
, stmt
);
7746 elem_cont_bb
= split
->src
;
7747 cont_bb
= split
->dest
;
7749 split
->flags
^= EDGE_FALLTHRU
| EDGE_FALSE_VALUE
;
7750 split
->probability
= profile_probability::unlikely ().guessed ();
7752 = make_edge (elem_cont_bb
, elem_body_bb
, EDGE_TRUE_VALUE
);
7753 latch_edge
->probability
= profile_probability::likely ().guessed ();
7755 edge skip_edge
= make_edge (body_bb
, cont_bb
, EDGE_FALSE_VALUE
);
7756 skip_edge
->probability
= profile_probability::unlikely ().guessed ();
7757 edge loop_entry_edge
= EDGE_SUCC (body_bb
, 1 - skip_edge
->dest_idx
);
7758 loop_entry_edge
->probability
7759 = profile_probability::likely ().guessed ();
7761 gsi
= gsi_for_stmt (cont_stmt
);
7764 /* Increment offset. */
7765 if (gimple_in_ssa_p (cfun
))
7766 expr
= build2 (plus_code
, iter_type
, offset
,
7767 fold_convert (plus_type
, step
));
7769 expr
= build2 (PLUS_EXPR
, diff_type
, offset
, step
);
7770 expr
= force_gimple_operand_gsi (&gsi
, expr
, false, NULL_TREE
,
7771 true, GSI_SAME_STMT
);
7772 ass
= gimple_build_assign (offset_incr
, expr
);
7773 gsi_insert_before (&gsi
, ass
, GSI_SAME_STMT
);
7774 expr
= build2 (cond_code
, boolean_type_node
, offset_incr
, bound
);
7775 gsi_insert_before (&gsi
, gimple_build_cond_empty (expr
), GSI_SAME_STMT
);
7777 /* Remove the GIMPLE_OMP_CONTINUE. */
7778 gsi_remove (&gsi
, true);
7780 /* Fixup edges from cont_bb. */
7781 be
= BRANCH_EDGE (cont_bb
);
7782 fte
= FALLTHRU_EDGE (cont_bb
);
7783 be
->flags
|= EDGE_TRUE_VALUE
;
7784 fte
->flags
^= EDGE_FALLTHRU
| EDGE_FALSE_VALUE
;
7788 /* Split the beginning of exit_bb to make bottom_bb. We
7789 need to insert a nop at the start, because splitting is
7790 after a stmt, not before. */
7791 gsi
= gsi_start_bb (exit_bb
);
7792 stmt
= gimple_build_nop ();
7793 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
7794 split
= split_block (exit_bb
, stmt
);
7795 bottom_bb
= split
->src
;
7796 exit_bb
= split
->dest
;
7797 gsi
= gsi_last_bb (bottom_bb
);
7799 /* Chunk increment and test goes into bottom_bb. */
7800 expr
= build2 (PLUS_EXPR
, diff_type
, chunk_no
,
7801 build_int_cst (diff_type
, 1));
7802 ass
= gimple_build_assign (chunk_no
, expr
);
7803 gsi_insert_after (&gsi
, ass
, GSI_CONTINUE_LINKING
);
7805 /* Chunk test at end of bottom_bb. */
7806 expr
= build2 (LT_EXPR
, boolean_type_node
, chunk_no
, chunk_max
);
7807 gsi_insert_after (&gsi
, gimple_build_cond_empty (expr
),
7808 GSI_CONTINUE_LINKING
);
7810 /* Fixup edges from bottom_bb. */
7811 split
->flags
^= EDGE_FALLTHRU
| EDGE_FALSE_VALUE
;
7812 split
->probability
= profile_probability::unlikely ().guessed ();
7813 edge latch_edge
= make_edge (bottom_bb
, head_bb
, EDGE_TRUE_VALUE
);
7814 latch_edge
->probability
= profile_probability::likely ().guessed ();
7818 gsi
= gsi_last_nondebug_bb (exit_bb
);
7819 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_RETURN
);
7820 loc
= gimple_location (gsi_stmt (gsi
));
7822 if (!gimple_in_ssa_p (cfun
))
7824 /* Insert the final value of V, in case it is live. This is the
7825 value for the only thread that survives past the join. */
7826 expr
= fold_build2 (MINUS_EXPR
, diff_type
, range
, dir
);
7827 expr
= fold_build2 (PLUS_EXPR
, diff_type
, expr
, s
);
7828 expr
= fold_build2 (TRUNC_DIV_EXPR
, diff_type
, expr
, s
);
7829 expr
= fold_build2 (MULT_EXPR
, diff_type
, expr
, s
);
7830 expr
= build2 (plus_code
, iter_type
, b
, fold_convert (plus_type
, expr
));
7831 expr
= force_gimple_operand_gsi (&gsi
, expr
, false, NULL_TREE
,
7832 true, GSI_SAME_STMT
);
7833 ass
= gimple_build_assign (v
, expr
);
7834 gsi_insert_before (&gsi
, ass
, GSI_SAME_STMT
);
7837 /* Remove the OMP_RETURN. */
7838 gsi_remove (&gsi
, true);
7842 /* We now have one, two or three nested loops. Update the loop
7844 class loop
*parent
= entry_bb
->loop_father
;
7845 class loop
*body
= body_bb
->loop_father
;
7849 class loop
*chunk_loop
= alloc_loop ();
7850 chunk_loop
->header
= head_bb
;
7851 chunk_loop
->latch
= bottom_bb
;
7852 add_loop (chunk_loop
, parent
);
7853 parent
= chunk_loop
;
7855 else if (parent
!= body
)
7857 gcc_assert (body
->header
== body_bb
);
7858 gcc_assert (body
->latch
== cont_bb
7859 || single_pred (body
->latch
) == cont_bb
);
7865 class loop
*body_loop
= alloc_loop ();
7866 body_loop
->header
= body_bb
;
7867 body_loop
->latch
= cont_bb
;
7868 add_loop (body_loop
, parent
);
7872 /* Insert tiling's element loop. */
7873 class loop
*inner_loop
= alloc_loop ();
7874 inner_loop
->header
= elem_body_bb
;
7875 inner_loop
->latch
= elem_cont_bb
;
7876 add_loop (inner_loop
, body_loop
);
7882 /* Expand the OMP loop defined by REGION. */
7885 expand_omp_for (struct omp_region
*region
, gimple
*inner_stmt
)
7887 struct omp_for_data fd
;
7888 struct omp_for_data_loop
*loops
;
7890 loops
= XALLOCAVEC (struct omp_for_data_loop
,
7891 gimple_omp_for_collapse (last_stmt (region
->entry
)));
7892 omp_extract_for_data (as_a
<gomp_for
*> (last_stmt (region
->entry
)),
7894 region
->sched_kind
= fd
.sched_kind
;
7895 region
->sched_modifiers
= fd
.sched_modifiers
;
7896 region
->has_lastprivate_conditional
= fd
.lastprivate_conditional
!= 0;
7897 if (fd
.non_rect
&& !gimple_omp_for_combined_into_p (fd
.for_stmt
))
7899 for (int i
= fd
.first_nonrect
; i
<= fd
.last_nonrect
; i
++)
7900 if ((loops
[i
].m1
|| loops
[i
].m2
)
7901 && (loops
[i
].m1
== NULL_TREE
7902 || TREE_CODE (loops
[i
].m1
) == INTEGER_CST
)
7903 && (loops
[i
].m2
== NULL_TREE
7904 || TREE_CODE (loops
[i
].m2
) == INTEGER_CST
)
7905 && TREE_CODE (loops
[i
].step
) == INTEGER_CST
7906 && TREE_CODE (loops
[i
- loops
[i
].outer
].step
) == INTEGER_CST
)
7909 tree itype
= TREE_TYPE (loops
[i
].v
);
7910 if (loops
[i
].m1
&& loops
[i
].m2
)
7911 t
= fold_build2 (MINUS_EXPR
, itype
, loops
[i
].m2
, loops
[i
].m1
);
7912 else if (loops
[i
].m1
)
7913 t
= fold_build1 (NEGATE_EXPR
, itype
, loops
[i
].m1
);
7916 t
= fold_build2 (MULT_EXPR
, itype
, t
,
7917 fold_convert (itype
,
7918 loops
[i
- loops
[i
].outer
].step
));
7919 if (TYPE_UNSIGNED (itype
) && loops
[i
].cond_code
== GT_EXPR
)
7920 t
= fold_build2 (TRUNC_MOD_EXPR
, itype
,
7921 fold_build1 (NEGATE_EXPR
, itype
, t
),
7922 fold_build1 (NEGATE_EXPR
, itype
,
7923 fold_convert (itype
,
7926 t
= fold_build2 (TRUNC_MOD_EXPR
, itype
, t
,
7927 fold_convert (itype
, loops
[i
].step
));
7928 if (integer_nonzerop (t
))
7929 error_at (gimple_location (fd
.for_stmt
),
7930 "invalid OpenMP non-rectangular loop step; "
7931 "%<(%E - %E) * %E%> is not a multiple of loop %d "
7933 loops
[i
].m2
? loops
[i
].m2
: integer_zero_node
,
7934 loops
[i
].m1
? loops
[i
].m1
: integer_zero_node
,
7935 loops
[i
- loops
[i
].outer
].step
, i
+ 1,
7940 gcc_assert (EDGE_COUNT (region
->entry
->succs
) == 2);
7941 BRANCH_EDGE (region
->entry
)->flags
&= ~EDGE_ABNORMAL
;
7942 FALLTHRU_EDGE (region
->entry
)->flags
&= ~EDGE_ABNORMAL
;
7945 gcc_assert (EDGE_COUNT (region
->cont
->succs
) == 2);
7946 BRANCH_EDGE (region
->cont
)->flags
&= ~EDGE_ABNORMAL
;
7947 FALLTHRU_EDGE (region
->cont
)->flags
&= ~EDGE_ABNORMAL
;
7950 /* If there isn't a continue then this is a degerate case where
7951 the introduction of abnormal edges during lowering will prevent
7952 original loops from being detected. Fix that up. */
7953 loops_state_set (LOOPS_NEED_FIXUP
);
7955 if (gimple_omp_for_kind (fd
.for_stmt
) == GF_OMP_FOR_KIND_SIMD
)
7956 expand_omp_simd (region
, &fd
);
7957 else if (gimple_omp_for_kind (fd
.for_stmt
) == GF_OMP_FOR_KIND_OACC_LOOP
)
7959 gcc_assert (!inner_stmt
&& !fd
.non_rect
);
7960 expand_oacc_for (region
, &fd
);
7962 else if (gimple_omp_for_kind (fd
.for_stmt
) == GF_OMP_FOR_KIND_TASKLOOP
)
7964 if (gimple_omp_for_combined_into_p (fd
.for_stmt
))
7965 expand_omp_taskloop_for_inner (region
, &fd
, inner_stmt
);
7967 expand_omp_taskloop_for_outer (region
, &fd
, inner_stmt
);
7969 else if (fd
.sched_kind
== OMP_CLAUSE_SCHEDULE_STATIC
7970 && !fd
.have_ordered
)
7972 if (fd
.chunk_size
== NULL
)
7973 expand_omp_for_static_nochunk (region
, &fd
, inner_stmt
);
7975 expand_omp_for_static_chunk (region
, &fd
, inner_stmt
);
7979 int fn_index
, start_ix
, next_ix
;
7980 unsigned HOST_WIDE_INT sched
= 0;
7981 tree sched_arg
= NULL_TREE
;
7983 gcc_assert (gimple_omp_for_kind (fd
.for_stmt
)
7984 == GF_OMP_FOR_KIND_FOR
&& !fd
.non_rect
);
7985 if (fd
.chunk_size
== NULL
7986 && fd
.sched_kind
== OMP_CLAUSE_SCHEDULE_STATIC
)
7987 fd
.chunk_size
= integer_zero_node
;
7988 switch (fd
.sched_kind
)
7990 case OMP_CLAUSE_SCHEDULE_RUNTIME
:
7991 if ((fd
.sched_modifiers
& OMP_CLAUSE_SCHEDULE_NONMONOTONIC
) != 0
7992 && fd
.lastprivate_conditional
== 0)
7994 gcc_assert (!fd
.have_ordered
);
7998 else if ((fd
.sched_modifiers
& OMP_CLAUSE_SCHEDULE_MONOTONIC
) == 0
8000 && fd
.lastprivate_conditional
== 0)
8005 sched
= (HOST_WIDE_INT_1U
<< 31);
8008 case OMP_CLAUSE_SCHEDULE_DYNAMIC
:
8009 case OMP_CLAUSE_SCHEDULE_GUIDED
:
8010 if ((fd
.sched_modifiers
& OMP_CLAUSE_SCHEDULE_MONOTONIC
) == 0
8012 && fd
.lastprivate_conditional
== 0)
8014 fn_index
= 3 + fd
.sched_kind
;
8015 sched
= (fd
.sched_kind
== OMP_CLAUSE_SCHEDULE_GUIDED
) + 2;
8018 fn_index
= fd
.sched_kind
;
8019 sched
= (fd
.sched_kind
== OMP_CLAUSE_SCHEDULE_GUIDED
) + 2;
8020 sched
+= (HOST_WIDE_INT_1U
<< 31);
8022 case OMP_CLAUSE_SCHEDULE_STATIC
:
8023 gcc_assert (fd
.have_ordered
);
8025 sched
= (HOST_WIDE_INT_1U
<< 31) + 1;
8031 fn_index
+= fd
.have_ordered
* 8;
8033 start_ix
= ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START
) + fn_index
;
8035 start_ix
= ((int)BUILT_IN_GOMP_LOOP_STATIC_START
) + fn_index
;
8036 next_ix
= ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT
) + fn_index
;
8037 if (fd
.have_reductemp
|| fd
.have_pointer_condtemp
)
8040 start_ix
= (int)BUILT_IN_GOMP_LOOP_DOACROSS_START
;
8041 else if (fd
.have_ordered
)
8042 start_ix
= (int)BUILT_IN_GOMP_LOOP_ORDERED_START
;
8044 start_ix
= (int)BUILT_IN_GOMP_LOOP_START
;
8045 sched_arg
= build_int_cstu (long_integer_type_node
, sched
);
8047 fd
.chunk_size
= integer_zero_node
;
8049 if (fd
.iter_type
== long_long_unsigned_type_node
)
8051 start_ix
+= ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
8052 - (int)BUILT_IN_GOMP_LOOP_STATIC_START
);
8053 next_ix
+= ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
8054 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT
);
8056 expand_omp_for_generic (region
, &fd
, (enum built_in_function
) start_ix
,
8057 (enum built_in_function
) next_ix
, sched_arg
,
8061 if (gimple_in_ssa_p (cfun
))
8062 update_ssa (TODO_update_ssa_only_virtuals
);
8065 /* Expand code for an OpenMP sections directive. In pseudo code, we generate
8067 v = GOMP_sections_start (n);
8084 v = GOMP_sections_next ();
8089 If this is a combined parallel sections, replace the call to
8090 GOMP_sections_start with call to GOMP_sections_next. */
8093 expand_omp_sections (struct omp_region
*region
)
8095 tree t
, u
, vin
= NULL
, vmain
, vnext
, l2
;
8097 basic_block entry_bb
, l0_bb
, l1_bb
, l2_bb
, default_bb
;
8098 gimple_stmt_iterator si
, switch_si
;
8099 gomp_sections
*sections_stmt
;
8101 gomp_continue
*cont
;
8104 struct omp_region
*inner
;
8106 bool exit_reachable
= region
->cont
!= NULL
;
8108 gcc_assert (region
->exit
!= NULL
);
8109 entry_bb
= region
->entry
;
8110 l0_bb
= single_succ (entry_bb
);
8111 l1_bb
= region
->cont
;
8112 l2_bb
= region
->exit
;
8113 if (single_pred_p (l2_bb
) && single_pred (l2_bb
) == l0_bb
)
8114 l2
= gimple_block_label (l2_bb
);
8117 /* This can happen if there are reductions. */
8118 len
= EDGE_COUNT (l0_bb
->succs
);
8119 gcc_assert (len
> 0);
8120 e
= EDGE_SUCC (l0_bb
, len
- 1);
8121 si
= gsi_last_nondebug_bb (e
->dest
);
8124 || gimple_code (gsi_stmt (si
)) != GIMPLE_OMP_SECTION
)
8125 l2
= gimple_block_label (e
->dest
);
8127 FOR_EACH_EDGE (e
, ei
, l0_bb
->succs
)
8129 si
= gsi_last_nondebug_bb (e
->dest
);
8131 || gimple_code (gsi_stmt (si
)) != GIMPLE_OMP_SECTION
)
8133 l2
= gimple_block_label (e
->dest
);
8139 default_bb
= create_empty_bb (l1_bb
->prev_bb
);
8141 default_bb
= create_empty_bb (l0_bb
);
8143 /* We will build a switch() with enough cases for all the
8144 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
8145 and a default case to abort if something goes wrong. */
8146 len
= EDGE_COUNT (l0_bb
->succs
);
8148 /* Use vec::quick_push on label_vec throughout, since we know the size
8150 auto_vec
<tree
> label_vec (len
);
8152 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
8153 GIMPLE_OMP_SECTIONS statement. */
8154 si
= gsi_last_nondebug_bb (entry_bb
);
8155 sections_stmt
= as_a
<gomp_sections
*> (gsi_stmt (si
));
8156 gcc_assert (gimple_code (sections_stmt
) == GIMPLE_OMP_SECTIONS
);
8157 vin
= gimple_omp_sections_control (sections_stmt
);
8158 tree clauses
= gimple_omp_sections_clauses (sections_stmt
);
8159 tree reductmp
= omp_find_clause (clauses
, OMP_CLAUSE__REDUCTEMP_
);
8160 tree condtmp
= omp_find_clause (clauses
, OMP_CLAUSE__CONDTEMP_
);
8161 tree cond_var
= NULL_TREE
;
8162 if (reductmp
|| condtmp
)
8164 tree reductions
= null_pointer_node
, mem
= null_pointer_node
;
8165 tree memv
= NULL_TREE
, condtemp
= NULL_TREE
;
8166 gimple_stmt_iterator gsi
= gsi_none ();
8170 reductions
= OMP_CLAUSE_DECL (reductmp
);
8171 gcc_assert (TREE_CODE (reductions
) == SSA_NAME
);
8172 g
= SSA_NAME_DEF_STMT (reductions
);
8173 reductions
= gimple_assign_rhs1 (g
);
8174 OMP_CLAUSE_DECL (reductmp
) = reductions
;
8175 gsi
= gsi_for_stmt (g
);
8181 condtemp
= OMP_CLAUSE_DECL (condtmp
);
8182 tree c
= omp_find_clause (OMP_CLAUSE_CHAIN (condtmp
),
8183 OMP_CLAUSE__CONDTEMP_
);
8184 cond_var
= OMP_CLAUSE_DECL (c
);
8185 tree type
= TREE_TYPE (condtemp
);
8186 memv
= create_tmp_var (type
);
8187 TREE_ADDRESSABLE (memv
) = 1;
8189 for (c
= clauses
; c
; c
= OMP_CLAUSE_CHAIN (c
))
8190 if (OMP_CLAUSE_CODE (c
) == OMP_CLAUSE_LASTPRIVATE
8191 && OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (c
))
8193 unsigned HOST_WIDE_INT sz
8194 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type
))) * cnt
;
8195 expand_omp_build_assign (&gsi
, memv
, build_int_cst (type
, sz
),
8197 mem
= build_fold_addr_expr (memv
);
8199 t
= build_int_cst (unsigned_type_node
, len
- 1);
8200 u
= builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS2_START
);
8201 stmt
= gimple_build_call (u
, 3, t
, reductions
, mem
);
8202 gimple_call_set_lhs (stmt
, vin
);
8203 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
8206 expand_omp_build_assign (&gsi
, condtemp
, memv
, false);
8207 tree t
= build2 (PLUS_EXPR
, TREE_TYPE (cond_var
),
8208 vin
, build_one_cst (TREE_TYPE (cond_var
)));
8209 expand_omp_build_assign (&gsi
, cond_var
, t
, false);
8213 gsi_remove (&gsi
, true);
8214 release_ssa_name (gimple_assign_lhs (g
));
8217 else if (!is_combined_parallel (region
))
8219 /* If we are not inside a combined parallel+sections region,
8220 call GOMP_sections_start. */
8221 t
= build_int_cst (unsigned_type_node
, len
- 1);
8222 u
= builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START
);
8223 stmt
= gimple_build_call (u
, 1, t
);
8227 /* Otherwise, call GOMP_sections_next. */
8228 u
= builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT
);
8229 stmt
= gimple_build_call (u
, 0);
8231 if (!reductmp
&& !condtmp
)
8233 gimple_call_set_lhs (stmt
, vin
);
8234 gsi_insert_after (&si
, stmt
, GSI_SAME_STMT
);
8236 gsi_remove (&si
, true);
8238 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
8240 switch_si
= gsi_last_nondebug_bb (l0_bb
);
8241 gcc_assert (gimple_code (gsi_stmt (switch_si
)) == GIMPLE_OMP_SECTIONS_SWITCH
);
8244 cont
= as_a
<gomp_continue
*> (last_stmt (l1_bb
));
8245 gcc_assert (gimple_code (cont
) == GIMPLE_OMP_CONTINUE
);
8246 vmain
= gimple_omp_continue_control_use (cont
);
8247 vnext
= gimple_omp_continue_control_def (cont
);
8255 t
= build_case_label (build_int_cst (unsigned_type_node
, 0), NULL
, l2
);
8256 label_vec
.quick_push (t
);
8259 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
8260 for (inner
= region
->inner
, casei
= 1;
8262 inner
= inner
->next
, i
++, casei
++)
8264 basic_block s_entry_bb
, s_exit_bb
;
8266 /* Skip optional reduction region. */
8267 if (inner
->type
== GIMPLE_OMP_ATOMIC_LOAD
)
8274 s_entry_bb
= inner
->entry
;
8275 s_exit_bb
= inner
->exit
;
8277 t
= gimple_block_label (s_entry_bb
);
8278 u
= build_int_cst (unsigned_type_node
, casei
);
8279 u
= build_case_label (u
, NULL
, t
);
8280 label_vec
.quick_push (u
);
8282 si
= gsi_last_nondebug_bb (s_entry_bb
);
8283 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_SECTION
);
8284 gcc_assert (i
< len
|| gimple_omp_section_last_p (gsi_stmt (si
)));
8285 gsi_remove (&si
, true);
8286 single_succ_edge (s_entry_bb
)->flags
= EDGE_FALLTHRU
;
8288 if (s_exit_bb
== NULL
)
8291 si
= gsi_last_nondebug_bb (s_exit_bb
);
8292 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_RETURN
);
8293 gsi_remove (&si
, true);
8295 single_succ_edge (s_exit_bb
)->flags
= EDGE_FALLTHRU
;
8298 /* Error handling code goes in DEFAULT_BB. */
8299 t
= gimple_block_label (default_bb
);
8300 u
= build_case_label (NULL
, NULL
, t
);
8301 make_edge (l0_bb
, default_bb
, 0);
8302 add_bb_to_loop (default_bb
, current_loops
->tree_root
);
8304 stmt
= gimple_build_switch (vmain
, u
, label_vec
);
8305 gsi_insert_after (&switch_si
, stmt
, GSI_SAME_STMT
);
8306 gsi_remove (&switch_si
, true);
8308 si
= gsi_start_bb (default_bb
);
8309 stmt
= gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP
), 0);
8310 gsi_insert_after (&si
, stmt
, GSI_CONTINUE_LINKING
);
8316 /* Code to get the next section goes in L1_BB. */
8317 si
= gsi_last_nondebug_bb (l1_bb
);
8318 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_CONTINUE
);
8320 bfn_decl
= builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT
);
8321 stmt
= gimple_build_call (bfn_decl
, 0);
8322 gimple_call_set_lhs (stmt
, vnext
);
8323 gsi_insert_before (&si
, stmt
, GSI_SAME_STMT
);
8326 tree t
= build2 (PLUS_EXPR
, TREE_TYPE (cond_var
),
8327 vnext
, build_one_cst (TREE_TYPE (cond_var
)));
8328 expand_omp_build_assign (&si
, cond_var
, t
, false);
8330 gsi_remove (&si
, true);
8332 single_succ_edge (l1_bb
)->flags
= EDGE_FALLTHRU
;
8335 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
8336 si
= gsi_last_nondebug_bb (l2_bb
);
8337 if (gimple_omp_return_nowait_p (gsi_stmt (si
)))
8338 t
= builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT
);
8339 else if (gimple_omp_return_lhs (gsi_stmt (si
)))
8340 t
= builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL
);
8342 t
= builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END
);
8343 stmt
= gimple_build_call (t
, 0);
8344 if (gimple_omp_return_lhs (gsi_stmt (si
)))
8345 gimple_call_set_lhs (stmt
, gimple_omp_return_lhs (gsi_stmt (si
)));
8346 gsi_insert_after (&si
, stmt
, GSI_SAME_STMT
);
8347 gsi_remove (&si
, true);
8349 set_immediate_dominator (CDI_DOMINATORS
, default_bb
, l0_bb
);
8352 /* Expand code for an OpenMP single directive. We've already expanded
8353 much of the code, here we simply place the GOMP_barrier call. */
8356 expand_omp_single (struct omp_region
*region
)
8358 basic_block entry_bb
, exit_bb
;
8359 gimple_stmt_iterator si
;
8361 entry_bb
= region
->entry
;
8362 exit_bb
= region
->exit
;
8364 si
= gsi_last_nondebug_bb (entry_bb
);
8365 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_SINGLE
);
8366 gsi_remove (&si
, true);
8367 single_succ_edge (entry_bb
)->flags
= EDGE_FALLTHRU
;
8369 si
= gsi_last_nondebug_bb (exit_bb
);
8370 if (!gimple_omp_return_nowait_p (gsi_stmt (si
)))
8372 tree t
= gimple_omp_return_lhs (gsi_stmt (si
));
8373 gsi_insert_after (&si
, omp_build_barrier (t
), GSI_SAME_STMT
);
8375 gsi_remove (&si
, true);
8376 single_succ_edge (exit_bb
)->flags
= EDGE_FALLTHRU
;
8379 /* Generic expansion for OpenMP synchronization directives: master,
8380 ordered and critical. All we need to do here is remove the entry
8381 and exit markers for REGION. */
8384 expand_omp_synch (struct omp_region
*region
)
8386 basic_block entry_bb
, exit_bb
;
8387 gimple_stmt_iterator si
;
8389 entry_bb
= region
->entry
;
8390 exit_bb
= region
->exit
;
8392 si
= gsi_last_nondebug_bb (entry_bb
);
8393 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_SINGLE
8394 || gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_MASTER
8395 || gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_TASKGROUP
8396 || gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_ORDERED
8397 || gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_CRITICAL
8398 || gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_TEAMS
);
8399 if (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_TEAMS
8400 && gimple_omp_teams_host (as_a
<gomp_teams
*> (gsi_stmt (si
))))
8402 expand_omp_taskreg (region
);
8405 gsi_remove (&si
, true);
8406 single_succ_edge (entry_bb
)->flags
= EDGE_FALLTHRU
;
8410 si
= gsi_last_nondebug_bb (exit_bb
);
8411 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_RETURN
);
8412 gsi_remove (&si
, true);
8413 single_succ_edge (exit_bb
)->flags
= EDGE_FALLTHRU
;
8417 /* Translate enum omp_memory_order to enum memmodel. The two enums
8418 are using different numbers so that OMP_MEMORY_ORDER_UNSPECIFIED
8421 static enum memmodel
8422 omp_memory_order_to_memmodel (enum omp_memory_order mo
)
8426 case OMP_MEMORY_ORDER_RELAXED
: return MEMMODEL_RELAXED
;
8427 case OMP_MEMORY_ORDER_ACQUIRE
: return MEMMODEL_ACQUIRE
;
8428 case OMP_MEMORY_ORDER_RELEASE
: return MEMMODEL_RELEASE
;
8429 case OMP_MEMORY_ORDER_ACQ_REL
: return MEMMODEL_ACQ_REL
;
8430 case OMP_MEMORY_ORDER_SEQ_CST
: return MEMMODEL_SEQ_CST
;
8431 default: gcc_unreachable ();
8435 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8436 operation as a normal volatile load. */
8439 expand_omp_atomic_load (basic_block load_bb
, tree addr
,
8440 tree loaded_val
, int index
)
8442 enum built_in_function tmpbase
;
8443 gimple_stmt_iterator gsi
;
8444 basic_block store_bb
;
8447 tree decl
, call
, type
, itype
;
8449 gsi
= gsi_last_nondebug_bb (load_bb
);
8450 stmt
= gsi_stmt (gsi
);
8451 gcc_assert (gimple_code (stmt
) == GIMPLE_OMP_ATOMIC_LOAD
);
8452 loc
= gimple_location (stmt
);
8454 /* ??? If the target does not implement atomic_load_optab[mode], and mode
8455 is smaller than word size, then expand_atomic_load assumes that the load
8456 is atomic. We could avoid the builtin entirely in this case. */
8458 tmpbase
= (enum built_in_function
) (BUILT_IN_ATOMIC_LOAD_N
+ index
+ 1);
8459 decl
= builtin_decl_explicit (tmpbase
);
8460 if (decl
== NULL_TREE
)
8463 type
= TREE_TYPE (loaded_val
);
8464 itype
= TREE_TYPE (TREE_TYPE (decl
));
8466 enum omp_memory_order omo
= gimple_omp_atomic_memory_order (stmt
);
8467 tree mo
= build_int_cst (NULL
, omp_memory_order_to_memmodel (omo
));
8468 call
= build_call_expr_loc (loc
, decl
, 2, addr
, mo
);
8469 if (!useless_type_conversion_p (type
, itype
))
8470 call
= fold_build1_loc (loc
, VIEW_CONVERT_EXPR
, type
, call
);
8471 call
= build2_loc (loc
, MODIFY_EXPR
, void_type_node
, loaded_val
, call
);
8473 force_gimple_operand_gsi (&gsi
, call
, true, NULL_TREE
, true, GSI_SAME_STMT
);
8474 gsi_remove (&gsi
, true);
8476 store_bb
= single_succ (load_bb
);
8477 gsi
= gsi_last_nondebug_bb (store_bb
);
8478 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_ATOMIC_STORE
);
8479 gsi_remove (&gsi
, true);
8481 if (gimple_in_ssa_p (cfun
))
8482 update_ssa (TODO_update_ssa_no_phi
);
8487 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8488 operation as a normal volatile store. */
8491 expand_omp_atomic_store (basic_block load_bb
, tree addr
,
8492 tree loaded_val
, tree stored_val
, int index
)
8494 enum built_in_function tmpbase
;
8495 gimple_stmt_iterator gsi
;
8496 basic_block store_bb
= single_succ (load_bb
);
8499 tree decl
, call
, type
, itype
;
8503 gsi
= gsi_last_nondebug_bb (load_bb
);
8504 stmt
= gsi_stmt (gsi
);
8505 gcc_assert (gimple_code (stmt
) == GIMPLE_OMP_ATOMIC_LOAD
);
8507 /* If the load value is needed, then this isn't a store but an exchange. */
8508 exchange
= gimple_omp_atomic_need_value_p (stmt
);
8510 gsi
= gsi_last_nondebug_bb (store_bb
);
8511 stmt
= gsi_stmt (gsi
);
8512 gcc_assert (gimple_code (stmt
) == GIMPLE_OMP_ATOMIC_STORE
);
8513 loc
= gimple_location (stmt
);
8515 /* ??? If the target does not implement atomic_store_optab[mode], and mode
8516 is smaller than word size, then expand_atomic_store assumes that the store
8517 is atomic. We could avoid the builtin entirely in this case. */
8519 tmpbase
= (exchange
? BUILT_IN_ATOMIC_EXCHANGE_N
: BUILT_IN_ATOMIC_STORE_N
);
8520 tmpbase
= (enum built_in_function
) ((int) tmpbase
+ index
+ 1);
8521 decl
= builtin_decl_explicit (tmpbase
);
8522 if (decl
== NULL_TREE
)
8525 type
= TREE_TYPE (stored_val
);
8527 /* Dig out the type of the function's second argument. */
8528 itype
= TREE_TYPE (decl
);
8529 itype
= TYPE_ARG_TYPES (itype
);
8530 itype
= TREE_CHAIN (itype
);
8531 itype
= TREE_VALUE (itype
);
8532 imode
= TYPE_MODE (itype
);
8534 if (exchange
&& !can_atomic_exchange_p (imode
, true))
8537 if (!useless_type_conversion_p (itype
, type
))
8538 stored_val
= fold_build1_loc (loc
, VIEW_CONVERT_EXPR
, itype
, stored_val
);
8539 enum omp_memory_order omo
= gimple_omp_atomic_memory_order (stmt
);
8540 tree mo
= build_int_cst (NULL
, omp_memory_order_to_memmodel (omo
));
8541 call
= build_call_expr_loc (loc
, decl
, 3, addr
, stored_val
, mo
);
8544 if (!useless_type_conversion_p (type
, itype
))
8545 call
= build1_loc (loc
, VIEW_CONVERT_EXPR
, type
, call
);
8546 call
= build2_loc (loc
, MODIFY_EXPR
, void_type_node
, loaded_val
, call
);
8549 force_gimple_operand_gsi (&gsi
, call
, true, NULL_TREE
, true, GSI_SAME_STMT
);
8550 gsi_remove (&gsi
, true);
8552 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
8553 gsi
= gsi_last_nondebug_bb (load_bb
);
8554 gsi_remove (&gsi
, true);
8556 if (gimple_in_ssa_p (cfun
))
8557 update_ssa (TODO_update_ssa_no_phi
);
8562 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8563 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
8564 size of the data type, and thus usable to find the index of the builtin
8565 decl. Returns false if the expression is not of the proper form. */
8568 expand_omp_atomic_fetch_op (basic_block load_bb
,
8569 tree addr
, tree loaded_val
,
8570 tree stored_val
, int index
)
8572 enum built_in_function oldbase
, newbase
, tmpbase
;
8573 tree decl
, itype
, call
;
8575 basic_block store_bb
= single_succ (load_bb
);
8576 gimple_stmt_iterator gsi
;
8579 enum tree_code code
;
8580 bool need_old
, need_new
;
8583 /* We expect to find the following sequences:
8586 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
8589 val = tmp OP something; (or: something OP tmp)
8590 GIMPLE_OMP_STORE (val)
8592 ???FIXME: Allow a more flexible sequence.
8593 Perhaps use data flow to pick the statements.
8597 gsi
= gsi_after_labels (store_bb
);
8598 stmt
= gsi_stmt (gsi
);
8599 if (is_gimple_debug (stmt
))
8601 gsi_next_nondebug (&gsi
);
8602 if (gsi_end_p (gsi
))
8604 stmt
= gsi_stmt (gsi
);
8606 loc
= gimple_location (stmt
);
8607 if (!is_gimple_assign (stmt
))
8609 gsi_next_nondebug (&gsi
);
8610 if (gimple_code (gsi_stmt (gsi
)) != GIMPLE_OMP_ATOMIC_STORE
)
8612 need_new
= gimple_omp_atomic_need_value_p (gsi_stmt (gsi
));
8613 need_old
= gimple_omp_atomic_need_value_p (last_stmt (load_bb
));
8614 enum omp_memory_order omo
8615 = gimple_omp_atomic_memory_order (last_stmt (load_bb
));
8616 enum memmodel mo
= omp_memory_order_to_memmodel (omo
);
8617 gcc_checking_assert (!need_old
|| !need_new
);
8619 if (!operand_equal_p (gimple_assign_lhs (stmt
), stored_val
, 0))
8622 /* Check for one of the supported fetch-op operations. */
8623 code
= gimple_assign_rhs_code (stmt
);
8627 case POINTER_PLUS_EXPR
:
8628 oldbase
= BUILT_IN_ATOMIC_FETCH_ADD_N
;
8629 newbase
= BUILT_IN_ATOMIC_ADD_FETCH_N
;
8632 oldbase
= BUILT_IN_ATOMIC_FETCH_SUB_N
;
8633 newbase
= BUILT_IN_ATOMIC_SUB_FETCH_N
;
8636 oldbase
= BUILT_IN_ATOMIC_FETCH_AND_N
;
8637 newbase
= BUILT_IN_ATOMIC_AND_FETCH_N
;
8640 oldbase
= BUILT_IN_ATOMIC_FETCH_OR_N
;
8641 newbase
= BUILT_IN_ATOMIC_OR_FETCH_N
;
8644 oldbase
= BUILT_IN_ATOMIC_FETCH_XOR_N
;
8645 newbase
= BUILT_IN_ATOMIC_XOR_FETCH_N
;
8651 /* Make sure the expression is of the proper form. */
8652 if (operand_equal_p (gimple_assign_rhs1 (stmt
), loaded_val
, 0))
8653 rhs
= gimple_assign_rhs2 (stmt
);
8654 else if (commutative_tree_code (gimple_assign_rhs_code (stmt
))
8655 && operand_equal_p (gimple_assign_rhs2 (stmt
), loaded_val
, 0))
8656 rhs
= gimple_assign_rhs1 (stmt
);
8660 tmpbase
= ((enum built_in_function
)
8661 ((need_new
? newbase
: oldbase
) + index
+ 1));
8662 decl
= builtin_decl_explicit (tmpbase
);
8663 if (decl
== NULL_TREE
)
8665 itype
= TREE_TYPE (TREE_TYPE (decl
));
8666 imode
= TYPE_MODE (itype
);
8668 /* We could test all of the various optabs involved, but the fact of the
8669 matter is that (with the exception of i486 vs i586 and xadd) all targets
8670 that support any atomic operaton optab also implements compare-and-swap.
8671 Let optabs.c take care of expanding any compare-and-swap loop. */
8672 if (!can_compare_and_swap_p (imode
, true) || !can_atomic_load_p (imode
))
8675 gsi
= gsi_last_nondebug_bb (load_bb
);
8676 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_ATOMIC_LOAD
);
8678 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
8679 It only requires that the operation happen atomically. Thus we can
8680 use the RELAXED memory model. */
8681 call
= build_call_expr_loc (loc
, decl
, 3, addr
,
8682 fold_convert_loc (loc
, itype
, rhs
),
8683 build_int_cst (NULL
, mo
));
8685 if (need_old
|| need_new
)
8687 lhs
= need_old
? loaded_val
: stored_val
;
8688 call
= fold_convert_loc (loc
, TREE_TYPE (lhs
), call
);
8689 call
= build2_loc (loc
, MODIFY_EXPR
, void_type_node
, lhs
, call
);
8692 call
= fold_convert_loc (loc
, void_type_node
, call
);
8693 force_gimple_operand_gsi (&gsi
, call
, true, NULL_TREE
, true, GSI_SAME_STMT
);
8694 gsi_remove (&gsi
, true);
8696 gsi
= gsi_last_nondebug_bb (store_bb
);
8697 gcc_assert (gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_ATOMIC_STORE
);
8698 gsi_remove (&gsi
, true);
8699 gsi
= gsi_last_nondebug_bb (store_bb
);
8700 stmt
= gsi_stmt (gsi
);
8701 gsi_remove (&gsi
, true);
8703 if (gimple_in_ssa_p (cfun
))
8705 release_defs (stmt
);
8706 update_ssa (TODO_update_ssa_no_phi
);
8712 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
8716 newval = rhs; // with oldval replacing *addr in rhs
8717 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
8718 if (oldval != newval)
8721 INDEX is log2 of the size of the data type, and thus usable to find the
8722 index of the builtin decl. */
8725 expand_omp_atomic_pipeline (basic_block load_bb
, basic_block store_bb
,
8726 tree addr
, tree loaded_val
, tree stored_val
,
8729 tree loadedi
, storedi
, initial
, new_storedi
, old_vali
;
8730 tree type
, itype
, cmpxchg
, iaddr
, atype
;
8731 gimple_stmt_iterator si
;
8732 basic_block loop_header
= single_succ (load_bb
);
8735 enum built_in_function fncode
;
8737 /* ??? We need a non-pointer interface to __atomic_compare_exchange in
8738 order to use the RELAXED memory model effectively. */
8739 fncode
= (enum built_in_function
)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
8741 cmpxchg
= builtin_decl_explicit (fncode
);
8742 if (cmpxchg
== NULL_TREE
)
8744 type
= TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val
));
8746 itype
= TREE_TYPE (TREE_TYPE (cmpxchg
));
8748 if (!can_compare_and_swap_p (TYPE_MODE (itype
), true)
8749 || !can_atomic_load_p (TYPE_MODE (itype
)))
8752 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
8753 si
= gsi_last_nondebug_bb (load_bb
);
8754 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_ATOMIC_LOAD
);
8756 /* For floating-point values, we'll need to view-convert them to integers
8757 so that we can perform the atomic compare and swap. Simplify the
8758 following code by always setting up the "i"ntegral variables. */
8759 if (!INTEGRAL_TYPE_P (type
) && !POINTER_TYPE_P (type
))
8763 iaddr
= create_tmp_reg (build_pointer_type_for_mode (itype
, ptr_mode
,
8767 = force_gimple_operand_gsi (&si
,
8768 fold_convert (TREE_TYPE (iaddr
), addr
),
8769 false, NULL_TREE
, true, GSI_SAME_STMT
);
8770 stmt
= gimple_build_assign (iaddr
, iaddr_val
);
8771 gsi_insert_before (&si
, stmt
, GSI_SAME_STMT
);
8772 loadedi
= create_tmp_var (itype
);
8773 if (gimple_in_ssa_p (cfun
))
8774 loadedi
= make_ssa_name (loadedi
);
8779 loadedi
= loaded_val
;
8782 fncode
= (enum built_in_function
) (BUILT_IN_ATOMIC_LOAD_N
+ index
+ 1);
8783 tree loaddecl
= builtin_decl_explicit (fncode
);
8786 = fold_convert (atype
,
8787 build_call_expr (loaddecl
, 2, iaddr
,
8788 build_int_cst (NULL_TREE
,
8789 MEMMODEL_RELAXED
)));
8793 = build_int_cst (build_pointer_type_for_mode (atype
, ptr_mode
,
8795 initial
= build2 (MEM_REF
, atype
, iaddr
, off
);
8799 = force_gimple_operand_gsi (&si
, initial
, true, NULL_TREE
, true,
8802 /* Move the value to the LOADEDI temporary. */
8803 if (gimple_in_ssa_p (cfun
))
8805 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header
)));
8806 phi
= create_phi_node (loadedi
, loop_header
);
8807 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi
, single_succ_edge (load_bb
)),
8811 gsi_insert_before (&si
,
8812 gimple_build_assign (loadedi
, initial
),
8814 if (loadedi
!= loaded_val
)
8816 gimple_stmt_iterator gsi2
;
8819 x
= build1 (VIEW_CONVERT_EXPR
, type
, loadedi
);
8820 gsi2
= gsi_start_bb (loop_header
);
8821 if (gimple_in_ssa_p (cfun
))
8824 x
= force_gimple_operand_gsi (&gsi2
, x
, true, NULL_TREE
,
8825 true, GSI_SAME_STMT
);
8826 stmt
= gimple_build_assign (loaded_val
, x
);
8827 gsi_insert_before (&gsi2
, stmt
, GSI_SAME_STMT
);
8831 x
= build2 (MODIFY_EXPR
, TREE_TYPE (loaded_val
), loaded_val
, x
);
8832 force_gimple_operand_gsi (&gsi2
, x
, true, NULL_TREE
,
8833 true, GSI_SAME_STMT
);
8836 gsi_remove (&si
, true);
8838 si
= gsi_last_nondebug_bb (store_bb
);
8839 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_ATOMIC_STORE
);
8842 storedi
= stored_val
;
8845 = force_gimple_operand_gsi (&si
,
8846 build1 (VIEW_CONVERT_EXPR
, itype
,
8847 stored_val
), true, NULL_TREE
, true,
8850 /* Build the compare&swap statement. */
8851 new_storedi
= build_call_expr (cmpxchg
, 3, iaddr
, loadedi
, storedi
);
8852 new_storedi
= force_gimple_operand_gsi (&si
,
8853 fold_convert (TREE_TYPE (loadedi
),
8856 true, GSI_SAME_STMT
);
8858 if (gimple_in_ssa_p (cfun
))
8862 old_vali
= create_tmp_var (TREE_TYPE (loadedi
));
8863 stmt
= gimple_build_assign (old_vali
, loadedi
);
8864 gsi_insert_before (&si
, stmt
, GSI_SAME_STMT
);
8866 stmt
= gimple_build_assign (loadedi
, new_storedi
);
8867 gsi_insert_before (&si
, stmt
, GSI_SAME_STMT
);
8870 /* Note that we always perform the comparison as an integer, even for
8871 floating point. This allows the atomic operation to properly
8872 succeed even with NaNs and -0.0. */
8873 tree ne
= build2 (NE_EXPR
, boolean_type_node
, new_storedi
, old_vali
);
8874 stmt
= gimple_build_cond_empty (ne
);
8875 gsi_insert_before (&si
, stmt
, GSI_SAME_STMT
);
8878 e
= single_succ_edge (store_bb
);
8879 e
->flags
&= ~EDGE_FALLTHRU
;
8880 e
->flags
|= EDGE_FALSE_VALUE
;
8881 /* Expect no looping. */
8882 e
->probability
= profile_probability::guessed_always ();
8884 e
= make_edge (store_bb
, loop_header
, EDGE_TRUE_VALUE
);
8885 e
->probability
= profile_probability::guessed_never ();
8887 /* Copy the new value to loadedi (we already did that before the condition
8888 if we are not in SSA). */
8889 if (gimple_in_ssa_p (cfun
))
8891 phi
= gimple_seq_first_stmt (phi_nodes (loop_header
));
8892 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi
, e
), new_storedi
);
8895 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
8896 gsi_remove (&si
, true);
8898 class loop
*loop
= alloc_loop ();
8899 loop
->header
= loop_header
;
8900 loop
->latch
= store_bb
;
8901 add_loop (loop
, loop_header
->loop_father
);
8903 if (gimple_in_ssa_p (cfun
))
8904 update_ssa (TODO_update_ssa_no_phi
);
8909 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
8911 GOMP_atomic_start ();
8915 The result is not globally atomic, but works so long as all parallel
8916 references are within #pragma omp atomic directives. According to
8917 responses received from omp@openmp.org, appears to be within spec.
8918 Which makes sense, since that's how several other compilers handle
8919 this situation as well.
8920 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
8921 expanding. STORED_VAL is the operand of the matching
8922 GIMPLE_OMP_ATOMIC_STORE.
8925 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
8929 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
8934 expand_omp_atomic_mutex (basic_block load_bb
, basic_block store_bb
,
8935 tree addr
, tree loaded_val
, tree stored_val
)
8937 gimple_stmt_iterator si
;
8941 si
= gsi_last_nondebug_bb (load_bb
);
8942 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_ATOMIC_LOAD
);
8944 t
= builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START
);
8945 t
= build_call_expr (t
, 0);
8946 force_gimple_operand_gsi (&si
, t
, true, NULL_TREE
, true, GSI_SAME_STMT
);
8948 tree mem
= build_simple_mem_ref (addr
);
8949 TREE_TYPE (mem
) = TREE_TYPE (loaded_val
);
8950 TREE_OPERAND (mem
, 1)
8951 = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem
), ptr_mode
,
8953 TREE_OPERAND (mem
, 1));
8954 stmt
= gimple_build_assign (loaded_val
, mem
);
8955 gsi_insert_before (&si
, stmt
, GSI_SAME_STMT
);
8956 gsi_remove (&si
, true);
8958 si
= gsi_last_nondebug_bb (store_bb
);
8959 gcc_assert (gimple_code (gsi_stmt (si
)) == GIMPLE_OMP_ATOMIC_STORE
);
8961 stmt
= gimple_build_assign (unshare_expr (mem
), stored_val
);
8962 gsi_insert_before (&si
, stmt
, GSI_SAME_STMT
);
8964 t
= builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END
);
8965 t
= build_call_expr (t
, 0);
8966 force_gimple_operand_gsi (&si
, t
, true, NULL_TREE
, true, GSI_SAME_STMT
);
8967 gsi_remove (&si
, true);
8969 if (gimple_in_ssa_p (cfun
))
8970 update_ssa (TODO_update_ssa_no_phi
);
8974 /* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
8975 using expand_omp_atomic_fetch_op. If it failed, we try to
8976 call expand_omp_atomic_pipeline, and if it fails too, the
8977 ultimate fallback is wrapping the operation in a mutex
8978 (expand_omp_atomic_mutex). REGION is the atomic region built
8979 by build_omp_regions_1(). */
8982 expand_omp_atomic (struct omp_region
*region
)
8984 basic_block load_bb
= region
->entry
, store_bb
= region
->exit
;
8985 gomp_atomic_load
*load
= as_a
<gomp_atomic_load
*> (last_stmt (load_bb
));
8986 gomp_atomic_store
*store
= as_a
<gomp_atomic_store
*> (last_stmt (store_bb
));
8987 tree loaded_val
= gimple_omp_atomic_load_lhs (load
);
8988 tree addr
= gimple_omp_atomic_load_rhs (load
);
8989 tree stored_val
= gimple_omp_atomic_store_val (store
);
8990 tree type
= TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val
));
8991 HOST_WIDE_INT index
;
8993 /* Make sure the type is one of the supported sizes. */
8994 index
= tree_to_uhwi (TYPE_SIZE_UNIT (type
));
8995 index
= exact_log2 (index
);
8996 if (index
>= 0 && index
<= 4)
8998 unsigned int align
= TYPE_ALIGN_UNIT (type
);
9000 /* __sync builtins require strict data alignment. */
9001 if (exact_log2 (align
) >= index
)
9005 if (loaded_val
== stored_val
9006 && (is_int_mode (TYPE_MODE (type
), &smode
)
9007 || is_float_mode (TYPE_MODE (type
), &smode
))
9008 && GET_MODE_BITSIZE (smode
) <= BITS_PER_WORD
9009 && expand_omp_atomic_load (load_bb
, addr
, loaded_val
, index
))
9013 if ((is_int_mode (TYPE_MODE (type
), &smode
)
9014 || is_float_mode (TYPE_MODE (type
), &smode
))
9015 && GET_MODE_BITSIZE (smode
) <= BITS_PER_WORD
9016 && store_bb
== single_succ (load_bb
)
9017 && first_stmt (store_bb
) == store
9018 && expand_omp_atomic_store (load_bb
, addr
, loaded_val
,
9022 /* When possible, use specialized atomic update functions. */
9023 if ((INTEGRAL_TYPE_P (type
) || POINTER_TYPE_P (type
))
9024 && store_bb
== single_succ (load_bb
)
9025 && expand_omp_atomic_fetch_op (load_bb
, addr
,
9026 loaded_val
, stored_val
, index
))
9029 /* If we don't have specialized __sync builtins, try and implement
9030 as a compare and swap loop. */
9031 if (expand_omp_atomic_pipeline (load_bb
, store_bb
, addr
,
9032 loaded_val
, stored_val
, index
))
9037 /* The ultimate fallback is wrapping the operation in a mutex. */
9038 expand_omp_atomic_mutex (load_bb
, store_bb
, addr
, loaded_val
, stored_val
);
9041 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
9045 mark_loops_in_oacc_kernels_region (basic_block region_entry
,
9046 basic_block region_exit
)
9048 class loop
*outer
= region_entry
->loop_father
;
9049 gcc_assert (region_exit
== NULL
|| outer
== region_exit
->loop_father
);
9051 /* Don't parallelize the kernels region if it contains more than one outer
9053 unsigned int nr_outer_loops
= 0;
9054 class loop
*single_outer
= NULL
;
9055 for (class loop
*loop
= outer
->inner
; loop
!= NULL
; loop
= loop
->next
)
9057 gcc_assert (loop_outer (loop
) == outer
);
9059 if (!dominated_by_p (CDI_DOMINATORS
, loop
->header
, region_entry
))
9062 if (region_exit
!= NULL
9063 && dominated_by_p (CDI_DOMINATORS
, loop
->header
, region_exit
))
9067 single_outer
= loop
;
9069 if (nr_outer_loops
!= 1)
9072 for (class loop
*loop
= single_outer
->inner
;
9078 /* Mark the loops in the region. */
9079 for (class loop
*loop
= single_outer
; loop
!= NULL
; loop
= loop
->inner
)
9080 loop
->in_oacc_kernels_region
= true;
9083 /* Build target argument identifier from the DEVICE identifier, value
9084 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
9087 get_target_argument_identifier_1 (int device
, bool subseqent_param
, int id
)
9089 tree t
= build_int_cst (integer_type_node
, device
);
9090 if (subseqent_param
)
9091 t
= fold_build2 (BIT_IOR_EXPR
, integer_type_node
, t
,
9092 build_int_cst (integer_type_node
,
9093 GOMP_TARGET_ARG_SUBSEQUENT_PARAM
));
9094 t
= fold_build2 (BIT_IOR_EXPR
, integer_type_node
, t
,
9095 build_int_cst (integer_type_node
, id
));
9099 /* Like above but return it in type that can be directly stored as an element
9100 of the argument array. */
9103 get_target_argument_identifier (int device
, bool subseqent_param
, int id
)
9105 tree t
= get_target_argument_identifier_1 (device
, subseqent_param
, id
);
9106 return fold_convert (ptr_type_node
, t
);
9109 /* Return a target argument consisting of DEVICE identifier, value identifier
9110 ID, and the actual VALUE. */
9113 get_target_argument_value (gimple_stmt_iterator
*gsi
, int device
, int id
,
9116 tree t
= fold_build2 (LSHIFT_EXPR
, integer_type_node
,
9117 fold_convert (integer_type_node
, value
),
9118 build_int_cst (unsigned_type_node
,
9119 GOMP_TARGET_ARG_VALUE_SHIFT
));
9120 t
= fold_build2 (BIT_IOR_EXPR
, integer_type_node
, t
,
9121 get_target_argument_identifier_1 (device
, false, id
));
9122 t
= fold_convert (ptr_type_node
, t
);
9123 return force_gimple_operand_gsi (gsi
, t
, true, NULL
, true, GSI_SAME_STMT
);
9126 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
9127 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
9128 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
9132 push_target_argument_according_to_value (gimple_stmt_iterator
*gsi
, int device
,
9133 int id
, tree value
, vec
<tree
> *args
)
9135 if (tree_fits_shwi_p (value
)
9136 && tree_to_shwi (value
) > -(1 << 15)
9137 && tree_to_shwi (value
) < (1 << 15))
9138 args
->quick_push (get_target_argument_value (gsi
, device
, id
, value
));
9141 args
->quick_push (get_target_argument_identifier (device
, true, id
));
9142 value
= fold_convert (ptr_type_node
, value
);
9143 value
= force_gimple_operand_gsi (gsi
, value
, true, NULL
, true,
9145 args
->quick_push (value
);
9149 /* Create an array of arguments that is then passed to GOMP_target. */
9152 get_target_arguments (gimple_stmt_iterator
*gsi
, gomp_target
*tgt_stmt
)
9154 auto_vec
<tree
, 6> args
;
9155 tree clauses
= gimple_omp_target_clauses (tgt_stmt
);
9156 tree t
, c
= omp_find_clause (clauses
, OMP_CLAUSE_NUM_TEAMS
);
9158 t
= OMP_CLAUSE_NUM_TEAMS_EXPR (c
);
9160 t
= integer_minus_one_node
;
9161 push_target_argument_according_to_value (gsi
, GOMP_TARGET_ARG_DEVICE_ALL
,
9162 GOMP_TARGET_ARG_NUM_TEAMS
, t
, &args
);
9164 c
= omp_find_clause (clauses
, OMP_CLAUSE_THREAD_LIMIT
);
9166 t
= OMP_CLAUSE_THREAD_LIMIT_EXPR (c
);
9168 t
= integer_minus_one_node
;
9169 push_target_argument_according_to_value (gsi
, GOMP_TARGET_ARG_DEVICE_ALL
,
9170 GOMP_TARGET_ARG_THREAD_LIMIT
, t
,
9173 /* Produce more, perhaps device specific, arguments here. */
9175 tree argarray
= create_tmp_var (build_array_type_nelts (ptr_type_node
,
9176 args
.length () + 1),
9177 ".omp_target_args");
9178 for (unsigned i
= 0; i
< args
.length (); i
++)
9180 tree ref
= build4 (ARRAY_REF
, ptr_type_node
, argarray
,
9181 build_int_cst (integer_type_node
, i
),
9182 NULL_TREE
, NULL_TREE
);
9183 gsi_insert_before (gsi
, gimple_build_assign (ref
, args
[i
]),
9186 tree ref
= build4 (ARRAY_REF
, ptr_type_node
, argarray
,
9187 build_int_cst (integer_type_node
, args
.length ()),
9188 NULL_TREE
, NULL_TREE
);
9189 gsi_insert_before (gsi
, gimple_build_assign (ref
, null_pointer_node
),
9191 TREE_ADDRESSABLE (argarray
) = 1;
9192 return build_fold_addr_expr (argarray
);
9195 /* Expand the GIMPLE_OMP_TARGET starting at REGION. */
9198 expand_omp_target (struct omp_region
*region
)
9200 basic_block entry_bb
, exit_bb
, new_bb
;
9201 struct function
*child_cfun
;
9202 tree child_fn
, block
, t
;
9203 gimple_stmt_iterator gsi
;
9204 gomp_target
*entry_stmt
;
9207 bool offloaded
, data_region
;
9210 entry_stmt
= as_a
<gomp_target
*> (last_stmt (region
->entry
));
9211 target_kind
= gimple_omp_target_kind (entry_stmt
);
9212 new_bb
= region
->entry
;
9214 offloaded
= is_gimple_omp_offloaded (entry_stmt
);
9215 switch (target_kind
)
9217 case GF_OMP_TARGET_KIND_REGION
:
9218 case GF_OMP_TARGET_KIND_UPDATE
:
9219 case GF_OMP_TARGET_KIND_ENTER_DATA
:
9220 case GF_OMP_TARGET_KIND_EXIT_DATA
:
9221 case GF_OMP_TARGET_KIND_OACC_PARALLEL
:
9222 case GF_OMP_TARGET_KIND_OACC_KERNELS
:
9223 case GF_OMP_TARGET_KIND_OACC_SERIAL
:
9224 case GF_OMP_TARGET_KIND_OACC_UPDATE
:
9225 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA
:
9226 case GF_OMP_TARGET_KIND_OACC_DECLARE
:
9227 data_region
= false;
9229 case GF_OMP_TARGET_KIND_DATA
:
9230 case GF_OMP_TARGET_KIND_OACC_DATA
:
9231 case GF_OMP_TARGET_KIND_OACC_HOST_DATA
:
9238 child_fn
= NULL_TREE
;
9242 child_fn
= gimple_omp_target_child_fn (entry_stmt
);
9243 child_cfun
= DECL_STRUCT_FUNCTION (child_fn
);
9246 /* Supported by expand_omp_taskreg, but not here. */
9247 if (child_cfun
!= NULL
)
9248 gcc_checking_assert (!child_cfun
->cfg
);
9249 gcc_checking_assert (!gimple_in_ssa_p (cfun
));
9251 entry_bb
= region
->entry
;
9252 exit_bb
= region
->exit
;
9254 switch (target_kind
)
9256 case GF_OMP_TARGET_KIND_OACC_KERNELS
:
9257 mark_loops_in_oacc_kernels_region (region
->entry
, region
->exit
);
9259 /* Further down, all OpenACC compute constructs will be mapped to
9260 BUILT_IN_GOACC_PARALLEL, and to distinguish between them, there
9261 is an "oacc kernels" attribute set for OpenACC kernels. */
9262 DECL_ATTRIBUTES (child_fn
)
9263 = tree_cons (get_identifier ("oacc kernels"),
9264 NULL_TREE
, DECL_ATTRIBUTES (child_fn
));
9266 case GF_OMP_TARGET_KIND_OACC_SERIAL
:
9267 /* Further down, all OpenACC compute constructs will be mapped to
9268 BUILT_IN_GOACC_PARALLEL, and to distinguish between them, there
9269 is an "oacc serial" attribute set for OpenACC serial. */
9270 DECL_ATTRIBUTES (child_fn
)
9271 = tree_cons (get_identifier ("oacc serial"),
9272 NULL_TREE
, DECL_ATTRIBUTES (child_fn
));
9280 unsigned srcidx
, dstidx
, num
;
9282 /* If the offloading region needs data sent from the parent
9283 function, then the very first statement (except possible
9284 tree profile counter updates) of the offloading body
9285 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
9286 &.OMP_DATA_O is passed as an argument to the child function,
9287 we need to replace it with the argument as seen by the child
9290 In most cases, this will end up being the identity assignment
9291 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
9292 a function call that has been inlined, the original PARM_DECL
9293 .OMP_DATA_I may have been converted into a different local
9294 variable. In which case, we need to keep the assignment. */
9295 tree data_arg
= gimple_omp_target_data_arg (entry_stmt
);
9298 basic_block entry_succ_bb
= single_succ (entry_bb
);
9299 gimple_stmt_iterator gsi
;
9301 gimple
*tgtcopy_stmt
= NULL
;
9302 tree sender
= TREE_VEC_ELT (data_arg
, 0);
9304 for (gsi
= gsi_start_bb (entry_succ_bb
); ; gsi_next (&gsi
))
9306 gcc_assert (!gsi_end_p (gsi
));
9307 stmt
= gsi_stmt (gsi
);
9308 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
9311 if (gimple_num_ops (stmt
) == 2)
9313 tree arg
= gimple_assign_rhs1 (stmt
);
9315 /* We're ignoring the subcode because we're
9316 effectively doing a STRIP_NOPS. */
9318 if (TREE_CODE (arg
) == ADDR_EXPR
9319 && TREE_OPERAND (arg
, 0) == sender
)
9321 tgtcopy_stmt
= stmt
;
9327 gcc_assert (tgtcopy_stmt
!= NULL
);
9328 arg
= DECL_ARGUMENTS (child_fn
);
9330 gcc_assert (gimple_assign_lhs (tgtcopy_stmt
) == arg
);
9331 gsi_remove (&gsi
, true);
9334 /* Declare local variables needed in CHILD_CFUN. */
9335 block
= DECL_INITIAL (child_fn
);
9336 BLOCK_VARS (block
) = vec2chain (child_cfun
->local_decls
);
9337 /* The gimplifier could record temporaries in the offloading block
9338 rather than in containing function's local_decls chain,
9339 which would mean cgraph missed finalizing them. Do it now. */
9340 for (t
= BLOCK_VARS (block
); t
; t
= DECL_CHAIN (t
))
9341 if (VAR_P (t
) && TREE_STATIC (t
) && !DECL_EXTERNAL (t
))
9342 varpool_node::finalize_decl (t
);
9343 DECL_SAVED_TREE (child_fn
) = NULL
;
9344 /* We'll create a CFG for child_fn, so no gimple body is needed. */
9345 gimple_set_body (child_fn
, NULL
);
9346 TREE_USED (block
) = 1;
9348 /* Reset DECL_CONTEXT on function arguments. */
9349 for (t
= DECL_ARGUMENTS (child_fn
); t
; t
= DECL_CHAIN (t
))
9350 DECL_CONTEXT (t
) = child_fn
;
9352 /* Split ENTRY_BB at GIMPLE_*,
9353 so that it can be moved to the child function. */
9354 gsi
= gsi_last_nondebug_bb (entry_bb
);
9355 stmt
= gsi_stmt (gsi
);
9357 && gimple_code (stmt
) == gimple_code (entry_stmt
));
9358 e
= split_block (entry_bb
, stmt
);
9359 gsi_remove (&gsi
, true);
9361 single_succ_edge (entry_bb
)->flags
= EDGE_FALLTHRU
;
9363 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
9366 gsi
= gsi_last_nondebug_bb (exit_bb
);
9367 gcc_assert (!gsi_end_p (gsi
)
9368 && gimple_code (gsi_stmt (gsi
)) == GIMPLE_OMP_RETURN
);
9369 stmt
= gimple_build_return (NULL
);
9370 gsi_insert_after (&gsi
, stmt
, GSI_SAME_STMT
);
9371 gsi_remove (&gsi
, true);
9374 /* Move the offloading region into CHILD_CFUN. */
9376 block
= gimple_block (entry_stmt
);
9378 new_bb
= move_sese_region_to_fn (child_cfun
, entry_bb
, exit_bb
, block
);
9380 single_succ_edge (new_bb
)->flags
= EDGE_FALLTHRU
;
9381 /* When the OMP expansion process cannot guarantee an up-to-date
9382 loop tree arrange for the child function to fixup loops. */
9383 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP
))
9384 child_cfun
->x_current_loops
->state
|= LOOPS_NEED_FIXUP
;
9386 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
9387 num
= vec_safe_length (child_cfun
->local_decls
);
9388 for (srcidx
= 0, dstidx
= 0; srcidx
< num
; srcidx
++)
9390 t
= (*child_cfun
->local_decls
)[srcidx
];
9391 if (DECL_CONTEXT (t
) == cfun
->decl
)
9393 if (srcidx
!= dstidx
)
9394 (*child_cfun
->local_decls
)[dstidx
] = t
;
9398 vec_safe_truncate (child_cfun
->local_decls
, dstidx
);
9400 /* Inform the callgraph about the new function. */
9401 child_cfun
->curr_properties
= cfun
->curr_properties
;
9402 child_cfun
->has_simduid_loops
|= cfun
->has_simduid_loops
;
9403 child_cfun
->has_force_vectorize_loops
|= cfun
->has_force_vectorize_loops
;
9404 cgraph_node
*node
= cgraph_node::get_create (child_fn
);
9405 node
->parallelized_function
= 1;
9406 cgraph_node::add_new_function (child_fn
, true);
9408 /* Add the new function to the offload table. */
9409 if (ENABLE_OFFLOADING
)
9412 DECL_PRESERVE_P (child_fn
) = 1;
9413 vec_safe_push (offload_funcs
, child_fn
);
9416 bool need_asm
= DECL_ASSEMBLER_NAME_SET_P (current_function_decl
)
9417 && !DECL_ASSEMBLER_NAME_SET_P (child_fn
);
9419 /* Fix the callgraph edges for child_cfun. Those for cfun will be
9420 fixed in a following pass. */
9421 push_cfun (child_cfun
);
9423 assign_assembler_name_if_needed (child_fn
);
9424 cgraph_edge::rebuild_edges ();
9426 /* Some EH regions might become dead, see PR34608. If
9427 pass_cleanup_cfg isn't the first pass to happen with the
9428 new child, these dead EH edges might cause problems.
9429 Clean them up now. */
9430 if (flag_exceptions
)
9433 bool changed
= false;
9435 FOR_EACH_BB_FN (bb
, cfun
)
9436 changed
|= gimple_purge_dead_eh_edges (bb
);
9438 cleanup_tree_cfg ();
9440 if (flag_checking
&& !loops_state_satisfies_p (LOOPS_NEED_FIXUP
))
9441 verify_loop_structure ();
9444 if (dump_file
&& !gimple_in_ssa_p (cfun
))
9446 omp_any_child_fn_dumped
= true;
9447 dump_function_header (dump_file
, child_fn
, dump_flags
);
9448 dump_function_to_file (child_fn
, dump_file
, dump_flags
);
9451 adjust_context_and_scope (region
, gimple_block (entry_stmt
), child_fn
);
9454 /* Emit a library call to launch the offloading region, or do data
9456 tree t1
, t2
, t3
, t4
, depend
, c
, clauses
;
9457 enum built_in_function start_ix
;
9458 unsigned int flags_i
= 0;
9460 switch (gimple_omp_target_kind (entry_stmt
))
9462 case GF_OMP_TARGET_KIND_REGION
:
9463 start_ix
= BUILT_IN_GOMP_TARGET
;
9465 case GF_OMP_TARGET_KIND_DATA
:
9466 start_ix
= BUILT_IN_GOMP_TARGET_DATA
;
9468 case GF_OMP_TARGET_KIND_UPDATE
:
9469 start_ix
= BUILT_IN_GOMP_TARGET_UPDATE
;
9471 case GF_OMP_TARGET_KIND_ENTER_DATA
:
9472 start_ix
= BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA
;
9474 case GF_OMP_TARGET_KIND_EXIT_DATA
:
9475 start_ix
= BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA
;
9476 flags_i
|= GOMP_TARGET_FLAG_EXIT_DATA
;
9478 case GF_OMP_TARGET_KIND_OACC_PARALLEL
:
9479 case GF_OMP_TARGET_KIND_OACC_KERNELS
:
9480 case GF_OMP_TARGET_KIND_OACC_SERIAL
:
9481 start_ix
= BUILT_IN_GOACC_PARALLEL
;
9483 case GF_OMP_TARGET_KIND_OACC_DATA
:
9484 case GF_OMP_TARGET_KIND_OACC_HOST_DATA
:
9485 start_ix
= BUILT_IN_GOACC_DATA_START
;
9487 case GF_OMP_TARGET_KIND_OACC_UPDATE
:
9488 start_ix
= BUILT_IN_GOACC_UPDATE
;
9490 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA
:
9491 start_ix
= BUILT_IN_GOACC_ENTER_EXIT_DATA
;
9493 case GF_OMP_TARGET_KIND_OACC_DECLARE
:
9494 start_ix
= BUILT_IN_GOACC_DECLARE
;
9500 clauses
= gimple_omp_target_clauses (entry_stmt
);
9502 tree device
= NULL_TREE
;
9503 location_t device_loc
= UNKNOWN_LOCATION
;
9504 tree goacc_flags
= NULL_TREE
;
9505 if (is_gimple_omp_oacc (entry_stmt
))
9507 /* By default, no GOACC_FLAGs are set. */
9508 goacc_flags
= integer_zero_node
;
9512 c
= omp_find_clause (clauses
, OMP_CLAUSE_DEVICE
);
9515 device
= OMP_CLAUSE_DEVICE_ID (c
);
9516 device_loc
= OMP_CLAUSE_LOCATION (c
);
9520 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
9522 device
= build_int_cst (integer_type_node
, GOMP_DEVICE_ICV
);
9523 device_loc
= gimple_location (entry_stmt
);
9526 c
= omp_find_clause (clauses
, OMP_CLAUSE_NOWAIT
);
9528 flags_i
|= GOMP_TARGET_FLAG_NOWAIT
;
9531 /* By default, there is no conditional. */
9532 tree cond
= NULL_TREE
;
9533 c
= omp_find_clause (clauses
, OMP_CLAUSE_IF
);
9535 cond
= OMP_CLAUSE_IF_EXPR (c
);
9536 /* If we found the clause 'if (cond)', build:
9537 OpenACC: goacc_flags = (cond ? goacc_flags : flags | GOACC_FLAG_HOST_FALLBACK)
9538 OpenMP: device = (cond ? device : GOMP_DEVICE_HOST_FALLBACK) */
9542 if (is_gimple_omp_oacc (entry_stmt
))
9546 /* Ensure 'device' is of the correct type. */
9547 device
= fold_convert_loc (device_loc
, integer_type_node
, device
);
9552 cond
= gimple_boolify (cond
);
9554 basic_block cond_bb
, then_bb
, else_bb
;
9558 tmp_var
= create_tmp_var (TREE_TYPE (*tp
));
9560 e
= split_block_after_labels (new_bb
);
9563 gsi
= gsi_last_nondebug_bb (new_bb
);
9565 e
= split_block (new_bb
, gsi_stmt (gsi
));
9571 then_bb
= create_empty_bb (cond_bb
);
9572 else_bb
= create_empty_bb (then_bb
);
9573 set_immediate_dominator (CDI_DOMINATORS
, then_bb
, cond_bb
);
9574 set_immediate_dominator (CDI_DOMINATORS
, else_bb
, cond_bb
);
9576 stmt
= gimple_build_cond_empty (cond
);
9577 gsi
= gsi_last_bb (cond_bb
);
9578 gsi_insert_after (&gsi
, stmt
, GSI_CONTINUE_LINKING
);
9580 gsi
= gsi_start_bb (then_bb
);
9581 stmt
= gimple_build_assign (tmp_var
, *tp
);
9582 gsi_insert_after (&gsi
, stmt
, GSI_CONTINUE_LINKING
);
9584 gsi
= gsi_start_bb (else_bb
);
9585 if (is_gimple_omp_oacc (entry_stmt
))
9586 stmt
= gimple_build_assign (tmp_var
,
9589 build_int_cst (integer_type_node
,
9590 GOACC_FLAG_HOST_FALLBACK
));
9592 stmt
= gimple_build_assign (tmp_var
,
9593 build_int_cst (integer_type_node
,
9594 GOMP_DEVICE_HOST_FALLBACK
));
9595 gsi_insert_after (&gsi
, stmt
, GSI_CONTINUE_LINKING
);
9597 make_edge (cond_bb
, then_bb
, EDGE_TRUE_VALUE
);
9598 make_edge (cond_bb
, else_bb
, EDGE_FALSE_VALUE
);
9599 add_bb_to_loop (then_bb
, cond_bb
->loop_father
);
9600 add_bb_to_loop (else_bb
, cond_bb
->loop_father
);
9601 make_edge (then_bb
, new_bb
, EDGE_FALLTHRU
);
9602 make_edge (else_bb
, new_bb
, EDGE_FALLTHRU
);
9606 gsi
= gsi_last_nondebug_bb (new_bb
);
9610 gsi
= gsi_last_nondebug_bb (new_bb
);
9612 if (device
!= NULL_TREE
)
9613 device
= force_gimple_operand_gsi (&gsi
, device
, true, NULL_TREE
,
9614 true, GSI_SAME_STMT
);
9617 t
= gimple_omp_target_data_arg (entry_stmt
);
9620 t1
= size_zero_node
;
9621 t2
= build_zero_cst (ptr_type_node
);
9627 t1
= TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t
, 1))));
9628 t1
= size_binop (PLUS_EXPR
, t1
, size_int (1));
9629 t2
= build_fold_addr_expr (TREE_VEC_ELT (t
, 0));
9630 t3
= build_fold_addr_expr (TREE_VEC_ELT (t
, 1));
9631 t4
= build_fold_addr_expr (TREE_VEC_ELT (t
, 2));
9635 bool tagging
= false;
9636 /* The maximum number used by any start_ix, without varargs. */
9637 auto_vec
<tree
, 11> args
;
9638 if (is_gimple_omp_oacc (entry_stmt
))
9640 tree goacc_flags_m
= fold_build1 (GOACC_FLAGS_MARSHAL_OP
,
9641 TREE_TYPE (goacc_flags
), goacc_flags
);
9642 goacc_flags_m
= force_gimple_operand_gsi (&gsi
, goacc_flags_m
, true,
9645 args
.quick_push (goacc_flags_m
);
9648 args
.quick_push (device
);
9650 args
.quick_push (build_fold_addr_expr (child_fn
));
9651 args
.quick_push (t1
);
9652 args
.quick_push (t2
);
9653 args
.quick_push (t3
);
9654 args
.quick_push (t4
);
9657 case BUILT_IN_GOACC_DATA_START
:
9658 case BUILT_IN_GOACC_DECLARE
:
9659 case BUILT_IN_GOMP_TARGET_DATA
:
9661 case BUILT_IN_GOMP_TARGET
:
9662 case BUILT_IN_GOMP_TARGET_UPDATE
:
9663 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA
:
9664 args
.quick_push (build_int_cst (unsigned_type_node
, flags_i
));
9665 c
= omp_find_clause (clauses
, OMP_CLAUSE_DEPEND
);
9667 depend
= OMP_CLAUSE_DECL (c
);
9669 depend
= build_int_cst (ptr_type_node
, 0);
9670 args
.quick_push (depend
);
9671 if (start_ix
== BUILT_IN_GOMP_TARGET
)
9672 args
.quick_push (get_target_arguments (&gsi
, entry_stmt
));
9674 case BUILT_IN_GOACC_PARALLEL
:
9675 if (lookup_attribute ("oacc serial", DECL_ATTRIBUTES (child_fn
)) != NULL
)
9677 tree dims
= NULL_TREE
;
9680 /* For serial constructs we set all dimensions to 1. */
9681 for (ix
= GOMP_DIM_MAX
; ix
--;)
9682 dims
= tree_cons (NULL_TREE
, integer_one_node
, dims
);
9683 oacc_replace_fn_attrib (child_fn
, dims
);
9686 oacc_set_fn_attrib (child_fn
, clauses
, &args
);
9689 case BUILT_IN_GOACC_ENTER_EXIT_DATA
:
9690 case BUILT_IN_GOACC_UPDATE
:
9692 tree t_async
= NULL_TREE
;
9694 /* If present, use the value specified by the respective
9695 clause, making sure that is of the correct type. */
9696 c
= omp_find_clause (clauses
, OMP_CLAUSE_ASYNC
);
9698 t_async
= fold_convert_loc (OMP_CLAUSE_LOCATION (c
),
9700 OMP_CLAUSE_ASYNC_EXPR (c
));
9702 /* Default values for t_async. */
9703 t_async
= fold_convert_loc (gimple_location (entry_stmt
),
9705 build_int_cst (integer_type_node
,
9707 if (tagging
&& t_async
)
9709 unsigned HOST_WIDE_INT i_async
= GOMP_LAUNCH_OP_MAX
;
9711 if (TREE_CODE (t_async
) == INTEGER_CST
)
9713 /* See if we can pack the async arg in to the tag's
9715 i_async
= TREE_INT_CST_LOW (t_async
);
9716 if (i_async
< GOMP_LAUNCH_OP_MAX
)
9717 t_async
= NULL_TREE
;
9719 i_async
= GOMP_LAUNCH_OP_MAX
;
9721 args
.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC
, NULL_TREE
,
9725 args
.safe_push (force_gimple_operand_gsi (&gsi
, t_async
, true,
9729 /* Save the argument index, and ... */
9730 unsigned t_wait_idx
= args
.length ();
9731 unsigned num_waits
= 0;
9732 c
= omp_find_clause (clauses
, OMP_CLAUSE_WAIT
);
9734 /* ... push a placeholder. */
9735 args
.safe_push (integer_zero_node
);
9737 for (; c
; c
= OMP_CLAUSE_CHAIN (c
))
9738 if (OMP_CLAUSE_CODE (c
) == OMP_CLAUSE_WAIT
)
9740 tree arg
= fold_convert_loc (OMP_CLAUSE_LOCATION (c
),
9742 OMP_CLAUSE_WAIT_EXPR (c
));
9743 arg
= force_gimple_operand_gsi (&gsi
, arg
, true, NULL_TREE
, true,
9745 args
.safe_push (arg
);
9749 if (!tagging
|| num_waits
)
9753 /* Now that we know the number, update the placeholder. */
9755 len
= oacc_launch_pack (GOMP_LAUNCH_WAIT
, NULL_TREE
, num_waits
);
9757 len
= build_int_cst (integer_type_node
, num_waits
);
9758 len
= fold_convert_loc (gimple_location (entry_stmt
),
9759 unsigned_type_node
, len
);
9760 args
[t_wait_idx
] = len
;
9768 /* Push terminal marker - zero. */
9769 args
.safe_push (oacc_launch_pack (0, NULL_TREE
, 0));
9771 g
= gimple_build_call_vec (builtin_decl_explicit (start_ix
), args
);
9772 gimple_set_location (g
, gimple_location (entry_stmt
));
9773 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
9777 gcc_assert (g
&& gimple_code (g
) == GIMPLE_OMP_TARGET
);
9778 gsi_remove (&gsi
, true);
9780 if (data_region
&& region
->exit
)
9782 gsi
= gsi_last_nondebug_bb (region
->exit
);
9784 gcc_assert (g
&& gimple_code (g
) == GIMPLE_OMP_RETURN
);
9785 gsi_remove (&gsi
, true);
9789 /* Expand the parallel region tree rooted at REGION. Expansion
9790 proceeds in depth-first order. Innermost regions are expanded
9791 first. This way, parallel regions that require a new function to
9792 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
9793 internal dependencies in their body. */
9796 expand_omp (struct omp_region
*region
)
9798 omp_any_child_fn_dumped
= false;
9801 location_t saved_location
;
9802 gimple
*inner_stmt
= NULL
;
9804 /* First, determine whether this is a combined parallel+workshare
9806 if (region
->type
== GIMPLE_OMP_PARALLEL
)
9807 determine_parallel_type (region
);
9809 if (region
->type
== GIMPLE_OMP_FOR
9810 && gimple_omp_for_combined_p (last_stmt (region
->entry
)))
9811 inner_stmt
= last_stmt (region
->inner
->entry
);
9814 expand_omp (region
->inner
);
9816 saved_location
= input_location
;
9817 if (gimple_has_location (last_stmt (region
->entry
)))
9818 input_location
= gimple_location (last_stmt (region
->entry
));
9820 switch (region
->type
)
9822 case GIMPLE_OMP_PARALLEL
:
9823 case GIMPLE_OMP_TASK
:
9824 expand_omp_taskreg (region
);
9827 case GIMPLE_OMP_FOR
:
9828 expand_omp_for (region
, inner_stmt
);
9831 case GIMPLE_OMP_SECTIONS
:
9832 expand_omp_sections (region
);
9835 case GIMPLE_OMP_SECTION
:
9836 /* Individual omp sections are handled together with their
9837 parent GIMPLE_OMP_SECTIONS region. */
9840 case GIMPLE_OMP_SINGLE
:
9841 expand_omp_single (region
);
9844 case GIMPLE_OMP_ORDERED
:
9846 gomp_ordered
*ord_stmt
9847 = as_a
<gomp_ordered
*> (last_stmt (region
->entry
));
9848 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt
),
9851 /* We'll expand these when expanding corresponding
9852 worksharing region with ordered(n) clause. */
9853 gcc_assert (region
->outer
9854 && region
->outer
->type
== GIMPLE_OMP_FOR
);
9855 region
->ord_stmt
= ord_stmt
;
9860 case GIMPLE_OMP_MASTER
:
9861 case GIMPLE_OMP_TASKGROUP
:
9862 case GIMPLE_OMP_CRITICAL
:
9863 case GIMPLE_OMP_TEAMS
:
9864 expand_omp_synch (region
);
9867 case GIMPLE_OMP_ATOMIC_LOAD
:
9868 expand_omp_atomic (region
);
9871 case GIMPLE_OMP_TARGET
:
9872 expand_omp_target (region
);
9879 input_location
= saved_location
;
9880 region
= region
->next
;
9882 if (omp_any_child_fn_dumped
)
9885 dump_function_header (dump_file
, current_function_decl
, dump_flags
);
9886 omp_any_child_fn_dumped
= false;
9890 /* Helper for build_omp_regions. Scan the dominator tree starting at
9891 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
9892 true, the function ends once a single tree is built (otherwise, whole
9893 forest of OMP constructs may be built). */
9896 build_omp_regions_1 (basic_block bb
, struct omp_region
*parent
,
9899 gimple_stmt_iterator gsi
;
9903 gsi
= gsi_last_nondebug_bb (bb
);
9904 if (!gsi_end_p (gsi
) && is_gimple_omp (gsi_stmt (gsi
)))
9906 struct omp_region
*region
;
9907 enum gimple_code code
;
9909 stmt
= gsi_stmt (gsi
);
9910 code
= gimple_code (stmt
);
9911 if (code
== GIMPLE_OMP_RETURN
)
9913 /* STMT is the return point out of region PARENT. Mark it
9914 as the exit point and make PARENT the immediately
9915 enclosing region. */
9916 gcc_assert (parent
);
9919 parent
= parent
->outer
;
9921 else if (code
== GIMPLE_OMP_ATOMIC_STORE
)
9923 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
9924 GIMPLE_OMP_RETURN, but matches with
9925 GIMPLE_OMP_ATOMIC_LOAD. */
9926 gcc_assert (parent
);
9927 gcc_assert (parent
->type
== GIMPLE_OMP_ATOMIC_LOAD
);
9930 parent
= parent
->outer
;
9932 else if (code
== GIMPLE_OMP_CONTINUE
)
9934 gcc_assert (parent
);
9937 else if (code
== GIMPLE_OMP_SECTIONS_SWITCH
)
9939 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
9940 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
9944 region
= new_omp_region (bb
, code
, parent
);
9946 if (code
== GIMPLE_OMP_TARGET
)
9948 switch (gimple_omp_target_kind (stmt
))
9950 case GF_OMP_TARGET_KIND_REGION
:
9951 case GF_OMP_TARGET_KIND_DATA
:
9952 case GF_OMP_TARGET_KIND_OACC_PARALLEL
:
9953 case GF_OMP_TARGET_KIND_OACC_KERNELS
:
9954 case GF_OMP_TARGET_KIND_OACC_SERIAL
:
9955 case GF_OMP_TARGET_KIND_OACC_DATA
:
9956 case GF_OMP_TARGET_KIND_OACC_HOST_DATA
:
9958 case GF_OMP_TARGET_KIND_UPDATE
:
9959 case GF_OMP_TARGET_KIND_ENTER_DATA
:
9960 case GF_OMP_TARGET_KIND_EXIT_DATA
:
9961 case GF_OMP_TARGET_KIND_OACC_UPDATE
:
9962 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA
:
9963 case GF_OMP_TARGET_KIND_OACC_DECLARE
:
9964 /* ..., other than for those stand-alone directives... */
9971 else if (code
== GIMPLE_OMP_ORDERED
9972 && omp_find_clause (gimple_omp_ordered_clauses
9973 (as_a
<gomp_ordered
*> (stmt
)),
9975 /* #pragma omp ordered depend is also just a stand-alone
9978 else if (code
== GIMPLE_OMP_TASK
9979 && gimple_omp_task_taskwait_p (stmt
))
9980 /* #pragma omp taskwait depend(...) is a stand-alone directive. */
9982 /* ..., this directive becomes the parent for a new region. */
9988 if (single_tree
&& !parent
)
9991 for (son
= first_dom_son (CDI_DOMINATORS
, bb
);
9993 son
= next_dom_son (CDI_DOMINATORS
, son
))
9994 build_omp_regions_1 (son
, parent
, single_tree
);
9997 /* Builds the tree of OMP regions rooted at ROOT, storing it to
10001 build_omp_regions_root (basic_block root
)
10003 gcc_assert (root_omp_region
== NULL
);
10004 build_omp_regions_1 (root
, NULL
, true);
10005 gcc_assert (root_omp_region
!= NULL
);
10008 /* Expands omp construct (and its subconstructs) starting in HEAD. */
10011 omp_expand_local (basic_block head
)
10013 build_omp_regions_root (head
);
10014 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
10016 fprintf (dump_file
, "\nOMP region tree\n\n");
10017 dump_omp_region (dump_file
, root_omp_region
, 0);
10018 fprintf (dump_file
, "\n");
10021 remove_exit_barriers (root_omp_region
);
10022 expand_omp (root_omp_region
);
10024 omp_free_regions ();
10027 /* Scan the CFG and build a tree of OMP regions. Return the root of
10028 the OMP region tree. */
10031 build_omp_regions (void)
10033 gcc_assert (root_omp_region
== NULL
);
10034 calculate_dominance_info (CDI_DOMINATORS
);
10035 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun
), NULL
, false);
10038 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */
10040 static unsigned int
10041 execute_expand_omp (void)
10043 build_omp_regions ();
10045 if (!root_omp_region
)
10050 fprintf (dump_file
, "\nOMP region tree\n\n");
10051 dump_omp_region (dump_file
, root_omp_region
, 0);
10052 fprintf (dump_file
, "\n");
10055 remove_exit_barriers (root_omp_region
);
10057 expand_omp (root_omp_region
);
10059 if (flag_checking
&& !loops_state_satisfies_p (LOOPS_NEED_FIXUP
))
10060 verify_loop_structure ();
10061 cleanup_tree_cfg ();
10063 omp_free_regions ();
10068 /* OMP expansion -- the default pass, run before creation of SSA form. */
10072 const pass_data pass_data_expand_omp
=
10074 GIMPLE_PASS
, /* type */
10075 "ompexp", /* name */
10076 OPTGROUP_OMP
, /* optinfo_flags */
10077 TV_NONE
, /* tv_id */
10078 PROP_gimple_any
, /* properties_required */
10079 PROP_gimple_eomp
, /* properties_provided */
10080 0, /* properties_destroyed */
10081 0, /* todo_flags_start */
10082 0, /* todo_flags_finish */
10085 class pass_expand_omp
: public gimple_opt_pass
10088 pass_expand_omp (gcc::context
*ctxt
)
10089 : gimple_opt_pass (pass_data_expand_omp
, ctxt
)
10092 /* opt_pass methods: */
10093 virtual unsigned int execute (function
*)
10095 bool gate
= ((flag_openacc
!= 0 || flag_openmp
!= 0
10096 || flag_openmp_simd
!= 0)
10097 && !seen_error ());
10099 /* This pass always runs, to provide PROP_gimple_eomp.
10100 But often, there is nothing to do. */
10104 return execute_expand_omp ();
10107 }; // class pass_expand_omp
10109 } // anon namespace
10112 make_pass_expand_omp (gcc::context
*ctxt
)
10114 return new pass_expand_omp (ctxt
);
10119 const pass_data pass_data_expand_omp_ssa
=
10121 GIMPLE_PASS
, /* type */
10122 "ompexpssa", /* name */
10123 OPTGROUP_OMP
, /* optinfo_flags */
10124 TV_NONE
, /* tv_id */
10125 PROP_cfg
| PROP_ssa
, /* properties_required */
10126 PROP_gimple_eomp
, /* properties_provided */
10127 0, /* properties_destroyed */
10128 0, /* todo_flags_start */
10129 TODO_cleanup_cfg
| TODO_rebuild_alias
, /* todo_flags_finish */
10132 class pass_expand_omp_ssa
: public gimple_opt_pass
10135 pass_expand_omp_ssa (gcc::context
*ctxt
)
10136 : gimple_opt_pass (pass_data_expand_omp_ssa
, ctxt
)
10139 /* opt_pass methods: */
10140 virtual bool gate (function
*fun
)
10142 return !(fun
->curr_properties
& PROP_gimple_eomp
);
10144 virtual unsigned int execute (function
*) { return execute_expand_omp (); }
10145 opt_pass
* clone () { return new pass_expand_omp_ssa (m_ctxt
); }
10147 }; // class pass_expand_omp_ssa
10149 } // anon namespace
10152 make_pass_expand_omp_ssa (gcc::context
*ctxt
)
10154 return new pass_expand_omp_ssa (ctxt
);
10157 /* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
10161 omp_make_gimple_edges (basic_block bb
, struct omp_region
**region
,
10164 gimple
*last
= last_stmt (bb
);
10165 enum gimple_code code
= gimple_code (last
);
10166 struct omp_region
*cur_region
= *region
;
10167 bool fallthru
= false;
10171 case GIMPLE_OMP_PARALLEL
:
10172 case GIMPLE_OMP_FOR
:
10173 case GIMPLE_OMP_SINGLE
:
10174 case GIMPLE_OMP_TEAMS
:
10175 case GIMPLE_OMP_MASTER
:
10176 case GIMPLE_OMP_TASKGROUP
:
10177 case GIMPLE_OMP_CRITICAL
:
10178 case GIMPLE_OMP_SECTION
:
10179 cur_region
= new_omp_region (bb
, code
, cur_region
);
10183 case GIMPLE_OMP_TASK
:
10184 cur_region
= new_omp_region (bb
, code
, cur_region
);
10186 if (gimple_omp_task_taskwait_p (last
))
10187 cur_region
= cur_region
->outer
;
10190 case GIMPLE_OMP_ORDERED
:
10191 cur_region
= new_omp_region (bb
, code
, cur_region
);
10193 if (omp_find_clause (gimple_omp_ordered_clauses
10194 (as_a
<gomp_ordered
*> (last
)),
10195 OMP_CLAUSE_DEPEND
))
10196 cur_region
= cur_region
->outer
;
10199 case GIMPLE_OMP_TARGET
:
10200 cur_region
= new_omp_region (bb
, code
, cur_region
);
10202 switch (gimple_omp_target_kind (last
))
10204 case GF_OMP_TARGET_KIND_REGION
:
10205 case GF_OMP_TARGET_KIND_DATA
:
10206 case GF_OMP_TARGET_KIND_OACC_PARALLEL
:
10207 case GF_OMP_TARGET_KIND_OACC_KERNELS
:
10208 case GF_OMP_TARGET_KIND_OACC_SERIAL
:
10209 case GF_OMP_TARGET_KIND_OACC_DATA
:
10210 case GF_OMP_TARGET_KIND_OACC_HOST_DATA
:
10212 case GF_OMP_TARGET_KIND_UPDATE
:
10213 case GF_OMP_TARGET_KIND_ENTER_DATA
:
10214 case GF_OMP_TARGET_KIND_EXIT_DATA
:
10215 case GF_OMP_TARGET_KIND_OACC_UPDATE
:
10216 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA
:
10217 case GF_OMP_TARGET_KIND_OACC_DECLARE
:
10218 cur_region
= cur_region
->outer
;
10221 gcc_unreachable ();
10225 case GIMPLE_OMP_SECTIONS
:
10226 cur_region
= new_omp_region (bb
, code
, cur_region
);
10230 case GIMPLE_OMP_SECTIONS_SWITCH
:
10234 case GIMPLE_OMP_ATOMIC_LOAD
:
10235 case GIMPLE_OMP_ATOMIC_STORE
:
10239 case GIMPLE_OMP_RETURN
:
10240 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
10241 somewhere other than the next block. This will be
10243 cur_region
->exit
= bb
;
10244 if (cur_region
->type
== GIMPLE_OMP_TASK
)
10245 /* Add an edge corresponding to not scheduling the task
10247 make_edge (cur_region
->entry
, bb
, EDGE_ABNORMAL
);
10248 fallthru
= cur_region
->type
!= GIMPLE_OMP_SECTION
;
10249 cur_region
= cur_region
->outer
;
10252 case GIMPLE_OMP_CONTINUE
:
10253 cur_region
->cont
= bb
;
10254 switch (cur_region
->type
)
10256 case GIMPLE_OMP_FOR
:
10257 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
10258 succs edges as abnormal to prevent splitting
10260 single_succ_edge (cur_region
->entry
)->flags
|= EDGE_ABNORMAL
;
10261 /* Make the loopback edge. */
10262 make_edge (bb
, single_succ (cur_region
->entry
),
10265 /* Create an edge from GIMPLE_OMP_FOR to exit, which
10266 corresponds to the case that the body of the loop
10267 is not executed at all. */
10268 make_edge (cur_region
->entry
, bb
->next_bb
, EDGE_ABNORMAL
);
10269 make_edge (bb
, bb
->next_bb
, EDGE_FALLTHRU
| EDGE_ABNORMAL
);
10273 case GIMPLE_OMP_SECTIONS
:
10274 /* Wire up the edges into and out of the nested sections. */
10276 basic_block switch_bb
= single_succ (cur_region
->entry
);
10278 struct omp_region
*i
;
10279 for (i
= cur_region
->inner
; i
; i
= i
->next
)
10281 gcc_assert (i
->type
== GIMPLE_OMP_SECTION
);
10282 make_edge (switch_bb
, i
->entry
, 0);
10283 make_edge (i
->exit
, bb
, EDGE_FALLTHRU
);
10286 /* Make the loopback edge to the block with
10287 GIMPLE_OMP_SECTIONS_SWITCH. */
10288 make_edge (bb
, switch_bb
, 0);
10290 /* Make the edge from the switch to exit. */
10291 make_edge (switch_bb
, bb
->next_bb
, 0);
10296 case GIMPLE_OMP_TASK
:
10301 gcc_unreachable ();
10306 gcc_unreachable ();
10309 if (*region
!= cur_region
)
10311 *region
= cur_region
;
10313 *region_idx
= cur_region
->entry
->index
;