]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/omp-expand.c
Correct a function pre/postcondition [PR102403].
[thirdparty/gcc.git] / gcc / omp-expand.c
CommitLineData
629b3d75
MJ
1/* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
4
99dee823 5Copyright (C) 2005-2021 Free Software Foundation, Inc.
629b3d75
MJ
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
11Software Foundation; either version 3, or (at your option) any later
12version.
13
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING3. If not see
21<http://www.gnu.org/licenses/>. */
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "memmodel.h"
27#include "backend.h"
28#include "target.h"
29#include "rtl.h"
30#include "tree.h"
31#include "gimple.h"
32#include "cfghooks.h"
33#include "tree-pass.h"
34#include "ssa.h"
35#include "optabs.h"
36#include "cgraph.h"
37#include "pretty-print.h"
38#include "diagnostic-core.h"
39#include "fold-const.h"
40#include "stor-layout.h"
41#include "cfganal.h"
42#include "internal-fn.h"
43#include "gimplify.h"
44#include "gimple-iterator.h"
45#include "gimplify-me.h"
46#include "gimple-walk.h"
47#include "tree-cfg.h"
48#include "tree-into-ssa.h"
49#include "tree-ssa.h"
50#include "splay-tree.h"
51#include "cfgloop.h"
52#include "omp-general.h"
53#include "omp-offload.h"
54#include "tree-cfgcleanup.h"
a895e6d7 55#include "alloc-pool.h"
629b3d75 56#include "symbol-summary.h"
629b3d75
MJ
57#include "gomp-constants.h"
58#include "gimple-pretty-print.h"
314e6352
ML
59#include "stringpool.h"
60#include "attribs.h"
87d6dae3 61#include "tree-eh.h"
629b3d75
MJ
62
63/* OMP region information. Every parallel and workshare
64 directive is enclosed between two markers, the OMP_* directive
65 and a corresponding GIMPLE_OMP_RETURN statement. */
66
67struct omp_region
68{
69 /* The enclosing region. */
70 struct omp_region *outer;
71
72 /* First child region. */
73 struct omp_region *inner;
74
75 /* Next peer region. */
76 struct omp_region *next;
77
78 /* Block containing the omp directive as its last stmt. */
79 basic_block entry;
80
81 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
82 basic_block exit;
83
84 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
85 basic_block cont;
86
87 /* If this is a combined parallel+workshare region, this is a list
88 of additional arguments needed by the combined parallel+workshare
89 library call. */
90 vec<tree, va_gc> *ws_args;
91
92 /* The code for the omp directive of this region. */
93 enum gimple_code type;
94
95 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
96 enum omp_clause_schedule_kind sched_kind;
97
98 /* Schedule modifiers. */
99 unsigned char sched_modifiers;
100
101 /* True if this is a combined parallel+workshare region. */
102 bool is_combined_parallel;
103
0b887b75
JJ
104 /* Copy of fd.lastprivate_conditional != 0. */
105 bool has_lastprivate_conditional;
106
629b3d75
MJ
107 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
108 a depend clause. */
109 gomp_ordered *ord_stmt;
110};
111
112static struct omp_region *root_omp_region;
113static bool omp_any_child_fn_dumped;
114
115static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
116 bool = false);
117static gphi *find_phi_with_arg_on_edge (tree, edge);
118static void expand_omp (struct omp_region *region);
119
120/* Return true if REGION is a combined parallel+workshare region. */
121
122static inline bool
123is_combined_parallel (struct omp_region *region)
124{
125 return region->is_combined_parallel;
126}
127
128/* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
129 is the immediate dominator of PAR_ENTRY_BB, return true if there
130 are no data dependencies that would prevent expanding the parallel
131 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
132
133 When expanding a combined parallel+workshare region, the call to
134 the child function may need additional arguments in the case of
135 GIMPLE_OMP_FOR regions. In some cases, these arguments are
136 computed out of variables passed in from the parent to the child
137 via 'struct .omp_data_s'. For instance:
138
139 #pragma omp parallel for schedule (guided, i * 4)
140 for (j ...)
141
142 Is lowered into:
143
01914336 144 # BLOCK 2 (PAR_ENTRY_BB)
629b3d75
MJ
145 .omp_data_o.i = i;
146 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
147
148 # BLOCK 3 (WS_ENTRY_BB)
149 .omp_data_i = &.omp_data_o;
150 D.1667 = .omp_data_i->i;
151 D.1598 = D.1667 * 4;
152 #pragma omp for schedule (guided, D.1598)
153
154 When we outline the parallel region, the call to the child function
155 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
156 that value is computed *after* the call site. So, in principle we
157 cannot do the transformation.
158
159 To see whether the code in WS_ENTRY_BB blocks the combined
160 parallel+workshare call, we collect all the variables used in the
161 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
162 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
163 call.
164
165 FIXME. If we had the SSA form built at this point, we could merely
166 hoist the code in block 3 into block 2 and be done with it. But at
167 this point we don't have dataflow information and though we could
168 hack something up here, it is really not worth the aggravation. */
169
170static bool
171workshare_safe_to_combine_p (basic_block ws_entry_bb)
172{
173 struct omp_for_data fd;
174 gimple *ws_stmt = last_stmt (ws_entry_bb);
175
176 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
177 return true;
178
179 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
28567c40
JJ
180 if (gimple_omp_for_kind (ws_stmt) != GF_OMP_FOR_KIND_FOR)
181 return false;
629b3d75
MJ
182
183 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
184
185 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
186 return false;
187 if (fd.iter_type != long_integer_type_node)
188 return false;
189
190 /* FIXME. We give up too easily here. If any of these arguments
191 are not constants, they will likely involve variables that have
192 been mapped into fields of .omp_data_s for sharing with the child
193 function. With appropriate data flow, it would be possible to
194 see through this. */
195 if (!is_gimple_min_invariant (fd.loop.n1)
196 || !is_gimple_min_invariant (fd.loop.n2)
197 || !is_gimple_min_invariant (fd.loop.step)
198 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
199 return false;
200
201 return true;
202}
203
204/* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
205 presence (SIMD_SCHEDULE). */
206
207static tree
208omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
209{
28567c40 210 if (!simd_schedule || integer_zerop (chunk_size))
629b3d75
MJ
211 return chunk_size;
212
9d2f08ab
RS
213 poly_uint64 vf = omp_max_vf ();
214 if (known_eq (vf, 1U))
629b3d75
MJ
215 return chunk_size;
216
217 tree type = TREE_TYPE (chunk_size);
218 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
219 build_int_cst (type, vf - 1));
220 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
221 build_int_cst (type, -vf));
222}
223
224/* Collect additional arguments needed to emit a combined
225 parallel+workshare call. WS_STMT is the workshare directive being
226 expanded. */
227
228static vec<tree, va_gc> *
229get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
230{
231 tree t;
232 location_t loc = gimple_location (ws_stmt);
233 vec<tree, va_gc> *ws_args;
234
235 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
236 {
237 struct omp_for_data fd;
238 tree n1, n2;
239
240 omp_extract_for_data (for_stmt, &fd, NULL);
241 n1 = fd.loop.n1;
242 n2 = fd.loop.n2;
243
244 if (gimple_omp_for_combined_into_p (for_stmt))
245 {
246 tree innerc
247 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
248 OMP_CLAUSE__LOOPTEMP_);
249 gcc_assert (innerc);
250 n1 = OMP_CLAUSE_DECL (innerc);
251 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
252 OMP_CLAUSE__LOOPTEMP_);
253 gcc_assert (innerc);
254 n2 = OMP_CLAUSE_DECL (innerc);
255 }
256
257 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
258
259 t = fold_convert_loc (loc, long_integer_type_node, n1);
260 ws_args->quick_push (t);
261
262 t = fold_convert_loc (loc, long_integer_type_node, n2);
263 ws_args->quick_push (t);
264
265 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
266 ws_args->quick_push (t);
267
268 if (fd.chunk_size)
269 {
270 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
271 t = omp_adjust_chunk_size (t, fd.simd_schedule);
272 ws_args->quick_push (t);
273 }
274
275 return ws_args;
276 }
277 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
278 {
279 /* Number of sections is equal to the number of edges from the
280 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
281 the exit of the sections region. */
282 basic_block bb = single_succ (gimple_bb (ws_stmt));
283 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
284 vec_alloc (ws_args, 1);
285 ws_args->quick_push (t);
286 return ws_args;
287 }
288
289 gcc_unreachable ();
290}
291
292/* Discover whether REGION is a combined parallel+workshare region. */
293
294static void
295determine_parallel_type (struct omp_region *region)
296{
297 basic_block par_entry_bb, par_exit_bb;
298 basic_block ws_entry_bb, ws_exit_bb;
299
300 if (region == NULL || region->inner == NULL
301 || region->exit == NULL || region->inner->exit == NULL
302 || region->inner->cont == NULL)
303 return;
304
305 /* We only support parallel+for and parallel+sections. */
306 if (region->type != GIMPLE_OMP_PARALLEL
307 || (region->inner->type != GIMPLE_OMP_FOR
308 && region->inner->type != GIMPLE_OMP_SECTIONS))
309 return;
310
311 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
312 WS_EXIT_BB -> PAR_EXIT_BB. */
313 par_entry_bb = region->entry;
314 par_exit_bb = region->exit;
315 ws_entry_bb = region->inner->entry;
316 ws_exit_bb = region->inner->exit;
317
28567c40
JJ
318 /* Give up for task reductions on the parallel, while it is implementable,
319 adding another big set of APIs or slowing down the normal paths is
320 not acceptable. */
321 tree pclauses = gimple_omp_parallel_clauses (last_stmt (par_entry_bb));
322 if (omp_find_clause (pclauses, OMP_CLAUSE__REDUCTEMP_))
323 return;
324
629b3d75
MJ
325 if (single_succ (par_entry_bb) == ws_entry_bb
326 && single_succ (ws_exit_bb) == par_exit_bb
327 && workshare_safe_to_combine_p (ws_entry_bb)
328 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
329 || (last_and_only_stmt (ws_entry_bb)
330 && last_and_only_stmt (par_exit_bb))))
331 {
332 gimple *par_stmt = last_stmt (par_entry_bb);
333 gimple *ws_stmt = last_stmt (ws_entry_bb);
334
335 if (region->inner->type == GIMPLE_OMP_FOR)
336 {
337 /* If this is a combined parallel loop, we need to determine
338 whether or not to use the combined library calls. There
339 are two cases where we do not apply the transformation:
340 static loops and any kind of ordered loop. In the first
341 case, we already open code the loop so there is no need
342 to do anything else. In the latter case, the combined
343 parallel loop call would still need extra synchronization
344 to implement ordered semantics, so there would not be any
345 gain in using the combined call. */
346 tree clauses = gimple_omp_for_clauses (ws_stmt);
347 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
348 if (c == NULL
349 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
350 == OMP_CLAUSE_SCHEDULE_STATIC)
28567c40 351 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED)
6c7ae8c5 352 || omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_)
8221c30b
JJ
353 || ((c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_))
354 && POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (c)))))
28567c40 355 return;
629b3d75 356 }
28567c40 357 else if (region->inner->type == GIMPLE_OMP_SECTIONS
8221c30b
JJ
358 && (omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
359 OMP_CLAUSE__REDUCTEMP_)
360 || omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
361 OMP_CLAUSE__CONDTEMP_)))
28567c40 362 return;
629b3d75
MJ
363
364 region->is_combined_parallel = true;
365 region->inner->is_combined_parallel = true;
366 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
367 }
368}
369
370/* Debugging dumps for parallel regions. */
371void dump_omp_region (FILE *, struct omp_region *, int);
372void debug_omp_region (struct omp_region *);
373void debug_all_omp_regions (void);
374
375/* Dump the parallel region tree rooted at REGION. */
376
377void
378dump_omp_region (FILE *file, struct omp_region *region, int indent)
379{
380 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
381 gimple_code_name[region->type]);
382
383 if (region->inner)
384 dump_omp_region (file, region->inner, indent + 4);
385
386 if (region->cont)
387 {
388 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
389 region->cont->index);
390 }
391
392 if (region->exit)
393 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
394 region->exit->index);
395 else
396 fprintf (file, "%*s[no exit marker]\n", indent, "");
397
398 if (region->next)
399 dump_omp_region (file, region->next, indent);
400}
401
402DEBUG_FUNCTION void
403debug_omp_region (struct omp_region *region)
404{
405 dump_omp_region (stderr, region, 0);
406}
407
408DEBUG_FUNCTION void
409debug_all_omp_regions (void)
410{
411 dump_omp_region (stderr, root_omp_region, 0);
412}
413
414/* Create a new parallel region starting at STMT inside region PARENT. */
415
416static struct omp_region *
417new_omp_region (basic_block bb, enum gimple_code type,
418 struct omp_region *parent)
419{
420 struct omp_region *region = XCNEW (struct omp_region);
421
422 region->outer = parent;
423 region->entry = bb;
424 region->type = type;
425
426 if (parent)
427 {
428 /* This is a nested region. Add it to the list of inner
429 regions in PARENT. */
430 region->next = parent->inner;
431 parent->inner = region;
432 }
433 else
434 {
435 /* This is a toplevel region. Add it to the list of toplevel
436 regions in ROOT_OMP_REGION. */
437 region->next = root_omp_region;
438 root_omp_region = region;
439 }
440
441 return region;
442}
443
444/* Release the memory associated with the region tree rooted at REGION. */
445
446static void
447free_omp_region_1 (struct omp_region *region)
448{
449 struct omp_region *i, *n;
450
451 for (i = region->inner; i ; i = n)
452 {
453 n = i->next;
454 free_omp_region_1 (i);
455 }
456
457 free (region);
458}
459
460/* Release the memory for the entire omp region tree. */
461
462void
463omp_free_regions (void)
464{
465 struct omp_region *r, *n;
466 for (r = root_omp_region; r ; r = n)
467 {
468 n = r->next;
469 free_omp_region_1 (r);
470 }
471 root_omp_region = NULL;
472}
473
474/* A convenience function to build an empty GIMPLE_COND with just the
475 condition. */
476
477static gcond *
478gimple_build_cond_empty (tree cond)
479{
480 enum tree_code pred_code;
481 tree lhs, rhs;
482
483 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
484 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
485}
486
623c6df5
KB
487/* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
488 Add CHILD_FNDECL to decl chain of the supercontext of the block
489 ENTRY_BLOCK - this is the block which originally contained the
490 code from which CHILD_FNDECL was created.
491
492 Together, these actions ensure that the debug info for the outlined
493 function will be emitted with the correct lexical scope. */
494
495static void
4ccc4e30
JJ
496adjust_context_and_scope (struct omp_region *region, tree entry_block,
497 tree child_fndecl)
623c6df5 498{
4ccc4e30
JJ
499 tree parent_fndecl = NULL_TREE;
500 gimple *entry_stmt;
501 /* OMP expansion expands inner regions before outer ones, so if
502 we e.g. have explicit task region nested in parallel region, when
503 expanding the task region current_function_decl will be the original
504 source function, but we actually want to use as context the child
505 function of the parallel. */
506 for (region = region->outer;
507 region && parent_fndecl == NULL_TREE; region = region->outer)
508 switch (region->type)
509 {
510 case GIMPLE_OMP_PARALLEL:
511 case GIMPLE_OMP_TASK:
512 case GIMPLE_OMP_TEAMS:
513 entry_stmt = last_stmt (region->entry);
514 parent_fndecl = gimple_omp_taskreg_child_fn (entry_stmt);
515 break;
516 case GIMPLE_OMP_TARGET:
517 entry_stmt = last_stmt (region->entry);
518 parent_fndecl
519 = gimple_omp_target_child_fn (as_a <gomp_target *> (entry_stmt));
520 break;
521 default:
522 break;
523 }
524
525 if (parent_fndecl == NULL_TREE)
526 parent_fndecl = current_function_decl;
527 DECL_CONTEXT (child_fndecl) = parent_fndecl;
528
623c6df5
KB
529 if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
530 {
531 tree b = BLOCK_SUPERCONTEXT (entry_block);
623c6df5
KB
532 if (TREE_CODE (b) == BLOCK)
533 {
623c6df5
KB
534 DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
535 BLOCK_VARS (b) = child_fndecl;
536 }
537 }
538}
539
28567c40 540/* Build the function calls to GOMP_parallel etc to actually
629b3d75
MJ
541 generate the parallel operation. REGION is the parallel region
542 being expanded. BB is the block where to insert the code. WS_ARGS
543 will be set if this is a call to a combined parallel+workshare
544 construct, it contains the list of additional arguments needed by
545 the workshare construct. */
546
547static void
548expand_parallel_call (struct omp_region *region, basic_block bb,
549 gomp_parallel *entry_stmt,
550 vec<tree, va_gc> *ws_args)
551{
552 tree t, t1, t2, val, cond, c, clauses, flags;
553 gimple_stmt_iterator gsi;
554 gimple *stmt;
555 enum built_in_function start_ix;
556 int start_ix2;
557 location_t clause_loc;
558 vec<tree, va_gc> *args;
559
560 clauses = gimple_omp_parallel_clauses (entry_stmt);
561
562 /* Determine what flavor of GOMP_parallel we will be
563 emitting. */
564 start_ix = BUILT_IN_GOMP_PARALLEL;
28567c40
JJ
565 tree rtmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
566 if (rtmp)
567 start_ix = BUILT_IN_GOMP_PARALLEL_REDUCTIONS;
568 else if (is_combined_parallel (region))
629b3d75
MJ
569 {
570 switch (region->inner->type)
571 {
572 case GIMPLE_OMP_FOR:
573 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
574 switch (region->inner->sched_kind)
575 {
576 case OMP_CLAUSE_SCHEDULE_RUNTIME:
0b887b75
JJ
577 /* For lastprivate(conditional:), our implementation
578 requires monotonic behavior. */
579 if (region->inner->has_lastprivate_conditional != 0)
580 start_ix2 = 3;
581 else if ((region->inner->sched_modifiers
582 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
28567c40
JJ
583 start_ix2 = 6;
584 else if ((region->inner->sched_modifiers
585 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
586 start_ix2 = 7;
587 else
588 start_ix2 = 3;
629b3d75
MJ
589 break;
590 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
591 case OMP_CLAUSE_SCHEDULE_GUIDED:
28567c40 592 if ((region->inner->sched_modifiers
0b887b75
JJ
593 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
594 && !region->inner->has_lastprivate_conditional)
629b3d75
MJ
595 {
596 start_ix2 = 3 + region->inner->sched_kind;
597 break;
598 }
599 /* FALLTHRU */
600 default:
601 start_ix2 = region->inner->sched_kind;
602 break;
603 }
604 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
605 start_ix = (enum built_in_function) start_ix2;
606 break;
607 case GIMPLE_OMP_SECTIONS:
608 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
609 break;
610 default:
611 gcc_unreachable ();
612 }
613 }
614
615 /* By default, the value of NUM_THREADS is zero (selected at run time)
616 and there is no conditional. */
617 cond = NULL_TREE;
618 val = build_int_cst (unsigned_type_node, 0);
619 flags = build_int_cst (unsigned_type_node, 0);
620
621 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
622 if (c)
623 cond = OMP_CLAUSE_IF_EXPR (c);
624
625 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
626 if (c)
627 {
628 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
629 clause_loc = OMP_CLAUSE_LOCATION (c);
630 }
631 else
632 clause_loc = gimple_location (entry_stmt);
633
634 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
635 if (c)
636 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
637
638 /* Ensure 'val' is of the correct type. */
639 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
640
641 /* If we found the clause 'if (cond)', build either
642 (cond != 0) or (cond ? val : 1u). */
643 if (cond)
644 {
645 cond = gimple_boolify (cond);
646
647 if (integer_zerop (val))
648 val = fold_build2_loc (clause_loc,
649 EQ_EXPR, unsigned_type_node, cond,
650 build_int_cst (TREE_TYPE (cond), 0));
651 else
652 {
653 basic_block cond_bb, then_bb, else_bb;
654 edge e, e_then, e_else;
655 tree tmp_then, tmp_else, tmp_join, tmp_var;
656
657 tmp_var = create_tmp_var (TREE_TYPE (val));
658 if (gimple_in_ssa_p (cfun))
659 {
660 tmp_then = make_ssa_name (tmp_var);
661 tmp_else = make_ssa_name (tmp_var);
662 tmp_join = make_ssa_name (tmp_var);
663 }
664 else
665 {
666 tmp_then = tmp_var;
667 tmp_else = tmp_var;
668 tmp_join = tmp_var;
669 }
670
671 e = split_block_after_labels (bb);
672 cond_bb = e->src;
673 bb = e->dest;
674 remove_edge (e);
675
676 then_bb = create_empty_bb (cond_bb);
677 else_bb = create_empty_bb (then_bb);
678 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
679 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
680
681 stmt = gimple_build_cond_empty (cond);
682 gsi = gsi_start_bb (cond_bb);
683 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
684
685 gsi = gsi_start_bb (then_bb);
686 expand_omp_build_assign (&gsi, tmp_then, val, true);
687
688 gsi = gsi_start_bb (else_bb);
689 expand_omp_build_assign (&gsi, tmp_else,
690 build_int_cst (unsigned_type_node, 1),
691 true);
692
693 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
694 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
695 add_bb_to_loop (then_bb, cond_bb->loop_father);
696 add_bb_to_loop (else_bb, cond_bb->loop_father);
697 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
698 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
699
700 if (gimple_in_ssa_p (cfun))
701 {
702 gphi *phi = create_phi_node (tmp_join, bb);
703 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
704 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
705 }
706
707 val = tmp_join;
708 }
709
710 gsi = gsi_start_bb (bb);
711 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
712 false, GSI_CONTINUE_LINKING);
713 }
714
65f4b875 715 gsi = gsi_last_nondebug_bb (bb);
629b3d75
MJ
716 t = gimple_omp_parallel_data_arg (entry_stmt);
717 if (t == NULL)
718 t1 = null_pointer_node;
719 else
720 t1 = build_fold_addr_expr (t);
721 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
722 t2 = build_fold_addr_expr (child_fndecl);
723
724 vec_alloc (args, 4 + vec_safe_length (ws_args));
725 args->quick_push (t2);
726 args->quick_push (t1);
727 args->quick_push (val);
728 if (ws_args)
729 args->splice (*ws_args);
730 args->quick_push (flags);
731
732 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
733 builtin_decl_explicit (start_ix), args);
734
28567c40
JJ
735 if (rtmp)
736 {
737 tree type = TREE_TYPE (OMP_CLAUSE_DECL (rtmp));
738 t = build2 (MODIFY_EXPR, type, OMP_CLAUSE_DECL (rtmp),
739 fold_convert (type,
740 fold_convert (pointer_sized_int_node, t)));
741 }
629b3d75
MJ
742 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
743 false, GSI_CONTINUE_LINKING);
629b3d75
MJ
744}
745
629b3d75
MJ
746/* Build the function call to GOMP_task to actually
747 generate the task operation. BB is the block where to insert the code. */
748
749static void
750expand_task_call (struct omp_region *region, basic_block bb,
751 gomp_task *entry_stmt)
752{
753 tree t1, t2, t3;
754 gimple_stmt_iterator gsi;
755 location_t loc = gimple_location (entry_stmt);
756
757 tree clauses = gimple_omp_task_clauses (entry_stmt);
758
759 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
760 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
761 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
762 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
763 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
764 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
a6d22fb2 765 tree detach = omp_find_clause (clauses, OMP_CLAUSE_DETACH);
629b3d75
MJ
766
767 unsigned int iflags
768 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
769 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
770 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
771
772 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
773 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
774 tree num_tasks = NULL_TREE;
775 bool ull = false;
776 if (taskloop_p)
777 {
778 gimple *g = last_stmt (region->outer->entry);
779 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
780 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
781 struct omp_for_data fd;
782 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
783 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
784 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
785 OMP_CLAUSE__LOOPTEMP_);
786 startvar = OMP_CLAUSE_DECL (startvar);
787 endvar = OMP_CLAUSE_DECL (endvar);
788 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
789 if (fd.loop.cond_code == LT_EXPR)
790 iflags |= GOMP_TASK_FLAG_UP;
791 tree tclauses = gimple_omp_for_clauses (g);
792 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
793 if (num_tasks)
3bc75533
JJ
794 {
795 if (OMP_CLAUSE_NUM_TASKS_STRICT (num_tasks))
796 iflags |= GOMP_TASK_FLAG_STRICT;
797 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
798 }
629b3d75
MJ
799 else
800 {
801 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
802 if (num_tasks)
803 {
804 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
3bc75533
JJ
805 if (OMP_CLAUSE_GRAINSIZE_STRICT (num_tasks))
806 iflags |= GOMP_TASK_FLAG_STRICT;
629b3d75
MJ
807 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
808 }
809 else
810 num_tasks = integer_zero_node;
811 }
812 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
813 if (ifc == NULL_TREE)
814 iflags |= GOMP_TASK_FLAG_IF;
815 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
816 iflags |= GOMP_TASK_FLAG_NOGROUP;
817 ull = fd.iter_type == long_long_unsigned_type_node;
28567c40
JJ
818 if (omp_find_clause (clauses, OMP_CLAUSE_REDUCTION))
819 iflags |= GOMP_TASK_FLAG_REDUCTION;
629b3d75 820 }
a6d22fb2
KCY
821 else
822 {
823 if (priority)
824 iflags |= GOMP_TASK_FLAG_PRIORITY;
825 if (detach)
826 iflags |= GOMP_TASK_FLAG_DETACH;
827 }
629b3d75
MJ
828
829 tree flags = build_int_cst (unsigned_type_node, iflags);
830
831 tree cond = boolean_true_node;
832 if (ifc)
833 {
834 if (taskloop_p)
835 {
836 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
837 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
838 build_int_cst (unsigned_type_node,
839 GOMP_TASK_FLAG_IF),
840 build_int_cst (unsigned_type_node, 0));
841 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
842 flags, t);
843 }
844 else
845 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
846 }
847
848 if (finalc)
849 {
850 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
851 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
852 build_int_cst (unsigned_type_node,
853 GOMP_TASK_FLAG_FINAL),
854 build_int_cst (unsigned_type_node, 0));
855 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
856 }
857 if (depend)
858 depend = OMP_CLAUSE_DECL (depend);
859 else
860 depend = build_int_cst (ptr_type_node, 0);
861 if (priority)
862 priority = fold_convert (integer_type_node,
863 OMP_CLAUSE_PRIORITY_EXPR (priority));
864 else
865 priority = integer_zero_node;
866
65f4b875 867 gsi = gsi_last_nondebug_bb (bb);
a6d22fb2
KCY
868
869 detach = (detach
870 ? build_fold_addr_expr (OMP_CLAUSE_DECL (detach))
871 : null_pointer_node);
872
629b3d75
MJ
873 tree t = gimple_omp_task_data_arg (entry_stmt);
874 if (t == NULL)
875 t2 = null_pointer_node;
876 else
877 t2 = build_fold_addr_expr_loc (loc, t);
878 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
879 t = gimple_omp_task_copy_fn (entry_stmt);
880 if (t == NULL)
881 t3 = null_pointer_node;
882 else
883 t3 = build_fold_addr_expr_loc (loc, t);
884
885 if (taskloop_p)
886 t = build_call_expr (ull
887 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
888 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
889 11, t1, t2, t3,
890 gimple_omp_task_arg_size (entry_stmt),
891 gimple_omp_task_arg_align (entry_stmt), flags,
892 num_tasks, priority, startvar, endvar, step);
893 else
894 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
a6d22fb2 895 10, t1, t2, t3,
629b3d75
MJ
896 gimple_omp_task_arg_size (entry_stmt),
897 gimple_omp_task_arg_align (entry_stmt), cond, flags,
a6d22fb2 898 depend, priority, detach);
629b3d75
MJ
899
900 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
901 false, GSI_CONTINUE_LINKING);
902}
903
28567c40
JJ
904/* Build the function call to GOMP_taskwait_depend to actually
905 generate the taskwait operation. BB is the block where to insert the
906 code. */
907
908static void
909expand_taskwait_call (basic_block bb, gomp_task *entry_stmt)
910{
911 tree clauses = gimple_omp_task_clauses (entry_stmt);
912 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
913 if (depend == NULL_TREE)
914 return;
915
916 depend = OMP_CLAUSE_DECL (depend);
917
918 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
919 tree t
920 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASKWAIT_DEPEND),
921 1, depend);
922
923 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
924 false, GSI_CONTINUE_LINKING);
925}
926
927/* Build the function call to GOMP_teams_reg to actually
928 generate the host teams operation. REGION is the teams region
929 being expanded. BB is the block where to insert the code. */
930
931static void
932expand_teams_call (basic_block bb, gomp_teams *entry_stmt)
933{
934 tree clauses = gimple_omp_teams_clauses (entry_stmt);
935 tree num_teams = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
936 if (num_teams == NULL_TREE)
937 num_teams = build_int_cst (unsigned_type_node, 0);
938 else
939 {
940 num_teams = OMP_CLAUSE_NUM_TEAMS_EXPR (num_teams);
941 num_teams = fold_convert (unsigned_type_node, num_teams);
942 }
943 tree thread_limit = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
944 if (thread_limit == NULL_TREE)
945 thread_limit = build_int_cst (unsigned_type_node, 0);
946 else
947 {
948 thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit);
949 thread_limit = fold_convert (unsigned_type_node, thread_limit);
950 }
951
952 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
953 tree t = gimple_omp_teams_data_arg (entry_stmt), t1;
954 if (t == NULL)
955 t1 = null_pointer_node;
956 else
957 t1 = build_fold_addr_expr (t);
958 tree child_fndecl = gimple_omp_teams_child_fn (entry_stmt);
959 tree t2 = build_fold_addr_expr (child_fndecl);
960
28567c40
JJ
961 vec<tree, va_gc> *args;
962 vec_alloc (args, 5);
963 args->quick_push (t2);
964 args->quick_push (t1);
965 args->quick_push (num_teams);
966 args->quick_push (thread_limit);
967 /* For future extensibility. */
968 args->quick_push (build_zero_cst (unsigned_type_node));
969
970 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
971 builtin_decl_explicit (BUILT_IN_GOMP_TEAMS_REG),
972 args);
973
974 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
975 false, GSI_CONTINUE_LINKING);
976}
977
629b3d75
MJ
978/* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
979
980static tree
981vec2chain (vec<tree, va_gc> *v)
982{
983 tree chain = NULL_TREE, t;
984 unsigned ix;
985
986 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
987 {
988 DECL_CHAIN (t) = chain;
989 chain = t;
990 }
991
992 return chain;
993}
994
995/* Remove barriers in REGION->EXIT's block. Note that this is only
996 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
997 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
998 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
999 removed. */
1000
1001static void
1002remove_exit_barrier (struct omp_region *region)
1003{
1004 gimple_stmt_iterator gsi;
1005 basic_block exit_bb;
1006 edge_iterator ei;
1007 edge e;
1008 gimple *stmt;
1009 int any_addressable_vars = -1;
1010
1011 exit_bb = region->exit;
1012
1013 /* If the parallel region doesn't return, we don't have REGION->EXIT
1014 block at all. */
1015 if (! exit_bb)
1016 return;
1017
1018 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
1019 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
1020 statements that can appear in between are extremely limited -- no
1021 memory operations at all. Here, we allow nothing at all, so the
1022 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
65f4b875 1023 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75 1024 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
65f4b875 1025 gsi_prev_nondebug (&gsi);
629b3d75
MJ
1026 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
1027 return;
1028
1029 FOR_EACH_EDGE (e, ei, exit_bb->preds)
1030 {
65f4b875 1031 gsi = gsi_last_nondebug_bb (e->src);
629b3d75
MJ
1032 if (gsi_end_p (gsi))
1033 continue;
1034 stmt = gsi_stmt (gsi);
1035 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
1036 && !gimple_omp_return_nowait_p (stmt))
1037 {
1038 /* OpenMP 3.0 tasks unfortunately prevent this optimization
1039 in many cases. If there could be tasks queued, the barrier
1040 might be needed to let the tasks run before some local
1041 variable of the parallel that the task uses as shared
1042 runs out of scope. The task can be spawned either
1043 from within current function (this would be easy to check)
1044 or from some function it calls and gets passed an address
1045 of such a variable. */
1046 if (any_addressable_vars < 0)
1047 {
1048 gomp_parallel *parallel_stmt
1049 = as_a <gomp_parallel *> (last_stmt (region->entry));
1050 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
1051 tree local_decls, block, decl;
1052 unsigned ix;
1053
1054 any_addressable_vars = 0;
1055 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
1056 if (TREE_ADDRESSABLE (decl))
1057 {
1058 any_addressable_vars = 1;
1059 break;
1060 }
1061 for (block = gimple_block (stmt);
1062 !any_addressable_vars
1063 && block
1064 && TREE_CODE (block) == BLOCK;
1065 block = BLOCK_SUPERCONTEXT (block))
1066 {
1067 for (local_decls = BLOCK_VARS (block);
1068 local_decls;
1069 local_decls = DECL_CHAIN (local_decls))
1070 if (TREE_ADDRESSABLE (local_decls))
1071 {
1072 any_addressable_vars = 1;
1073 break;
1074 }
1075 if (block == gimple_block (parallel_stmt))
1076 break;
1077 }
1078 }
1079 if (!any_addressable_vars)
1080 gimple_omp_return_set_nowait (stmt);
1081 }
1082 }
1083}
1084
1085static void
1086remove_exit_barriers (struct omp_region *region)
1087{
1088 if (region->type == GIMPLE_OMP_PARALLEL)
1089 remove_exit_barrier (region);
1090
1091 if (region->inner)
1092 {
1093 region = region->inner;
1094 remove_exit_barriers (region);
1095 while (region->next)
1096 {
1097 region = region->next;
1098 remove_exit_barriers (region);
1099 }
1100 }
1101}
1102
1103/* Optimize omp_get_thread_num () and omp_get_num_threads ()
1104 calls. These can't be declared as const functions, but
1105 within one parallel body they are constant, so they can be
1106 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1107 which are declared const. Similarly for task body, except
1108 that in untied task omp_get_thread_num () can change at any task
1109 scheduling point. */
1110
1111static void
1112optimize_omp_library_calls (gimple *entry_stmt)
1113{
1114 basic_block bb;
1115 gimple_stmt_iterator gsi;
1116 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1117 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1118 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1119 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1120 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1121 && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1122 OMP_CLAUSE_UNTIED) != NULL);
1123
1124 FOR_EACH_BB_FN (bb, cfun)
1125 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1126 {
1127 gimple *call = gsi_stmt (gsi);
1128 tree decl;
1129
1130 if (is_gimple_call (call)
1131 && (decl = gimple_call_fndecl (call))
1132 && DECL_EXTERNAL (decl)
1133 && TREE_PUBLIC (decl)
1134 && DECL_INITIAL (decl) == NULL)
1135 {
1136 tree built_in;
1137
1138 if (DECL_NAME (decl) == thr_num_id)
1139 {
1140 /* In #pragma omp task untied omp_get_thread_num () can change
1141 during the execution of the task region. */
1142 if (untied_task)
1143 continue;
1144 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1145 }
1146 else if (DECL_NAME (decl) == num_thr_id)
1147 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1148 else
1149 continue;
1150
1151 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1152 || gimple_call_num_args (call) != 0)
1153 continue;
1154
1155 if (flag_exceptions && !TREE_NOTHROW (decl))
1156 continue;
1157
1158 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1159 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1160 TREE_TYPE (TREE_TYPE (built_in))))
1161 continue;
1162
1163 gimple_call_set_fndecl (call, built_in);
1164 }
1165 }
1166}
1167
1168/* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1169 regimplified. */
1170
1171static tree
1172expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1173{
1174 tree t = *tp;
1175
1176 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1177 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1178 return t;
1179
1180 if (TREE_CODE (t) == ADDR_EXPR)
1181 recompute_tree_invariant_for_addr_expr (t);
1182
1183 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1184 return NULL_TREE;
1185}
1186
1187/* Prepend or append TO = FROM assignment before or after *GSI_P. */
1188
1189static void
1190expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1191 bool after)
1192{
1193 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1194 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1195 !after, after ? GSI_CONTINUE_LINKING
1196 : GSI_SAME_STMT);
1197 gimple *stmt = gimple_build_assign (to, from);
1198 if (after)
1199 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1200 else
1201 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1202 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1203 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1204 {
1205 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1206 gimple_regimplify_operands (stmt, &gsi);
1207 }
1208}
1209
1210/* Expand the OpenMP parallel or task directive starting at REGION. */
1211
1212static void
1213expand_omp_taskreg (struct omp_region *region)
1214{
1215 basic_block entry_bb, exit_bb, new_bb;
1216 struct function *child_cfun;
1217 tree child_fn, block, t;
1218 gimple_stmt_iterator gsi;
1219 gimple *entry_stmt, *stmt;
1220 edge e;
1221 vec<tree, va_gc> *ws_args;
1222
1223 entry_stmt = last_stmt (region->entry);
28567c40
JJ
1224 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1225 && gimple_omp_task_taskwait_p (entry_stmt))
1226 {
1227 new_bb = region->entry;
1228 gsi = gsi_last_nondebug_bb (region->entry);
1229 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1230 gsi_remove (&gsi, true);
1231 expand_taskwait_call (new_bb, as_a <gomp_task *> (entry_stmt));
1232 return;
1233 }
1234
629b3d75
MJ
1235 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1236 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1237
1238 entry_bb = region->entry;
1239 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1240 exit_bb = region->cont;
1241 else
1242 exit_bb = region->exit;
1243
5e9d6aa4 1244 if (is_combined_parallel (region))
629b3d75
MJ
1245 ws_args = region->ws_args;
1246 else
1247 ws_args = NULL;
1248
1249 if (child_cfun->cfg)
1250 {
1251 /* Due to inlining, it may happen that we have already outlined
1252 the region, in which case all we need to do is make the
1253 sub-graph unreachable and emit the parallel call. */
1254 edge entry_succ_e, exit_succ_e;
1255
1256 entry_succ_e = single_succ_edge (entry_bb);
1257
65f4b875 1258 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75 1259 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
28567c40
JJ
1260 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK
1261 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TEAMS);
629b3d75
MJ
1262 gsi_remove (&gsi, true);
1263
1264 new_bb = entry_bb;
1265 if (exit_bb)
1266 {
1267 exit_succ_e = single_succ_edge (exit_bb);
1268 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1269 }
1270 remove_edge_and_dominated_blocks (entry_succ_e);
1271 }
1272 else
1273 {
1274 unsigned srcidx, dstidx, num;
1275
1276 /* If the parallel region needs data sent from the parent
1277 function, then the very first statement (except possible
1278 tree profile counter updates) of the parallel body
1279 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1280 &.OMP_DATA_O is passed as an argument to the child function,
1281 we need to replace it with the argument as seen by the child
1282 function.
1283
1284 In most cases, this will end up being the identity assignment
1285 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1286 a function call that has been inlined, the original PARM_DECL
1287 .OMP_DATA_I may have been converted into a different local
1288 variable. In which case, we need to keep the assignment. */
1289 if (gimple_omp_taskreg_data_arg (entry_stmt))
1290 {
1291 basic_block entry_succ_bb
1292 = single_succ_p (entry_bb) ? single_succ (entry_bb)
1293 : FALLTHRU_EDGE (entry_bb)->dest;
1294 tree arg;
1295 gimple *parcopy_stmt = NULL;
1296
1297 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1298 {
1299 gimple *stmt;
1300
1301 gcc_assert (!gsi_end_p (gsi));
1302 stmt = gsi_stmt (gsi);
1303 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1304 continue;
1305
1306 if (gimple_num_ops (stmt) == 2)
1307 {
1308 tree arg = gimple_assign_rhs1 (stmt);
1309
1310 /* We're ignore the subcode because we're
1311 effectively doing a STRIP_NOPS. */
1312
1313 if (TREE_CODE (arg) == ADDR_EXPR
28567c40
JJ
1314 && (TREE_OPERAND (arg, 0)
1315 == gimple_omp_taskreg_data_arg (entry_stmt)))
629b3d75
MJ
1316 {
1317 parcopy_stmt = stmt;
1318 break;
1319 }
1320 }
1321 }
1322
1323 gcc_assert (parcopy_stmt != NULL);
1324 arg = DECL_ARGUMENTS (child_fn);
1325
1326 if (!gimple_in_ssa_p (cfun))
1327 {
1328 if (gimple_assign_lhs (parcopy_stmt) == arg)
1329 gsi_remove (&gsi, true);
1330 else
1331 {
01914336 1332 /* ?? Is setting the subcode really necessary ?? */
629b3d75
MJ
1333 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1334 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1335 }
1336 }
1337 else
1338 {
1339 tree lhs = gimple_assign_lhs (parcopy_stmt);
1340 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1341 /* We'd like to set the rhs to the default def in the child_fn,
1342 but it's too early to create ssa names in the child_fn.
1343 Instead, we set the rhs to the parm. In
1344 move_sese_region_to_fn, we introduce a default def for the
1345 parm, map the parm to it's default def, and once we encounter
1346 this stmt, replace the parm with the default def. */
1347 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1348 update_stmt (parcopy_stmt);
1349 }
1350 }
1351
1352 /* Declare local variables needed in CHILD_CFUN. */
1353 block = DECL_INITIAL (child_fn);
1354 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1355 /* The gimplifier could record temporaries in parallel/task block
1356 rather than in containing function's local_decls chain,
1357 which would mean cgraph missed finalizing them. Do it now. */
1358 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1359 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1360 varpool_node::finalize_decl (t);
1361 DECL_SAVED_TREE (child_fn) = NULL;
1362 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1363 gimple_set_body (child_fn, NULL);
1364 TREE_USED (block) = 1;
1365
1366 /* Reset DECL_CONTEXT on function arguments. */
1367 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1368 DECL_CONTEXT (t) = child_fn;
1369
1370 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1371 so that it can be moved to the child function. */
65f4b875 1372 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
1373 stmt = gsi_stmt (gsi);
1374 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
28567c40
JJ
1375 || gimple_code (stmt) == GIMPLE_OMP_TASK
1376 || gimple_code (stmt) == GIMPLE_OMP_TEAMS));
629b3d75
MJ
1377 e = split_block (entry_bb, stmt);
1378 gsi_remove (&gsi, true);
1379 entry_bb = e->dest;
1380 edge e2 = NULL;
28567c40 1381 if (gimple_code (entry_stmt) != GIMPLE_OMP_TASK)
629b3d75
MJ
1382 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1383 else
1384 {
1385 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1386 gcc_assert (e2->dest == region->exit);
1387 remove_edge (BRANCH_EDGE (entry_bb));
1388 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
65f4b875 1389 gsi = gsi_last_nondebug_bb (region->exit);
629b3d75
MJ
1390 gcc_assert (!gsi_end_p (gsi)
1391 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1392 gsi_remove (&gsi, true);
1393 }
1394
1395 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1396 if (exit_bb)
1397 {
65f4b875 1398 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
1399 gcc_assert (!gsi_end_p (gsi)
1400 && (gimple_code (gsi_stmt (gsi))
1401 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1402 stmt = gimple_build_return (NULL);
1403 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1404 gsi_remove (&gsi, true);
1405 }
1406
1407 /* Move the parallel region into CHILD_CFUN. */
1408
1409 if (gimple_in_ssa_p (cfun))
1410 {
1411 init_tree_ssa (child_cfun);
1412 init_ssa_operands (child_cfun);
1413 child_cfun->gimple_df->in_ssa_p = true;
1414 block = NULL_TREE;
1415 }
1416 else
1417 block = gimple_block (entry_stmt);
1418
1419 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1420 if (exit_bb)
1421 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1422 if (e2)
1423 {
1424 basic_block dest_bb = e2->dest;
1425 if (!exit_bb)
1426 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1427 remove_edge (e2);
1428 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1429 }
1430 /* When the OMP expansion process cannot guarantee an up-to-date
01914336 1431 loop tree arrange for the child function to fixup loops. */
629b3d75
MJ
1432 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1433 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1434
1435 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1436 num = vec_safe_length (child_cfun->local_decls);
1437 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1438 {
1439 t = (*child_cfun->local_decls)[srcidx];
1440 if (DECL_CONTEXT (t) == cfun->decl)
1441 continue;
1442 if (srcidx != dstidx)
1443 (*child_cfun->local_decls)[dstidx] = t;
1444 dstidx++;
1445 }
1446 if (dstidx != num)
1447 vec_safe_truncate (child_cfun->local_decls, dstidx);
1448
1449 /* Inform the callgraph about the new function. */
1450 child_cfun->curr_properties = cfun->curr_properties;
1451 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1452 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1453 cgraph_node *node = cgraph_node::get_create (child_fn);
1454 node->parallelized_function = 1;
1455 cgraph_node::add_new_function (child_fn, true);
1456
1457 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1458 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1459
1460 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1461 fixed in a following pass. */
1462 push_cfun (child_cfun);
1463 if (need_asm)
9579db35 1464 assign_assembler_name_if_needed (child_fn);
629b3d75
MJ
1465
1466 if (optimize)
1467 optimize_omp_library_calls (entry_stmt);
fc06ae0d 1468 update_max_bb_count ();
629b3d75
MJ
1469 cgraph_edge::rebuild_edges ();
1470
1471 /* Some EH regions might become dead, see PR34608. If
1472 pass_cleanup_cfg isn't the first pass to happen with the
1473 new child, these dead EH edges might cause problems.
1474 Clean them up now. */
1475 if (flag_exceptions)
1476 {
1477 basic_block bb;
1478 bool changed = false;
1479
1480 FOR_EACH_BB_FN (bb, cfun)
1481 changed |= gimple_purge_dead_eh_edges (bb);
1482 if (changed)
1483 cleanup_tree_cfg ();
1484 }
1485 if (gimple_in_ssa_p (cfun))
1486 update_ssa (TODO_update_ssa);
1487 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1488 verify_loop_structure ();
1489 pop_cfun ();
1490
1491 if (dump_file && !gimple_in_ssa_p (cfun))
1492 {
1493 omp_any_child_fn_dumped = true;
1494 dump_function_header (dump_file, child_fn, dump_flags);
1495 dump_function_to_file (child_fn, dump_file, dump_flags);
1496 }
1497 }
1498
4ccc4e30
JJ
1499 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
1500
5e9d6aa4 1501 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
629b3d75
MJ
1502 expand_parallel_call (region, new_bb,
1503 as_a <gomp_parallel *> (entry_stmt), ws_args);
28567c40
JJ
1504 else if (gimple_code (entry_stmt) == GIMPLE_OMP_TEAMS)
1505 expand_teams_call (new_bb, as_a <gomp_teams *> (entry_stmt));
629b3d75
MJ
1506 else
1507 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1508 if (gimple_in_ssa_p (cfun))
1509 update_ssa (TODO_update_ssa_only_virtuals);
1510}
1511
1512/* Information about members of an OpenACC collapsed loop nest. */
1513
1514struct oacc_collapse
1515{
01914336 1516 tree base; /* Base value. */
629b3d75 1517 tree iters; /* Number of steps. */
02889d23
CLT
1518 tree step; /* Step size. */
1519 tree tile; /* Tile increment (if tiled). */
1520 tree outer; /* Tile iterator var. */
629b3d75
MJ
1521};
1522
1523/* Helper for expand_oacc_for. Determine collapsed loop information.
1524 Fill in COUNTS array. Emit any initialization code before GSI.
1525 Return the calculated outer loop bound of BOUND_TYPE. */
1526
1527static tree
1528expand_oacc_collapse_init (const struct omp_for_data *fd,
1529 gimple_stmt_iterator *gsi,
f324479c
TB
1530 oacc_collapse *counts, tree diff_type,
1531 tree bound_type, location_t loc)
629b3d75 1532{
02889d23 1533 tree tiling = fd->tiling;
629b3d75
MJ
1534 tree total = build_int_cst (bound_type, 1);
1535 int ix;
1536
1537 gcc_assert (integer_onep (fd->loop.step));
1538 gcc_assert (integer_zerop (fd->loop.n1));
1539
02889d23
CLT
1540 /* When tiling, the first operand of the tile clause applies to the
1541 innermost loop, and we work outwards from there. Seems
1542 backwards, but whatever. */
1543 for (ix = fd->collapse; ix--;)
629b3d75
MJ
1544 {
1545 const omp_for_data_loop *loop = &fd->loops[ix];
1546
1547 tree iter_type = TREE_TYPE (loop->v);
629b3d75
MJ
1548 tree plus_type = iter_type;
1549
ac200799 1550 gcc_assert (loop->cond_code == LT_EXPR || loop->cond_code == GT_EXPR);
629b3d75
MJ
1551
1552 if (POINTER_TYPE_P (iter_type))
1553 plus_type = sizetype;
629b3d75 1554
02889d23
CLT
1555 if (tiling)
1556 {
1557 tree num = build_int_cst (integer_type_node, fd->collapse);
1558 tree loop_no = build_int_cst (integer_type_node, ix);
1559 tree tile = TREE_VALUE (tiling);
1560 gcall *call
1561 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1562 /* gwv-outer=*/integer_zero_node,
1563 /* gwv-inner=*/integer_zero_node);
1564
1565 counts[ix].outer = create_tmp_var (iter_type, ".outer");
1566 counts[ix].tile = create_tmp_var (diff_type, ".tile");
1567 gimple_call_set_lhs (call, counts[ix].tile);
1568 gimple_set_location (call, loc);
1569 gsi_insert_before (gsi, call, GSI_SAME_STMT);
1570
1571 tiling = TREE_CHAIN (tiling);
1572 }
1573 else
1574 {
1575 counts[ix].tile = NULL;
1576 counts[ix].outer = loop->v;
1577 }
1578
629b3d75
MJ
1579 tree b = loop->n1;
1580 tree e = loop->n2;
1581 tree s = loop->step;
1582 bool up = loop->cond_code == LT_EXPR;
1583 tree dir = build_int_cst (diff_type, up ? +1 : -1);
1584 bool negating;
1585 tree expr;
1586
1587 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1588 true, GSI_SAME_STMT);
1589 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1590 true, GSI_SAME_STMT);
1591
01914336 1592 /* Convert the step, avoiding possible unsigned->signed overflow. */
629b3d75
MJ
1593 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1594 if (negating)
1595 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1596 s = fold_convert (diff_type, s);
1597 if (negating)
1598 s = fold_build1 (NEGATE_EXPR, diff_type, s);
1599 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1600 true, GSI_SAME_STMT);
1601
01914336 1602 /* Determine the range, avoiding possible unsigned->signed overflow. */
629b3d75
MJ
1603 negating = !up && TYPE_UNSIGNED (iter_type);
1604 expr = fold_build2 (MINUS_EXPR, plus_type,
1605 fold_convert (plus_type, negating ? b : e),
1606 fold_convert (plus_type, negating ? e : b));
1607 expr = fold_convert (diff_type, expr);
1608 if (negating)
1609 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1610 tree range = force_gimple_operand_gsi
1611 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1612
1613 /* Determine number of iterations. */
1614 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1615 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1616 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1617
1618 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1619 true, GSI_SAME_STMT);
1620
1621 counts[ix].base = b;
1622 counts[ix].iters = iters;
1623 counts[ix].step = s;
1624
1625 total = fold_build2 (MULT_EXPR, bound_type, total,
1626 fold_convert (bound_type, iters));
1627 }
1628
1629 return total;
1630}
1631
02889d23
CLT
1632/* Emit initializers for collapsed loop members. INNER is true if
1633 this is for the element loop of a TILE. IVAR is the outer
629b3d75
MJ
1634 loop iteration variable, from which collapsed loop iteration values
1635 are calculated. COUNTS array has been initialized by
1636 expand_oacc_collapse_inits. */
1637
1638static void
02889d23 1639expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
629b3d75 1640 gimple_stmt_iterator *gsi,
f324479c
TB
1641 const oacc_collapse *counts, tree ivar,
1642 tree diff_type)
629b3d75
MJ
1643{
1644 tree ivar_type = TREE_TYPE (ivar);
1645
1646 /* The most rapidly changing iteration variable is the innermost
1647 one. */
1648 for (int ix = fd->collapse; ix--;)
1649 {
1650 const omp_for_data_loop *loop = &fd->loops[ix];
1651 const oacc_collapse *collapse = &counts[ix];
02889d23
CLT
1652 tree v = inner ? loop->v : collapse->outer;
1653 tree iter_type = TREE_TYPE (v);
629b3d75
MJ
1654 tree plus_type = iter_type;
1655 enum tree_code plus_code = PLUS_EXPR;
1656 tree expr;
1657
1658 if (POINTER_TYPE_P (iter_type))
1659 {
1660 plus_code = POINTER_PLUS_EXPR;
1661 plus_type = sizetype;
1662 }
1663
02889d23
CLT
1664 expr = ivar;
1665 if (ix)
1666 {
1667 tree mod = fold_convert (ivar_type, collapse->iters);
1668 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1669 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1670 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1671 true, GSI_SAME_STMT);
1672 }
1673
629b3d75 1674 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
f324479c 1675 fold_convert (diff_type, collapse->step));
02889d23
CLT
1676 expr = fold_build2 (plus_code, iter_type,
1677 inner ? collapse->outer : collapse->base,
629b3d75
MJ
1678 fold_convert (plus_type, expr));
1679 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1680 true, GSI_SAME_STMT);
02889d23 1681 gassign *ass = gimple_build_assign (v, expr);
629b3d75 1682 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
629b3d75
MJ
1683 }
1684}
1685
1686/* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1687 of the combined collapse > 1 loop constructs, generate code like:
1688 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1689 if (cond3 is <)
1690 adj = STEP3 - 1;
1691 else
1692 adj = STEP3 + 1;
1693 count3 = (adj + N32 - N31) / STEP3;
1694 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1695 if (cond2 is <)
1696 adj = STEP2 - 1;
1697 else
1698 adj = STEP2 + 1;
1699 count2 = (adj + N22 - N21) / STEP2;
1700 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1701 if (cond1 is <)
1702 adj = STEP1 - 1;
1703 else
1704 adj = STEP1 + 1;
1705 count1 = (adj + N12 - N11) / STEP1;
1706 count = count1 * count2 * count3;
1707 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1708 count = 0;
1709 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1710 of the combined loop constructs, just initialize COUNTS array
aed3ab25
JJ
1711 from the _looptemp_ clauses. For loop nests with non-rectangular
1712 loops, do this only for the rectangular loops. Then pick
1713 the loops which reference outer vars in their bound expressions
1714 and the loops which they refer to and for this sub-nest compute
c2ebf4f1
JJ
1715 number of iterations. For triangular loops use Faulhaber's formula,
1716 otherwise as a fallback, compute by iterating the loops.
aed3ab25
JJ
1717 If e.g. the sub-nest is
1718 for (I = N11; I COND1 N12; I += STEP1)
1719 for (J = M21 * I + N21; J COND2 M22 * I + N22; J += STEP2)
1720 for (K = M31 * J + N31; K COND3 M32 * J + N32; K += STEP3)
1721 do:
1722 COUNT = 0;
1723 for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
1724 for (tmpj = M21 * tmpi + N21;
1725 tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
1726 {
1727 int tmpk1 = M31 * tmpj + N31;
1728 int tmpk2 = M32 * tmpj + N32;
1729 if (tmpk1 COND3 tmpk2)
1730 {
1731 if (COND3 is <)
1732 adj = STEP3 - 1;
1733 else
1734 adj = STEP3 + 1;
1735 COUNT += (adj + tmpk2 - tmpk1) / STEP3;
1736 }
1737 }
1738 and finally multiply the counts of the rectangular loops not
1739 in the sub-nest with COUNT. Also, as counts[fd->last_nonrect]
1740 store number of iterations of the loops from fd->first_nonrect
1741 to fd->last_nonrect inclusive, i.e. the above COUNT multiplied
1742 by the counts of rectangular loops not referenced in any non-rectangular
1743 loops sandwitched in between those. */
629b3d75
MJ
1744
1745/* NOTE: It *could* be better to moosh all of the BBs together,
1746 creating one larger BB with all the computation and the unexpected
1747 jump at the end. I.e.
1748
1749 bool zero3, zero2, zero1, zero;
1750
1751 zero3 = N32 c3 N31;
1752 count3 = (N32 - N31) /[cl] STEP3;
1753 zero2 = N22 c2 N21;
1754 count2 = (N22 - N21) /[cl] STEP2;
1755 zero1 = N12 c1 N11;
1756 count1 = (N12 - N11) /[cl] STEP1;
1757 zero = zero3 || zero2 || zero1;
1758 count = count1 * count2 * count3;
1759 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1760
1761 After all, we expect the zero=false, and thus we expect to have to
1762 evaluate all of the comparison expressions, so short-circuiting
1763 oughtn't be a win. Since the condition isn't protecting a
1764 denominator, we're not concerned about divide-by-zero, so we can
1765 fully evaluate count even if a numerator turned out to be wrong.
1766
1767 It seems like putting this all together would create much better
1768 scheduling opportunities, and less pressure on the chip's branch
1769 predictor. */
1770
1771static void
1772expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1773 basic_block &entry_bb, tree *counts,
1774 basic_block &zero_iter1_bb, int &first_zero_iter1,
1775 basic_block &zero_iter2_bb, int &first_zero_iter2,
1776 basic_block &l2_dom_bb)
1777{
1778 tree t, type = TREE_TYPE (fd->loop.v);
1779 edge e, ne;
1780 int i;
1781
1782 /* Collapsed loops need work for expansion into SSA form. */
1783 gcc_assert (!gimple_in_ssa_p (cfun));
1784
1785 if (gimple_omp_for_combined_into_p (fd->for_stmt)
1786 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1787 {
1788 gcc_assert (fd->ordered == 0);
1789 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1790 isn't supposed to be handled, as the inner loop doesn't
1791 use it. */
1792 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1793 OMP_CLAUSE__LOOPTEMP_);
1794 gcc_assert (innerc);
1795 for (i = 0; i < fd->collapse; i++)
1796 {
1797 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1798 OMP_CLAUSE__LOOPTEMP_);
1799 gcc_assert (innerc);
1800 if (i)
1801 counts[i] = OMP_CLAUSE_DECL (innerc);
1802 else
1803 counts[0] = NULL_TREE;
1804 }
14707c89
JJ
1805 if (fd->non_rect
1806 && fd->last_nonrect == fd->first_nonrect + 1
1807 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
1808 {
1809 tree c[4];
1810 for (i = 0; i < 4; i++)
1811 {
1812 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1813 OMP_CLAUSE__LOOPTEMP_);
1814 gcc_assert (innerc);
1815 c[i] = OMP_CLAUSE_DECL (innerc);
1816 }
1817 counts[0] = c[0];
1818 fd->first_inner_iterations = c[1];
1819 fd->factor = c[2];
1820 fd->adjn1 = c[3];
1821 }
629b3d75
MJ
1822 return;
1823 }
1824
1825 for (i = fd->collapse; i < fd->ordered; i++)
1826 {
1827 tree itype = TREE_TYPE (fd->loops[i].v);
1828 counts[i] = NULL_TREE;
1829 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1830 fold_convert (itype, fd->loops[i].n1),
1831 fold_convert (itype, fd->loops[i].n2));
1832 if (t && integer_zerop (t))
1833 {
1834 for (i = fd->collapse; i < fd->ordered; i++)
1835 counts[i] = build_int_cst (type, 0);
1836 break;
1837 }
1838 }
aed3ab25 1839 bool rect_count_seen = false;
629b3d75
MJ
1840 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1841 {
1842 tree itype = TREE_TYPE (fd->loops[i].v);
1843
1844 if (i >= fd->collapse && counts[i])
1845 continue;
aed3ab25
JJ
1846 if (fd->non_rect)
1847 {
1848 /* Skip loops that use outer iterators in their expressions
1849 during this phase. */
1850 if (fd->loops[i].m1 || fd->loops[i].m2)
1851 {
1852 counts[i] = build_zero_cst (type);
1853 continue;
1854 }
1855 }
629b3d75
MJ
1856 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1857 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1858 fold_convert (itype, fd->loops[i].n1),
1859 fold_convert (itype, fd->loops[i].n2)))
1860 == NULL_TREE || !integer_onep (t)))
1861 {
1862 gcond *cond_stmt;
1863 tree n1, n2;
1864 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1865 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1866 true, GSI_SAME_STMT);
1867 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1868 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1869 true, GSI_SAME_STMT);
1870 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1871 NULL_TREE, NULL_TREE);
1872 gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1873 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1874 expand_omp_regimplify_p, NULL, NULL)
1875 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1876 expand_omp_regimplify_p, NULL, NULL))
1877 {
1878 *gsi = gsi_for_stmt (cond_stmt);
1879 gimple_regimplify_operands (cond_stmt, gsi);
1880 }
1881 e = split_block (entry_bb, cond_stmt);
1882 basic_block &zero_iter_bb
1883 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1884 int &first_zero_iter
1885 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1886 if (zero_iter_bb == NULL)
1887 {
1888 gassign *assign_stmt;
1889 first_zero_iter = i;
1890 zero_iter_bb = create_empty_bb (entry_bb);
1891 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1892 *gsi = gsi_after_labels (zero_iter_bb);
1893 if (i < fd->collapse)
1894 assign_stmt = gimple_build_assign (fd->loop.n2,
1895 build_zero_cst (type));
1896 else
1897 {
1898 counts[i] = create_tmp_reg (type, ".count");
1899 assign_stmt
1900 = gimple_build_assign (counts[i], build_zero_cst (type));
1901 }
1902 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1903 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1904 entry_bb);
1905 }
1906 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
357067f2 1907 ne->probability = profile_probability::very_unlikely ();
629b3d75 1908 e->flags = EDGE_TRUE_VALUE;
357067f2 1909 e->probability = ne->probability.invert ();
629b3d75
MJ
1910 if (l2_dom_bb == NULL)
1911 l2_dom_bb = entry_bb;
1912 entry_bb = e->dest;
65f4b875 1913 *gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
1914 }
1915
1916 if (POINTER_TYPE_P (itype))
1917 itype = signed_type_for (itype);
1918 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1919 ? -1 : 1));
1920 t = fold_build2 (PLUS_EXPR, itype,
1921 fold_convert (itype, fd->loops[i].step), t);
1922 t = fold_build2 (PLUS_EXPR, itype, t,
1923 fold_convert (itype, fd->loops[i].n2));
1924 t = fold_build2 (MINUS_EXPR, itype, t,
1925 fold_convert (itype, fd->loops[i].n1));
1926 /* ?? We could probably use CEIL_DIV_EXPR instead of
1927 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1928 generate the same code in the end because generically we
1929 don't know that the values involved must be negative for
1930 GT?? */
1931 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1932 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1933 fold_build1 (NEGATE_EXPR, itype, t),
1934 fold_build1 (NEGATE_EXPR, itype,
1935 fold_convert (itype,
1936 fd->loops[i].step)));
1937 else
1938 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1939 fold_convert (itype, fd->loops[i].step));
1940 t = fold_convert (type, t);
1941 if (TREE_CODE (t) == INTEGER_CST)
1942 counts[i] = t;
1943 else
1944 {
1945 if (i < fd->collapse || i != first_zero_iter2)
1946 counts[i] = create_tmp_reg (type, ".count");
1947 expand_omp_build_assign (gsi, counts[i], t);
1948 }
1949 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1950 {
aed3ab25
JJ
1951 if (fd->non_rect && i >= fd->first_nonrect && i <= fd->last_nonrect)
1952 continue;
1953 if (!rect_count_seen)
1954 {
1955 t = counts[i];
1956 rect_count_seen = true;
1957 }
629b3d75
MJ
1958 else
1959 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1960 expand_omp_build_assign (gsi, fd->loop.n2, t);
1961 }
1962 }
aed3ab25
JJ
1963 if (fd->non_rect && SSA_VAR_P (fd->loop.n2))
1964 {
1965 gcc_assert (fd->last_nonrect != -1);
1966
aed3ab25
JJ
1967 counts[fd->last_nonrect] = create_tmp_reg (type, ".count");
1968 expand_omp_build_assign (gsi, counts[fd->last_nonrect],
1969 build_zero_cst (type));
29e0ad45
JJ
1970 for (i = fd->first_nonrect + 1; i < fd->last_nonrect; i++)
1971 if (fd->loops[i].m1
1972 || fd->loops[i].m2
1973 || fd->loops[i].non_rect_referenced)
1974 break;
1975 if (i == fd->last_nonrect
1976 && fd->loops[i].outer == fd->last_nonrect - fd->first_nonrect
1977 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[i].v)))
aed3ab25 1978 {
29e0ad45
JJ
1979 int o = fd->first_nonrect;
1980 tree itype = TREE_TYPE (fd->loops[o].v);
1981 tree n1o = create_tmp_reg (itype, ".n1o");
1982 t = fold_convert (itype, unshare_expr (fd->loops[o].n1));
1983 expand_omp_build_assign (gsi, n1o, t);
1984 tree n2o = create_tmp_reg (itype, ".n2o");
1985 t = fold_convert (itype, unshare_expr (fd->loops[o].n2));
1986 expand_omp_build_assign (gsi, n2o, t);
1987 if (fd->loops[i].m1 && fd->loops[i].m2)
1988 t = fold_build2 (MINUS_EXPR, itype, unshare_expr (fd->loops[i].m2),
1989 unshare_expr (fd->loops[i].m1));
1990 else if (fd->loops[i].m1)
1991 t = fold_unary (NEGATE_EXPR, itype,
1992 unshare_expr (fd->loops[i].m1));
1993 else
1994 t = unshare_expr (fd->loops[i].m2);
1995 tree m2minusm1
1996 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
1997 true, GSI_SAME_STMT);
aed3ab25 1998
29e0ad45
JJ
1999 gimple_stmt_iterator gsi2 = *gsi;
2000 gsi_prev (&gsi2);
2001 e = split_block (entry_bb, gsi_stmt (gsi2));
2002 e = split_block (e->dest, (gimple *) NULL);
2003 basic_block bb1 = e->src;
2004 entry_bb = e->dest;
2005 *gsi = gsi_after_labels (entry_bb);
aed3ab25 2006
29e0ad45
JJ
2007 gsi2 = gsi_after_labels (bb1);
2008 tree ostep = fold_convert (itype, fd->loops[o].step);
2009 t = build_int_cst (itype, (fd->loops[o].cond_code
2010 == LT_EXPR ? -1 : 1));
2011 t = fold_build2 (PLUS_EXPR, itype, ostep, t);
2012 t = fold_build2 (PLUS_EXPR, itype, t, n2o);
2013 t = fold_build2 (MINUS_EXPR, itype, t, n1o);
2014 if (TYPE_UNSIGNED (itype)
2015 && fd->loops[o].cond_code == GT_EXPR)
2016 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2017 fold_build1 (NEGATE_EXPR, itype, t),
2018 fold_build1 (NEGATE_EXPR, itype, ostep));
2019 else
2020 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, ostep);
2021 tree outer_niters
2022 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2023 true, GSI_SAME_STMT);
2024 t = fold_build2 (MINUS_EXPR, itype, outer_niters,
2025 build_one_cst (itype));
2026 t = fold_build2 (MULT_EXPR, itype, t, ostep);
2027 t = fold_build2 (PLUS_EXPR, itype, n1o, t);
2028 tree last = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2029 true, GSI_SAME_STMT);
2030 tree n1, n2, n1e, n2e;
aed3ab25
JJ
2031 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2032 if (fd->loops[i].m1)
2033 {
2034 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
29e0ad45 2035 n1 = fold_build2 (MULT_EXPR, itype, n1o, n1);
aed3ab25
JJ
2036 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2037 }
2038 else
2039 n1 = t;
2040 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2041 true, GSI_SAME_STMT);
aed3ab25
JJ
2042 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2043 if (fd->loops[i].m2)
2044 {
2045 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
29e0ad45 2046 n2 = fold_build2 (MULT_EXPR, itype, n1o, n2);
aed3ab25
JJ
2047 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2048 }
2049 else
2050 n2 = t;
2051 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2052 true, GSI_SAME_STMT);
29e0ad45
JJ
2053 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2054 if (fd->loops[i].m1)
aed3ab25 2055 {
29e0ad45
JJ
2056 n1e = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2057 n1e = fold_build2 (MULT_EXPR, itype, last, n1e);
2058 n1e = fold_build2 (PLUS_EXPR, itype, n1e, t);
2059 }
2060 else
2061 n1e = t;
2062 n1e = force_gimple_operand_gsi (&gsi2, n1e, true, NULL_TREE,
2063 true, GSI_SAME_STMT);
2064 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2065 if (fd->loops[i].m2)
2066 {
2067 n2e = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2068 n2e = fold_build2 (MULT_EXPR, itype, last, n2e);
2069 n2e = fold_build2 (PLUS_EXPR, itype, n2e, t);
2070 }
2071 else
2072 n2e = t;
2073 n2e = force_gimple_operand_gsi (&gsi2, n2e, true, NULL_TREE,
2074 true, GSI_SAME_STMT);
2075 gcond *cond_stmt
2076 = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
2077 NULL_TREE, NULL_TREE);
2078 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2079 e = split_block (bb1, cond_stmt);
2080 e->flags = EDGE_TRUE_VALUE;
2081 e->probability = profile_probability::likely ().guessed ();
2082 basic_block bb2 = e->dest;
2083 gsi2 = gsi_after_labels (bb2);
2084
2085 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1e, n2e,
2086 NULL_TREE, NULL_TREE);
2087 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2088 e = split_block (bb2, cond_stmt);
2089 e->flags = EDGE_TRUE_VALUE;
2090 e->probability = profile_probability::likely ().guessed ();
2091 gsi2 = gsi_after_labels (e->dest);
2092
2093 tree step = fold_convert (itype, fd->loops[i].step);
2094 t = build_int_cst (itype, (fd->loops[i].cond_code
2095 == LT_EXPR ? -1 : 1));
2096 t = fold_build2 (PLUS_EXPR, itype, step, t);
2097 t = fold_build2 (PLUS_EXPR, itype, t, n2);
2098 t = fold_build2 (MINUS_EXPR, itype, t, n1);
2099 if (TYPE_UNSIGNED (itype)
2100 && fd->loops[i].cond_code == GT_EXPR)
2101 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2102 fold_build1 (NEGATE_EXPR, itype, t),
2103 fold_build1 (NEGATE_EXPR, itype, step));
2104 else
2105 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2106 tree first_inner_iterations
2107 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2108 true, GSI_SAME_STMT);
2109 t = fold_build2 (MULT_EXPR, itype, m2minusm1, ostep);
2110 if (TYPE_UNSIGNED (itype)
2111 && fd->loops[i].cond_code == GT_EXPR)
2112 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2113 fold_build1 (NEGATE_EXPR, itype, t),
2114 fold_build1 (NEGATE_EXPR, itype, step));
2115 else
2116 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2117 tree factor
2118 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2119 true, GSI_SAME_STMT);
2120 t = fold_build2 (MINUS_EXPR, itype, outer_niters,
2121 build_one_cst (itype));
2122 t = fold_build2 (MULT_EXPR, itype, t, outer_niters);
2123 t = fold_build2 (RSHIFT_EXPR, itype, t, integer_one_node);
2124 t = fold_build2 (MULT_EXPR, itype, factor, t);
2125 t = fold_build2 (PLUS_EXPR, itype,
2126 fold_build2 (MULT_EXPR, itype, outer_niters,
2127 first_inner_iterations), t);
2128 expand_omp_build_assign (&gsi2, counts[fd->last_nonrect],
2129 fold_convert (type, t));
2130
2131 basic_block bb3 = create_empty_bb (bb1);
2132 add_bb_to_loop (bb3, bb1->loop_father);
2133
2134 e = make_edge (bb1, bb3, EDGE_FALSE_VALUE);
2135 e->probability = profile_probability::unlikely ().guessed ();
2136
2137 gsi2 = gsi_after_labels (bb3);
2138 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1e, n2e,
2139 NULL_TREE, NULL_TREE);
2140 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2141 e = split_block (bb3, cond_stmt);
2142 e->flags = EDGE_TRUE_VALUE;
2143 e->probability = profile_probability::likely ().guessed ();
2144 basic_block bb4 = e->dest;
2145
2146 ne = make_edge (bb3, entry_bb, EDGE_FALSE_VALUE);
2147 ne->probability = e->probability.invert ();
2148
2149 basic_block bb5 = create_empty_bb (bb2);
2150 add_bb_to_loop (bb5, bb2->loop_father);
2151
2152 ne = make_edge (bb2, bb5, EDGE_FALSE_VALUE);
2153 ne->probability = profile_probability::unlikely ().guessed ();
2154
2155 for (int j = 0; j < 2; j++)
2156 {
2157 gsi2 = gsi_after_labels (j ? bb5 : bb4);
2158 t = fold_build2 (MINUS_EXPR, itype,
2159 unshare_expr (fd->loops[i].n1),
2160 unshare_expr (fd->loops[i].n2));
2161 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, m2minusm1);
2162 tree tem
2163 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2164 true, GSI_SAME_STMT);
2165 t = fold_build2 (MINUS_EXPR, itype, tem, n1o);
2166 t = fold_build2 (TRUNC_MOD_EXPR, itype, t, ostep);
2167 t = fold_build2 (MINUS_EXPR, itype, tem, t);
2168 tem = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2169 true, GSI_SAME_STMT);
2170 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2171 if (fd->loops[i].m1)
2172 {
2173 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2174 n1 = fold_build2 (MULT_EXPR, itype, tem, n1);
2175 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2176 }
2177 else
2178 n1 = t;
2179 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2180 true, GSI_SAME_STMT);
2181 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2182 if (fd->loops[i].m2)
2183 {
2184 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2185 n2 = fold_build2 (MULT_EXPR, itype, tem, n2);
2186 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2187 }
2188 else
2189 n2 = t;
2190 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2191 true, GSI_SAME_STMT);
2192 expand_omp_build_assign (&gsi2, j ? n2o : n1o, tem);
2193
2194 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
2195 NULL_TREE, NULL_TREE);
aed3ab25 2196 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
29e0ad45
JJ
2197 e = split_block (gsi_bb (gsi2), cond_stmt);
2198 e->flags = j ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE;
2199 e->probability = profile_probability::unlikely ().guessed ();
2200 ne = make_edge (e->src, bb1,
2201 j ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE);
aed3ab25
JJ
2202 ne->probability = e->probability.invert ();
2203 gsi2 = gsi_after_labels (e->dest);
2204
29e0ad45
JJ
2205 t = fold_build2 (PLUS_EXPR, itype, tem, ostep);
2206 expand_omp_build_assign (&gsi2, j ? n2o : n1o, t);
aed3ab25 2207
29e0ad45
JJ
2208 make_edge (e->dest, bb1, EDGE_FALLTHRU);
2209 }
aed3ab25 2210
29e0ad45
JJ
2211 set_immediate_dominator (CDI_DOMINATORS, bb3, bb1);
2212 set_immediate_dominator (CDI_DOMINATORS, bb5, bb2);
2213 set_immediate_dominator (CDI_DOMINATORS, entry_bb, bb1);
325714b4
JJ
2214
2215 if (fd->first_nonrect + 1 == fd->last_nonrect)
2216 {
2217 fd->first_inner_iterations = first_inner_iterations;
2218 fd->factor = factor;
2219 fd->adjn1 = n1o;
2220 }
29e0ad45
JJ
2221 }
2222 else
2223 {
2224 /* Fallback implementation. Evaluate the loops with m1/m2
2225 non-NULL as well as their outer loops at runtime using temporaries
2226 instead of the original iteration variables, and in the
2227 body just bump the counter. */
2228 gimple_stmt_iterator gsi2 = *gsi;
2229 gsi_prev (&gsi2);
2230 e = split_block (entry_bb, gsi_stmt (gsi2));
2231 e = split_block (e->dest, (gimple *) NULL);
2232 basic_block cur_bb = e->src;
2233 basic_block next_bb = e->dest;
2234 entry_bb = e->dest;
2235 *gsi = gsi_after_labels (entry_bb);
aed3ab25 2236
29e0ad45
JJ
2237 tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
2238 memset (vs, 0, fd->last_nonrect * sizeof (tree));
aed3ab25 2239
29e0ad45 2240 for (i = 0; i <= fd->last_nonrect; i++)
aed3ab25 2241 {
29e0ad45
JJ
2242 if (fd->loops[i].m1 == NULL_TREE
2243 && fd->loops[i].m2 == NULL_TREE
2244 && !fd->loops[i].non_rect_referenced)
2245 continue;
2246
2247 tree itype = TREE_TYPE (fd->loops[i].v);
2248
2249 gsi2 = gsi_after_labels (cur_bb);
2250 tree n1, n2;
2251 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2252 if (fd->loops[i].m1)
2253 {
2254 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2255 n1 = fold_build2 (MULT_EXPR, itype,
2256 vs[i - fd->loops[i].outer], n1);
2257 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2258 }
2259 else
2260 n1 = t;
2261 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2262 true, GSI_SAME_STMT);
2263 if (i < fd->last_nonrect)
2264 {
2265 vs[i] = create_tmp_reg (itype, ".it");
2266 expand_omp_build_assign (&gsi2, vs[i], n1);
2267 }
2268 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2269 if (fd->loops[i].m2)
2270 {
2271 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2272 n2 = fold_build2 (MULT_EXPR, itype,
2273 vs[i - fd->loops[i].outer], n2);
2274 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2275 }
2276 else
2277 n2 = t;
2278 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2279 true, GSI_SAME_STMT);
2280 if (i == fd->last_nonrect)
2281 {
2282 gcond *cond_stmt
2283 = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
2284 NULL_TREE, NULL_TREE);
2285 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2286 e = split_block (cur_bb, cond_stmt);
2287 e->flags = EDGE_TRUE_VALUE;
2288 ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2289 e->probability = profile_probability::likely ().guessed ();
2290 ne->probability = e->probability.invert ();
2291 gsi2 = gsi_after_labels (e->dest);
2292
2293 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
2294 ? -1 : 1));
2295 t = fold_build2 (PLUS_EXPR, itype,
2296 fold_convert (itype, fd->loops[i].step), t);
2297 t = fold_build2 (PLUS_EXPR, itype, t, n2);
2298 t = fold_build2 (MINUS_EXPR, itype, t, n1);
2299 tree step = fold_convert (itype, fd->loops[i].step);
2300 if (TYPE_UNSIGNED (itype)
2301 && fd->loops[i].cond_code == GT_EXPR)
2302 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2303 fold_build1 (NEGATE_EXPR, itype, t),
2304 fold_build1 (NEGATE_EXPR, itype, step));
2305 else
2306 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2307 t = fold_convert (type, t);
2308 t = fold_build2 (PLUS_EXPR, type,
2309 counts[fd->last_nonrect], t);
2310 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2311 true, GSI_SAME_STMT);
2312 expand_omp_build_assign (&gsi2, counts[fd->last_nonrect], t);
2313 e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2314 set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2315 break;
2316 }
2317 e = split_block (cur_bb, last_stmt (cur_bb));
2318
2319 basic_block new_cur_bb = create_empty_bb (cur_bb);
2320 add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2321
2322 gsi2 = gsi_after_labels (e->dest);
2323 tree step = fold_convert (itype,
2324 unshare_expr (fd->loops[i].step));
2325 t = fold_build2 (PLUS_EXPR, itype, vs[i], step);
2326 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2327 true, GSI_SAME_STMT);
2328 expand_omp_build_assign (&gsi2, vs[i], t);
2329
2330 ne = split_block (e->dest, last_stmt (e->dest));
2331 gsi2 = gsi_after_labels (ne->dest);
2332
2333 gcond *cond_stmt
2334 = gimple_build_cond (fd->loops[i].cond_code, vs[i], n2,
2335 NULL_TREE, NULL_TREE);
2336 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2337 edge e3, e4;
2338 if (next_bb == entry_bb)
2339 {
2340 e3 = find_edge (ne->dest, next_bb);
2341 e3->flags = EDGE_FALSE_VALUE;
2342 }
2343 else
2344 e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2345 e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2346 e4->probability = profile_probability::likely ().guessed ();
2347 e3->probability = e4->probability.invert ();
2348 basic_block esrc = e->src;
2349 make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2350 cur_bb = new_cur_bb;
2351 basic_block latch_bb = next_bb;
2352 next_bb = e->dest;
2353 remove_edge (e);
2354 set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2355 set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2356 set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
aed3ab25 2357 }
aed3ab25
JJ
2358 }
2359 t = NULL_TREE;
2360 for (i = fd->first_nonrect; i < fd->last_nonrect; i++)
2361 if (!fd->loops[i].non_rect_referenced
2362 && fd->loops[i].m1 == NULL_TREE
2363 && fd->loops[i].m2 == NULL_TREE)
2364 {
2365 if (t == NULL_TREE)
2366 t = counts[i];
2367 else
2368 t = fold_build2 (MULT_EXPR, type, t, counts[i]);
2369 }
2370 if (t)
2371 {
2372 t = fold_build2 (MULT_EXPR, type, counts[fd->last_nonrect], t);
2373 expand_omp_build_assign (gsi, counts[fd->last_nonrect], t);
2374 }
2375 if (!rect_count_seen)
2376 t = counts[fd->last_nonrect];
2377 else
2378 t = fold_build2 (MULT_EXPR, type, fd->loop.n2,
2379 counts[fd->last_nonrect]);
2380 expand_omp_build_assign (gsi, fd->loop.n2, t);
2381 }
2382 else if (fd->non_rect)
2383 {
2384 tree t = fd->loop.n2;
2385 gcc_assert (TREE_CODE (t) == INTEGER_CST);
2386 int non_rect_referenced = 0, non_rect = 0;
2387 for (i = 0; i < fd->collapse; i++)
2388 {
5acef69f 2389 if ((i < fd->first_nonrect || i > fd->last_nonrect)
aed3ab25
JJ
2390 && !integer_zerop (counts[i]))
2391 t = fold_build2 (TRUNC_DIV_EXPR, type, t, counts[i]);
2392 if (fd->loops[i].non_rect_referenced)
2393 non_rect_referenced++;
2394 if (fd->loops[i].m1 || fd->loops[i].m2)
2395 non_rect++;
2396 }
2397 gcc_assert (non_rect == 1 && non_rect_referenced == 1);
2398 counts[fd->last_nonrect] = t;
2399 }
629b3d75
MJ
2400}
2401
2402/* Helper function for expand_omp_{for_*,simd}. Generate code like:
2403 T = V;
2404 V3 = N31 + (T % count3) * STEP3;
2405 T = T / count3;
2406 V2 = N21 + (T % count2) * STEP2;
2407 T = T / count2;
2408 V1 = N11 + T * STEP1;
2409 if this loop doesn't have an inner loop construct combined with it.
2410 If it does have an inner loop construct combined with it and the
2411 iteration count isn't known constant, store values from counts array
aed3ab25
JJ
2412 into its _looptemp_ temporaries instead.
2413 For non-rectangular loops (between fd->first_nonrect and fd->last_nonrect
2414 inclusive), use the count of all those loops together, and either
c2ebf4f1 2415 find quadratic etc. equation roots, or as a fallback, do:
aed3ab25
JJ
2416 COUNT = 0;
2417 for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
2418 for (tmpj = M21 * tmpi + N21;
2419 tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
2420 {
2421 int tmpk1 = M31 * tmpj + N31;
2422 int tmpk2 = M32 * tmpj + N32;
2423 if (tmpk1 COND3 tmpk2)
2424 {
2425 if (COND3 is <)
2426 adj = STEP3 - 1;
2427 else
2428 adj = STEP3 + 1;
2429 int temp = (adj + tmpk2 - tmpk1) / STEP3;
2430 if (COUNT + temp > T)
2431 {
2432 V1 = tmpi;
2433 V2 = tmpj;
2434 V3 = tmpk1 + (T - COUNT) * STEP3;
2435 goto done;
2436 }
2437 else
2438 COUNT += temp;
2439 }
2440 }
2441 done:;
2442 but for optional innermost or outermost rectangular loops that aren't
2443 referenced by other loop expressions keep doing the division/modulo. */
629b3d75
MJ
2444
2445static void
2446expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
aed3ab25
JJ
2447 tree *counts, tree *nonrect_bounds,
2448 gimple *inner_stmt, tree startvar)
629b3d75
MJ
2449{
2450 int i;
2451 if (gimple_omp_for_combined_p (fd->for_stmt))
2452 {
2453 /* If fd->loop.n2 is constant, then no propagation of the counts
2454 is needed, they are constant. */
2455 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
2456 return;
2457
2458 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
2459 ? gimple_omp_taskreg_clauses (inner_stmt)
2460 : gimple_omp_for_clauses (inner_stmt);
2461 /* First two _looptemp_ clauses are for istart/iend, counts[0]
2462 isn't supposed to be handled, as the inner loop doesn't
2463 use it. */
2464 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
2465 gcc_assert (innerc);
14707c89
JJ
2466 int count = 0;
2467 if (fd->non_rect
2468 && fd->last_nonrect == fd->first_nonrect + 1
2469 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
2470 count = 4;
2471 for (i = 0; i < fd->collapse + count; i++)
629b3d75
MJ
2472 {
2473 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2474 OMP_CLAUSE__LOOPTEMP_);
2475 gcc_assert (innerc);
2476 if (i)
2477 {
2478 tree tem = OMP_CLAUSE_DECL (innerc);
14707c89
JJ
2479 tree t;
2480 if (i < fd->collapse)
2481 t = counts[i];
2482 else
2483 switch (i - fd->collapse)
2484 {
2485 case 0: t = counts[0]; break;
2486 case 1: t = fd->first_inner_iterations; break;
2487 case 2: t = fd->factor; break;
2488 case 3: t = fd->adjn1; break;
2489 default: gcc_unreachable ();
2490 }
2491 t = fold_convert (TREE_TYPE (tem), t);
629b3d75
MJ
2492 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
2493 false, GSI_CONTINUE_LINKING);
2494 gassign *stmt = gimple_build_assign (tem, t);
2495 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2496 }
2497 }
2498 return;
2499 }
2500
2501 tree type = TREE_TYPE (fd->loop.v);
2502 tree tem = create_tmp_reg (type, ".tem");
2503 gassign *stmt = gimple_build_assign (tem, startvar);
2504 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2505
2506 for (i = fd->collapse - 1; i >= 0; i--)
2507 {
2508 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
2509 itype = vtype;
2510 if (POINTER_TYPE_P (vtype))
2511 itype = signed_type_for (vtype);
aed3ab25 2512 if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
629b3d75
MJ
2513 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
2514 else
2515 t = tem;
aed3ab25
JJ
2516 if (i == fd->last_nonrect)
2517 {
aed3ab25
JJ
2518 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
2519 false, GSI_CONTINUE_LINKING);
2520 tree stopval = t;
2521 tree idx = create_tmp_reg (type, ".count");
2522 expand_omp_build_assign (gsi, idx,
2523 build_zero_cst (type), true);
29e0ad45 2524 basic_block bb_triang = NULL, bb_triang_dom = NULL;
5acef69f 2525 if (fd->first_nonrect + 1 == fd->last_nonrect
325714b4 2526 && (TREE_CODE (fd->loop.n2) == INTEGER_CST
14707c89 2527 || fd->first_inner_iterations)
5acef69f 2528 && (optab_handler (sqrt_optab, TYPE_MODE (double_type_node))
ba009860
JJ
2529 != CODE_FOR_nothing)
2530 && !integer_zerop (fd->loop.n2))
5acef69f 2531 {
f418bd4b 2532 tree outer_n1 = fd->adjn1 ? fd->adjn1 : fd->loops[i - 1].n1;
5acef69f 2533 tree itype = TREE_TYPE (fd->loops[i].v);
79c12969 2534 tree first_inner_iterations = fd->first_inner_iterations;
5acef69f
JJ
2535 tree factor = fd->factor;
2536 gcond *cond_stmt
2537 = gimple_build_cond (NE_EXPR, factor,
2538 build_zero_cst (TREE_TYPE (factor)),
2539 NULL_TREE, NULL_TREE);
2540 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2541 edge e = split_block (gsi_bb (*gsi), cond_stmt);
2542 basic_block bb0 = e->src;
2543 e->flags = EDGE_TRUE_VALUE;
2544 e->probability = profile_probability::likely ();
29e0ad45 2545 bb_triang_dom = bb0;
5acef69f
JJ
2546 *gsi = gsi_after_labels (e->dest);
2547 tree slltype = long_long_integer_type_node;
2548 tree ulltype = long_long_unsigned_type_node;
2549 tree stopvalull = fold_convert (ulltype, stopval);
2550 stopvalull
2551 = force_gimple_operand_gsi (gsi, stopvalull, true, NULL_TREE,
2552 false, GSI_CONTINUE_LINKING);
79c12969
JJ
2553 first_inner_iterations
2554 = fold_convert (slltype, first_inner_iterations);
2555 first_inner_iterations
2556 = force_gimple_operand_gsi (gsi, first_inner_iterations, true,
5acef69f
JJ
2557 NULL_TREE, false,
2558 GSI_CONTINUE_LINKING);
2559 factor = fold_convert (slltype, factor);
2560 factor
2561 = force_gimple_operand_gsi (gsi, factor, true, NULL_TREE,
2562 false, GSI_CONTINUE_LINKING);
79c12969 2563 tree first_inner_iterationsd
5acef69f 2564 = fold_build1 (FLOAT_EXPR, double_type_node,
79c12969
JJ
2565 first_inner_iterations);
2566 first_inner_iterationsd
2567 = force_gimple_operand_gsi (gsi, first_inner_iterationsd, true,
5acef69f
JJ
2568 NULL_TREE, false,
2569 GSI_CONTINUE_LINKING);
2570 tree factord = fold_build1 (FLOAT_EXPR, double_type_node,
2571 factor);
2572 factord = force_gimple_operand_gsi (gsi, factord, true,
2573 NULL_TREE, false,
2574 GSI_CONTINUE_LINKING);
2575 tree stopvald = fold_build1 (FLOAT_EXPR, double_type_node,
2576 stopvalull);
2577 stopvald = force_gimple_operand_gsi (gsi, stopvald, true,
2578 NULL_TREE, false,
2579 GSI_CONTINUE_LINKING);
2580 /* Temporarily disable flag_rounding_math, values will be
2581 decimal numbers divided by 2 and worst case imprecisions
2582 due to too large values ought to be caught later by the
2583 checks for fallback. */
2584 int save_flag_rounding_math = flag_rounding_math;
2585 flag_rounding_math = 0;
2586 t = fold_build2 (RDIV_EXPR, double_type_node, factord,
2587 build_real (double_type_node, dconst2));
2588 tree t3 = fold_build2 (MINUS_EXPR, double_type_node,
79c12969 2589 first_inner_iterationsd, t);
5acef69f
JJ
2590 t3 = force_gimple_operand_gsi (gsi, t3, true, NULL_TREE, false,
2591 GSI_CONTINUE_LINKING);
2592 t = fold_build2 (MULT_EXPR, double_type_node, factord,
2593 build_real (double_type_node, dconst2));
2594 t = fold_build2 (MULT_EXPR, double_type_node, t, stopvald);
2595 t = fold_build2 (PLUS_EXPR, double_type_node, t,
2596 fold_build2 (MULT_EXPR, double_type_node,
2597 t3, t3));
2598 flag_rounding_math = save_flag_rounding_math;
2599 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2600 GSI_CONTINUE_LINKING);
87d6dae3
JJ
2601 if (flag_exceptions
2602 && cfun->can_throw_non_call_exceptions
2603 && operation_could_trap_p (LT_EXPR, true, false, NULL_TREE))
2604 {
2605 tree tem = fold_build2 (LT_EXPR, boolean_type_node, t,
2606 build_zero_cst (double_type_node));
2607 tem = force_gimple_operand_gsi (gsi, tem, true, NULL_TREE,
2608 false, GSI_CONTINUE_LINKING);
2609 cond_stmt = gimple_build_cond (NE_EXPR, tem,
2610 boolean_false_node,
2611 NULL_TREE, NULL_TREE);
2612 }
2613 else
2614 cond_stmt
2615 = gimple_build_cond (LT_EXPR, t,
2616 build_zero_cst (double_type_node),
2617 NULL_TREE, NULL_TREE);
5acef69f
JJ
2618 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2619 e = split_block (gsi_bb (*gsi), cond_stmt);
2620 basic_block bb1 = e->src;
2621 e->flags = EDGE_FALSE_VALUE;
2622 e->probability = profile_probability::very_likely ();
2623 *gsi = gsi_after_labels (e->dest);
2624 gcall *call = gimple_build_call_internal (IFN_SQRT, 1, t);
2625 tree sqrtr = create_tmp_var (double_type_node);
2626 gimple_call_set_lhs (call, sqrtr);
2627 gsi_insert_after (gsi, call, GSI_CONTINUE_LINKING);
2628 t = fold_build2 (MINUS_EXPR, double_type_node, sqrtr, t3);
2629 t = fold_build2 (RDIV_EXPR, double_type_node, t, factord);
2630 t = fold_build1 (FIX_TRUNC_EXPR, ulltype, t);
2631 tree c = create_tmp_var (ulltype);
2632 tree d = create_tmp_var (ulltype);
2633 expand_omp_build_assign (gsi, c, t, true);
2634 t = fold_build2 (MINUS_EXPR, ulltype, c,
2635 build_one_cst (ulltype));
2636 t = fold_build2 (MULT_EXPR, ulltype, c, t);
2637 t = fold_build2 (RSHIFT_EXPR, ulltype, t, integer_one_node);
325714b4
JJ
2638 t = fold_build2 (MULT_EXPR, ulltype,
2639 fold_convert (ulltype, fd->factor), t);
2640 tree t2
2641 = fold_build2 (MULT_EXPR, ulltype, c,
2642 fold_convert (ulltype,
2643 fd->first_inner_iterations));
5acef69f
JJ
2644 t = fold_build2 (PLUS_EXPR, ulltype, t, t2);
2645 expand_omp_build_assign (gsi, d, t, true);
325714b4
JJ
2646 t = fold_build2 (MULT_EXPR, ulltype,
2647 fold_convert (ulltype, fd->factor), c);
5acef69f 2648 t = fold_build2 (PLUS_EXPR, ulltype,
325714b4
JJ
2649 t, fold_convert (ulltype,
2650 fd->first_inner_iterations));
5acef69f
JJ
2651 t2 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2652 GSI_CONTINUE_LINKING);
2653 cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, d,
2654 NULL_TREE, NULL_TREE);
2655 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2656 e = split_block (gsi_bb (*gsi), cond_stmt);
2657 basic_block bb2 = e->src;
2658 e->flags = EDGE_TRUE_VALUE;
2659 e->probability = profile_probability::very_likely ();
2660 *gsi = gsi_after_labels (e->dest);
2661 t = fold_build2 (PLUS_EXPR, ulltype, d, t2);
2662 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2663 GSI_CONTINUE_LINKING);
2664 cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, t,
2665 NULL_TREE, NULL_TREE);
2666 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2667 e = split_block (gsi_bb (*gsi), cond_stmt);
2668 basic_block bb3 = e->src;
2669 e->flags = EDGE_FALSE_VALUE;
2670 e->probability = profile_probability::very_likely ();
2671 *gsi = gsi_after_labels (e->dest);
2672 t = fold_convert (itype, c);
2673 t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i - 1].step);
f418bd4b 2674 t = fold_build2 (PLUS_EXPR, itype, outer_n1, t);
5acef69f
JJ
2675 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2676 GSI_CONTINUE_LINKING);
2677 expand_omp_build_assign (gsi, fd->loops[i - 1].v, t, true);
2678 t2 = fold_build2 (MINUS_EXPR, ulltype, stopvalull, d);
2679 t2 = fold_convert (itype, t2);
2680 t2 = fold_build2 (MULT_EXPR, itype, t2, fd->loops[i].step);
2681 t2 = fold_build2 (PLUS_EXPR, itype, t2, fd->loops[i].n1);
2682 if (fd->loops[i].m1)
2683 {
2684 t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i].m1);
2685 t2 = fold_build2 (PLUS_EXPR, itype, t2, t);
2686 }
2687 expand_omp_build_assign (gsi, fd->loops[i].v, t2, true);
2688 e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
2689 bb_triang = e->src;
2690 *gsi = gsi_after_labels (e->dest);
2691 remove_edge (e);
2692 e = make_edge (bb1, gsi_bb (*gsi), EDGE_TRUE_VALUE);
2693 e->probability = profile_probability::very_unlikely ();
2694 e = make_edge (bb2, gsi_bb (*gsi), EDGE_FALSE_VALUE);
2695 e->probability = profile_probability::very_unlikely ();
2696 e = make_edge (bb3, gsi_bb (*gsi), EDGE_TRUE_VALUE);
2697 e->probability = profile_probability::very_unlikely ();
2698
2699 basic_block bb4 = create_empty_bb (bb0);
2700 add_bb_to_loop (bb4, bb0->loop_father);
2701 e = make_edge (bb0, bb4, EDGE_FALSE_VALUE);
2702 e->probability = profile_probability::unlikely ();
2703 make_edge (bb4, gsi_bb (*gsi), EDGE_FALLTHRU);
2704 set_immediate_dominator (CDI_DOMINATORS, bb4, bb0);
2705 set_immediate_dominator (CDI_DOMINATORS, gsi_bb (*gsi), bb0);
2706 gimple_stmt_iterator gsi2 = gsi_after_labels (bb4);
2707 t2 = fold_build2 (TRUNC_DIV_EXPR, type,
2708 counts[i], counts[i - 1]);
2709 t2 = force_gimple_operand_gsi (&gsi2, t2, true, NULL_TREE, false,
2710 GSI_CONTINUE_LINKING);
2711 t = fold_build2 (TRUNC_MOD_EXPR, type, stopval, t2);
2712 t2 = fold_build2 (TRUNC_DIV_EXPR, type, stopval, t2);
2713 t = fold_convert (itype, t);
2714 t2 = fold_convert (itype, t2);
2715 t = fold_build2 (MULT_EXPR, itype, t,
2716 fold_convert (itype, fd->loops[i].step));
2717 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
2718 t2 = fold_build2 (MULT_EXPR, itype, t2,
2719 fold_convert (itype, fd->loops[i - 1].step));
2720 t2 = fold_build2 (PLUS_EXPR, itype, fd->loops[i - 1].n1, t2);
2721 t2 = force_gimple_operand_gsi (&gsi2, t2, false, NULL_TREE,
2722 false, GSI_CONTINUE_LINKING);
2723 stmt = gimple_build_assign (fd->loops[i - 1].v, t2);
2724 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2725 if (fd->loops[i].m1)
2726 {
2727 t2 = fold_build2 (MULT_EXPR, itype, fd->loops[i].m1,
2728 fd->loops[i - 1].v);
2729 t = fold_build2 (PLUS_EXPR, itype, t, t2);
2730 }
2731 t = force_gimple_operand_gsi (&gsi2, t, false, NULL_TREE,
2732 false, GSI_CONTINUE_LINKING);
2733 stmt = gimple_build_assign (fd->loops[i].v, t);
2734 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2735 }
2736 /* Fallback implementation. Evaluate the loops in between
2737 (inclusive) fd->first_nonrect and fd->last_nonrect at
2738 runtime unsing temporaries instead of the original iteration
2739 variables, in the body just bump the counter and compare
2740 with the desired value. */
aed3ab25
JJ
2741 gimple_stmt_iterator gsi2 = *gsi;
2742 basic_block entry_bb = gsi_bb (gsi2);
2743 edge e = split_block (entry_bb, gsi_stmt (gsi2));
2744 e = split_block (e->dest, (gimple *) NULL);
2745 basic_block dom_bb = NULL;
2746 basic_block cur_bb = e->src;
2747 basic_block next_bb = e->dest;
2748 entry_bb = e->dest;
2749 *gsi = gsi_after_labels (entry_bb);
2750
2751 tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
2752 tree n1 = NULL_TREE, n2 = NULL_TREE;
2753 memset (vs, 0, fd->last_nonrect * sizeof (tree));
2754
2755 for (int j = fd->first_nonrect; j <= fd->last_nonrect; j++)
2756 {
2757 tree itype = TREE_TYPE (fd->loops[j].v);
2758 bool rect_p = (fd->loops[j].m1 == NULL_TREE
2759 && fd->loops[j].m2 == NULL_TREE
2760 && !fd->loops[j].non_rect_referenced);
2761 gsi2 = gsi_after_labels (cur_bb);
2762 t = fold_convert (itype, unshare_expr (fd->loops[j].n1));
2763 if (fd->loops[j].m1)
2764 {
2765 n1 = fold_convert (itype, unshare_expr (fd->loops[j].m1));
2766 n1 = fold_build2 (MULT_EXPR, itype,
2767 vs[j - fd->loops[j].outer], n1);
2768 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2769 }
2770 else if (rect_p)
2771 n1 = build_zero_cst (type);
2772 else
2773 n1 = t;
2774 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2775 true, GSI_SAME_STMT);
2776 if (j < fd->last_nonrect)
2777 {
2778 vs[j] = create_tmp_reg (rect_p ? type : itype, ".it");
2779 expand_omp_build_assign (&gsi2, vs[j], n1);
2780 }
2781 t = fold_convert (itype, unshare_expr (fd->loops[j].n2));
2782 if (fd->loops[j].m2)
2783 {
2784 n2 = fold_convert (itype, unshare_expr (fd->loops[j].m2));
2785 n2 = fold_build2 (MULT_EXPR, itype,
2786 vs[j - fd->loops[j].outer], n2);
2787 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2788 }
2789 else if (rect_p)
2790 n2 = counts[j];
2791 else
2792 n2 = t;
2793 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2794 true, GSI_SAME_STMT);
2795 if (j == fd->last_nonrect)
2796 {
2797 gcond *cond_stmt
2798 = gimple_build_cond (fd->loops[j].cond_code, n1, n2,
2799 NULL_TREE, NULL_TREE);
2800 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2801 e = split_block (cur_bb, cond_stmt);
2802 e->flags = EDGE_TRUE_VALUE;
2803 edge ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2804 e->probability = profile_probability::likely ().guessed ();
2805 ne->probability = e->probability.invert ();
2806 gsi2 = gsi_after_labels (e->dest);
2807
2808 t = build_int_cst (itype, (fd->loops[j].cond_code == LT_EXPR
2809 ? -1 : 1));
2810 t = fold_build2 (PLUS_EXPR, itype,
2811 fold_convert (itype, fd->loops[j].step), t);
2812 t = fold_build2 (PLUS_EXPR, itype, t, n2);
2813 t = fold_build2 (MINUS_EXPR, itype, t, n1);
2814 tree step = fold_convert (itype, fd->loops[j].step);
2815 if (TYPE_UNSIGNED (itype)
2816 && fd->loops[j].cond_code == GT_EXPR)
2817 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2818 fold_build1 (NEGATE_EXPR, itype, t),
2819 fold_build1 (NEGATE_EXPR, itype, step));
2820 else
2821 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2822 t = fold_convert (type, t);
2823 t = fold_build2 (PLUS_EXPR, type, idx, t);
2824 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2825 true, GSI_SAME_STMT);
2826 e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2827 set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2828 cond_stmt
2829 = gimple_build_cond (LE_EXPR, t, stopval, NULL_TREE,
2830 NULL_TREE);
2831 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2832 e = split_block (gsi_bb (gsi2), cond_stmt);
2833 e->flags = EDGE_TRUE_VALUE;
2834 e->probability = profile_probability::likely ().guessed ();
2835 ne = make_edge (e->src, entry_bb, EDGE_FALSE_VALUE);
2836 ne->probability = e->probability.invert ();
2837 gsi2 = gsi_after_labels (e->dest);
2838 expand_omp_build_assign (&gsi2, idx, t);
2839 set_immediate_dominator (CDI_DOMINATORS, entry_bb, dom_bb);
2840 break;
2841 }
2842 e = split_block (cur_bb, last_stmt (cur_bb));
2843
2844 basic_block new_cur_bb = create_empty_bb (cur_bb);
2845 add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2846
2847 gsi2 = gsi_after_labels (e->dest);
2848 if (rect_p)
2849 t = fold_build2 (PLUS_EXPR, type, vs[j],
2850 build_one_cst (type));
2851 else
2852 {
2853 tree step
2854 = fold_convert (itype, unshare_expr (fd->loops[j].step));
2855 t = fold_build2 (PLUS_EXPR, itype, vs[j], step);
2856 }
2857 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2858 true, GSI_SAME_STMT);
2859 expand_omp_build_assign (&gsi2, vs[j], t);
2860
2861 edge ne = split_block (e->dest, last_stmt (e->dest));
2862 gsi2 = gsi_after_labels (ne->dest);
2863
2864 gcond *cond_stmt;
2865 if (next_bb == entry_bb)
2866 /* No need to actually check the outermost condition. */
2867 cond_stmt
2868 = gimple_build_cond (EQ_EXPR, boolean_true_node,
2869 boolean_true_node,
2870 NULL_TREE, NULL_TREE);
2871 else
2872 cond_stmt
2873 = gimple_build_cond (rect_p ? LT_EXPR
2874 : fd->loops[j].cond_code,
2875 vs[j], n2, NULL_TREE, NULL_TREE);
2876 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2877 edge e3, e4;
2878 if (next_bb == entry_bb)
2879 {
2880 e3 = find_edge (ne->dest, next_bb);
2881 e3->flags = EDGE_FALSE_VALUE;
2882 dom_bb = ne->dest;
2883 }
2884 else
2885 e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2886 e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2887 e4->probability = profile_probability::likely ().guessed ();
2888 e3->probability = e4->probability.invert ();
2889 basic_block esrc = e->src;
2890 make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2891 cur_bb = new_cur_bb;
2892 basic_block latch_bb = next_bb;
2893 next_bb = e->dest;
2894 remove_edge (e);
2895 set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2896 set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2897 set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
2898 }
2899 for (int j = fd->last_nonrect; j >= fd->first_nonrect; j--)
2900 {
2901 tree itype = TREE_TYPE (fd->loops[j].v);
2902 bool rect_p = (fd->loops[j].m1 == NULL_TREE
2903 && fd->loops[j].m2 == NULL_TREE
2904 && !fd->loops[j].non_rect_referenced);
2905 if (j == fd->last_nonrect)
2906 {
2907 t = fold_build2 (MINUS_EXPR, type, stopval, idx);
2908 t = fold_convert (itype, t);
2909 tree t2
2910 = fold_convert (itype, unshare_expr (fd->loops[j].step));
2911 t = fold_build2 (MULT_EXPR, itype, t, t2);
2912 t = fold_build2 (PLUS_EXPR, itype, n1, t);
2913 }
2914 else if (rect_p)
2915 {
2916 t = fold_convert (itype, vs[j]);
2917 t = fold_build2 (MULT_EXPR, itype, t,
2918 fold_convert (itype, fd->loops[j].step));
2919 if (POINTER_TYPE_P (vtype))
2920 t = fold_build_pointer_plus (fd->loops[j].n1, t);
2921 else
2922 t = fold_build2 (PLUS_EXPR, itype, fd->loops[j].n1, t);
2923 }
2924 else
2925 t = vs[j];
2926 t = force_gimple_operand_gsi (gsi, t, false,
2927 NULL_TREE, true,
2928 GSI_SAME_STMT);
2929 stmt = gimple_build_assign (fd->loops[j].v, t);
2930 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
2931 }
2932 if (gsi_end_p (*gsi))
2933 *gsi = gsi_last_bb (gsi_bb (*gsi));
2934 else
2935 gsi_prev (gsi);
5acef69f
JJ
2936 if (bb_triang)
2937 {
2938 e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
2939 make_edge (bb_triang, e->dest, EDGE_FALLTHRU);
2940 *gsi = gsi_after_labels (e->dest);
2941 if (!gsi_end_p (*gsi))
2942 gsi_insert_before (gsi, gimple_build_nop (), GSI_NEW_STMT);
29e0ad45 2943 set_immediate_dominator (CDI_DOMINATORS, e->dest, bb_triang_dom);
5acef69f 2944 }
aed3ab25 2945 }
629b3d75 2946 else
aed3ab25
JJ
2947 {
2948 t = fold_convert (itype, t);
2949 t = fold_build2 (MULT_EXPR, itype, t,
2950 fold_convert (itype, fd->loops[i].step));
2951 if (POINTER_TYPE_P (vtype))
2952 t = fold_build_pointer_plus (fd->loops[i].n1, t);
2953 else
2954 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
2955 t = force_gimple_operand_gsi (gsi, t,
2956 DECL_P (fd->loops[i].v)
2957 && TREE_ADDRESSABLE (fd->loops[i].v),
2958 NULL_TREE, false,
2959 GSI_CONTINUE_LINKING);
2960 stmt = gimple_build_assign (fd->loops[i].v, t);
2961 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2962 }
2963 if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
629b3d75
MJ
2964 {
2965 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
2966 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
2967 false, GSI_CONTINUE_LINKING);
2968 stmt = gimple_build_assign (tem, t);
2969 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2970 }
aed3ab25
JJ
2971 if (i == fd->last_nonrect)
2972 i = fd->first_nonrect;
629b3d75 2973 }
aed3ab25
JJ
2974 if (fd->non_rect)
2975 for (i = 0; i <= fd->last_nonrect; i++)
2976 if (fd->loops[i].m2)
2977 {
2978 tree itype = TREE_TYPE (fd->loops[i].v);
2979
2980 tree t = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2981 t = fold_build2 (MULT_EXPR, itype,
2982 fd->loops[i - fd->loops[i].outer].v, t);
2983 t = fold_build2 (PLUS_EXPR, itype, t,
2984 fold_convert (itype,
2985 unshare_expr (fd->loops[i].n2)));
2986 nonrect_bounds[i] = create_tmp_reg (itype, ".bound");
2987 t = force_gimple_operand_gsi (gsi, t, false,
2988 NULL_TREE, false,
2989 GSI_CONTINUE_LINKING);
2990 stmt = gimple_build_assign (nonrect_bounds[i], t);
2991 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2992 }
629b3d75
MJ
2993}
2994
2995/* Helper function for expand_omp_for_*. Generate code like:
2996 L10:
2997 V3 += STEP3;
2998 if (V3 cond3 N32) goto BODY_BB; else goto L11;
2999 L11:
3000 V3 = N31;
3001 V2 += STEP2;
3002 if (V2 cond2 N22) goto BODY_BB; else goto L12;
3003 L12:
3004 V2 = N21;
3005 V1 += STEP1;
aed3ab25
JJ
3006 goto BODY_BB;
3007 For non-rectangular loops, use temporaries stored in nonrect_bounds
3008 for the upper bounds if M?2 multiplier is present. Given e.g.
3009 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
3010 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
3011 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
3012 for (V4 = N41 + M41 * V2; V4 cond4 N42 + M42 * V2; V4 += STEP4)
3013 do:
3014 L10:
3015 V4 += STEP4;
3016 if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L11;
3017 L11:
3018 V4 = N41 + M41 * V2; // This can be left out if the loop
3019 // refers to the immediate parent loop
3020 V3 += STEP3;
3021 if (V3 cond3 N32) goto BODY_BB; else goto L12;
3022 L12:
3023 V3 = N31;
3024 V2 += STEP2;
3025 if (V2 cond2 N22) goto L120; else goto L13;
3026 L120:
3027 V4 = N41 + M41 * V2;
3028 NONRECT_BOUND4 = N42 + M42 * V2;
3029 if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L12;
3030 L13:
3031 V2 = N21;
3032 V1 += STEP1;
3033 goto L120; */
629b3d75
MJ
3034
3035static basic_block
aed3ab25
JJ
3036extract_omp_for_update_vars (struct omp_for_data *fd, tree *nonrect_bounds,
3037 basic_block cont_bb, basic_block body_bb)
629b3d75
MJ
3038{
3039 basic_block last_bb, bb, collapse_bb = NULL;
3040 int i;
3041 gimple_stmt_iterator gsi;
3042 edge e;
3043 tree t;
3044 gimple *stmt;
3045
3046 last_bb = cont_bb;
3047 for (i = fd->collapse - 1; i >= 0; i--)
3048 {
3049 tree vtype = TREE_TYPE (fd->loops[i].v);
3050
3051 bb = create_empty_bb (last_bb);
3052 add_bb_to_loop (bb, last_bb->loop_father);
3053 gsi = gsi_start_bb (bb);
3054
3055 if (i < fd->collapse - 1)
3056 {
3057 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
aed3ab25
JJ
3058 e->probability
3059 = profile_probability::guessed_always ().apply_scale (1, 8);
629b3d75 3060
aed3ab25
JJ
3061 struct omp_for_data_loop *l = &fd->loops[i + 1];
3062 if (l->m1 == NULL_TREE || l->outer != 1)
3063 {
3064 t = l->n1;
3065 if (l->m1)
3066 {
3067 tree t2
3068 = fold_build2 (MULT_EXPR, TREE_TYPE (t),
3069 fd->loops[i + 1 - l->outer].v, l->m1);
3070 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t2, t);
3071 }
3072 t = force_gimple_operand_gsi (&gsi, t,
3073 DECL_P (l->v)
3074 && TREE_ADDRESSABLE (l->v),
3075 NULL_TREE, false,
3076 GSI_CONTINUE_LINKING);
3077 stmt = gimple_build_assign (l->v, t);
3078 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3079 }
629b3d75
MJ
3080 }
3081 else
3082 collapse_bb = bb;
3083
3084 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
3085
3086 if (POINTER_TYPE_P (vtype))
3087 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
3088 else
3089 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
3090 t = force_gimple_operand_gsi (&gsi, t,
3091 DECL_P (fd->loops[i].v)
3092 && TREE_ADDRESSABLE (fd->loops[i].v),
3093 NULL_TREE, false, GSI_CONTINUE_LINKING);
3094 stmt = gimple_build_assign (fd->loops[i].v, t);
3095 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3096
aed3ab25
JJ
3097 if (fd->loops[i].non_rect_referenced)
3098 {
3099 basic_block update_bb = NULL, prev_bb = NULL;
3100 for (int j = i + 1; j <= fd->last_nonrect; j++)
3101 if (j - fd->loops[j].outer == i)
3102 {
3103 tree n1, n2;
3104 struct omp_for_data_loop *l = &fd->loops[j];
3105 basic_block this_bb = create_empty_bb (last_bb);
3106 add_bb_to_loop (this_bb, last_bb->loop_father);
3107 gimple_stmt_iterator gsi2 = gsi_start_bb (this_bb);
3108 if (prev_bb)
3109 {
3110 e = make_edge (prev_bb, this_bb, EDGE_TRUE_VALUE);
3111 e->probability
3112 = profile_probability::guessed_always ().apply_scale (7,
3113 8);
3114 set_immediate_dominator (CDI_DOMINATORS, this_bb, prev_bb);
aed3ab25
JJ
3115 }
3116 if (l->m1)
3117 {
3118 t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m1), l->m1,
3119 fd->loops[i].v);
3120 t = fold_build2 (PLUS_EXPR, TREE_TYPE (l->v), t, l->n1);
3121 n1 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3122 false,
3123 GSI_CONTINUE_LINKING);
3124 stmt = gimple_build_assign (l->v, n1);
3125 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
3126 n1 = l->v;
3127 }
3128 else
3129 n1 = force_gimple_operand_gsi (&gsi2, l->n1, true,
3130 NULL_TREE, false,
3131 GSI_CONTINUE_LINKING);
3132 if (l->m2)
3133 {
3134 t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m2), l->m2,
3135 fd->loops[i].v);
3136 t = fold_build2 (PLUS_EXPR, TREE_TYPE (nonrect_bounds[j]),
3137 t, unshare_expr (l->n2));
3138 n2 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3139 false,
3140 GSI_CONTINUE_LINKING);
3141 stmt = gimple_build_assign (nonrect_bounds[j], n2);
3142 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
3143 n2 = nonrect_bounds[j];
3144 }
3145 else
3146 n2 = force_gimple_operand_gsi (&gsi2, unshare_expr (l->n2),
3147 true, NULL_TREE, false,
3148 GSI_CONTINUE_LINKING);
3149 gcond *cond_stmt
3150 = gimple_build_cond (l->cond_code, n1, n2,
3151 NULL_TREE, NULL_TREE);
3152 gsi_insert_after (&gsi2, cond_stmt, GSI_CONTINUE_LINKING);
3153 if (update_bb == NULL)
3154 update_bb = this_bb;
3155 e = make_edge (this_bb, bb, EDGE_FALSE_VALUE);
3156 e->probability
3157 = profile_probability::guessed_always ().apply_scale (1, 8);
3158 if (prev_bb == NULL)
29e0ad45 3159 set_immediate_dominator (CDI_DOMINATORS, this_bb, bb);
aed3ab25
JJ
3160 prev_bb = this_bb;
3161 }
3162 e = make_edge (prev_bb, body_bb, EDGE_TRUE_VALUE);
3163 e->probability
3164 = profile_probability::guessed_always ().apply_scale (7, 8);
3165 body_bb = update_bb;
3166 }
3167
629b3d75
MJ
3168 if (i > 0)
3169 {
aed3ab25
JJ
3170 if (fd->loops[i].m2)
3171 t = nonrect_bounds[i];
3172 else
3173 t = unshare_expr (fd->loops[i].n2);
629b3d75
MJ
3174 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3175 false, GSI_CONTINUE_LINKING);
3176 tree v = fd->loops[i].v;
3177 if (DECL_P (v) && TREE_ADDRESSABLE (v))
3178 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
3179 false, GSI_CONTINUE_LINKING);
3180 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
3181 stmt = gimple_build_cond_empty (t);
3182 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
d1ffbd43
JJ
3183 if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)),
3184 expand_omp_regimplify_p, NULL, NULL)
3185 || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)),
3186 expand_omp_regimplify_p, NULL, NULL))
3187 gimple_regimplify_operands (stmt, &gsi);
629b3d75 3188 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
357067f2 3189 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
629b3d75
MJ
3190 }
3191 else
3192 make_edge (bb, body_bb, EDGE_FALLTHRU);
aed3ab25 3193 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
629b3d75
MJ
3194 last_bb = bb;
3195 }
3196
3197 return collapse_bb;
3198}
3199
3200/* Expand #pragma omp ordered depend(source). */
3201
3202static void
3203expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
3204 tree *counts, location_t loc)
3205{
3206 enum built_in_function source_ix
3207 = fd->iter_type == long_integer_type_node
3208 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
3209 gimple *g
3210 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
3211 build_fold_addr_expr (counts[fd->ordered]));
3212 gimple_set_location (g, loc);
3213 gsi_insert_before (gsi, g, GSI_SAME_STMT);
3214}
3215
3216/* Expand a single depend from #pragma omp ordered depend(sink:...). */
3217
3218static void
3219expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
3220 tree *counts, tree c, location_t loc)
3221{
3222 auto_vec<tree, 10> args;
3223 enum built_in_function sink_ix
3224 = fd->iter_type == long_integer_type_node
3225 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
3226 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
3227 int i;
3228 gimple_stmt_iterator gsi2 = *gsi;
3229 bool warned_step = false;
3230
3231 for (i = 0; i < fd->ordered; i++)
3232 {
3233 tree step = NULL_TREE;
3234 off = TREE_PURPOSE (deps);
3235 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
3236 {
3237 step = TREE_OPERAND (off, 1);
3238 off = TREE_OPERAND (off, 0);
3239 }
3240 if (!integer_zerop (off))
3241 {
3242 gcc_assert (fd->loops[i].cond_code == LT_EXPR
3243 || fd->loops[i].cond_code == GT_EXPR);
3244 bool forward = fd->loops[i].cond_code == LT_EXPR;
3245 if (step)
3246 {
3247 /* Non-simple Fortran DO loops. If step is variable,
3248 we don't know at compile even the direction, so can't
3249 warn. */
3250 if (TREE_CODE (step) != INTEGER_CST)
3251 break;
3252 forward = tree_int_cst_sgn (step) != -1;
3253 }
3254 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
90a0bf4e
JJ
3255 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
3256 "waiting for lexically later iteration");
629b3d75
MJ
3257 break;
3258 }
3259 deps = TREE_CHAIN (deps);
3260 }
3261 /* If all offsets corresponding to the collapsed loops are zero,
3262 this depend clause can be ignored. FIXME: but there is still a
3263 flush needed. We need to emit one __sync_synchronize () for it
3264 though (perhaps conditionally)? Solve this together with the
3265 conservative dependence folding optimization.
3266 if (i >= fd->collapse)
3267 return; */
3268
3269 deps = OMP_CLAUSE_DECL (c);
3270 gsi_prev (&gsi2);
3271 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
3272 edge e2 = split_block_after_labels (e1->dest);
3273
3274 gsi2 = gsi_after_labels (e1->dest);
3275 *gsi = gsi_last_bb (e1->src);
3276 for (i = 0; i < fd->ordered; i++)
3277 {
3278 tree itype = TREE_TYPE (fd->loops[i].v);
3279 tree step = NULL_TREE;
3280 tree orig_off = NULL_TREE;
3281 if (POINTER_TYPE_P (itype))
3282 itype = sizetype;
3283 if (i)
3284 deps = TREE_CHAIN (deps);
3285 off = TREE_PURPOSE (deps);
3286 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
3287 {
3288 step = TREE_OPERAND (off, 1);
3289 off = TREE_OPERAND (off, 0);
3290 gcc_assert (fd->loops[i].cond_code == LT_EXPR
3291 && integer_onep (fd->loops[i].step)
3292 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
3293 }
3294 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
3295 if (step)
3296 {
3297 off = fold_convert_loc (loc, itype, off);
3298 orig_off = off;
3299 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3300 }
3301
3302 if (integer_zerop (off))
3303 t = boolean_true_node;
3304 else
3305 {
3306 tree a;
3307 tree co = fold_convert_loc (loc, itype, off);
3308 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
3309 {
3310 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3311 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
3312 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
3313 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
3314 co);
3315 }
3316 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3317 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3318 fd->loops[i].v, co);
3319 else
3320 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
3321 fd->loops[i].v, co);
3322 if (step)
3323 {
3324 tree t1, t2;
3325 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3326 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3327 fd->loops[i].n1);
3328 else
3329 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3330 fd->loops[i].n2);
3331 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3332 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3333 fd->loops[i].n2);
3334 else
3335 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3336 fd->loops[i].n1);
3337 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
3338 step, build_int_cst (TREE_TYPE (step), 0));
3339 if (TREE_CODE (step) != INTEGER_CST)
3340 {
3341 t1 = unshare_expr (t1);
3342 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
3343 false, GSI_CONTINUE_LINKING);
3344 t2 = unshare_expr (t2);
3345 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
3346 false, GSI_CONTINUE_LINKING);
3347 }
3348 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
3349 t, t2, t1);
3350 }
3351 else if (fd->loops[i].cond_code == LT_EXPR)
3352 {
3353 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3354 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3355 fd->loops[i].n1);
3356 else
3357 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3358 fd->loops[i].n2);
3359 }
3360 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3361 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
3362 fd->loops[i].n2);
3363 else
3364 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
3365 fd->loops[i].n1);
3366 }
3367 if (cond)
3368 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
3369 else
3370 cond = t;
3371
3372 off = fold_convert_loc (loc, itype, off);
3373
3374 if (step
3375 || (fd->loops[i].cond_code == LT_EXPR
3376 ? !integer_onep (fd->loops[i].step)
3377 : !integer_minus_onep (fd->loops[i].step)))
3378 {
3379 if (step == NULL_TREE
3380 && TYPE_UNSIGNED (itype)
3381 && fd->loops[i].cond_code == GT_EXPR)
3382 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
3383 fold_build1_loc (loc, NEGATE_EXPR, itype,
3384 s));
3385 else
3386 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
3387 orig_off ? orig_off : off, s);
3388 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
3389 build_int_cst (itype, 0));
3390 if (integer_zerop (t) && !warned_step)
3391 {
90a0bf4e
JJ
3392 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
3393 "refers to iteration never in the iteration "
3394 "space");
629b3d75
MJ
3395 warned_step = true;
3396 }
3397 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
3398 cond, t);
3399 }
3400
3401 if (i <= fd->collapse - 1 && fd->collapse > 1)
3402 t = fd->loop.v;
3403 else if (counts[i])
3404 t = counts[i];
3405 else
3406 {
3407 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3408 fd->loops[i].v, fd->loops[i].n1);
3409 t = fold_convert_loc (loc, fd->iter_type, t);
3410 }
3411 if (step)
3412 /* We have divided off by step already earlier. */;
3413 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
3414 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
3415 fold_build1_loc (loc, NEGATE_EXPR, itype,
3416 s));
3417 else
3418 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3419 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3420 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
3421 off = fold_convert_loc (loc, fd->iter_type, off);
3422 if (i <= fd->collapse - 1 && fd->collapse > 1)
3423 {
3424 if (i)
3425 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
3426 off);
3427 if (i < fd->collapse - 1)
3428 {
3429 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
3430 counts[i]);
3431 continue;
3432 }
3433 }
3434 off = unshare_expr (off);
3435 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
3436 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3437 true, GSI_SAME_STMT);
3438 args.safe_push (t);
3439 }
3440 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
3441 gimple_set_location (g, loc);
3442 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3443
3444 cond = unshare_expr (cond);
3445 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
3446 GSI_CONTINUE_LINKING);
3447 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
3448 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
357067f2
JH
3449 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
3450 e1->probability = e3->probability.invert ();
629b3d75
MJ
3451 e1->flags = EDGE_TRUE_VALUE;
3452 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
3453
3454 *gsi = gsi_after_labels (e2->dest);
3455}
3456
3457/* Expand all #pragma omp ordered depend(source) and
3458 #pragma omp ordered depend(sink:...) constructs in the current
3459 #pragma omp for ordered(n) region. */
3460
3461static void
3462expand_omp_ordered_source_sink (struct omp_region *region,
3463 struct omp_for_data *fd, tree *counts,
3464 basic_block cont_bb)
3465{
3466 struct omp_region *inner;
3467 int i;
3468 for (i = fd->collapse - 1; i < fd->ordered; i++)
3469 if (i == fd->collapse - 1 && fd->collapse > 1)
3470 counts[i] = NULL_TREE;
3471 else if (i >= fd->collapse && !cont_bb)
3472 counts[i] = build_zero_cst (fd->iter_type);
3473 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
3474 && integer_onep (fd->loops[i].step))
3475 counts[i] = NULL_TREE;
3476 else
3477 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
3478 tree atype
3479 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
3480 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
3481 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
3482
3483 for (inner = region->inner; inner; inner = inner->next)
3484 if (inner->type == GIMPLE_OMP_ORDERED)
3485 {
3486 gomp_ordered *ord_stmt = inner->ord_stmt;
3487 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
3488 location_t loc = gimple_location (ord_stmt);
3489 tree c;
3490 for (c = gimple_omp_ordered_clauses (ord_stmt);
3491 c; c = OMP_CLAUSE_CHAIN (c))
3492 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
3493 break;
3494 if (c)
3495 expand_omp_ordered_source (&gsi, fd, counts, loc);
3496 for (c = gimple_omp_ordered_clauses (ord_stmt);
3497 c; c = OMP_CLAUSE_CHAIN (c))
3498 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
3499 expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
3500 gsi_remove (&gsi, true);
3501 }
3502}
3503
3504/* Wrap the body into fd->ordered - fd->collapse loops that aren't
3505 collapsed. */
3506
3507static basic_block
3508expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
3509 basic_block cont_bb, basic_block body_bb,
3510 bool ordered_lastprivate)
3511{
3512 if (fd->ordered == fd->collapse)
3513 return cont_bb;
3514
3515 if (!cont_bb)
3516 {
3517 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3518 for (int i = fd->collapse; i < fd->ordered; i++)
3519 {
3520 tree type = TREE_TYPE (fd->loops[i].v);
3521 tree n1 = fold_convert (type, fd->loops[i].n1);
3522 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
3523 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3524 size_int (i - fd->collapse + 1),
3525 NULL_TREE, NULL_TREE);
3526 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
3527 }
3528 return NULL;
3529 }
3530
3531 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
3532 {
3533 tree t, type = TREE_TYPE (fd->loops[i].v);
3534 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3535 expand_omp_build_assign (&gsi, fd->loops[i].v,
3536 fold_convert (type, fd->loops[i].n1));
3537 if (counts[i])
3538 expand_omp_build_assign (&gsi, counts[i],
3539 build_zero_cst (fd->iter_type));
3540 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3541 size_int (i - fd->collapse + 1),
3542 NULL_TREE, NULL_TREE);
3543 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
3544 if (!gsi_end_p (gsi))
3545 gsi_prev (&gsi);
3546 else
3547 gsi = gsi_last_bb (body_bb);
3548 edge e1 = split_block (body_bb, gsi_stmt (gsi));
3549 basic_block new_body = e1->dest;
3550 if (body_bb == cont_bb)
3551 cont_bb = new_body;
3552 edge e2 = NULL;
3553 basic_block new_header;
3554 if (EDGE_COUNT (cont_bb->preds) > 0)
3555 {
3556 gsi = gsi_last_bb (cont_bb);
3557 if (POINTER_TYPE_P (type))
3558 t = fold_build_pointer_plus (fd->loops[i].v,
3559 fold_convert (sizetype,
3560 fd->loops[i].step));
3561 else
3562 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
3563 fold_convert (type, fd->loops[i].step));
3564 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
3565 if (counts[i])
3566 {
3567 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
3568 build_int_cst (fd->iter_type, 1));
3569 expand_omp_build_assign (&gsi, counts[i], t);
3570 t = counts[i];
3571 }
3572 else
3573 {
3574 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3575 fd->loops[i].v, fd->loops[i].n1);
3576 t = fold_convert (fd->iter_type, t);
3577 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3578 true, GSI_SAME_STMT);
3579 }
3580 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3581 size_int (i - fd->collapse + 1),
3582 NULL_TREE, NULL_TREE);
3583 expand_omp_build_assign (&gsi, aref, t);
3584 gsi_prev (&gsi);
3585 e2 = split_block (cont_bb, gsi_stmt (gsi));
3586 new_header = e2->dest;
3587 }
3588 else
3589 new_header = cont_bb;
3590 gsi = gsi_after_labels (new_header);
3591 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
3592 true, GSI_SAME_STMT);
3593 tree n2
3594 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
3595 true, NULL_TREE, true, GSI_SAME_STMT);
3596 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
3597 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
3598 edge e3 = split_block (new_header, gsi_stmt (gsi));
3599 cont_bb = e3->dest;
3600 remove_edge (e1);
3601 make_edge (body_bb, new_header, EDGE_FALLTHRU);
3602 e3->flags = EDGE_FALSE_VALUE;
357067f2 3603 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
629b3d75 3604 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
357067f2 3605 e1->probability = e3->probability.invert ();
629b3d75
MJ
3606
3607 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
3608 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
3609
3610 if (e2)
3611 {
99b1c316 3612 class loop *loop = alloc_loop ();
629b3d75
MJ
3613 loop->header = new_header;
3614 loop->latch = e2->src;
3615 add_loop (loop, body_bb->loop_father);
3616 }
3617 }
3618
3619 /* If there are any lastprivate clauses and it is possible some loops
3620 might have zero iterations, ensure all the decls are initialized,
3621 otherwise we could crash evaluating C++ class iterators with lastprivate
3622 clauses. */
3623 bool need_inits = false;
3624 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
3625 if (need_inits)
3626 {
3627 tree type = TREE_TYPE (fd->loops[i].v);
3628 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3629 expand_omp_build_assign (&gsi, fd->loops[i].v,
3630 fold_convert (type, fd->loops[i].n1));
3631 }
3632 else
3633 {
3634 tree type = TREE_TYPE (fd->loops[i].v);
3635 tree this_cond = fold_build2 (fd->loops[i].cond_code,
3636 boolean_type_node,
3637 fold_convert (type, fd->loops[i].n1),
3638 fold_convert (type, fd->loops[i].n2));
3639 if (!integer_onep (this_cond))
3640 need_inits = true;
3641 }
3642
3643 return cont_bb;
3644}
3645
3646/* A subroutine of expand_omp_for. Generate code for a parallel
3647 loop with any schedule. Given parameters:
3648
3649 for (V = N1; V cond N2; V += STEP) BODY;
3650
3651 where COND is "<" or ">", we generate pseudocode
3652
3653 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
3654 if (more) goto L0; else goto L3;
3655 L0:
3656 V = istart0;
3657 iend = iend0;
3658 L1:
3659 BODY;
3660 V += STEP;
3661 if (V cond iend) goto L1; else goto L2;
3662 L2:
3663 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3664 L3:
3665
3666 If this is a combined omp parallel loop, instead of the call to
3667 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
3668 If this is gimple_omp_for_combined_p loop, then instead of assigning
3669 V and iend in L0 we assign the first two _looptemp_ clause decls of the
3670 inner GIMPLE_OMP_FOR and V += STEP; and
3671 if (V cond iend) goto L1; else goto L2; are removed.
3672
3673 For collapsed loops, given parameters:
3674 collapse(3)
3675 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
3676 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
3677 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
3678 BODY;
3679
3680 we generate pseudocode
3681
3682 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
3683 if (cond3 is <)
3684 adj = STEP3 - 1;
3685 else
3686 adj = STEP3 + 1;
3687 count3 = (adj + N32 - N31) / STEP3;
3688 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
3689 if (cond2 is <)
3690 adj = STEP2 - 1;
3691 else
3692 adj = STEP2 + 1;
3693 count2 = (adj + N22 - N21) / STEP2;
3694 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
3695 if (cond1 is <)
3696 adj = STEP1 - 1;
3697 else
3698 adj = STEP1 + 1;
3699 count1 = (adj + N12 - N11) / STEP1;
3700 count = count1 * count2 * count3;
3701 goto Z1;
3702 Z0:
3703 count = 0;
3704 Z1:
3705 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
3706 if (more) goto L0; else goto L3;
3707 L0:
3708 V = istart0;
3709 T = V;
3710 V3 = N31 + (T % count3) * STEP3;
3711 T = T / count3;
3712 V2 = N21 + (T % count2) * STEP2;
3713 T = T / count2;
3714 V1 = N11 + T * STEP1;
3715 iend = iend0;
3716 L1:
3717 BODY;
3718 V += 1;
3719 if (V < iend) goto L10; else goto L2;
3720 L10:
3721 V3 += STEP3;
3722 if (V3 cond3 N32) goto L1; else goto L11;
3723 L11:
3724 V3 = N31;
3725 V2 += STEP2;
3726 if (V2 cond2 N22) goto L1; else goto L12;
3727 L12:
3728 V2 = N21;
3729 V1 += STEP1;
3730 goto L1;
3731 L2:
3732 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3733 L3:
3734
3735 */
3736
3737static void
3738expand_omp_for_generic (struct omp_region *region,
3739 struct omp_for_data *fd,
3740 enum built_in_function start_fn,
3741 enum built_in_function next_fn,
28567c40 3742 tree sched_arg,
629b3d75
MJ
3743 gimple *inner_stmt)
3744{
3745 tree type, istart0, iend0, iend;
3746 tree t, vmain, vback, bias = NULL_TREE;
3747 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
3748 basic_block l2_bb = NULL, l3_bb = NULL;
3749 gimple_stmt_iterator gsi;
3750 gassign *assign_stmt;
3751 bool in_combined_parallel = is_combined_parallel (region);
3752 bool broken_loop = region->cont == NULL;
3753 edge e, ne;
3754 tree *counts = NULL;
3755 int i;
3756 bool ordered_lastprivate = false;
3757
3758 gcc_assert (!broken_loop || !in_combined_parallel);
3759 gcc_assert (fd->iter_type == long_integer_type_node
3760 || !in_combined_parallel);
3761
3762 entry_bb = region->entry;
3763 cont_bb = region->cont;
3764 collapse_bb = NULL;
3765 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3766 gcc_assert (broken_loop
3767 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
3768 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3769 l1_bb = single_succ (l0_bb);
3770 if (!broken_loop)
3771 {
3772 l2_bb = create_empty_bb (cont_bb);
3773 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
3774 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
3775 == l1_bb));
3776 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3777 }
3778 else
3779 l2_bb = NULL;
3780 l3_bb = BRANCH_EDGE (entry_bb)->dest;
3781 exit_bb = region->exit;
3782
65f4b875 3783 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
3784
3785 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3786 if (fd->ordered
6c7ae8c5 3787 && omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
629b3d75
MJ
3788 OMP_CLAUSE_LASTPRIVATE))
3789 ordered_lastprivate = false;
28567c40 3790 tree reductions = NULL_TREE;
6c7ae8c5
JJ
3791 tree mem = NULL_TREE, cond_var = NULL_TREE, condtemp = NULL_TREE;
3792 tree memv = NULL_TREE;
8221c30b
JJ
3793 if (fd->lastprivate_conditional)
3794 {
3795 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3796 OMP_CLAUSE__CONDTEMP_);
3797 if (fd->have_pointer_condtemp)
3798 condtemp = OMP_CLAUSE_DECL (c);
3799 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
3800 cond_var = OMP_CLAUSE_DECL (c);
3801 }
28567c40
JJ
3802 if (sched_arg)
3803 {
3804 if (fd->have_reductemp)
3805 {
6c7ae8c5 3806 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
28567c40
JJ
3807 OMP_CLAUSE__REDUCTEMP_);
3808 reductions = OMP_CLAUSE_DECL (c);
3809 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
3810 gimple *g = SSA_NAME_DEF_STMT (reductions);
3811 reductions = gimple_assign_rhs1 (g);
3812 OMP_CLAUSE_DECL (c) = reductions;
3813 entry_bb = gimple_bb (g);
3814 edge e = split_block (entry_bb, g);
3815 if (region->entry == entry_bb)
3816 region->entry = e->dest;
3817 gsi = gsi_last_bb (entry_bb);
3818 }
3819 else
3820 reductions = null_pointer_node;
8221c30b 3821 if (fd->have_pointer_condtemp)
6c7ae8c5 3822 {
6c7ae8c5
JJ
3823 tree type = TREE_TYPE (condtemp);
3824 memv = create_tmp_var (type);
3825 TREE_ADDRESSABLE (memv) = 1;
3826 unsigned HOST_WIDE_INT sz
3827 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
3828 sz *= fd->lastprivate_conditional;
3829 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
3830 false);
3831 mem = build_fold_addr_expr (memv);
3832 }
3833 else
3834 mem = null_pointer_node;
28567c40 3835 }
629b3d75
MJ
3836 if (fd->collapse > 1 || fd->ordered)
3837 {
3838 int first_zero_iter1 = -1, first_zero_iter2 = -1;
3839 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
3840
3841 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
3842 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3843 zero_iter1_bb, first_zero_iter1,
3844 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
3845
3846 if (zero_iter1_bb)
3847 {
3848 /* Some counts[i] vars might be uninitialized if
3849 some loop has zero iterations. But the body shouldn't
3850 be executed in that case, so just avoid uninit warnings. */
3851 for (i = first_zero_iter1;
3852 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
3853 if (SSA_VAR_P (counts[i]))
e9e2bad7 3854 suppress_warning (counts[i], OPT_Wuninitialized);
629b3d75
MJ
3855 gsi_prev (&gsi);
3856 e = split_block (entry_bb, gsi_stmt (gsi));
3857 entry_bb = e->dest;
3858 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
65f4b875 3859 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
3860 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
3861 get_immediate_dominator (CDI_DOMINATORS,
3862 zero_iter1_bb));
3863 }
3864 if (zero_iter2_bb)
3865 {
3866 /* Some counts[i] vars might be uninitialized if
3867 some loop has zero iterations. But the body shouldn't
3868 be executed in that case, so just avoid uninit warnings. */
3869 for (i = first_zero_iter2; i < fd->ordered; i++)
3870 if (SSA_VAR_P (counts[i]))
e9e2bad7 3871 suppress_warning (counts[i], OPT_Wuninitialized);
629b3d75
MJ
3872 if (zero_iter1_bb)
3873 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
3874 else
3875 {
3876 gsi_prev (&gsi);
3877 e = split_block (entry_bb, gsi_stmt (gsi));
3878 entry_bb = e->dest;
3879 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
65f4b875 3880 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
3881 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
3882 get_immediate_dominator
3883 (CDI_DOMINATORS, zero_iter2_bb));
3884 }
3885 }
3886 if (fd->collapse == 1)
3887 {
3888 counts[0] = fd->loop.n2;
3889 fd->loop = fd->loops[0];
3890 }
3891 }
3892
3893 type = TREE_TYPE (fd->loop.v);
3894 istart0 = create_tmp_var (fd->iter_type, ".istart0");
3895 iend0 = create_tmp_var (fd->iter_type, ".iend0");
3896 TREE_ADDRESSABLE (istart0) = 1;
3897 TREE_ADDRESSABLE (iend0) = 1;
3898
3899 /* See if we need to bias by LLONG_MIN. */
3900 if (fd->iter_type == long_long_unsigned_type_node
3901 && TREE_CODE (type) == INTEGER_TYPE
3902 && !TYPE_UNSIGNED (type)
3903 && fd->ordered == 0)
3904 {
3905 tree n1, n2;
3906
3907 if (fd->loop.cond_code == LT_EXPR)
3908 {
3909 n1 = fd->loop.n1;
3910 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
3911 }
3912 else
3913 {
3914 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
3915 n2 = fd->loop.n1;
3916 }
3917 if (TREE_CODE (n1) != INTEGER_CST
3918 || TREE_CODE (n2) != INTEGER_CST
3919 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
3920 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
3921 }
3922
3923 gimple_stmt_iterator gsif = gsi;
3924 gsi_prev (&gsif);
3925
3926 tree arr = NULL_TREE;
3927 if (in_combined_parallel)
3928 {
3929 gcc_assert (fd->ordered == 0);
3930 /* In a combined parallel loop, emit a call to
3931 GOMP_loop_foo_next. */
3932 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
3933 build_fold_addr_expr (istart0),
3934 build_fold_addr_expr (iend0));
3935 }
3936 else
3937 {
3938 tree t0, t1, t2, t3, t4;
3939 /* If this is not a combined parallel loop, emit a call to
3940 GOMP_loop_foo_start in ENTRY_BB. */
3941 t4 = build_fold_addr_expr (iend0);
3942 t3 = build_fold_addr_expr (istart0);
3943 if (fd->ordered)
3944 {
3945 t0 = build_int_cst (unsigned_type_node,
3946 fd->ordered - fd->collapse + 1);
3947 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
3948 fd->ordered
3949 - fd->collapse + 1),
3950 ".omp_counts");
3951 DECL_NAMELESS (arr) = 1;
3952 TREE_ADDRESSABLE (arr) = 1;
3953 TREE_STATIC (arr) = 1;
3954 vec<constructor_elt, va_gc> *v;
3955 vec_alloc (v, fd->ordered - fd->collapse + 1);
3956 int idx;
3957
3958 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
3959 {
3960 tree c;
3961 if (idx == 0 && fd->collapse > 1)
3962 c = fd->loop.n2;
3963 else
3964 c = counts[idx + fd->collapse - 1];
3965 tree purpose = size_int (idx);
3966 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
3967 if (TREE_CODE (c) != INTEGER_CST)
3968 TREE_STATIC (arr) = 0;
3969 }
3970
3971 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
3972 if (!TREE_STATIC (arr))
3973 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
3974 void_type_node, arr),
3975 true, NULL_TREE, true, GSI_SAME_STMT);
3976 t1 = build_fold_addr_expr (arr);
3977 t2 = NULL_TREE;
3978 }
3979 else
3980 {
3981 t2 = fold_convert (fd->iter_type, fd->loop.step);
3982 t1 = fd->loop.n2;
3983 t0 = fd->loop.n1;
3984 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3985 {
3986 tree innerc
3987 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3988 OMP_CLAUSE__LOOPTEMP_);
3989 gcc_assert (innerc);
3990 t0 = OMP_CLAUSE_DECL (innerc);
3991 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3992 OMP_CLAUSE__LOOPTEMP_);
3993 gcc_assert (innerc);
3994 t1 = OMP_CLAUSE_DECL (innerc);
3995 }
3996 if (POINTER_TYPE_P (TREE_TYPE (t0))
3997 && TYPE_PRECISION (TREE_TYPE (t0))
3998 != TYPE_PRECISION (fd->iter_type))
3999 {
4000 /* Avoid casting pointers to integer of a different size. */
4001 tree itype = signed_type_for (type);
4002 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
4003 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
4004 }
4005 else
4006 {
4007 t1 = fold_convert (fd->iter_type, t1);
4008 t0 = fold_convert (fd->iter_type, t0);
4009 }
4010 if (bias)
4011 {
4012 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
4013 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
4014 }
4015 }
4016 if (fd->iter_type == long_integer_type_node || fd->ordered)
4017 {
4018 if (fd->chunk_size)
4019 {
4020 t = fold_convert (fd->iter_type, fd->chunk_size);
4021 t = omp_adjust_chunk_size (t, fd->simd_schedule);
28567c40
JJ
4022 if (sched_arg)
4023 {
4024 if (fd->ordered)
4025 t = build_call_expr (builtin_decl_explicit (start_fn),
4026 8, t0, t1, sched_arg, t, t3, t4,
4027 reductions, mem);
4028 else
4029 t = build_call_expr (builtin_decl_explicit (start_fn),
4030 9, t0, t1, t2, sched_arg, t, t3, t4,
4031 reductions, mem);
4032 }
4033 else if (fd->ordered)
629b3d75
MJ
4034 t = build_call_expr (builtin_decl_explicit (start_fn),
4035 5, t0, t1, t, t3, t4);
4036 else
4037 t = build_call_expr (builtin_decl_explicit (start_fn),
4038 6, t0, t1, t2, t, t3, t4);
4039 }
4040 else if (fd->ordered)
4041 t = build_call_expr (builtin_decl_explicit (start_fn),
4042 4, t0, t1, t3, t4);
4043 else
4044 t = build_call_expr (builtin_decl_explicit (start_fn),
4045 5, t0, t1, t2, t3, t4);
4046 }
4047 else
4048 {
4049 tree t5;
4050 tree c_bool_type;
4051 tree bfn_decl;
4052
4053 /* The GOMP_loop_ull_*start functions have additional boolean
4054 argument, true for < loops and false for > loops.
4055 In Fortran, the C bool type can be different from
4056 boolean_type_node. */
4057 bfn_decl = builtin_decl_explicit (start_fn);
4058 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
4059 t5 = build_int_cst (c_bool_type,
4060 fd->loop.cond_code == LT_EXPR ? 1 : 0);
4061 if (fd->chunk_size)
4062 {
4063 tree bfn_decl = builtin_decl_explicit (start_fn);
4064 t = fold_convert (fd->iter_type, fd->chunk_size);
4065 t = omp_adjust_chunk_size (t, fd->simd_schedule);
28567c40
JJ
4066 if (sched_arg)
4067 t = build_call_expr (bfn_decl, 10, t5, t0, t1, t2, sched_arg,
4068 t, t3, t4, reductions, mem);
4069 else
4070 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
629b3d75
MJ
4071 }
4072 else
4073 t = build_call_expr (builtin_decl_explicit (start_fn),
4074 6, t5, t0, t1, t2, t3, t4);
4075 }
4076 }
4077 if (TREE_TYPE (t) != boolean_type_node)
4078 t = fold_build2 (NE_EXPR, boolean_type_node,
4079 t, build_int_cst (TREE_TYPE (t), 0));
4080 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
01914336 4081 true, GSI_SAME_STMT);
629b3d75
MJ
4082 if (arr && !TREE_STATIC (arr))
4083 {
25b45c7c 4084 tree clobber = build_clobber (TREE_TYPE (arr));
629b3d75
MJ
4085 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
4086 GSI_SAME_STMT);
4087 }
8221c30b 4088 if (fd->have_pointer_condtemp)
6c7ae8c5 4089 expand_omp_build_assign (&gsi, condtemp, memv, false);
28567c40
JJ
4090 if (fd->have_reductemp)
4091 {
4092 gimple *g = gsi_stmt (gsi);
4093 gsi_remove (&gsi, true);
4094 release_ssa_name (gimple_assign_lhs (g));
4095
4096 entry_bb = region->entry;
4097 gsi = gsi_last_nondebug_bb (entry_bb);
4098
4099 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4100 }
629b3d75
MJ
4101 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4102
4103 /* Remove the GIMPLE_OMP_FOR statement. */
4104 gsi_remove (&gsi, true);
4105
4106 if (gsi_end_p (gsif))
4107 gsif = gsi_after_labels (gsi_bb (gsif));
4108 gsi_next (&gsif);
4109
4110 /* Iteration setup for sequential loop goes in L0_BB. */
4111 tree startvar = fd->loop.v;
4112 tree endvar = NULL_TREE;
4113
4114 if (gimple_omp_for_combined_p (fd->for_stmt))
4115 {
4116 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
4117 && gimple_omp_for_kind (inner_stmt)
4118 == GF_OMP_FOR_KIND_SIMD);
4119 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
4120 OMP_CLAUSE__LOOPTEMP_);
4121 gcc_assert (innerc);
4122 startvar = OMP_CLAUSE_DECL (innerc);
4123 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4124 OMP_CLAUSE__LOOPTEMP_);
4125 gcc_assert (innerc);
4126 endvar = OMP_CLAUSE_DECL (innerc);
4127 }
4128
4129 gsi = gsi_start_bb (l0_bb);
4130 t = istart0;
4131 if (fd->ordered && fd->collapse == 1)
4132 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
4133 fold_convert (fd->iter_type, fd->loop.step));
4134 else if (bias)
4135 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
4136 if (fd->ordered && fd->collapse == 1)
4137 {
4138 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4139 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
4140 fd->loop.n1, fold_convert (sizetype, t));
4141 else
4142 {
4143 t = fold_convert (TREE_TYPE (startvar), t);
4144 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
4145 fd->loop.n1, t);
4146 }
4147 }
4148 else
4149 {
4150 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4151 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
4152 t = fold_convert (TREE_TYPE (startvar), t);
4153 }
4154 t = force_gimple_operand_gsi (&gsi, t,
4155 DECL_P (startvar)
4156 && TREE_ADDRESSABLE (startvar),
4157 NULL_TREE, false, GSI_CONTINUE_LINKING);
4158 assign_stmt = gimple_build_assign (startvar, t);
4159 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6c7ae8c5
JJ
4160 if (cond_var)
4161 {
4162 tree itype = TREE_TYPE (cond_var);
4163 /* For lastprivate(conditional:) itervar, we need some iteration
4164 counter that starts at unsigned non-zero and increases.
4165 Prefer as few IVs as possible, so if we can use startvar
4166 itself, use that, or startvar + constant (those would be
4167 incremented with step), and as last resort use the s0 + 1
4168 incremented by 1. */
4169 if ((fd->ordered && fd->collapse == 1)
4170 || bias
4171 || POINTER_TYPE_P (type)
4172 || TREE_CODE (fd->loop.n1) != INTEGER_CST
4173 || fd->loop.cond_code != LT_EXPR)
4174 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, istart0),
4175 build_int_cst (itype, 1));
4176 else if (tree_int_cst_sgn (fd->loop.n1) == 1)
4177 t = fold_convert (itype, t);
4178 else
4179 {
4180 tree c = fold_convert (itype, fd->loop.n1);
4181 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
4182 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
4183 }
4184 t = force_gimple_operand_gsi (&gsi, t, false,
4185 NULL_TREE, false, GSI_CONTINUE_LINKING);
4186 assign_stmt = gimple_build_assign (cond_var, t);
4187 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4188 }
629b3d75
MJ
4189
4190 t = iend0;
4191 if (fd->ordered && fd->collapse == 1)
4192 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
4193 fold_convert (fd->iter_type, fd->loop.step));
4194 else if (bias)
4195 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
4196 if (fd->ordered && fd->collapse == 1)
4197 {
4198 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4199 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
4200 fd->loop.n1, fold_convert (sizetype, t));
4201 else
4202 {
4203 t = fold_convert (TREE_TYPE (startvar), t);
4204 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
4205 fd->loop.n1, t);
4206 }
4207 }
4208 else
4209 {
4210 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4211 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
4212 t = fold_convert (TREE_TYPE (startvar), t);
4213 }
4214 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4215 false, GSI_CONTINUE_LINKING);
4216 if (endvar)
4217 {
4218 assign_stmt = gimple_build_assign (endvar, iend);
4219 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4220 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
4221 assign_stmt = gimple_build_assign (fd->loop.v, iend);
4222 else
4223 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
4224 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4225 }
4226 /* Handle linear clause adjustments. */
4227 tree itercnt = NULL_TREE;
4228 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4229 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4230 c; c = OMP_CLAUSE_CHAIN (c))
4231 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4232 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4233 {
4234 tree d = OMP_CLAUSE_DECL (c);
629b3d75 4235 tree t = d, a, dest;
22e6b327 4236 if (omp_privatize_by_reference (t))
629b3d75
MJ
4237 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4238 tree type = TREE_TYPE (t);
4239 if (POINTER_TYPE_P (type))
4240 type = sizetype;
4241 dest = unshare_expr (t);
4242 tree v = create_tmp_var (TREE_TYPE (t), NULL);
4243 expand_omp_build_assign (&gsif, v, t);
4244 if (itercnt == NULL_TREE)
4245 {
4246 itercnt = startvar;
4247 tree n1 = fd->loop.n1;
4248 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
4249 {
4250 itercnt
4251 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
4252 itercnt);
4253 n1 = fold_convert (TREE_TYPE (itercnt), n1);
4254 }
4255 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
4256 itercnt, n1);
4257 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
4258 itercnt, fd->loop.step);
4259 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4260 NULL_TREE, false,
4261 GSI_CONTINUE_LINKING);
4262 }
4263 a = fold_build2 (MULT_EXPR, type,
4264 fold_convert (type, itercnt),
4265 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4266 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4267 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4268 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4269 false, GSI_CONTINUE_LINKING);
6fcc3cac 4270 expand_omp_build_assign (&gsi, dest, t, true);
629b3d75
MJ
4271 }
4272 if (fd->collapse > 1)
aed3ab25 4273 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
629b3d75
MJ
4274
4275 if (fd->ordered)
4276 {
4277 /* Until now, counts array contained number of iterations or
4278 variable containing it for ith loop. From now on, we need
4279 those counts only for collapsed loops, and only for the 2nd
4280 till the last collapsed one. Move those one element earlier,
4281 we'll use counts[fd->collapse - 1] for the first source/sink
4282 iteration counter and so on and counts[fd->ordered]
4283 as the array holding the current counter values for
4284 depend(source). */
4285 if (fd->collapse > 1)
4286 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
4287 if (broken_loop)
4288 {
4289 int i;
4290 for (i = fd->collapse; i < fd->ordered; i++)
4291 {
4292 tree type = TREE_TYPE (fd->loops[i].v);
4293 tree this_cond
4294 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
4295 fold_convert (type, fd->loops[i].n1),
4296 fold_convert (type, fd->loops[i].n2));
4297 if (!integer_onep (this_cond))
4298 break;
4299 }
4300 if (i < fd->ordered)
4301 {
4302 cont_bb
4303 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
4304 add_bb_to_loop (cont_bb, l1_bb->loop_father);
4305 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
4306 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
4307 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4308 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
4309 make_edge (cont_bb, l1_bb, 0);
4310 l2_bb = create_empty_bb (cont_bb);
4311 broken_loop = false;
4312 }
4313 }
4314 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
4315 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
4316 ordered_lastprivate);
4317 if (counts[fd->collapse - 1])
4318 {
4319 gcc_assert (fd->collapse == 1);
4320 gsi = gsi_last_bb (l0_bb);
4321 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
4322 istart0, true);
a2a17ae7
JJ
4323 if (cont_bb)
4324 {
4325 gsi = gsi_last_bb (cont_bb);
4326 t = fold_build2 (PLUS_EXPR, fd->iter_type,
4327 counts[fd->collapse - 1],
4328 build_int_cst (fd->iter_type, 1));
4329 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
4330 tree aref = build4 (ARRAY_REF, fd->iter_type,
4331 counts[fd->ordered], size_zero_node,
4332 NULL_TREE, NULL_TREE);
4333 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
4334 }
629b3d75
MJ
4335 t = counts[fd->collapse - 1];
4336 }
4337 else if (fd->collapse > 1)
4338 t = fd->loop.v;
4339 else
4340 {
4341 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4342 fd->loops[0].v, fd->loops[0].n1);
4343 t = fold_convert (fd->iter_type, t);
4344 }
4345 gsi = gsi_last_bb (l0_bb);
4346 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
4347 size_zero_node, NULL_TREE, NULL_TREE);
4348 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4349 false, GSI_CONTINUE_LINKING);
4350 expand_omp_build_assign (&gsi, aref, t, true);
4351 }
4352
4353 if (!broken_loop)
4354 {
4355 /* Code to control the increment and predicate for the sequential
4356 loop goes in the CONT_BB. */
65f4b875 4357 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
4358 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4359 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
4360 vmain = gimple_omp_continue_control_use (cont_stmt);
4361 vback = gimple_omp_continue_control_def (cont_stmt);
4362
7855700e
JJ
4363 if (cond_var)
4364 {
4365 tree itype = TREE_TYPE (cond_var);
4366 tree t2;
4367 if ((fd->ordered && fd->collapse == 1)
4368 || bias
4369 || POINTER_TYPE_P (type)
4370 || TREE_CODE (fd->loop.n1) != INTEGER_CST
4371 || fd->loop.cond_code != LT_EXPR)
4372 t2 = build_int_cst (itype, 1);
4373 else
4374 t2 = fold_convert (itype, fd->loop.step);
4375 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
4376 t2 = force_gimple_operand_gsi (&gsi, t2, false,
4377 NULL_TREE, true, GSI_SAME_STMT);
4378 assign_stmt = gimple_build_assign (cond_var, t2);
4379 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4380 }
4381
629b3d75
MJ
4382 if (!gimple_omp_for_combined_p (fd->for_stmt))
4383 {
4384 if (POINTER_TYPE_P (type))
4385 t = fold_build_pointer_plus (vmain, fd->loop.step);
4386 else
4387 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
4388 t = force_gimple_operand_gsi (&gsi, t,
4389 DECL_P (vback)
4390 && TREE_ADDRESSABLE (vback),
4391 NULL_TREE, true, GSI_SAME_STMT);
4392 assign_stmt = gimple_build_assign (vback, t);
4393 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4394
4395 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
4396 {
d1ffbd43 4397 tree tem;
629b3d75 4398 if (fd->collapse > 1)
d1ffbd43 4399 tem = fd->loop.v;
629b3d75
MJ
4400 else
4401 {
d1ffbd43
JJ
4402 tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4403 fd->loops[0].v, fd->loops[0].n1);
4404 tem = fold_convert (fd->iter_type, tem);
629b3d75
MJ
4405 }
4406 tree aref = build4 (ARRAY_REF, fd->iter_type,
4407 counts[fd->ordered], size_zero_node,
4408 NULL_TREE, NULL_TREE);
d1ffbd43
JJ
4409 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE,
4410 true, GSI_SAME_STMT);
4411 expand_omp_build_assign (&gsi, aref, tem);
629b3d75
MJ
4412 }
4413
4414 t = build2 (fd->loop.cond_code, boolean_type_node,
4415 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
4416 iend);
4417 gcond *cond_stmt = gimple_build_cond_empty (t);
4418 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4419 }
4420
4421 /* Remove GIMPLE_OMP_CONTINUE. */
4422 gsi_remove (&gsi, true);
4423
4424 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
aed3ab25 4425 collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, l1_bb);
629b3d75
MJ
4426
4427 /* Emit code to get the next parallel iteration in L2_BB. */
4428 gsi = gsi_start_bb (l2_bb);
4429
4430 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
4431 build_fold_addr_expr (istart0),
4432 build_fold_addr_expr (iend0));
4433 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4434 false, GSI_CONTINUE_LINKING);
4435 if (TREE_TYPE (t) != boolean_type_node)
4436 t = fold_build2 (NE_EXPR, boolean_type_node,
4437 t, build_int_cst (TREE_TYPE (t), 0));
4438 gcond *cond_stmt = gimple_build_cond_empty (t);
4439 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4440 }
4441
4442 /* Add the loop cleanup function. */
65f4b875 4443 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
4444 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4445 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
4446 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
4447 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4448 else
4449 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4450 gcall *call_stmt = gimple_build_call (t, 0);
629b3d75
MJ
4451 if (fd->ordered)
4452 {
4453 tree arr = counts[fd->ordered];
25b45c7c 4454 tree clobber = build_clobber (TREE_TYPE (arr));
629b3d75
MJ
4455 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
4456 GSI_SAME_STMT);
4457 }
28567c40
JJ
4458 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
4459 {
4460 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
4461 if (fd->have_reductemp)
4462 {
4463 gimple *g = gimple_build_assign (reductions, NOP_EXPR,
4464 gimple_call_lhs (call_stmt));
4465 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4466 }
4467 }
4468 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
629b3d75
MJ
4469 gsi_remove (&gsi, true);
4470
4471 /* Connect the new blocks. */
4472 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
4473 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
4474
4475 if (!broken_loop)
4476 {
4477 gimple_seq phis;
4478
4479 e = find_edge (cont_bb, l3_bb);
4480 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
4481
4482 phis = phi_nodes (l3_bb);
4483 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
4484 {
4485 gimple *phi = gsi_stmt (gsi);
4486 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
4487 PHI_ARG_DEF_FROM_EDGE (phi, e));
4488 }
4489 remove_edge (e);
4490
4491 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
4492 e = find_edge (cont_bb, l1_bb);
4493 if (e == NULL)
4494 {
4495 e = BRANCH_EDGE (cont_bb);
4496 gcc_assert (single_succ (e->dest) == l1_bb);
4497 }
4498 if (gimple_omp_for_combined_p (fd->for_stmt))
4499 {
4500 remove_edge (e);
4501 e = NULL;
4502 }
4503 else if (fd->collapse > 1)
4504 {
4505 remove_edge (e);
4506 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4507 }
4508 else
4509 e->flags = EDGE_TRUE_VALUE;
4510 if (e)
4511 {
357067f2
JH
4512 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
4513 find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
629b3d75
MJ
4514 }
4515 else
4516 {
4517 e = find_edge (cont_bb, l2_bb);
4518 e->flags = EDGE_FALLTHRU;
4519 }
4520 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
4521
4522 if (gimple_in_ssa_p (cfun))
4523 {
4524 /* Add phis to the outer loop that connect to the phis in the inner,
4525 original loop, and move the loop entry value of the inner phi to
4526 the loop entry value of the outer phi. */
4527 gphi_iterator psi;
4528 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
4529 {
620e594b 4530 location_t locus;
629b3d75
MJ
4531 gphi *nphi;
4532 gphi *exit_phi = psi.phi ();
4533
164485b5
JJ
4534 if (virtual_operand_p (gimple_phi_result (exit_phi)))
4535 continue;
4536
629b3d75
MJ
4537 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
4538 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
4539
4540 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
4541 edge latch_to_l1 = find_edge (latch, l1_bb);
4542 gphi *inner_phi
4543 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
4544
4545 tree t = gimple_phi_result (exit_phi);
4546 tree new_res = copy_ssa_name (t, NULL);
4547 nphi = create_phi_node (new_res, l0_bb);
4548
4549 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
4550 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
4551 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
4552 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
4553 add_phi_arg (nphi, t, entry_to_l0, locus);
4554
4555 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
4556 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
4557
4558 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
164485b5 4559 }
629b3d75
MJ
4560 }
4561
4562 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
4563 recompute_dominator (CDI_DOMINATORS, l2_bb));
4564 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
4565 recompute_dominator (CDI_DOMINATORS, l3_bb));
4566 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
4567 recompute_dominator (CDI_DOMINATORS, l0_bb));
4568 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
4569 recompute_dominator (CDI_DOMINATORS, l1_bb));
4570
4571 /* We enter expand_omp_for_generic with a loop. This original loop may
4572 have its own loop struct, or it may be part of an outer loop struct
4573 (which may be the fake loop). */
99b1c316 4574 class loop *outer_loop = entry_bb->loop_father;
629b3d75
MJ
4575 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
4576
4577 add_bb_to_loop (l2_bb, outer_loop);
4578
4579 /* We've added a new loop around the original loop. Allocate the
4580 corresponding loop struct. */
99b1c316 4581 class loop *new_loop = alloc_loop ();
629b3d75
MJ
4582 new_loop->header = l0_bb;
4583 new_loop->latch = l2_bb;
4584 add_loop (new_loop, outer_loop);
4585
4586 /* Allocate a loop structure for the original loop unless we already
4587 had one. */
4588 if (!orig_loop_has_loop_struct
4589 && !gimple_omp_for_combined_p (fd->for_stmt))
4590 {
99b1c316 4591 class loop *orig_loop = alloc_loop ();
629b3d75
MJ
4592 orig_loop->header = l1_bb;
4593 /* The loop may have multiple latches. */
4594 add_loop (orig_loop, new_loop);
4595 }
4596 }
4597}
4598
2f6bb511
JJ
4599/* Helper function for expand_omp_for_static_nochunk. If PTR is NULL,
4600 compute needed allocation size. If !ALLOC of team allocations,
4601 if ALLOC of thread allocation. SZ is the initial needed size for
4602 other purposes, ALLOC_ALIGN guaranteed alignment of allocation in bytes,
4603 CNT number of elements of each array, for !ALLOC this is
4604 omp_get_num_threads (), for ALLOC number of iterations handled by the
4605 current thread. If PTR is non-NULL, it is the start of the allocation
4606 and this routine shall assign to OMP_CLAUSE_DECL (c) of those _scantemp_
4607 clauses pointers to the corresponding arrays. */
4608
4609static tree
4610expand_omp_scantemp_alloc (tree clauses, tree ptr, unsigned HOST_WIDE_INT sz,
4611 unsigned HOST_WIDE_INT alloc_align, tree cnt,
4612 gimple_stmt_iterator *gsi, bool alloc)
4613{
4614 tree eltsz = NULL_TREE;
4615 unsigned HOST_WIDE_INT preval = 0;
4616 if (ptr && sz)
4617 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4618 ptr, size_int (sz));
4619 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
4620 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
4621 && !OMP_CLAUSE__SCANTEMP__CONTROL (c)
4622 && (!OMP_CLAUSE__SCANTEMP__ALLOC (c)) != alloc)
4623 {
4624 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
4625 unsigned HOST_WIDE_INT al = TYPE_ALIGN_UNIT (pointee_type);
4626 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4627 {
4628 unsigned HOST_WIDE_INT szl
4629 = tree_to_uhwi (TYPE_SIZE_UNIT (pointee_type));
4630 szl = least_bit_hwi (szl);
4631 if (szl)
4632 al = MIN (al, szl);
4633 }
4634 if (ptr == NULL_TREE)
4635 {
4636 if (eltsz == NULL_TREE)
4637 eltsz = TYPE_SIZE_UNIT (pointee_type);
4638 else
4639 eltsz = size_binop (PLUS_EXPR, eltsz,
4640 TYPE_SIZE_UNIT (pointee_type));
4641 }
4642 if (preval == 0 && al <= alloc_align)
4643 {
4644 unsigned HOST_WIDE_INT diff = ROUND_UP (sz, al) - sz;
4645 sz += diff;
4646 if (diff && ptr)
4647 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4648 ptr, size_int (diff));
4649 }
4650 else if (al > preval)
4651 {
4652 if (ptr)
4653 {
4654 ptr = fold_convert (pointer_sized_int_node, ptr);
4655 ptr = fold_build2 (PLUS_EXPR, pointer_sized_int_node, ptr,
4656 build_int_cst (pointer_sized_int_node,
4657 al - 1));
4658 ptr = fold_build2 (BIT_AND_EXPR, pointer_sized_int_node, ptr,
4659 build_int_cst (pointer_sized_int_node,
4660 -(HOST_WIDE_INT) al));
4661 ptr = fold_convert (ptr_type_node, ptr);
4662 }
4663 else
4664 sz += al - 1;
4665 }
4666 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4667 preval = al;
4668 else
4669 preval = 1;
4670 if (ptr)
4671 {
4672 expand_omp_build_assign (gsi, OMP_CLAUSE_DECL (c), ptr, false);
4673 ptr = OMP_CLAUSE_DECL (c);
4674 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), ptr,
4675 size_binop (MULT_EXPR, cnt,
4676 TYPE_SIZE_UNIT (pointee_type)));
4677 }
4678 }
4679
4680 if (ptr == NULL_TREE)
4681 {
4682 eltsz = size_binop (MULT_EXPR, eltsz, cnt);
4683 if (sz)
4684 eltsz = size_binop (PLUS_EXPR, eltsz, size_int (sz));
4685 return eltsz;
4686 }
4687 else
4688 return ptr;
4689}
4690
14707c89
JJ
4691/* Return the last _looptemp_ clause if one has been created for
4692 lastprivate on distribute parallel for{, simd} or taskloop.
4693 FD is the loop data and INNERC should be the second _looptemp_
4694 clause (the one holding the end of the range).
4695 This is followed by collapse - 1 _looptemp_ clauses for the
4696 counts[1] and up, and for triangular loops followed by 4
4697 further _looptemp_ clauses (one for counts[0], one first_inner_iterations,
4698 one factor and one adjn1). After this there is optionally one
4699 _looptemp_ clause that this function returns. */
4700
4701static tree
4702find_lastprivate_looptemp (struct omp_for_data *fd, tree innerc)
4703{
4704 gcc_assert (innerc);
4705 int count = fd->collapse - 1;
4706 if (fd->non_rect
4707 && fd->last_nonrect == fd->first_nonrect + 1
4708 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
4709 count += 4;
4710 for (int i = 0; i < count; i++)
4711 {
4712 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4713 OMP_CLAUSE__LOOPTEMP_);
4714 gcc_assert (innerc);
4715 }
4716 return omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4717 OMP_CLAUSE__LOOPTEMP_);
4718}
4719
629b3d75
MJ
4720/* A subroutine of expand_omp_for. Generate code for a parallel
4721 loop with static schedule and no specified chunk size. Given
4722 parameters:
4723
4724 for (V = N1; V cond N2; V += STEP) BODY;
4725
4726 where COND is "<" or ">", we generate pseudocode
4727
4728 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
4729 if (cond is <)
4730 adj = STEP - 1;
4731 else
4732 adj = STEP + 1;
4733 if ((__typeof (V)) -1 > 0 && cond is >)
4734 n = -(adj + N2 - N1) / -STEP;
4735 else
4736 n = (adj + N2 - N1) / STEP;
4737 q = n / nthreads;
4738 tt = n % nthreads;
4739 if (threadid < tt) goto L3; else goto L4;
4740 L3:
4741 tt = 0;
4742 q = q + 1;
4743 L4:
4744 s0 = q * threadid + tt;
4745 e0 = s0 + q;
4746 V = s0 * STEP + N1;
4747 if (s0 >= e0) goto L2; else goto L0;
4748 L0:
4749 e = e0 * STEP + N1;
4750 L1:
4751 BODY;
4752 V += STEP;
4753 if (V cond e) goto L1;
4754 L2:
4755*/
4756
4757static void
4758expand_omp_for_static_nochunk (struct omp_region *region,
4759 struct omp_for_data *fd,
4760 gimple *inner_stmt)
4761{
2f6bb511 4762 tree n, q, s0, e0, e, t, tt, nthreads = NULL_TREE, threadid;
629b3d75
MJ
4763 tree type, itype, vmain, vback;
4764 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
4765 basic_block body_bb, cont_bb, collapse_bb = NULL;
2f6bb511
JJ
4766 basic_block fin_bb, fourth_bb = NULL, fifth_bb = NULL, sixth_bb = NULL;
4767 basic_block exit1_bb = NULL, exit2_bb = NULL, exit3_bb = NULL;
6c7ae8c5 4768 gimple_stmt_iterator gsi, gsip;
629b3d75
MJ
4769 edge ep;
4770 bool broken_loop = region->cont == NULL;
4771 tree *counts = NULL;
4772 tree n1, n2, step;
28567c40 4773 tree reductions = NULL_TREE;
8221c30b 4774 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
629b3d75
MJ
4775
4776 itype = type = TREE_TYPE (fd->loop.v);
4777 if (POINTER_TYPE_P (type))
4778 itype = signed_type_for (type);
4779
4780 entry_bb = region->entry;
4781 cont_bb = region->cont;
4782 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4783 fin_bb = BRANCH_EDGE (entry_bb)->dest;
4784 gcc_assert (broken_loop
4785 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
4786 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
4787 body_bb = single_succ (seq_start_bb);
4788 if (!broken_loop)
4789 {
4790 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
4791 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
4792 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4793 }
4794 exit_bb = region->exit;
4795
4796 /* Iteration space partitioning goes in ENTRY_BB. */
65f4b875 4797 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75 4798 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
6c7ae8c5
JJ
4799 gsip = gsi;
4800 gsi_prev (&gsip);
629b3d75
MJ
4801
4802 if (fd->collapse > 1)
4803 {
4804 int first_zero_iter = -1, dummy = -1;
4805 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
4806
4807 counts = XALLOCAVEC (tree, fd->collapse);
4808 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4809 fin_bb, first_zero_iter,
4810 dummy_bb, dummy, l2_dom_bb);
4811 t = NULL_TREE;
4812 }
4813 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
4814 t = integer_one_node;
4815 else
4816 t = fold_binary (fd->loop.cond_code, boolean_type_node,
4817 fold_convert (type, fd->loop.n1),
4818 fold_convert (type, fd->loop.n2));
4819 if (fd->collapse == 1
4820 && TYPE_UNSIGNED (type)
4821 && (t == NULL_TREE || !integer_onep (t)))
4822 {
4823 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
4824 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
4825 true, GSI_SAME_STMT);
4826 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
4827 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
4828 true, GSI_SAME_STMT);
4829 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
6c7ae8c5 4830 NULL_TREE, NULL_TREE);
629b3d75
MJ
4831 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4832 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
4833 expand_omp_regimplify_p, NULL, NULL)
4834 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
4835 expand_omp_regimplify_p, NULL, NULL))
4836 {
4837 gsi = gsi_for_stmt (cond_stmt);
4838 gimple_regimplify_operands (cond_stmt, &gsi);
4839 }
4840 ep = split_block (entry_bb, cond_stmt);
4841 ep->flags = EDGE_TRUE_VALUE;
4842 entry_bb = ep->dest;
357067f2 4843 ep->probability = profile_probability::very_likely ();
629b3d75 4844 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
357067f2 4845 ep->probability = profile_probability::very_unlikely ();
629b3d75
MJ
4846 if (gimple_in_ssa_p (cfun))
4847 {
4848 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
4849 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
4850 !gsi_end_p (gpi); gsi_next (&gpi))
4851 {
4852 gphi *phi = gpi.phi ();
4853 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
4854 ep, UNKNOWN_LOCATION);
4855 }
4856 }
4857 gsi = gsi_last_bb (entry_bb);
4858 }
4859
8221c30b
JJ
4860 if (fd->lastprivate_conditional)
4861 {
4862 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4863 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
4864 if (fd->have_pointer_condtemp)
4865 condtemp = OMP_CLAUSE_DECL (c);
4866 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
4867 cond_var = OMP_CLAUSE_DECL (c);
4868 }
2f6bb511 4869 if (fd->have_reductemp
2f03073f
JJ
4870 /* For scan, we don't want to reinitialize condtemp before the
4871 second loop. */
4872 || (fd->have_pointer_condtemp && !fd->have_scantemp)
2f6bb511 4873 || fd->have_nonctrl_scantemp)
28567c40
JJ
4874 {
4875 tree t1 = build_int_cst (long_integer_type_node, 0);
4876 tree t2 = build_int_cst (long_integer_type_node, 1);
4877 tree t3 = build_int_cstu (long_integer_type_node,
4878 (HOST_WIDE_INT_1U << 31) + 1);
4879 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
6c7ae8c5
JJ
4880 gimple_stmt_iterator gsi2 = gsi_none ();
4881 gimple *g = NULL;
4882 tree mem = null_pointer_node, memv = NULL_TREE;
2f6bb511
JJ
4883 unsigned HOST_WIDE_INT condtemp_sz = 0;
4884 unsigned HOST_WIDE_INT alloc_align = 0;
6c7ae8c5
JJ
4885 if (fd->have_reductemp)
4886 {
2f6bb511 4887 gcc_assert (!fd->have_nonctrl_scantemp);
6c7ae8c5
JJ
4888 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
4889 reductions = OMP_CLAUSE_DECL (c);
4890 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
4891 g = SSA_NAME_DEF_STMT (reductions);
4892 reductions = gimple_assign_rhs1 (g);
4893 OMP_CLAUSE_DECL (c) = reductions;
4894 gsi2 = gsi_for_stmt (g);
4895 }
4896 else
4897 {
4898 if (gsi_end_p (gsip))
4899 gsi2 = gsi_after_labels (region->entry);
4900 else
4901 gsi2 = gsip;
4902 reductions = null_pointer_node;
4903 }
2f6bb511 4904 if (fd->have_pointer_condtemp || fd->have_nonctrl_scantemp)
6c7ae8c5 4905 {
2f6bb511
JJ
4906 tree type;
4907 if (fd->have_pointer_condtemp)
4908 type = TREE_TYPE (condtemp);
4909 else
4910 type = ptr_type_node;
6c7ae8c5
JJ
4911 memv = create_tmp_var (type);
4912 TREE_ADDRESSABLE (memv) = 1;
2f6bb511
JJ
4913 unsigned HOST_WIDE_INT sz = 0;
4914 tree size = NULL_TREE;
4915 if (fd->have_pointer_condtemp)
4916 {
4917 sz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
4918 sz *= fd->lastprivate_conditional;
4919 condtemp_sz = sz;
4920 }
4921 if (fd->have_nonctrl_scantemp)
4922 {
4923 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
4924 gimple *g = gimple_build_call (nthreads, 0);
4925 nthreads = create_tmp_var (integer_type_node);
4926 gimple_call_set_lhs (g, nthreads);
4927 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
4928 nthreads = fold_convert (sizetype, nthreads);
4929 alloc_align = TYPE_ALIGN_UNIT (long_long_integer_type_node);
4930 size = expand_omp_scantemp_alloc (clauses, NULL_TREE, sz,
4931 alloc_align, nthreads, NULL,
4932 false);
4933 size = fold_convert (type, size);
4934 }
4935 else
4936 size = build_int_cst (type, sz);
4937 expand_omp_build_assign (&gsi2, memv, size, false);
6c7ae8c5
JJ
4938 mem = build_fold_addr_expr (memv);
4939 }
28567c40
JJ
4940 tree t
4941 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
4942 9, t1, t2, t2, t3, t1, null_pointer_node,
6c7ae8c5 4943 null_pointer_node, reductions, mem);
28567c40
JJ
4944 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
4945 true, GSI_SAME_STMT);
8221c30b 4946 if (fd->have_pointer_condtemp)
6c7ae8c5 4947 expand_omp_build_assign (&gsi2, condtemp, memv, false);
2f6bb511
JJ
4948 if (fd->have_nonctrl_scantemp)
4949 {
4950 tree ptr = fd->have_pointer_condtemp ? condtemp : memv;
4951 expand_omp_scantemp_alloc (clauses, ptr, condtemp_sz,
4952 alloc_align, nthreads, &gsi2, false);
4953 }
6c7ae8c5
JJ
4954 if (fd->have_reductemp)
4955 {
4956 gsi_remove (&gsi2, true);
4957 release_ssa_name (gimple_assign_lhs (g));
4958 }
28567c40 4959 }
629b3d75
MJ
4960 switch (gimple_omp_for_kind (fd->for_stmt))
4961 {
4962 case GF_OMP_FOR_KIND_FOR:
4963 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
4964 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
4965 break;
4966 case GF_OMP_FOR_KIND_DISTRIBUTE:
4967 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
4968 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
4969 break;
4970 default:
4971 gcc_unreachable ();
4972 }
4973 nthreads = build_call_expr (nthreads, 0);
4974 nthreads = fold_convert (itype, nthreads);
4975 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
4976 true, GSI_SAME_STMT);
4977 threadid = build_call_expr (threadid, 0);
4978 threadid = fold_convert (itype, threadid);
4979 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
4980 true, GSI_SAME_STMT);
4981
4982 n1 = fd->loop.n1;
4983 n2 = fd->loop.n2;
4984 step = fd->loop.step;
4985 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4986 {
4987 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4988 OMP_CLAUSE__LOOPTEMP_);
4989 gcc_assert (innerc);
4990 n1 = OMP_CLAUSE_DECL (innerc);
4991 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4992 OMP_CLAUSE__LOOPTEMP_);
4993 gcc_assert (innerc);
4994 n2 = OMP_CLAUSE_DECL (innerc);
4995 }
4996 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
4997 true, NULL_TREE, true, GSI_SAME_STMT);
4998 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
4999 true, NULL_TREE, true, GSI_SAME_STMT);
5000 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5001 true, NULL_TREE, true, GSI_SAME_STMT);
5002
5003 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
5004 t = fold_build2 (PLUS_EXPR, itype, step, t);
5005 t = fold_build2 (PLUS_EXPR, itype, t, n2);
5006 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
5007 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
5008 t = fold_build2 (TRUNC_DIV_EXPR, itype,
5009 fold_build1 (NEGATE_EXPR, itype, t),
5010 fold_build1 (NEGATE_EXPR, itype, step));
5011 else
5012 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
5013 t = fold_convert (itype, t);
5014 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5015
5016 q = create_tmp_reg (itype, "q");
5017 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
5018 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
5019 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
5020
5021 tt = create_tmp_reg (itype, "tt");
5022 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
5023 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
5024 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
5025
5026 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
5027 gcond *cond_stmt = gimple_build_cond_empty (t);
5028 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
5029
5030 second_bb = split_block (entry_bb, cond_stmt)->dest;
65f4b875 5031 gsi = gsi_last_nondebug_bb (second_bb);
629b3d75
MJ
5032 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5033
5034 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
5035 GSI_SAME_STMT);
5036 gassign *assign_stmt
5037 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
5038 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5039
5040 third_bb = split_block (second_bb, assign_stmt)->dest;
65f4b875 5041 gsi = gsi_last_nondebug_bb (third_bb);
629b3d75
MJ
5042 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5043
2f6bb511
JJ
5044 if (fd->have_nonctrl_scantemp)
5045 {
5046 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5047 tree controlp = NULL_TREE, controlb = NULL_TREE;
5048 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5049 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5050 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
5051 {
5052 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
5053 controlb = OMP_CLAUSE_DECL (c);
5054 else
5055 controlp = OMP_CLAUSE_DECL (c);
5056 if (controlb && controlp)
5057 break;
5058 }
5059 gcc_assert (controlp && controlb);
5060 tree cnt = create_tmp_var (sizetype);
5061 gimple *g = gimple_build_assign (cnt, NOP_EXPR, q);
5062 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5063 unsigned HOST_WIDE_INT alloc_align = TYPE_ALIGN_UNIT (ptr_type_node);
5064 tree sz = expand_omp_scantemp_alloc (clauses, NULL_TREE, 0,
5065 alloc_align, cnt, NULL, true);
5066 tree size = create_tmp_var (sizetype);
5067 expand_omp_build_assign (&gsi, size, sz, false);
5068 tree cmp = fold_build2 (GT_EXPR, boolean_type_node,
5069 size, size_int (16384));
5070 expand_omp_build_assign (&gsi, controlb, cmp);
5071 g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
5072 NULL_TREE, NULL_TREE);
5073 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5074 fourth_bb = split_block (third_bb, g)->dest;
5075 gsi = gsi_last_nondebug_bb (fourth_bb);
5076 /* FIXME: Once we have allocators, this should use allocator. */
5077 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_MALLOC), 1, size);
5078 gimple_call_set_lhs (g, controlp);
5079 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5080 expand_omp_scantemp_alloc (clauses, controlp, 0, alloc_align, cnt,
5081 &gsi, true);
5082 gsi_prev (&gsi);
5083 g = gsi_stmt (gsi);
5084 fifth_bb = split_block (fourth_bb, g)->dest;
5085 gsi = gsi_last_nondebug_bb (fifth_bb);
5086
5087 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_SAVE), 0);
5088 gimple_call_set_lhs (g, controlp);
5089 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5090 tree alloca_decl = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN);
5091 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5092 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5093 && OMP_CLAUSE__SCANTEMP__ALLOC (c))
5094 {
5095 tree tmp = create_tmp_var (sizetype);
5096 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
5097 g = gimple_build_assign (tmp, MULT_EXPR, cnt,
5098 TYPE_SIZE_UNIT (pointee_type));
5099 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5100 g = gimple_build_call (alloca_decl, 2, tmp,
5101 size_int (TYPE_ALIGN (pointee_type)));
5102 gimple_call_set_lhs (g, OMP_CLAUSE_DECL (c));
5103 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5104 }
5105
5106 sixth_bb = split_block (fifth_bb, g)->dest;
5107 gsi = gsi_last_nondebug_bb (sixth_bb);
5108 }
5109
629b3d75
MJ
5110 t = build2 (MULT_EXPR, itype, q, threadid);
5111 t = build2 (PLUS_EXPR, itype, t, tt);
5112 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5113
5114 t = fold_build2 (PLUS_EXPR, itype, s0, q);
5115 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5116
5117 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
5118 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5119
5120 /* Remove the GIMPLE_OMP_FOR statement. */
5121 gsi_remove (&gsi, true);
5122
5123 /* Setup code for sequential iteration goes in SEQ_START_BB. */
5124 gsi = gsi_start_bb (seq_start_bb);
5125
5126 tree startvar = fd->loop.v;
5127 tree endvar = NULL_TREE;
5128
5129 if (gimple_omp_for_combined_p (fd->for_stmt))
5130 {
5131 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
5132 ? gimple_omp_parallel_clauses (inner_stmt)
5133 : gimple_omp_for_clauses (inner_stmt);
5134 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5135 gcc_assert (innerc);
5136 startvar = OMP_CLAUSE_DECL (innerc);
5137 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5138 OMP_CLAUSE__LOOPTEMP_);
5139 gcc_assert (innerc);
5140 endvar = OMP_CLAUSE_DECL (innerc);
5141 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
5142 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
5143 {
14707c89 5144 innerc = find_lastprivate_looptemp (fd, innerc);
629b3d75
MJ
5145 if (innerc)
5146 {
5147 /* If needed (distribute parallel for with lastprivate),
5148 propagate down the total number of iterations. */
5149 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
5150 fd->loop.n2);
5151 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
5152 GSI_CONTINUE_LINKING);
5153 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5154 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5155 }
5156 }
5157 }
5158 t = fold_convert (itype, s0);
5159 t = fold_build2 (MULT_EXPR, itype, t, step);
5160 if (POINTER_TYPE_P (type))
bde84d51
RB
5161 {
5162 t = fold_build_pointer_plus (n1, t);
5163 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5164 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5165 t = fold_convert (signed_type_for (type), t);
5166 }
629b3d75
MJ
5167 else
5168 t = fold_build2 (PLUS_EXPR, type, t, n1);
5169 t = fold_convert (TREE_TYPE (startvar), t);
5170 t = force_gimple_operand_gsi (&gsi, t,
5171 DECL_P (startvar)
5172 && TREE_ADDRESSABLE (startvar),
5173 NULL_TREE, false, GSI_CONTINUE_LINKING);
5174 assign_stmt = gimple_build_assign (startvar, t);
5175 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6c7ae8c5
JJ
5176 if (cond_var)
5177 {
5178 tree itype = TREE_TYPE (cond_var);
5179 /* For lastprivate(conditional:) itervar, we need some iteration
5180 counter that starts at unsigned non-zero and increases.
5181 Prefer as few IVs as possible, so if we can use startvar
5182 itself, use that, or startvar + constant (those would be
5183 incremented with step), and as last resort use the s0 + 1
5184 incremented by 1. */
5185 if (POINTER_TYPE_P (type)
5186 || TREE_CODE (n1) != INTEGER_CST
5187 || fd->loop.cond_code != LT_EXPR)
5188 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
5189 build_int_cst (itype, 1));
5190 else if (tree_int_cst_sgn (n1) == 1)
5191 t = fold_convert (itype, t);
5192 else
5193 {
5194 tree c = fold_convert (itype, n1);
5195 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
5196 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
5197 }
5198 t = force_gimple_operand_gsi (&gsi, t, false,
5199 NULL_TREE, false, GSI_CONTINUE_LINKING);
5200 assign_stmt = gimple_build_assign (cond_var, t);
5201 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5202 }
629b3d75
MJ
5203
5204 t = fold_convert (itype, e0);
5205 t = fold_build2 (MULT_EXPR, itype, t, step);
5206 if (POINTER_TYPE_P (type))
bde84d51
RB
5207 {
5208 t = fold_build_pointer_plus (n1, t);
5209 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5210 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5211 t = fold_convert (signed_type_for (type), t);
5212 }
629b3d75
MJ
5213 else
5214 t = fold_build2 (PLUS_EXPR, type, t, n1);
5215 t = fold_convert (TREE_TYPE (startvar), t);
5216 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5217 false, GSI_CONTINUE_LINKING);
5218 if (endvar)
5219 {
5220 assign_stmt = gimple_build_assign (endvar, e);
5221 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5222 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5223 assign_stmt = gimple_build_assign (fd->loop.v, e);
5224 else
5225 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5226 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5227 }
5228 /* Handle linear clause adjustments. */
5229 tree itercnt = NULL_TREE;
aed3ab25 5230 tree *nonrect_bounds = NULL;
629b3d75
MJ
5231 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
5232 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
5233 c; c = OMP_CLAUSE_CHAIN (c))
5234 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
5235 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
5236 {
5237 tree d = OMP_CLAUSE_DECL (c);
629b3d75 5238 tree t = d, a, dest;
22e6b327 5239 if (omp_privatize_by_reference (t))
629b3d75
MJ
5240 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
5241 if (itercnt == NULL_TREE)
5242 {
5243 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5244 {
5245 itercnt = fold_build2 (MINUS_EXPR, itype,
5246 fold_convert (itype, n1),
5247 fold_convert (itype, fd->loop.n1));
5248 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
5249 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
5250 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
5251 NULL_TREE, false,
5252 GSI_CONTINUE_LINKING);
5253 }
5254 else
5255 itercnt = s0;
5256 }
5257 tree type = TREE_TYPE (t);
5258 if (POINTER_TYPE_P (type))
5259 type = sizetype;
5260 a = fold_build2 (MULT_EXPR, type,
5261 fold_convert (type, itercnt),
5262 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
5263 dest = unshare_expr (t);
5264 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
5265 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
5266 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5267 false, GSI_CONTINUE_LINKING);
6fcc3cac 5268 expand_omp_build_assign (&gsi, dest, t, true);
629b3d75
MJ
5269 }
5270 if (fd->collapse > 1)
aed3ab25
JJ
5271 {
5272 if (fd->non_rect)
5273 {
5274 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
5275 memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1));
5276 }
5277 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt,
5278 startvar);
5279 }
629b3d75
MJ
5280
5281 if (!broken_loop)
5282 {
5283 /* The code controlling the sequential loop replaces the
5284 GIMPLE_OMP_CONTINUE. */
65f4b875 5285 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
5286 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5287 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5288 vmain = gimple_omp_continue_control_use (cont_stmt);
5289 vback = gimple_omp_continue_control_def (cont_stmt);
5290
7855700e
JJ
5291 if (cond_var)
5292 {
5293 tree itype = TREE_TYPE (cond_var);
5294 tree t2;
5295 if (POINTER_TYPE_P (type)
5296 || TREE_CODE (n1) != INTEGER_CST
5297 || fd->loop.cond_code != LT_EXPR)
5298 t2 = build_int_cst (itype, 1);
5299 else
5300 t2 = fold_convert (itype, step);
5301 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
5302 t2 = force_gimple_operand_gsi (&gsi, t2, false,
5303 NULL_TREE, true, GSI_SAME_STMT);
5304 assign_stmt = gimple_build_assign (cond_var, t2);
5305 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5306 }
5307
629b3d75
MJ
5308 if (!gimple_omp_for_combined_p (fd->for_stmt))
5309 {
5310 if (POINTER_TYPE_P (type))
5311 t = fold_build_pointer_plus (vmain, step);
5312 else
5313 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5314 t = force_gimple_operand_gsi (&gsi, t,
5315 DECL_P (vback)
5316 && TREE_ADDRESSABLE (vback),
5317 NULL_TREE, true, GSI_SAME_STMT);
5318 assign_stmt = gimple_build_assign (vback, t);
5319 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5320
5321 t = build2 (fd->loop.cond_code, boolean_type_node,
5322 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5323 ? t : vback, e);
5324 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5325 }
5326
5327 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5328 gsi_remove (&gsi, true);
5329
5330 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
aed3ab25
JJ
5331 collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
5332 cont_bb, body_bb);
629b3d75
MJ
5333 }
5334
5335 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
65f4b875 5336 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
5337 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
5338 {
5339 t = gimple_omp_return_lhs (gsi_stmt (gsi));
2f6bb511
JJ
5340 if (fd->have_reductemp
5341 || ((fd->have_pointer_condtemp || fd->have_scantemp)
5342 && !fd->have_nonctrl_scantemp))
28567c40
JJ
5343 {
5344 tree fn;
5345 if (t)
5346 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
5347 else
5348 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
5349 gcall *g = gimple_build_call (fn, 0);
5350 if (t)
5351 {
5352 gimple_call_set_lhs (g, t);
6c7ae8c5
JJ
5353 if (fd->have_reductemp)
5354 gsi_insert_after (&gsi, gimple_build_assign (reductions,
5355 NOP_EXPR, t),
5356 GSI_SAME_STMT);
28567c40
JJ
5357 }
5358 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5359 }
5360 else
5361 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
629b3d75 5362 }
2f03073f
JJ
5363 else if ((fd->have_pointer_condtemp || fd->have_scantemp)
5364 && !fd->have_nonctrl_scantemp)
1a39b3d3
JJ
5365 {
5366 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
5367 gcall *g = gimple_build_call (fn, 0);
5368 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5369 }
2f6bb511
JJ
5370 if (fd->have_scantemp && !fd->have_nonctrl_scantemp)
5371 {
5372 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5373 tree controlp = NULL_TREE, controlb = NULL_TREE;
5374 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5375 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5376 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
5377 {
5378 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
5379 controlb = OMP_CLAUSE_DECL (c);
5380 else
5381 controlp = OMP_CLAUSE_DECL (c);
5382 if (controlb && controlp)
5383 break;
5384 }
5385 gcc_assert (controlp && controlb);
5386 gimple *g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
5387 NULL_TREE, NULL_TREE);
5388 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5389 exit1_bb = split_block (exit_bb, g)->dest;
5390 gsi = gsi_after_labels (exit1_bb);
5391 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_FREE), 1,
5392 controlp);
5393 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5394 exit2_bb = split_block (exit1_bb, g)->dest;
5395 gsi = gsi_after_labels (exit2_bb);
5396 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_RESTORE), 1,
5397 controlp);
5398 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5399 exit3_bb = split_block (exit2_bb, g)->dest;
5400 gsi = gsi_after_labels (exit3_bb);
5401 }
629b3d75
MJ
5402 gsi_remove (&gsi, true);
5403
5404 /* Connect all the blocks. */
5405 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
357067f2 5406 ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
629b3d75
MJ
5407 ep = find_edge (entry_bb, second_bb);
5408 ep->flags = EDGE_TRUE_VALUE;
357067f2 5409 ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
2f6bb511
JJ
5410 if (fourth_bb)
5411 {
5412 ep = make_edge (third_bb, fifth_bb, EDGE_FALSE_VALUE);
5413 ep->probability
5414 = profile_probability::guessed_always ().apply_scale (1, 2);
5415 ep = find_edge (third_bb, fourth_bb);
5416 ep->flags = EDGE_TRUE_VALUE;
5417 ep->probability
5418 = profile_probability::guessed_always ().apply_scale (1, 2);
5419 ep = find_edge (fourth_bb, fifth_bb);
5420 redirect_edge_and_branch (ep, sixth_bb);
5421 }
5422 else
5423 sixth_bb = third_bb;
5424 find_edge (sixth_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
5425 find_edge (sixth_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
5426 if (exit1_bb)
5427 {
5428 ep = make_edge (exit_bb, exit2_bb, EDGE_FALSE_VALUE);
5429 ep->probability
5430 = profile_probability::guessed_always ().apply_scale (1, 2);
5431 ep = find_edge (exit_bb, exit1_bb);
5432 ep->flags = EDGE_TRUE_VALUE;
5433 ep->probability
5434 = profile_probability::guessed_always ().apply_scale (1, 2);
5435 ep = find_edge (exit1_bb, exit2_bb);
5436 redirect_edge_and_branch (ep, exit3_bb);
5437 }
629b3d75
MJ
5438
5439 if (!broken_loop)
5440 {
5441 ep = find_edge (cont_bb, body_bb);
5442 if (ep == NULL)
5443 {
5444 ep = BRANCH_EDGE (cont_bb);
5445 gcc_assert (single_succ (ep->dest) == body_bb);
5446 }
5447 if (gimple_omp_for_combined_p (fd->for_stmt))
5448 {
5449 remove_edge (ep);
5450 ep = NULL;
5451 }
5452 else if (fd->collapse > 1)
5453 {
5454 remove_edge (ep);
5455 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5456 }
5457 else
5458 ep->flags = EDGE_TRUE_VALUE;
5459 find_edge (cont_bb, fin_bb)->flags
5460 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5461 }
5462
5463 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
5464 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
2f6bb511
JJ
5465 if (fourth_bb)
5466 {
5467 set_immediate_dominator (CDI_DOMINATORS, fifth_bb, third_bb);
5468 set_immediate_dominator (CDI_DOMINATORS, sixth_bb, third_bb);
5469 }
5470 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, sixth_bb);
629b3d75
MJ
5471
5472 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5473 recompute_dominator (CDI_DOMINATORS, body_bb));
5474 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5475 recompute_dominator (CDI_DOMINATORS, fin_bb));
2f6bb511
JJ
5476 if (exit1_bb)
5477 {
5478 set_immediate_dominator (CDI_DOMINATORS, exit2_bb, exit_bb);
5479 set_immediate_dominator (CDI_DOMINATORS, exit3_bb, exit_bb);
5480 }
629b3d75 5481
99b1c316 5482 class loop *loop = body_bb->loop_father;
629b3d75
MJ
5483 if (loop != entry_bb->loop_father)
5484 {
5485 gcc_assert (broken_loop || loop->header == body_bb);
5486 gcc_assert (broken_loop
5487 || loop->latch == region->cont
5488 || single_pred (loop->latch) == region->cont);
5489 return;
5490 }
5491
5492 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5493 {
5494 loop = alloc_loop ();
5495 loop->header = body_bb;
5496 if (collapse_bb == NULL)
5497 loop->latch = cont_bb;
5498 add_loop (loop, body_bb->loop_father);
5499 }
5500}
5501
5502/* Return phi in E->DEST with ARG on edge E. */
5503
5504static gphi *
5505find_phi_with_arg_on_edge (tree arg, edge e)
5506{
5507 basic_block bb = e->dest;
5508
5509 for (gphi_iterator gpi = gsi_start_phis (bb);
5510 !gsi_end_p (gpi);
5511 gsi_next (&gpi))
5512 {
5513 gphi *phi = gpi.phi ();
5514 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
5515 return phi;
5516 }
5517
5518 return NULL;
5519}
5520
5521/* A subroutine of expand_omp_for. Generate code for a parallel
5522 loop with static schedule and a specified chunk size. Given
5523 parameters:
5524
5525 for (V = N1; V cond N2; V += STEP) BODY;
5526
5527 where COND is "<" or ">", we generate pseudocode
5528
5529 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
5530 if (cond is <)
5531 adj = STEP - 1;
5532 else
5533 adj = STEP + 1;
5534 if ((__typeof (V)) -1 > 0 && cond is >)
5535 n = -(adj + N2 - N1) / -STEP;
5536 else
5537 n = (adj + N2 - N1) / STEP;
5538 trip = 0;
5539 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
5540 here so that V is defined
5541 if the loop is not entered
5542 L0:
5543 s0 = (trip * nthreads + threadid) * CHUNK;
01914336 5544 e0 = min (s0 + CHUNK, n);
629b3d75
MJ
5545 if (s0 < n) goto L1; else goto L4;
5546 L1:
5547 V = s0 * STEP + N1;
5548 e = e0 * STEP + N1;
5549 L2:
5550 BODY;
5551 V += STEP;
5552 if (V cond e) goto L2; else goto L3;
5553 L3:
5554 trip += 1;
5555 goto L0;
5556 L4:
5557*/
5558
5559static void
5560expand_omp_for_static_chunk (struct omp_region *region,
5561 struct omp_for_data *fd, gimple *inner_stmt)
5562{
5563 tree n, s0, e0, e, t;
5564 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
5565 tree type, itype, vmain, vback, vextra;
5566 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
5567 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
6c7ae8c5 5568 gimple_stmt_iterator gsi, gsip;
629b3d75
MJ
5569 edge se;
5570 bool broken_loop = region->cont == NULL;
5571 tree *counts = NULL;
5572 tree n1, n2, step;
28567c40 5573 tree reductions = NULL_TREE;
8221c30b 5574 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
629b3d75
MJ
5575
5576 itype = type = TREE_TYPE (fd->loop.v);
5577 if (POINTER_TYPE_P (type))
5578 itype = signed_type_for (type);
5579
5580 entry_bb = region->entry;
5581 se = split_block (entry_bb, last_stmt (entry_bb));
5582 entry_bb = se->src;
5583 iter_part_bb = se->dest;
5584 cont_bb = region->cont;
5585 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
5586 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
5587 gcc_assert (broken_loop
5588 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
5589 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
5590 body_bb = single_succ (seq_start_bb);
5591 if (!broken_loop)
5592 {
5593 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
5594 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
5595 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5596 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
5597 }
5598 exit_bb = region->exit;
5599
5600 /* Trip and adjustment setup goes in ENTRY_BB. */
65f4b875 5601 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75 5602 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
6c7ae8c5
JJ
5603 gsip = gsi;
5604 gsi_prev (&gsip);
629b3d75
MJ
5605
5606 if (fd->collapse > 1)
5607 {
5608 int first_zero_iter = -1, dummy = -1;
5609 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5610
5611 counts = XALLOCAVEC (tree, fd->collapse);
5612 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5613 fin_bb, first_zero_iter,
5614 dummy_bb, dummy, l2_dom_bb);
5615 t = NULL_TREE;
5616 }
5617 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
5618 t = integer_one_node;
5619 else
5620 t = fold_binary (fd->loop.cond_code, boolean_type_node,
5621 fold_convert (type, fd->loop.n1),
5622 fold_convert (type, fd->loop.n2));
5623 if (fd->collapse == 1
5624 && TYPE_UNSIGNED (type)
5625 && (t == NULL_TREE || !integer_onep (t)))
5626 {
5627 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
5628 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
5629 true, GSI_SAME_STMT);
5630 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
5631 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
5632 true, GSI_SAME_STMT);
5633 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
5634 NULL_TREE, NULL_TREE);
5635 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
5636 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
5637 expand_omp_regimplify_p, NULL, NULL)
5638 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
5639 expand_omp_regimplify_p, NULL, NULL))
5640 {
5641 gsi = gsi_for_stmt (cond_stmt);
5642 gimple_regimplify_operands (cond_stmt, &gsi);
5643 }
5644 se = split_block (entry_bb, cond_stmt);
5645 se->flags = EDGE_TRUE_VALUE;
5646 entry_bb = se->dest;
357067f2 5647 se->probability = profile_probability::very_likely ();
629b3d75 5648 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
357067f2 5649 se->probability = profile_probability::very_unlikely ();
629b3d75
MJ
5650 if (gimple_in_ssa_p (cfun))
5651 {
5652 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
5653 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
5654 !gsi_end_p (gpi); gsi_next (&gpi))
5655 {
5656 gphi *phi = gpi.phi ();
5657 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
5658 se, UNKNOWN_LOCATION);
5659 }
5660 }
5661 gsi = gsi_last_bb (entry_bb);
5662 }
5663
8221c30b
JJ
5664 if (fd->lastprivate_conditional)
5665 {
5666 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5667 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
5668 if (fd->have_pointer_condtemp)
5669 condtemp = OMP_CLAUSE_DECL (c);
5670 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
5671 cond_var = OMP_CLAUSE_DECL (c);
5672 }
5673 if (fd->have_reductemp || fd->have_pointer_condtemp)
28567c40
JJ
5674 {
5675 tree t1 = build_int_cst (long_integer_type_node, 0);
5676 tree t2 = build_int_cst (long_integer_type_node, 1);
5677 tree t3 = build_int_cstu (long_integer_type_node,
5678 (HOST_WIDE_INT_1U << 31) + 1);
5679 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
6c7ae8c5
JJ
5680 gimple_stmt_iterator gsi2 = gsi_none ();
5681 gimple *g = NULL;
5682 tree mem = null_pointer_node, memv = NULL_TREE;
6c7ae8c5
JJ
5683 if (fd->have_reductemp)
5684 {
5685 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
5686 reductions = OMP_CLAUSE_DECL (c);
5687 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
5688 g = SSA_NAME_DEF_STMT (reductions);
5689 reductions = gimple_assign_rhs1 (g);
5690 OMP_CLAUSE_DECL (c) = reductions;
5691 gsi2 = gsi_for_stmt (g);
5692 }
5693 else
5694 {
5695 if (gsi_end_p (gsip))
5696 gsi2 = gsi_after_labels (region->entry);
5697 else
5698 gsi2 = gsip;
5699 reductions = null_pointer_node;
5700 }
8221c30b 5701 if (fd->have_pointer_condtemp)
6c7ae8c5 5702 {
6c7ae8c5
JJ
5703 tree type = TREE_TYPE (condtemp);
5704 memv = create_tmp_var (type);
5705 TREE_ADDRESSABLE (memv) = 1;
5706 unsigned HOST_WIDE_INT sz
5707 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
5708 sz *= fd->lastprivate_conditional;
5709 expand_omp_build_assign (&gsi2, memv, build_int_cst (type, sz),
5710 false);
5711 mem = build_fold_addr_expr (memv);
5712 }
28567c40
JJ
5713 tree t
5714 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
5715 9, t1, t2, t2, t3, t1, null_pointer_node,
6c7ae8c5 5716 null_pointer_node, reductions, mem);
28567c40
JJ
5717 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
5718 true, GSI_SAME_STMT);
8221c30b 5719 if (fd->have_pointer_condtemp)
6c7ae8c5
JJ
5720 expand_omp_build_assign (&gsi2, condtemp, memv, false);
5721 if (fd->have_reductemp)
5722 {
5723 gsi_remove (&gsi2, true);
5724 release_ssa_name (gimple_assign_lhs (g));
5725 }
28567c40 5726 }
629b3d75
MJ
5727 switch (gimple_omp_for_kind (fd->for_stmt))
5728 {
5729 case GF_OMP_FOR_KIND_FOR:
5730 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
5731 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
5732 break;
5733 case GF_OMP_FOR_KIND_DISTRIBUTE:
5734 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
5735 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
5736 break;
5737 default:
5738 gcc_unreachable ();
5739 }
5740 nthreads = build_call_expr (nthreads, 0);
5741 nthreads = fold_convert (itype, nthreads);
5742 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
5743 true, GSI_SAME_STMT);
5744 threadid = build_call_expr (threadid, 0);
5745 threadid = fold_convert (itype, threadid);
5746 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
5747 true, GSI_SAME_STMT);
5748
5749 n1 = fd->loop.n1;
5750 n2 = fd->loop.n2;
5751 step = fd->loop.step;
5752 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5753 {
5754 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5755 OMP_CLAUSE__LOOPTEMP_);
5756 gcc_assert (innerc);
5757 n1 = OMP_CLAUSE_DECL (innerc);
5758 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5759 OMP_CLAUSE__LOOPTEMP_);
5760 gcc_assert (innerc);
5761 n2 = OMP_CLAUSE_DECL (innerc);
5762 }
5763 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5764 true, NULL_TREE, true, GSI_SAME_STMT);
5765 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5766 true, NULL_TREE, true, GSI_SAME_STMT);
5767 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5768 true, NULL_TREE, true, GSI_SAME_STMT);
5769 tree chunk_size = fold_convert (itype, fd->chunk_size);
5770 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
5771 chunk_size
5772 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
5773 GSI_SAME_STMT);
5774
5775 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
5776 t = fold_build2 (PLUS_EXPR, itype, step, t);
5777 t = fold_build2 (PLUS_EXPR, itype, t, n2);
5778 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
5779 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
5780 t = fold_build2 (TRUNC_DIV_EXPR, itype,
5781 fold_build1 (NEGATE_EXPR, itype, t),
5782 fold_build1 (NEGATE_EXPR, itype, step));
5783 else
5784 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
5785 t = fold_convert (itype, t);
5786 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5787 true, GSI_SAME_STMT);
5788
5789 trip_var = create_tmp_reg (itype, ".trip");
5790 if (gimple_in_ssa_p (cfun))
5791 {
5792 trip_init = make_ssa_name (trip_var);
5793 trip_main = make_ssa_name (trip_var);
5794 trip_back = make_ssa_name (trip_var);
5795 }
5796 else
5797 {
5798 trip_init = trip_var;
5799 trip_main = trip_var;
5800 trip_back = trip_var;
5801 }
5802
5803 gassign *assign_stmt
5804 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
5805 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5806
5807 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
5808 t = fold_build2 (MULT_EXPR, itype, t, step);
5809 if (POINTER_TYPE_P (type))
5810 t = fold_build_pointer_plus (n1, t);
5811 else
5812 t = fold_build2 (PLUS_EXPR, type, t, n1);
5813 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5814 true, GSI_SAME_STMT);
5815
5816 /* Remove the GIMPLE_OMP_FOR. */
5817 gsi_remove (&gsi, true);
5818
5819 gimple_stmt_iterator gsif = gsi;
5820
5821 /* Iteration space partitioning goes in ITER_PART_BB. */
5822 gsi = gsi_last_bb (iter_part_bb);
5823
5824 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
5825 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
5826 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
5827 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5828 false, GSI_CONTINUE_LINKING);
5829
5830 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
5831 t = fold_build2 (MIN_EXPR, itype, t, n);
5832 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5833 false, GSI_CONTINUE_LINKING);
5834
5835 t = build2 (LT_EXPR, boolean_type_node, s0, n);
5836 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
5837
5838 /* Setup code for sequential iteration goes in SEQ_START_BB. */
5839 gsi = gsi_start_bb (seq_start_bb);
5840
5841 tree startvar = fd->loop.v;
5842 tree endvar = NULL_TREE;
5843
5844 if (gimple_omp_for_combined_p (fd->for_stmt))
5845 {
5846 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
5847 ? gimple_omp_parallel_clauses (inner_stmt)
5848 : gimple_omp_for_clauses (inner_stmt);
5849 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5850 gcc_assert (innerc);
5851 startvar = OMP_CLAUSE_DECL (innerc);
5852 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5853 OMP_CLAUSE__LOOPTEMP_);
5854 gcc_assert (innerc);
5855 endvar = OMP_CLAUSE_DECL (innerc);
5856 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
5857 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
5858 {
14707c89 5859 innerc = find_lastprivate_looptemp (fd, innerc);
629b3d75
MJ
5860 if (innerc)
5861 {
5862 /* If needed (distribute parallel for with lastprivate),
5863 propagate down the total number of iterations. */
5864 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
5865 fd->loop.n2);
5866 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
5867 GSI_CONTINUE_LINKING);
5868 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5869 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5870 }
5871 }
5872 }
5873
5874 t = fold_convert (itype, s0);
5875 t = fold_build2 (MULT_EXPR, itype, t, step);
5876 if (POINTER_TYPE_P (type))
bde84d51
RB
5877 {
5878 t = fold_build_pointer_plus (n1, t);
5879 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5880 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5881 t = fold_convert (signed_type_for (type), t);
5882 }
629b3d75
MJ
5883 else
5884 t = fold_build2 (PLUS_EXPR, type, t, n1);
5885 t = fold_convert (TREE_TYPE (startvar), t);
5886 t = force_gimple_operand_gsi (&gsi, t,
5887 DECL_P (startvar)
5888 && TREE_ADDRESSABLE (startvar),
5889 NULL_TREE, false, GSI_CONTINUE_LINKING);
5890 assign_stmt = gimple_build_assign (startvar, t);
5891 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6c7ae8c5
JJ
5892 if (cond_var)
5893 {
5894 tree itype = TREE_TYPE (cond_var);
5895 /* For lastprivate(conditional:) itervar, we need some iteration
5896 counter that starts at unsigned non-zero and increases.
5897 Prefer as few IVs as possible, so if we can use startvar
5898 itself, use that, or startvar + constant (those would be
5899 incremented with step), and as last resort use the s0 + 1
5900 incremented by 1. */
5901 if (POINTER_TYPE_P (type)
5902 || TREE_CODE (n1) != INTEGER_CST
5903 || fd->loop.cond_code != LT_EXPR)
5904 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
5905 build_int_cst (itype, 1));
5906 else if (tree_int_cst_sgn (n1) == 1)
5907 t = fold_convert (itype, t);
5908 else
5909 {
5910 tree c = fold_convert (itype, n1);
5911 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
5912 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
5913 }
5914 t = force_gimple_operand_gsi (&gsi, t, false,
5915 NULL_TREE, false, GSI_CONTINUE_LINKING);
5916 assign_stmt = gimple_build_assign (cond_var, t);
5917 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5918 }
629b3d75
MJ
5919
5920 t = fold_convert (itype, e0);
5921 t = fold_build2 (MULT_EXPR, itype, t, step);
5922 if (POINTER_TYPE_P (type))
bde84d51
RB
5923 {
5924 t = fold_build_pointer_plus (n1, t);
5925 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5926 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5927 t = fold_convert (signed_type_for (type), t);
5928 }
629b3d75
MJ
5929 else
5930 t = fold_build2 (PLUS_EXPR, type, t, n1);
5931 t = fold_convert (TREE_TYPE (startvar), t);
5932 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5933 false, GSI_CONTINUE_LINKING);
5934 if (endvar)
5935 {
5936 assign_stmt = gimple_build_assign (endvar, e);
5937 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5938 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5939 assign_stmt = gimple_build_assign (fd->loop.v, e);
5940 else
5941 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5942 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5943 }
5944 /* Handle linear clause adjustments. */
5945 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
5946 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
5947 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
5948 c; c = OMP_CLAUSE_CHAIN (c))
5949 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
5950 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
5951 {
5952 tree d = OMP_CLAUSE_DECL (c);
629b3d75 5953 tree t = d, a, dest;
22e6b327 5954 if (omp_privatize_by_reference (t))
629b3d75
MJ
5955 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
5956 tree type = TREE_TYPE (t);
5957 if (POINTER_TYPE_P (type))
5958 type = sizetype;
5959 dest = unshare_expr (t);
5960 tree v = create_tmp_var (TREE_TYPE (t), NULL);
5961 expand_omp_build_assign (&gsif, v, t);
5962 if (itercnt == NULL_TREE)
5963 {
5964 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5965 {
5966 itercntbias
5967 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
5968 fold_convert (itype, fd->loop.n1));
5969 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
5970 itercntbias, step);
5971 itercntbias
5972 = force_gimple_operand_gsi (&gsif, itercntbias, true,
5973 NULL_TREE, true,
5974 GSI_SAME_STMT);
5975 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
5976 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
5977 NULL_TREE, false,
5978 GSI_CONTINUE_LINKING);
5979 }
5980 else
5981 itercnt = s0;
5982 }
5983 a = fold_build2 (MULT_EXPR, type,
5984 fold_convert (type, itercnt),
5985 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
5986 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
5987 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
5988 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5989 false, GSI_CONTINUE_LINKING);
6fcc3cac 5990 expand_omp_build_assign (&gsi, dest, t, true);
629b3d75
MJ
5991 }
5992 if (fd->collapse > 1)
aed3ab25 5993 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
629b3d75
MJ
5994
5995 if (!broken_loop)
5996 {
5997 /* The code controlling the sequential loop goes in CONT_BB,
5998 replacing the GIMPLE_OMP_CONTINUE. */
65f4b875 5999 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
6000 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
6001 vmain = gimple_omp_continue_control_use (cont_stmt);
6002 vback = gimple_omp_continue_control_def (cont_stmt);
6003
7855700e
JJ
6004 if (cond_var)
6005 {
6006 tree itype = TREE_TYPE (cond_var);
6007 tree t2;
6008 if (POINTER_TYPE_P (type)
6009 || TREE_CODE (n1) != INTEGER_CST
6010 || fd->loop.cond_code != LT_EXPR)
6011 t2 = build_int_cst (itype, 1);
6012 else
6013 t2 = fold_convert (itype, step);
6014 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
6015 t2 = force_gimple_operand_gsi (&gsi, t2, false,
6016 NULL_TREE, true, GSI_SAME_STMT);
6017 assign_stmt = gimple_build_assign (cond_var, t2);
6018 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
6019 }
6020
629b3d75
MJ
6021 if (!gimple_omp_for_combined_p (fd->for_stmt))
6022 {
6023 if (POINTER_TYPE_P (type))
6024 t = fold_build_pointer_plus (vmain, step);
6025 else
6026 t = fold_build2 (PLUS_EXPR, type, vmain, step);
6027 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
6028 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6029 true, GSI_SAME_STMT);
6030 assign_stmt = gimple_build_assign (vback, t);
6031 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
6032
6033 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
6034 t = build2 (EQ_EXPR, boolean_type_node,
6035 build_int_cst (itype, 0),
6036 build_int_cst (itype, 1));
6037 else
6038 t = build2 (fd->loop.cond_code, boolean_type_node,
6039 DECL_P (vback) && TREE_ADDRESSABLE (vback)
6040 ? t : vback, e);
6041 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
6042 }
6043
6044 /* Remove GIMPLE_OMP_CONTINUE. */
6045 gsi_remove (&gsi, true);
6046
6047 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
aed3ab25 6048 collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, body_bb);
629b3d75
MJ
6049
6050 /* Trip update code goes into TRIP_UPDATE_BB. */
6051 gsi = gsi_start_bb (trip_update_bb);
6052
6053 t = build_int_cst (itype, 1);
6054 t = build2 (PLUS_EXPR, itype, trip_main, t);
6055 assign_stmt = gimple_build_assign (trip_back, t);
6056 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6057 }
6058
6059 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
65f4b875 6060 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
6061 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
6062 {
6063 t = gimple_omp_return_lhs (gsi_stmt (gsi));
8221c30b 6064 if (fd->have_reductemp || fd->have_pointer_condtemp)
28567c40
JJ
6065 {
6066 tree fn;
6067 if (t)
6068 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
6069 else
6070 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
6071 gcall *g = gimple_build_call (fn, 0);
6072 if (t)
6073 {
6074 gimple_call_set_lhs (g, t);
6c7ae8c5
JJ
6075 if (fd->have_reductemp)
6076 gsi_insert_after (&gsi, gimple_build_assign (reductions,
6077 NOP_EXPR, t),
6078 GSI_SAME_STMT);
28567c40
JJ
6079 }
6080 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
6081 }
6082 else
6083 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
629b3d75 6084 }
1a39b3d3
JJ
6085 else if (fd->have_pointer_condtemp)
6086 {
6087 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
6088 gcall *g = gimple_build_call (fn, 0);
6089 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
6090 }
629b3d75
MJ
6091 gsi_remove (&gsi, true);
6092
6093 /* Connect the new blocks. */
6094 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
6095 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
6096
6097 if (!broken_loop)
6098 {
6099 se = find_edge (cont_bb, body_bb);
6100 if (se == NULL)
6101 {
6102 se = BRANCH_EDGE (cont_bb);
6103 gcc_assert (single_succ (se->dest) == body_bb);
6104 }
6105 if (gimple_omp_for_combined_p (fd->for_stmt))
6106 {
6107 remove_edge (se);
6108 se = NULL;
6109 }
6110 else if (fd->collapse > 1)
6111 {
6112 remove_edge (se);
6113 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
6114 }
6115 else
6116 se->flags = EDGE_TRUE_VALUE;
6117 find_edge (cont_bb, trip_update_bb)->flags
6118 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
6119
01914336
MJ
6120 redirect_edge_and_branch (single_succ_edge (trip_update_bb),
6121 iter_part_bb);
629b3d75
MJ
6122 }
6123
6124 if (gimple_in_ssa_p (cfun))
6125 {
6126 gphi_iterator psi;
6127 gphi *phi;
6128 edge re, ene;
6129 edge_var_map *vm;
6130 size_t i;
6131
6132 gcc_assert (fd->collapse == 1 && !broken_loop);
6133
6134 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
6135 remove arguments of the phi nodes in fin_bb. We need to create
6136 appropriate phi nodes in iter_part_bb instead. */
6137 se = find_edge (iter_part_bb, fin_bb);
6138 re = single_succ_edge (trip_update_bb);
6139 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
6140 ene = single_succ_edge (entry_bb);
6141
6142 psi = gsi_start_phis (fin_bb);
6143 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
6144 gsi_next (&psi), ++i)
6145 {
6146 gphi *nphi;
620e594b 6147 location_t locus;
629b3d75
MJ
6148
6149 phi = psi.phi ();
d83cc5cc
TV
6150 if (operand_equal_p (gimple_phi_arg_def (phi, 0),
6151 redirect_edge_var_map_def (vm), 0))
6152 continue;
6153
629b3d75
MJ
6154 t = gimple_phi_result (phi);
6155 gcc_assert (t == redirect_edge_var_map_result (vm));
6156
6157 if (!single_pred_p (fin_bb))
6158 t = copy_ssa_name (t, phi);
6159
6160 nphi = create_phi_node (t, iter_part_bb);
6161
6162 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
6163 locus = gimple_phi_arg_location_from_edge (phi, se);
6164
6165 /* A special case -- fd->loop.v is not yet computed in
6166 iter_part_bb, we need to use vextra instead. */
6167 if (t == fd->loop.v)
6168 t = vextra;
6169 add_phi_arg (nphi, t, ene, locus);
6170 locus = redirect_edge_var_map_location (vm);
6171 tree back_arg = redirect_edge_var_map_def (vm);
6172 add_phi_arg (nphi, back_arg, re, locus);
6173 edge ce = find_edge (cont_bb, body_bb);
6174 if (ce == NULL)
6175 {
6176 ce = BRANCH_EDGE (cont_bb);
6177 gcc_assert (single_succ (ce->dest) == body_bb);
6178 ce = single_succ_edge (ce->dest);
6179 }
6180 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
6181 gcc_assert (inner_loop_phi != NULL);
6182 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
6183 find_edge (seq_start_bb, body_bb), locus);
6184
6185 if (!single_pred_p (fin_bb))
6186 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
6187 }
6188 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
6189 redirect_edge_var_map_clear (re);
6190 if (single_pred_p (fin_bb))
6191 while (1)
6192 {
6193 psi = gsi_start_phis (fin_bb);
6194 if (gsi_end_p (psi))
6195 break;
6196 remove_phi_node (&psi, false);
6197 }
6198
6199 /* Make phi node for trip. */
6200 phi = create_phi_node (trip_main, iter_part_bb);
6201 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
6202 UNKNOWN_LOCATION);
6203 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
6204 UNKNOWN_LOCATION);
6205 }
6206
6207 if (!broken_loop)
6208 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
6209 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
6210 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
6211 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
6212 recompute_dominator (CDI_DOMINATORS, fin_bb));
6213 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
6214 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
6215 set_immediate_dominator (CDI_DOMINATORS, body_bb,
6216 recompute_dominator (CDI_DOMINATORS, body_bb));
6217
6218 if (!broken_loop)
6219 {
99b1c316
MS
6220 class loop *loop = body_bb->loop_father;
6221 class loop *trip_loop = alloc_loop ();
629b3d75
MJ
6222 trip_loop->header = iter_part_bb;
6223 trip_loop->latch = trip_update_bb;
6224 add_loop (trip_loop, iter_part_bb->loop_father);
6225
6226 if (loop != entry_bb->loop_father)
6227 {
6228 gcc_assert (loop->header == body_bb);
6229 gcc_assert (loop->latch == region->cont
6230 || single_pred (loop->latch) == region->cont);
6231 trip_loop->inner = loop;
6232 return;
6233 }
6234
6235 if (!gimple_omp_for_combined_p (fd->for_stmt))
6236 {
6237 loop = alloc_loop ();
6238 loop->header = body_bb;
6239 if (collapse_bb == NULL)
6240 loop->latch = cont_bb;
6241 add_loop (loop, trip_loop);
6242 }
6243 }
6244}
6245
629b3d75
MJ
6246/* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
6247 loop. Given parameters:
6248
6249 for (V = N1; V cond N2; V += STEP) BODY;
6250
6251 where COND is "<" or ">", we generate pseudocode
6252
6253 V = N1;
6254 goto L1;
6255 L0:
6256 BODY;
6257 V += STEP;
6258 L1:
6259 if (V cond N2) goto L0; else goto L2;
6260 L2:
6261
c2ebf4f1
JJ
6262 For collapsed loops, emit the outer loops as scalar
6263 and only try to vectorize the innermost loop. */
629b3d75
MJ
6264
6265static void
6266expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
6267{
6268 tree type, t;
6269 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
6270 gimple_stmt_iterator gsi;
6271 gimple *stmt;
6272 gcond *cond_stmt;
6273 bool broken_loop = region->cont == NULL;
6274 edge e, ne;
6275 tree *counts = NULL;
6276 int i;
6277 int safelen_int = INT_MAX;
fed2a43c 6278 bool dont_vectorize = false;
629b3d75
MJ
6279 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6280 OMP_CLAUSE_SAFELEN);
6281 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6282 OMP_CLAUSE__SIMDUID_);
fed2a43c
JJ
6283 tree ifc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6284 OMP_CLAUSE_IF);
6285 tree simdlen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6286 OMP_CLAUSE_SIMDLEN);
e7393c89
JJ
6287 tree condtemp = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6288 OMP_CLAUSE__CONDTEMP_);
629b3d75 6289 tree n1, n2;
e7393c89 6290 tree cond_var = condtemp ? OMP_CLAUSE_DECL (condtemp) : NULL_TREE;
629b3d75
MJ
6291
6292 if (safelen)
6293 {
9d2f08ab 6294 poly_uint64 val;
629b3d75 6295 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
9d2f08ab 6296 if (!poly_int_tree_p (safelen, &val))
629b3d75 6297 safelen_int = 0;
9d2f08ab
RS
6298 else
6299 safelen_int = MIN (constant_lower_bound (val), INT_MAX);
629b3d75
MJ
6300 if (safelen_int == 1)
6301 safelen_int = 0;
6302 }
fed2a43c
JJ
6303 if ((ifc && integer_zerop (OMP_CLAUSE_IF_EXPR (ifc)))
6304 || (simdlen && integer_onep (OMP_CLAUSE_SIMDLEN_EXPR (simdlen))))
6305 {
6306 safelen_int = 0;
6307 dont_vectorize = true;
6308 }
629b3d75
MJ
6309 type = TREE_TYPE (fd->loop.v);
6310 entry_bb = region->entry;
6311 cont_bb = region->cont;
6312 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
6313 gcc_assert (broken_loop
6314 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
6315 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
6316 if (!broken_loop)
6317 {
6318 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
6319 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
6320 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
6321 l2_bb = BRANCH_EDGE (entry_bb)->dest;
6322 }
6323 else
6324 {
6325 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
6326 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
6327 l2_bb = single_succ (l1_bb);
6328 }
6329 exit_bb = region->exit;
6330 l2_dom_bb = NULL;
6331
65f4b875 6332 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
6333
6334 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
6335 /* Not needed in SSA form right now. */
6336 gcc_assert (!gimple_in_ssa_p (cfun));
c2ebf4f1
JJ
6337 if (fd->collapse > 1
6338 && (gimple_omp_for_combined_into_p (fd->for_stmt)
6339 || broken_loop))
629b3d75
MJ
6340 {
6341 int first_zero_iter = -1, dummy = -1;
6342 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
6343
6344 counts = XALLOCAVEC (tree, fd->collapse);
6345 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
6346 zero_iter_bb, first_zero_iter,
6347 dummy_bb, dummy, l2_dom_bb);
6348 }
6349 if (l2_dom_bb == NULL)
6350 l2_dom_bb = l1_bb;
6351
6352 n1 = fd->loop.n1;
6353 n2 = fd->loop.n2;
6354 if (gimple_omp_for_combined_into_p (fd->for_stmt))
6355 {
6356 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6357 OMP_CLAUSE__LOOPTEMP_);
6358 gcc_assert (innerc);
6359 n1 = OMP_CLAUSE_DECL (innerc);
6360 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
6361 OMP_CLAUSE__LOOPTEMP_);
6362 gcc_assert (innerc);
6363 n2 = OMP_CLAUSE_DECL (innerc);
6364 }
6365 tree step = fd->loop.step;
fc14ff61 6366 tree orig_step = step; /* May be different from step if is_simt. */
629b3d75 6367
4cea8675
AM
6368 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6369 OMP_CLAUSE__SIMT_);
629b3d75
MJ
6370 if (is_simt)
6371 {
6372 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
4cea8675
AM
6373 is_simt = safelen_int > 1;
6374 }
6375 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
6376 if (is_simt)
6377 {
629b3d75
MJ
6378 simt_lane = create_tmp_var (unsigned_type_node);
6379 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
6380 gimple_call_set_lhs (g, simt_lane);
6381 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6382 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
6383 fold_convert (TREE_TYPE (step), simt_lane));
6384 n1 = fold_convert (type, n1);
6385 if (POINTER_TYPE_P (type))
6386 n1 = fold_build_pointer_plus (n1, offset);
6387 else
6388 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
6389
6390 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
6391 if (fd->collapse > 1)
6392 simt_maxlane = build_one_cst (unsigned_type_node);
6393 else if (safelen_int < omp_max_simt_vf ())
6394 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
6395 tree vf
6396 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
6397 unsigned_type_node, 0);
6398 if (simt_maxlane)
6399 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
6400 vf = fold_convert (TREE_TYPE (step), vf);
6401 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
6402 }
6403
c2ebf4f1
JJ
6404 tree n2var = NULL_TREE;
6405 tree n2v = NULL_TREE;
6406 tree *nonrect_bounds = NULL;
83f565ed 6407 tree min_arg1 = NULL_TREE, min_arg2 = NULL_TREE;
629b3d75
MJ
6408 if (fd->collapse > 1)
6409 {
c2ebf4f1 6410 if (broken_loop || gimple_omp_for_combined_into_p (fd->for_stmt))
629b3d75 6411 {
c2ebf4f1
JJ
6412 if (fd->non_rect)
6413 {
6414 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
6415 memset (nonrect_bounds, 0,
6416 sizeof (tree) * (fd->last_nonrect + 1));
6417 }
6418 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
6419 gcc_assert (entry_bb == gsi_bb (gsi));
6420 gcc_assert (fd->for_stmt == gsi_stmt (gsi));
629b3d75 6421 gsi_prev (&gsi);
c2ebf4f1
JJ
6422 entry_bb = split_block (entry_bb, gsi_stmt (gsi))->dest;
6423 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds,
6424 NULL, n1);
6425 gsi = gsi_for_stmt (fd->for_stmt);
6426 }
6427 if (broken_loop)
6428 ;
6429 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
6430 {
6431 /* Compute in n2var the limit for the first innermost loop,
6432 i.e. fd->loop.v + MIN (n2 - fd->loop.v, cnt)
6433 where cnt is how many iterations would the loop have if
6434 all further iterations were assigned to the current task. */
6435 n2var = create_tmp_var (type);
6436 i = fd->collapse - 1;
6437 tree itype = TREE_TYPE (fd->loops[i].v);
6438 if (POINTER_TYPE_P (itype))
6439 itype = signed_type_for (itype);
6440 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
6441 ? -1 : 1));
6442 t = fold_build2 (PLUS_EXPR, itype,
6443 fold_convert (itype, fd->loops[i].step), t);
6444 t = fold_build2 (PLUS_EXPR, itype, t,
6445 fold_convert (itype, fd->loops[i].n2));
6446 if (fd->loops[i].m2)
6447 {
6448 tree t2 = fold_convert (itype,
6449 fd->loops[i - fd->loops[i].outer].v);
6450 tree t3 = fold_convert (itype, fd->loops[i].m2);
6451 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6452 t = fold_build2 (PLUS_EXPR, itype, t, t2);
6453 }
6454 t = fold_build2 (MINUS_EXPR, itype, t,
6455 fold_convert (itype, fd->loops[i].v));
6456 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
6457 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6458 fold_build1 (NEGATE_EXPR, itype, t),
6459 fold_build1 (NEGATE_EXPR, itype,
6460 fold_convert (itype,
6461 fd->loops[i].step)));
6462 else
6463 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
6464 fold_convert (itype, fd->loops[i].step));
6465 t = fold_convert (type, t);
6466 tree t2 = fold_build2 (MINUS_EXPR, type, n2, n1);
83f565ed
JJ
6467 min_arg1 = create_tmp_var (type);
6468 expand_omp_build_assign (&gsi, min_arg1, t2);
6469 min_arg2 = create_tmp_var (type);
6470 expand_omp_build_assign (&gsi, min_arg2, t);
629b3d75
MJ
6471 }
6472 else
c2ebf4f1
JJ
6473 {
6474 if (TREE_CODE (n2) == INTEGER_CST)
6475 {
6476 /* Indicate for lastprivate handling that at least one iteration
6477 has been performed, without wasting runtime. */
6478 if (integer_nonzerop (n2))
6479 expand_omp_build_assign (&gsi, fd->loop.v,
6480 fold_convert (type, n2));
6481 else
6482 /* Indicate that no iteration has been performed. */
6483 expand_omp_build_assign (&gsi, fd->loop.v,
6484 build_one_cst (type));
6485 }
6486 else
6487 {
6488 expand_omp_build_assign (&gsi, fd->loop.v,
6489 build_zero_cst (type));
6490 expand_omp_build_assign (&gsi, n2, build_one_cst (type));
6491 }
6492 for (i = 0; i < fd->collapse; i++)
6493 {
6494 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
6495 if (fd->loops[i].m1)
6496 {
6497 tree t2
6498 = fold_convert (TREE_TYPE (t),
6499 fd->loops[i - fd->loops[i].outer].v);
6500 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i].m1);
6501 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6502 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6503 }
6504 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6505 /* For normal non-combined collapsed loops just initialize
6506 the outermost iterator in the entry_bb. */
6507 if (!broken_loop)
6508 break;
6509 }
6510 }
629b3d75 6511 }
c2ebf4f1
JJ
6512 else
6513 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
a29bd4f5
JJ
6514 tree altv = NULL_TREE, altn2 = NULL_TREE;
6515 if (fd->collapse == 1
6516 && !broken_loop
fc14ff61 6517 && TREE_CODE (orig_step) != INTEGER_CST)
a29bd4f5
JJ
6518 {
6519 /* The vectorizer currently punts on loops with non-constant steps
6520 for the main IV (can't compute number of iterations and gives up
6521 because of that). As for OpenMP loops it is always possible to
6522 compute the number of iterations upfront, use an alternate IV
6523 as the loop iterator:
6524 altn2 = n1 < n2 ? (n2 - n1 + step - 1) / step : 0;
6525 for (i = n1, altv = 0; altv < altn2; altv++, i += step) */
6526 altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v)));
6527 expand_omp_build_assign (&gsi, altv, build_zero_cst (TREE_TYPE (altv)));
6528 tree itype = TREE_TYPE (fd->loop.v);
6529 if (POINTER_TYPE_P (itype))
6530 itype = signed_type_for (itype);
6531 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
6532 t = fold_build2 (PLUS_EXPR, itype,
fc14ff61 6533 fold_convert (itype, step), t);
a29bd4f5
JJ
6534 t = fold_build2 (PLUS_EXPR, itype, t, fold_convert (itype, n2));
6535 t = fold_build2 (MINUS_EXPR, itype, t,
6536 fold_convert (itype, fd->loop.v));
6537 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
6538 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6539 fold_build1 (NEGATE_EXPR, itype, t),
6540 fold_build1 (NEGATE_EXPR, itype,
fc14ff61 6541 fold_convert (itype, step)));
a29bd4f5
JJ
6542 else
6543 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
fc14ff61 6544 fold_convert (itype, step));
a29bd4f5
JJ
6545 t = fold_convert (TREE_TYPE (altv), t);
6546 altn2 = create_tmp_var (TREE_TYPE (altv));
6547 expand_omp_build_assign (&gsi, altn2, t);
6548 tree t2 = fold_convert (TREE_TYPE (fd->loop.v), n2);
6549 t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
6550 true, GSI_SAME_STMT);
6551 t2 = fold_build2 (fd->loop.cond_code, boolean_type_node, fd->loop.v, t2);
6552 gassign *g = gimple_build_assign (altn2, COND_EXPR, t2, altn2,
6553 build_zero_cst (TREE_TYPE (altv)));
6554 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6555 }
6556 else if (fd->collapse > 1
6557 && !broken_loop
6558 && !gimple_omp_for_combined_into_p (fd->for_stmt)
6559 && TREE_CODE (fd->loops[fd->collapse - 1].step) != INTEGER_CST)
6560 {
6561 altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v)));
6562 altn2 = create_tmp_var (TREE_TYPE (altv));
6563 }
e7393c89
JJ
6564 if (cond_var)
6565 {
6566 if (POINTER_TYPE_P (type)
6567 || TREE_CODE (n1) != INTEGER_CST
6568 || fd->loop.cond_code != LT_EXPR
6569 || tree_int_cst_sgn (n1) != 1)
6570 expand_omp_build_assign (&gsi, cond_var,
6571 build_one_cst (TREE_TYPE (cond_var)));
6572 else
6573 expand_omp_build_assign (&gsi, cond_var,
6574 fold_convert (TREE_TYPE (cond_var), n1));
6575 }
629b3d75
MJ
6576
6577 /* Remove the GIMPLE_OMP_FOR statement. */
6578 gsi_remove (&gsi, true);
6579
6580 if (!broken_loop)
6581 {
6582 /* Code to control the increment goes in the CONT_BB. */
65f4b875 6583 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
6584 stmt = gsi_stmt (gsi);
6585 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
6586
c2ebf4f1
JJ
6587 if (fd->collapse == 1
6588 || gimple_omp_for_combined_into_p (fd->for_stmt))
6589 {
6590 if (POINTER_TYPE_P (type))
6591 t = fold_build_pointer_plus (fd->loop.v, step);
6592 else
6593 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6594 expand_omp_build_assign (&gsi, fd->loop.v, t);
6595 }
6596 else if (TREE_CODE (n2) != INTEGER_CST)
6597 expand_omp_build_assign (&gsi, fd->loop.v, build_one_cst (type));
a29bd4f5
JJ
6598 if (altv)
6599 {
6600 t = fold_build2 (PLUS_EXPR, TREE_TYPE (altv), altv,
6601 build_one_cst (TREE_TYPE (altv)));
6602 expand_omp_build_assign (&gsi, altv, t);
6603 }
629b3d75
MJ
6604
6605 if (fd->collapse > 1)
6606 {
6607 i = fd->collapse - 1;
6608 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
6609 {
6610 t = fold_convert (sizetype, fd->loops[i].step);
6611 t = fold_build_pointer_plus (fd->loops[i].v, t);
6612 }
6613 else
6614 {
6615 t = fold_convert (TREE_TYPE (fd->loops[i].v),
6616 fd->loops[i].step);
6617 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
6618 fd->loops[i].v, t);
6619 }
6620 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
629b3d75 6621 }
e7393c89
JJ
6622 if (cond_var)
6623 {
6624 if (POINTER_TYPE_P (type)
6625 || TREE_CODE (n1) != INTEGER_CST
6626 || fd->loop.cond_code != LT_EXPR
6627 || tree_int_cst_sgn (n1) != 1)
6628 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6629 build_one_cst (TREE_TYPE (cond_var)));
6630 else
6631 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6632 fold_convert (TREE_TYPE (cond_var), step));
6633 expand_omp_build_assign (&gsi, cond_var, t);
6634 }
629b3d75
MJ
6635
6636 /* Remove GIMPLE_OMP_CONTINUE. */
6637 gsi_remove (&gsi, true);
6638 }
6639
6640 /* Emit the condition in L1_BB. */
6641 gsi = gsi_start_bb (l1_bb);
6642
a29bd4f5
JJ
6643 if (altv)
6644 t = build2 (LT_EXPR, boolean_type_node, altv, altn2);
6645 else if (fd->collapse > 1
6646 && !gimple_omp_for_combined_into_p (fd->for_stmt)
6647 && !broken_loop)
c2ebf4f1
JJ
6648 {
6649 i = fd->collapse - 1;
6650 tree itype = TREE_TYPE (fd->loops[i].v);
6651 if (fd->loops[i].m2)
6652 t = n2v = create_tmp_var (itype);
6653 else
6654 t = fold_convert (itype, fd->loops[i].n2);
6655 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6656 false, GSI_CONTINUE_LINKING);
6657 tree v = fd->loops[i].v;
6658 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6659 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6660 false, GSI_CONTINUE_LINKING);
6661 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
6662 }
6663 else
6664 {
6665 if (fd->collapse > 1 && !broken_loop)
6666 t = n2var;
6667 else
6668 t = fold_convert (type, n2);
6669 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6670 false, GSI_CONTINUE_LINKING);
6671 tree v = fd->loop.v;
6672 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6673 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6674 false, GSI_CONTINUE_LINKING);
6675 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
6676 }
629b3d75
MJ
6677 cond_stmt = gimple_build_cond_empty (t);
6678 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
6679 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
6680 NULL, NULL)
6681 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
6682 NULL, NULL))
6683 {
6684 gsi = gsi_for_stmt (cond_stmt);
6685 gimple_regimplify_operands (cond_stmt, &gsi);
6686 }
6687
6688 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
6689 if (is_simt)
6690 {
6691 gsi = gsi_start_bb (l2_bb);
fc14ff61 6692 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), orig_step, step);
629b3d75
MJ
6693 if (POINTER_TYPE_P (type))
6694 t = fold_build_pointer_plus (fd->loop.v, step);
6695 else
6696 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6697 expand_omp_build_assign (&gsi, fd->loop.v, t);
6698 }
6699
6700 /* Remove GIMPLE_OMP_RETURN. */
65f4b875 6701 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
6702 gsi_remove (&gsi, true);
6703
6704 /* Connect the new blocks. */
6705 remove_edge (FALLTHRU_EDGE (entry_bb));
6706
6707 if (!broken_loop)
6708 {
6709 remove_edge (BRANCH_EDGE (entry_bb));
6710 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
6711
6712 e = BRANCH_EDGE (l1_bb);
6713 ne = FALLTHRU_EDGE (l1_bb);
6714 e->flags = EDGE_TRUE_VALUE;
6715 }
6716 else
6717 {
6718 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6719
6720 ne = single_succ_edge (l1_bb);
6721 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
6722
6723 }
6724 ne->flags = EDGE_FALSE_VALUE;
357067f2
JH
6725 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
6726 ne->probability = e->probability.invert ();
629b3d75
MJ
6727
6728 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
6729 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
6730
6731 if (simt_maxlane)
6732 {
6733 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
6734 NULL_TREE, NULL_TREE);
6735 gsi = gsi_last_bb (entry_bb);
6736 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
6737 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
6738 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
357067f2
JH
6739 FALLTHRU_EDGE (entry_bb)->probability
6740 = profile_probability::guessed_always ().apply_scale (7, 8);
c2ebf4f1 6741 BRANCH_EDGE (entry_bb)->probability
357067f2 6742 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
629b3d75
MJ
6743 l2_dom_bb = entry_bb;
6744 }
6745 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
6746
c2ebf4f1
JJ
6747 if (!broken_loop && fd->collapse > 1)
6748 {
6749 basic_block last_bb = l1_bb;
6750 basic_block init_bb = NULL;
6751 for (i = fd->collapse - 2; i >= 0; i--)
6752 {
6753 tree nextn2v = NULL_TREE;
6754 if (EDGE_SUCC (last_bb, 0)->flags & EDGE_FALSE_VALUE)
6755 e = EDGE_SUCC (last_bb, 0);
6756 else
6757 e = EDGE_SUCC (last_bb, 1);
6758 basic_block bb = split_edge (e);
6759 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
6760 {
6761 t = fold_convert (sizetype, fd->loops[i].step);
6762 t = fold_build_pointer_plus (fd->loops[i].v, t);
6763 }
6764 else
6765 {
6766 t = fold_convert (TREE_TYPE (fd->loops[i].v),
6767 fd->loops[i].step);
6768 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
6769 fd->loops[i].v, t);
6770 }
6771 gsi = gsi_after_labels (bb);
6772 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6773
6774 bb = split_block (bb, last_stmt (bb))->dest;
6775 gsi = gsi_start_bb (bb);
6776 tree itype = TREE_TYPE (fd->loops[i].v);
6777 if (fd->loops[i].m2)
6778 t = nextn2v = create_tmp_var (itype);
6779 else
6780 t = fold_convert (itype, fd->loops[i].n2);
6781 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6782 false, GSI_CONTINUE_LINKING);
6783 tree v = fd->loops[i].v;
6784 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6785 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6786 false, GSI_CONTINUE_LINKING);
6787 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
6788 cond_stmt = gimple_build_cond_empty (t);
6789 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
6790 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
6791 expand_omp_regimplify_p, NULL, NULL)
6792 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
6793 expand_omp_regimplify_p, NULL, NULL))
6794 {
6795 gsi = gsi_for_stmt (cond_stmt);
6796 gimple_regimplify_operands (cond_stmt, &gsi);
6797 }
6798 ne = single_succ_edge (bb);
6799 ne->flags = EDGE_FALSE_VALUE;
6800
6801 init_bb = create_empty_bb (bb);
6802 set_immediate_dominator (CDI_DOMINATORS, init_bb, bb);
6803 add_bb_to_loop (init_bb, bb->loop_father);
6804 e = make_edge (bb, init_bb, EDGE_TRUE_VALUE);
6805 e->probability
6806 = profile_probability::guessed_always ().apply_scale (7, 8);
6807 ne->probability = e->probability.invert ();
6808
6809 gsi = gsi_after_labels (init_bb);
6810 t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6811 fd->loops[i + 1].n1);
6812 if (fd->loops[i + 1].m1)
6813 {
6814 tree t2 = fold_convert (TREE_TYPE (t),
6815 fd->loops[i + 1
6816 - fd->loops[i + 1].outer].v);
6817 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m1);
6818 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6819 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6820 }
6821 expand_omp_build_assign (&gsi, fd->loops[i + 1].v, t);
6822 if (fd->loops[i + 1].m2)
6823 {
a29bd4f5 6824 if (i + 2 == fd->collapse && (n2var || altv))
c2ebf4f1
JJ
6825 {
6826 gcc_assert (n2v == NULL_TREE);
6827 n2v = create_tmp_var (TREE_TYPE (fd->loops[i + 1].v));
6828 }
6829 t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6830 fd->loops[i + 1].n2);
6831 tree t2 = fold_convert (TREE_TYPE (t),
6832 fd->loops[i + 1
6833 - fd->loops[i + 1].outer].v);
6834 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m2);
6835 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6836 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6837 expand_omp_build_assign (&gsi, n2v, t);
6838 }
6839 if (i + 2 == fd->collapse && n2var)
6840 {
6841 /* For composite simd, n2 is the first iteration the current
6842 task shouldn't already handle, so we effectively want to use
6843 for (V3 = N31; V < N2 && V3 < N32; V++, V3 += STEP3)
6844 as the vectorized loop. Except the vectorizer will not
6845 vectorize that, so instead compute N2VAR as
6846 N2VAR = V + MIN (N2 - V, COUNTS3) and use
6847 for (V3 = N31; V < N2VAR; V++, V3 += STEP3)
6848 as the loop to vectorize. */
6849 tree t2 = fold_build2 (MINUS_EXPR, type, n2, fd->loop.v);
6850 if (fd->loops[i + 1].m1 || fd->loops[i + 1].m2)
6851 {
6852 t = build_int_cst (itype, (fd->loops[i + 1].cond_code
6853 == LT_EXPR ? -1 : 1));
6854 t = fold_build2 (PLUS_EXPR, itype,
6855 fold_convert (itype,
6856 fd->loops[i + 1].step), t);
6857 if (fd->loops[i + 1].m2)
6858 t = fold_build2 (PLUS_EXPR, itype, t, n2v);
6859 else
6860 t = fold_build2 (PLUS_EXPR, itype, t,
6861 fold_convert (itype,
6862 fd->loops[i + 1].n2));
6863 t = fold_build2 (MINUS_EXPR, itype, t,
6864 fold_convert (itype, fd->loops[i + 1].v));
6865 tree step = fold_convert (itype, fd->loops[i + 1].step);
6866 if (TYPE_UNSIGNED (itype)
6867 && fd->loops[i + 1].cond_code == GT_EXPR)
6868 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6869 fold_build1 (NEGATE_EXPR, itype, t),
6870 fold_build1 (NEGATE_EXPR, itype, step));
6871 else
6872 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
6873 t = fold_convert (type, t);
6874 }
6875 else
6876 t = counts[i + 1];
83f565ed
JJ
6877 expand_omp_build_assign (&gsi, min_arg1, t2);
6878 expand_omp_build_assign (&gsi, min_arg2, t);
6879 e = split_block (init_bb, last_stmt (init_bb));
6880 gsi = gsi_after_labels (e->dest);
6881 init_bb = e->dest;
6882 remove_edge (FALLTHRU_EDGE (entry_bb));
6883 make_edge (entry_bb, init_bb, EDGE_FALLTHRU);
6884 set_immediate_dominator (CDI_DOMINATORS, init_bb, entry_bb);
6885 set_immediate_dominator (CDI_DOMINATORS, l1_bb, init_bb);
6886 t = fold_build2 (MIN_EXPR, type, min_arg1, min_arg2);
c2ebf4f1
JJ
6887 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, t);
6888 expand_omp_build_assign (&gsi, n2var, t);
6889 }
a29bd4f5
JJ
6890 if (i + 2 == fd->collapse && altv)
6891 {
6892 /* The vectorizer currently punts on loops with non-constant
6893 steps for the main IV (can't compute number of iterations
6894 and gives up because of that). As for OpenMP loops it is
6895 always possible to compute the number of iterations upfront,
6896 use an alternate IV as the loop iterator. */
6897 expand_omp_build_assign (&gsi, altv,
6898 build_zero_cst (TREE_TYPE (altv)));
6899 tree itype = TREE_TYPE (fd->loops[i + 1].v);
6900 if (POINTER_TYPE_P (itype))
6901 itype = signed_type_for (itype);
6902 t = build_int_cst (itype, (fd->loops[i + 1].cond_code == LT_EXPR
6903 ? -1 : 1));
6904 t = fold_build2 (PLUS_EXPR, itype,
6905 fold_convert (itype, fd->loops[i + 1].step), t);
6906 t = fold_build2 (PLUS_EXPR, itype, t,
6907 fold_convert (itype,
6908 fd->loops[i + 1].m2
6909 ? n2v : fd->loops[i + 1].n2));
6910 t = fold_build2 (MINUS_EXPR, itype, t,
6911 fold_convert (itype, fd->loops[i + 1].v));
6912 tree step = fold_convert (itype, fd->loops[i + 1].step);
6913 if (TYPE_UNSIGNED (itype)
6914 && fd->loops[i + 1].cond_code == GT_EXPR)
6915 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6916 fold_build1 (NEGATE_EXPR, itype, t),
6917 fold_build1 (NEGATE_EXPR, itype, step));
6918 else
6919 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
6920 t = fold_convert (TREE_TYPE (altv), t);
6921 expand_omp_build_assign (&gsi, altn2, t);
6922 tree t2 = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6923 fd->loops[i + 1].m2
6924 ? n2v : fd->loops[i + 1].n2);
6925 t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
6926 true, GSI_SAME_STMT);
6927 t2 = fold_build2 (fd->loops[i + 1].cond_code, boolean_type_node,
6928 fd->loops[i + 1].v, t2);
6929 gassign *g
6930 = gimple_build_assign (altn2, COND_EXPR, t2, altn2,
6931 build_zero_cst (TREE_TYPE (altv)));
6932 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6933 }
c2ebf4f1
JJ
6934 n2v = nextn2v;
6935
6936 make_edge (init_bb, last_bb, EDGE_FALLTHRU);
6937 if (!gimple_omp_for_combined_into_p (fd->for_stmt))
6938 {
6939 e = find_edge (entry_bb, last_bb);
6940 redirect_edge_succ (e, bb);
6941 set_immediate_dominator (CDI_DOMINATORS, bb, entry_bb);
6942 set_immediate_dominator (CDI_DOMINATORS, last_bb, init_bb);
6943 }
6944
6945 last_bb = bb;
6946 }
6947 }
629b3d75
MJ
6948 if (!broken_loop)
6949 {
99b1c316 6950 class loop *loop = alloc_loop ();
629b3d75
MJ
6951 loop->header = l1_bb;
6952 loop->latch = cont_bb;
6953 add_loop (loop, l1_bb->loop_father);
6954 loop->safelen = safelen_int;
6955 if (simduid)
6956 {
6957 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
6958 cfun->has_simduid_loops = true;
6959 }
6960 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
6961 the loop. */
6962 if ((flag_tree_loop_vectorize
26d476cd 6963 || !global_options_set.x_flag_tree_loop_vectorize)
629b3d75
MJ
6964 && flag_tree_loop_optimize
6965 && loop->safelen > 1)
6966 {
6967 loop->force_vectorize = true;
f63445e5
JJ
6968 if (simdlen && tree_fits_uhwi_p (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)))
6969 {
6970 unsigned HOST_WIDE_INT v
6971 = tree_to_uhwi (OMP_CLAUSE_SIMDLEN_EXPR (simdlen));
6972 if (v < INT_MAX && v <= (unsigned HOST_WIDE_INT) loop->safelen)
6973 loop->simdlen = v;
6974 }
629b3d75
MJ
6975 cfun->has_force_vectorize_loops = true;
6976 }
fed2a43c
JJ
6977 else if (dont_vectorize)
6978 loop->dont_vectorize = true;
629b3d75
MJ
6979 }
6980 else if (simduid)
6981 cfun->has_simduid_loops = true;
6982}
6983
6984/* Taskloop construct is represented after gimplification with
6985 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
6986 in between them. This routine expands the outer GIMPLE_OMP_FOR,
6987 which should just compute all the needed loop temporaries
6988 for GIMPLE_OMP_TASK. */
6989
6990static void
6991expand_omp_taskloop_for_outer (struct omp_region *region,
6992 struct omp_for_data *fd,
6993 gimple *inner_stmt)
6994{
6995 tree type, bias = NULL_TREE;
6996 basic_block entry_bb, cont_bb, exit_bb;
6997 gimple_stmt_iterator gsi;
6998 gassign *assign_stmt;
6999 tree *counts = NULL;
7000 int i;
7001
7002 gcc_assert (inner_stmt);
7003 gcc_assert (region->cont);
7004 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
7005 && gimple_omp_task_taskloop_p (inner_stmt));
7006 type = TREE_TYPE (fd->loop.v);
7007
7008 /* See if we need to bias by LLONG_MIN. */
7009 if (fd->iter_type == long_long_unsigned_type_node
7010 && TREE_CODE (type) == INTEGER_TYPE
7011 && !TYPE_UNSIGNED (type))
7012 {
7013 tree n1, n2;
7014
7015 if (fd->loop.cond_code == LT_EXPR)
7016 {
7017 n1 = fd->loop.n1;
7018 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
7019 }
7020 else
7021 {
7022 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
7023 n2 = fd->loop.n1;
7024 }
7025 if (TREE_CODE (n1) != INTEGER_CST
7026 || TREE_CODE (n2) != INTEGER_CST
7027 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
7028 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
7029 }
7030
7031 entry_bb = region->entry;
7032 cont_bb = region->cont;
7033 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
7034 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
7035 exit_bb = region->exit;
7036
65f4b875 7037 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
7038 gimple *for_stmt = gsi_stmt (gsi);
7039 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
7040 if (fd->collapse > 1)
7041 {
7042 int first_zero_iter = -1, dummy = -1;
7043 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
7044
7045 counts = XALLOCAVEC (tree, fd->collapse);
7046 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
7047 zero_iter_bb, first_zero_iter,
7048 dummy_bb, dummy, l2_dom_bb);
7049
7050 if (zero_iter_bb)
7051 {
7052 /* Some counts[i] vars might be uninitialized if
7053 some loop has zero iterations. But the body shouldn't
7054 be executed in that case, so just avoid uninit warnings. */
7055 for (i = first_zero_iter; i < fd->collapse; i++)
7056 if (SSA_VAR_P (counts[i]))
e9e2bad7 7057 suppress_warning (counts[i], OPT_Wuninitialized);
629b3d75
MJ
7058 gsi_prev (&gsi);
7059 edge e = split_block (entry_bb, gsi_stmt (gsi));
7060 entry_bb = e->dest;
7061 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
7062 gsi = gsi_last_bb (entry_bb);
7063 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
7064 get_immediate_dominator (CDI_DOMINATORS,
7065 zero_iter_bb));
7066 }
7067 }
7068
7069 tree t0, t1;
7070 t1 = fd->loop.n2;
7071 t0 = fd->loop.n1;
7072 if (POINTER_TYPE_P (TREE_TYPE (t0))
7073 && TYPE_PRECISION (TREE_TYPE (t0))
7074 != TYPE_PRECISION (fd->iter_type))
7075 {
7076 /* Avoid casting pointers to integer of a different size. */
7077 tree itype = signed_type_for (type);
7078 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
7079 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
7080 }
7081 else
7082 {
7083 t1 = fold_convert (fd->iter_type, t1);
7084 t0 = fold_convert (fd->iter_type, t0);
7085 }
7086 if (bias)
7087 {
7088 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
7089 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
7090 }
7091
7092 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
7093 OMP_CLAUSE__LOOPTEMP_);
7094 gcc_assert (innerc);
7095 tree startvar = OMP_CLAUSE_DECL (innerc);
7096 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
7097 gcc_assert (innerc);
7098 tree endvar = OMP_CLAUSE_DECL (innerc);
7099 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
7100 {
14707c89 7101 innerc = find_lastprivate_looptemp (fd, innerc);
629b3d75
MJ
7102 if (innerc)
7103 {
7104 /* If needed (inner taskloop has lastprivate clause), propagate
7105 down the total number of iterations. */
7106 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
7107 NULL_TREE, false,
7108 GSI_CONTINUE_LINKING);
7109 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
7110 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7111 }
7112 }
7113
7114 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
7115 GSI_CONTINUE_LINKING);
7116 assign_stmt = gimple_build_assign (startvar, t0);
7117 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7118
7119 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
7120 GSI_CONTINUE_LINKING);
7121 assign_stmt = gimple_build_assign (endvar, t1);
7122 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7123 if (fd->collapse > 1)
aed3ab25 7124 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
629b3d75
MJ
7125
7126 /* Remove the GIMPLE_OMP_FOR statement. */
7127 gsi = gsi_for_stmt (for_stmt);
7128 gsi_remove (&gsi, true);
7129
65f4b875 7130 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
7131 gsi_remove (&gsi, true);
7132
65f4b875 7133 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
7134 gsi_remove (&gsi, true);
7135
357067f2 7136 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
629b3d75 7137 remove_edge (BRANCH_EDGE (entry_bb));
357067f2 7138 FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
629b3d75
MJ
7139 remove_edge (BRANCH_EDGE (cont_bb));
7140 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
7141 set_immediate_dominator (CDI_DOMINATORS, region->entry,
7142 recompute_dominator (CDI_DOMINATORS, region->entry));
7143}
7144
7145/* Taskloop construct is represented after gimplification with
7146 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
7147 in between them. This routine expands the inner GIMPLE_OMP_FOR.
7148 GOMP_taskloop{,_ull} function arranges for each task to be given just
7149 a single range of iterations. */
7150
7151static void
7152expand_omp_taskloop_for_inner (struct omp_region *region,
7153 struct omp_for_data *fd,
7154 gimple *inner_stmt)
7155{
7156 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
7157 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
7158 basic_block fin_bb;
7159 gimple_stmt_iterator gsi;
7160 edge ep;
7161 bool broken_loop = region->cont == NULL;
7162 tree *counts = NULL;
7163 tree n1, n2, step;
7164
7165 itype = type = TREE_TYPE (fd->loop.v);
7166 if (POINTER_TYPE_P (type))
7167 itype = signed_type_for (type);
7168
7169 /* See if we need to bias by LLONG_MIN. */
7170 if (fd->iter_type == long_long_unsigned_type_node
7171 && TREE_CODE (type) == INTEGER_TYPE
7172 && !TYPE_UNSIGNED (type))
7173 {
7174 tree n1, n2;
7175
7176 if (fd->loop.cond_code == LT_EXPR)
7177 {
7178 n1 = fd->loop.n1;
7179 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
7180 }
7181 else
7182 {
7183 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
7184 n2 = fd->loop.n1;
7185 }
7186 if (TREE_CODE (n1) != INTEGER_CST
7187 || TREE_CODE (n2) != INTEGER_CST
7188 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
7189 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
7190 }
7191
7192 entry_bb = region->entry;
7193 cont_bb = region->cont;
7194 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
7195 fin_bb = BRANCH_EDGE (entry_bb)->dest;
7196 gcc_assert (broken_loop
7197 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
7198 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
7199 if (!broken_loop)
7200 {
7201 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
7202 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
7203 }
7204 exit_bb = region->exit;
7205
7206 /* Iteration space partitioning goes in ENTRY_BB. */
65f4b875 7207 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
7208 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
7209
7210 if (fd->collapse > 1)
7211 {
7212 int first_zero_iter = -1, dummy = -1;
7213 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
7214
7215 counts = XALLOCAVEC (tree, fd->collapse);
7216 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
7217 fin_bb, first_zero_iter,
7218 dummy_bb, dummy, l2_dom_bb);
7219 t = NULL_TREE;
7220 }
7221 else
7222 t = integer_one_node;
7223
7224 step = fd->loop.step;
7225 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
7226 OMP_CLAUSE__LOOPTEMP_);
7227 gcc_assert (innerc);
7228 n1 = OMP_CLAUSE_DECL (innerc);
7229 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
7230 gcc_assert (innerc);
7231 n2 = OMP_CLAUSE_DECL (innerc);
7232 if (bias)
7233 {
7234 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
7235 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
7236 }
7237 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
7238 true, NULL_TREE, true, GSI_SAME_STMT);
7239 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
7240 true, NULL_TREE, true, GSI_SAME_STMT);
7241 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
7242 true, NULL_TREE, true, GSI_SAME_STMT);
7243
7244 tree startvar = fd->loop.v;
7245 tree endvar = NULL_TREE;
7246
7247 if (gimple_omp_for_combined_p (fd->for_stmt))
7248 {
7249 tree clauses = gimple_omp_for_clauses (inner_stmt);
7250 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
7251 gcc_assert (innerc);
7252 startvar = OMP_CLAUSE_DECL (innerc);
7253 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
7254 OMP_CLAUSE__LOOPTEMP_);
7255 gcc_assert (innerc);
7256 endvar = OMP_CLAUSE_DECL (innerc);
7257 }
7258 t = fold_convert (TREE_TYPE (startvar), n1);
7259 t = force_gimple_operand_gsi (&gsi, t,
7260 DECL_P (startvar)
7261 && TREE_ADDRESSABLE (startvar),
7262 NULL_TREE, false, GSI_CONTINUE_LINKING);
7263 gimple *assign_stmt = gimple_build_assign (startvar, t);
7264 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7265
7266 t = fold_convert (TREE_TYPE (startvar), n2);
7267 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
7268 false, GSI_CONTINUE_LINKING);
7269 if (endvar)
7270 {
7271 assign_stmt = gimple_build_assign (endvar, e);
7272 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7273 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
7274 assign_stmt = gimple_build_assign (fd->loop.v, e);
7275 else
7276 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
7277 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7278 }
2e47c8c6
JJ
7279
7280 tree *nonrect_bounds = NULL;
629b3d75 7281 if (fd->collapse > 1)
2e47c8c6
JJ
7282 {
7283 if (fd->non_rect)
7284 {
7285 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
7286 memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1));
7287 }
7288 gcc_assert (gsi_bb (gsi) == entry_bb);
7289 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt,
7290 startvar);
7291 entry_bb = gsi_bb (gsi);
7292 }
629b3d75
MJ
7293
7294 if (!broken_loop)
7295 {
7296 /* The code controlling the sequential loop replaces the
7297 GIMPLE_OMP_CONTINUE. */
65f4b875 7298 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
7299 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7300 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
7301 vmain = gimple_omp_continue_control_use (cont_stmt);
7302 vback = gimple_omp_continue_control_def (cont_stmt);
7303
7304 if (!gimple_omp_for_combined_p (fd->for_stmt))
7305 {
7306 if (POINTER_TYPE_P (type))
7307 t = fold_build_pointer_plus (vmain, step);
7308 else
7309 t = fold_build2 (PLUS_EXPR, type, vmain, step);
7310 t = force_gimple_operand_gsi (&gsi, t,
7311 DECL_P (vback)
7312 && TREE_ADDRESSABLE (vback),
7313 NULL_TREE, true, GSI_SAME_STMT);
7314 assign_stmt = gimple_build_assign (vback, t);
7315 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
7316
7317 t = build2 (fd->loop.cond_code, boolean_type_node,
7318 DECL_P (vback) && TREE_ADDRESSABLE (vback)
7319 ? t : vback, e);
7320 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
7321 }
7322
7323 /* Remove the GIMPLE_OMP_CONTINUE statement. */
7324 gsi_remove (&gsi, true);
7325
7326 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
2e47c8c6
JJ
7327 collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
7328 cont_bb, body_bb);
629b3d75
MJ
7329 }
7330
7331 /* Remove the GIMPLE_OMP_FOR statement. */
7332 gsi = gsi_for_stmt (fd->for_stmt);
7333 gsi_remove (&gsi, true);
7334
7335 /* Remove the GIMPLE_OMP_RETURN statement. */
65f4b875 7336 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
7337 gsi_remove (&gsi, true);
7338
357067f2 7339 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
629b3d75
MJ
7340 if (!broken_loop)
7341 remove_edge (BRANCH_EDGE (entry_bb));
7342 else
7343 {
7344 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
7345 region->outer->cont = NULL;
7346 }
7347
7348 /* Connect all the blocks. */
7349 if (!broken_loop)
7350 {
7351 ep = find_edge (cont_bb, body_bb);
7352 if (gimple_omp_for_combined_p (fd->for_stmt))
7353 {
7354 remove_edge (ep);
7355 ep = NULL;
7356 }
7357 else if (fd->collapse > 1)
7358 {
7359 remove_edge (ep);
7360 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
7361 }
7362 else
7363 ep->flags = EDGE_TRUE_VALUE;
7364 find_edge (cont_bb, fin_bb)->flags
7365 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
7366 }
7367
7368 set_immediate_dominator (CDI_DOMINATORS, body_bb,
7369 recompute_dominator (CDI_DOMINATORS, body_bb));
7370 if (!broken_loop)
7371 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
7372 recompute_dominator (CDI_DOMINATORS, fin_bb));
7373
7374 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
7375 {
99b1c316 7376 class loop *loop = alloc_loop ();
629b3d75
MJ
7377 loop->header = body_bb;
7378 if (collapse_bb == NULL)
7379 loop->latch = cont_bb;
7380 add_loop (loop, body_bb->loop_father);
7381 }
7382}
7383
7384/* A subroutine of expand_omp_for. Generate code for an OpenACC
7385 partitioned loop. The lowering here is abstracted, in that the
7386 loop parameters are passed through internal functions, which are
7387 further lowered by oacc_device_lower, once we get to the target
7388 compiler. The loop is of the form:
7389
7390 for (V = B; V LTGT E; V += S) {BODY}
7391
7392 where LTGT is < or >. We may have a specified chunking size, CHUNKING
7393 (constant 0 for no chunking) and we will have a GWV partitioning
7394 mask, specifying dimensions over which the loop is to be
02889d23
CLT
7395 partitioned (see note below). We generate code that looks like
7396 (this ignores tiling):
629b3d75
MJ
7397
7398 <entry_bb> [incoming FALL->body, BRANCH->exit]
7399 typedef signedintify (typeof (V)) T; // underlying signed integral type
7400 T range = E - B;
7401 T chunk_no = 0;
7402 T DIR = LTGT == '<' ? +1 : -1;
7403 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
7404 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
7405
7406 <head_bb> [created by splitting end of entry_bb]
7407 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
7408 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
7409 if (!(offset LTGT bound)) goto bottom_bb;
7410
7411 <body_bb> [incoming]
7412 V = B + offset;
7413 {BODY}
7414
7415 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
7416 offset += step;
7417 if (offset LTGT bound) goto body_bb; [*]
7418
7419 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
7420 chunk_no++;
7421 if (chunk < chunk_max) goto head_bb;
7422
7423 <exit_bb> [incoming]
7424 V = B + ((range -/+ 1) / S +/- 1) * S [*]
7425
02889d23 7426 [*] Needed if V live at end of loop. */
629b3d75
MJ
7427
7428static void
7429expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
7430{
8c3aa359
TS
7431 bool is_oacc_kernels_parallelized
7432 = (lookup_attribute ("oacc kernels parallelized",
7433 DECL_ATTRIBUTES (current_function_decl)) != NULL);
7434 {
7435 bool is_oacc_kernels
7436 = (lookup_attribute ("oacc kernels",
7437 DECL_ATTRIBUTES (current_function_decl)) != NULL);
7438 if (is_oacc_kernels_parallelized)
7439 gcc_checking_assert (is_oacc_kernels);
7440 }
7441 gcc_assert (gimple_in_ssa_p (cfun) == is_oacc_kernels_parallelized);
7442 /* In the following, some of the 'gimple_in_ssa_p (cfun)' conditionals are
7443 for SSA specifics, and some are for 'parloops' OpenACC
7444 'kernels'-parallelized specifics. */
7445
629b3d75
MJ
7446 tree v = fd->loop.v;
7447 enum tree_code cond_code = fd->loop.cond_code;
7448 enum tree_code plus_code = PLUS_EXPR;
7449
7450 tree chunk_size = integer_minus_one_node;
7451 tree gwv = integer_zero_node;
7452 tree iter_type = TREE_TYPE (v);
7453 tree diff_type = iter_type;
7454 tree plus_type = iter_type;
7455 struct oacc_collapse *counts = NULL;
7456
7457 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
7458 == GF_OMP_FOR_KIND_OACC_LOOP);
7459 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
7460 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
7461
7462 if (POINTER_TYPE_P (iter_type))
7463 {
7464 plus_code = POINTER_PLUS_EXPR;
7465 plus_type = sizetype;
7466 }
f324479c
TB
7467 for (int ix = fd->collapse; ix--;)
7468 {
7469 tree diff_type2 = TREE_TYPE (fd->loops[ix].step);
7470 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (diff_type2))
7471 diff_type = diff_type2;
7472 }
629b3d75
MJ
7473 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
7474 diff_type = signed_type_for (diff_type);
f4c222c0
TV
7475 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
7476 diff_type = integer_type_node;
629b3d75
MJ
7477
7478 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
7479 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
7480 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
7481 basic_block bottom_bb = NULL;
7482
9c3da8cc
JJ
7483 /* entry_bb has two successors; the branch edge is to the exit
7484 block, fallthrough edge to body. */
629b3d75
MJ
7485 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
7486 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
7487
7488 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
7489 body_bb, or to a block whose only successor is the body_bb. Its
7490 fallthrough successor is the final block (same as the branch
7491 successor of the entry_bb). */
7492 if (cont_bb)
7493 {
7494 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
7495 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
7496
7497 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
7498 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
7499 }
7500 else
7501 gcc_assert (!gimple_in_ssa_p (cfun));
7502
7503 /* The exit block only has entry_bb and cont_bb as predecessors. */
7504 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
7505
7506 tree chunk_no;
7507 tree chunk_max = NULL_TREE;
7508 tree bound, offset;
7509 tree step = create_tmp_var (diff_type, ".step");
7510 bool up = cond_code == LT_EXPR;
7511 tree dir = build_int_cst (diff_type, up ? +1 : -1);
02889d23 7512 bool chunking = !gimple_in_ssa_p (cfun);
629b3d75
MJ
7513 bool negating;
7514
02889d23
CLT
7515 /* Tiling vars. */
7516 tree tile_size = NULL_TREE;
7517 tree element_s = NULL_TREE;
7518 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
7519 basic_block elem_body_bb = NULL;
7520 basic_block elem_cont_bb = NULL;
7521
629b3d75
MJ
7522 /* SSA instances. */
7523 tree offset_incr = NULL_TREE;
7524 tree offset_init = NULL_TREE;
7525
7526 gimple_stmt_iterator gsi;
7527 gassign *ass;
7528 gcall *call;
7529 gimple *stmt;
7530 tree expr;
7531 location_t loc;
7532 edge split, be, fte;
7533
7534 /* Split the end of entry_bb to create head_bb. */
7535 split = split_block (entry_bb, last_stmt (entry_bb));
7536 basic_block head_bb = split->dest;
7537 entry_bb = split->src;
7538
7539 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
65f4b875 7540 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
7541 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
7542 loc = gimple_location (for_stmt);
7543
7544 if (gimple_in_ssa_p (cfun))
7545 {
7546 offset_init = gimple_omp_for_index (for_stmt, 0);
7547 gcc_assert (integer_zerop (fd->loop.n1));
7548 /* The SSA parallelizer does gang parallelism. */
7549 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
7550 }
7551
02889d23 7552 if (fd->collapse > 1 || fd->tiling)
629b3d75 7553 {
02889d23 7554 gcc_assert (!gimple_in_ssa_p (cfun) && up);
629b3d75 7555 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
f324479c 7556 tree total = expand_oacc_collapse_init (fd, &gsi, counts, diff_type,
02889d23 7557 TREE_TYPE (fd->loop.n2), loc);
629b3d75
MJ
7558
7559 if (SSA_VAR_P (fd->loop.n2))
7560 {
7561 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
7562 true, GSI_SAME_STMT);
7563 ass = gimple_build_assign (fd->loop.n2, total);
7564 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7565 }
629b3d75
MJ
7566 }
7567
7568 tree b = fd->loop.n1;
7569 tree e = fd->loop.n2;
7570 tree s = fd->loop.step;
7571
7572 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
7573 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
7574
01914336 7575 /* Convert the step, avoiding possible unsigned->signed overflow. */
629b3d75
MJ
7576 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
7577 if (negating)
7578 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
7579 s = fold_convert (diff_type, s);
7580 if (negating)
7581 s = fold_build1 (NEGATE_EXPR, diff_type, s);
7582 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
7583
7584 if (!chunking)
7585 chunk_size = integer_zero_node;
7586 expr = fold_convert (diff_type, chunk_size);
7587 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
7588 NULL_TREE, true, GSI_SAME_STMT);
02889d23
CLT
7589
7590 if (fd->tiling)
7591 {
7592 /* Determine the tile size and element step,
7593 modify the outer loop step size. */
7594 tile_size = create_tmp_var (diff_type, ".tile_size");
7595 expr = build_int_cst (diff_type, 1);
7596 for (int ix = 0; ix < fd->collapse; ix++)
7597 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
7598 expr = force_gimple_operand_gsi (&gsi, expr, true,
7599 NULL_TREE, true, GSI_SAME_STMT);
7600 ass = gimple_build_assign (tile_size, expr);
7601 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7602
7603 element_s = create_tmp_var (diff_type, ".element_s");
7604 ass = gimple_build_assign (element_s, s);
7605 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7606
7607 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
7608 s = force_gimple_operand_gsi (&gsi, expr, true,
7609 NULL_TREE, true, GSI_SAME_STMT);
7610 }
7611
01914336 7612 /* Determine the range, avoiding possible unsigned->signed overflow. */
629b3d75
MJ
7613 negating = !up && TYPE_UNSIGNED (iter_type);
7614 expr = fold_build2 (MINUS_EXPR, plus_type,
7615 fold_convert (plus_type, negating ? b : e),
7616 fold_convert (plus_type, negating ? e : b));
7617 expr = fold_convert (diff_type, expr);
7618 if (negating)
7619 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
7620 tree range = force_gimple_operand_gsi (&gsi, expr, true,
7621 NULL_TREE, true, GSI_SAME_STMT);
7622
7623 chunk_no = build_int_cst (diff_type, 0);
7624 if (chunking)
7625 {
7626 gcc_assert (!gimple_in_ssa_p (cfun));
7627
7628 expr = chunk_no;
7629 chunk_max = create_tmp_var (diff_type, ".chunk_max");
7630 chunk_no = create_tmp_var (diff_type, ".chunk_no");
7631
7632 ass = gimple_build_assign (chunk_no, expr);
7633 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7634
7635 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7636 build_int_cst (integer_type_node,
7637 IFN_GOACC_LOOP_CHUNKS),
7638 dir, range, s, chunk_size, gwv);
7639 gimple_call_set_lhs (call, chunk_max);
7640 gimple_set_location (call, loc);
7641 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7642 }
7643 else
7644 chunk_size = chunk_no;
7645
7646 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7647 build_int_cst (integer_type_node,
7648 IFN_GOACC_LOOP_STEP),
7649 dir, range, s, chunk_size, gwv);
7650 gimple_call_set_lhs (call, step);
7651 gimple_set_location (call, loc);
7652 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7653
7654 /* Remove the GIMPLE_OMP_FOR. */
7655 gsi_remove (&gsi, true);
7656
01914336 7657 /* Fixup edges from head_bb. */
629b3d75
MJ
7658 be = BRANCH_EDGE (head_bb);
7659 fte = FALLTHRU_EDGE (head_bb);
7660 be->flags |= EDGE_FALSE_VALUE;
7661 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
7662
7663 basic_block body_bb = fte->dest;
7664
7665 if (gimple_in_ssa_p (cfun))
7666 {
65f4b875 7667 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
7668 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7669
7670 offset = gimple_omp_continue_control_use (cont_stmt);
7671 offset_incr = gimple_omp_continue_control_def (cont_stmt);
7672 }
7673 else
7674 {
7675 offset = create_tmp_var (diff_type, ".offset");
7676 offset_init = offset_incr = offset;
7677 }
7678 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
7679
7680 /* Loop offset & bound go into head_bb. */
7681 gsi = gsi_start_bb (head_bb);
7682
7683 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7684 build_int_cst (integer_type_node,
7685 IFN_GOACC_LOOP_OFFSET),
7686 dir, range, s,
7687 chunk_size, gwv, chunk_no);
7688 gimple_call_set_lhs (call, offset_init);
7689 gimple_set_location (call, loc);
7690 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7691
7692 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7693 build_int_cst (integer_type_node,
7694 IFN_GOACC_LOOP_BOUND),
7695 dir, range, s,
7696 chunk_size, gwv, offset_init);
7697 gimple_call_set_lhs (call, bound);
7698 gimple_set_location (call, loc);
7699 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7700
7701 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
7702 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
7703 GSI_CONTINUE_LINKING);
7704
7705 /* V assignment goes into body_bb. */
7706 if (!gimple_in_ssa_p (cfun))
7707 {
7708 gsi = gsi_start_bb (body_bb);
7709
7710 expr = build2 (plus_code, iter_type, b,
7711 fold_convert (plus_type, offset));
7712 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7713 true, GSI_SAME_STMT);
7714 ass = gimple_build_assign (v, expr);
7715 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
02889d23
CLT
7716
7717 if (fd->collapse > 1 || fd->tiling)
f324479c 7718 expand_oacc_collapse_vars (fd, false, &gsi, counts, v, diff_type);
02889d23
CLT
7719
7720 if (fd->tiling)
7721 {
7722 /* Determine the range of the element loop -- usually simply
7723 the tile_size, but could be smaller if the final
7724 iteration of the outer loop is a partial tile. */
7725 tree e_range = create_tmp_var (diff_type, ".e_range");
7726
7727 expr = build2 (MIN_EXPR, diff_type,
7728 build2 (MINUS_EXPR, diff_type, bound, offset),
7729 build2 (MULT_EXPR, diff_type, tile_size,
7730 element_s));
7731 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7732 true, GSI_SAME_STMT);
7733 ass = gimple_build_assign (e_range, expr);
7734 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7735
7736 /* Determine bound, offset & step of inner loop. */
7737 e_bound = create_tmp_var (diff_type, ".e_bound");
7738 e_offset = create_tmp_var (diff_type, ".e_offset");
7739 e_step = create_tmp_var (diff_type, ".e_step");
7740
7741 /* Mark these as element loops. */
7742 tree t, e_gwv = integer_minus_one_node;
7743 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */
7744
7745 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
7746 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7747 element_s, chunk, e_gwv, chunk);
7748 gimple_call_set_lhs (call, e_offset);
7749 gimple_set_location (call, loc);
7750 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7751
7752 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
7753 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7754 element_s, chunk, e_gwv, e_offset);
7755 gimple_call_set_lhs (call, e_bound);
7756 gimple_set_location (call, loc);
7757 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7758
7759 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
7760 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
7761 element_s, chunk, e_gwv);
7762 gimple_call_set_lhs (call, e_step);
7763 gimple_set_location (call, loc);
7764 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7765
7766 /* Add test and split block. */
7767 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
7768 stmt = gimple_build_cond_empty (expr);
7769 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7770 split = split_block (body_bb, stmt);
7771 elem_body_bb = split->dest;
7772 if (cont_bb == body_bb)
7773 cont_bb = elem_body_bb;
7774 body_bb = split->src;
7775
7776 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
7777
05e0af43
CP
7778 /* Add a dummy exit for the tiled block when cont_bb is missing. */
7779 if (cont_bb == NULL)
7780 {
7781 edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
7782 e->probability = profile_probability::even ();
7783 split->probability = profile_probability::even ();
7784 }
7785
02889d23
CLT
7786 /* Initialize the user's loop vars. */
7787 gsi = gsi_start_bb (elem_body_bb);
f324479c
TB
7788 expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset,
7789 diff_type);
02889d23 7790 }
629b3d75
MJ
7791 }
7792
7793 /* Loop increment goes into cont_bb. If this is not a loop, we
7794 will have spawned threads as if it was, and each one will
7795 execute one iteration. The specification is not explicit about
7796 whether such constructs are ill-formed or not, and they can
7797 occur, especially when noreturn routines are involved. */
7798 if (cont_bb)
7799 {
65f4b875 7800 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
7801 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7802 loc = gimple_location (cont_stmt);
7803
02889d23
CLT
7804 if (fd->tiling)
7805 {
7806 /* Insert element loop increment and test. */
7807 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
7808 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7809 true, GSI_SAME_STMT);
7810 ass = gimple_build_assign (e_offset, expr);
7811 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7812 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
7813
7814 stmt = gimple_build_cond_empty (expr);
7815 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7816 split = split_block (cont_bb, stmt);
7817 elem_cont_bb = split->src;
7818 cont_bb = split->dest;
7819
7820 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
9fba2943
TV
7821 split->probability = profile_probability::unlikely ().guessed ();
7822 edge latch_edge
7823 = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
7824 latch_edge->probability = profile_probability::likely ().guessed ();
7825
7826 edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
7827 skip_edge->probability = profile_probability::unlikely ().guessed ();
7828 edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
7829 loop_entry_edge->probability
7830 = profile_probability::likely ().guessed ();
02889d23
CLT
7831
7832 gsi = gsi_for_stmt (cont_stmt);
7833 }
7834
629b3d75
MJ
7835 /* Increment offset. */
7836 if (gimple_in_ssa_p (cfun))
02889d23
CLT
7837 expr = build2 (plus_code, iter_type, offset,
7838 fold_convert (plus_type, step));
629b3d75
MJ
7839 else
7840 expr = build2 (PLUS_EXPR, diff_type, offset, step);
7841 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7842 true, GSI_SAME_STMT);
7843 ass = gimple_build_assign (offset_incr, expr);
7844 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7845 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
7846 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
7847
7848 /* Remove the GIMPLE_OMP_CONTINUE. */
7849 gsi_remove (&gsi, true);
7850
01914336 7851 /* Fixup edges from cont_bb. */
629b3d75
MJ
7852 be = BRANCH_EDGE (cont_bb);
7853 fte = FALLTHRU_EDGE (cont_bb);
7854 be->flags |= EDGE_TRUE_VALUE;
7855 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7856
7857 if (chunking)
7858 {
7859 /* Split the beginning of exit_bb to make bottom_bb. We
7860 need to insert a nop at the start, because splitting is
01914336 7861 after a stmt, not before. */
629b3d75
MJ
7862 gsi = gsi_start_bb (exit_bb);
7863 stmt = gimple_build_nop ();
7864 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7865 split = split_block (exit_bb, stmt);
7866 bottom_bb = split->src;
7867 exit_bb = split->dest;
7868 gsi = gsi_last_bb (bottom_bb);
7869
7870 /* Chunk increment and test goes into bottom_bb. */
7871 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
7872 build_int_cst (diff_type, 1));
7873 ass = gimple_build_assign (chunk_no, expr);
7874 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
7875
7876 /* Chunk test at end of bottom_bb. */
7877 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
7878 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
7879 GSI_CONTINUE_LINKING);
7880
01914336 7881 /* Fixup edges from bottom_bb. */
629b3d75 7882 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
f38057cc
TV
7883 split->probability = profile_probability::unlikely ().guessed ();
7884 edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
7885 latch_edge->probability = profile_probability::likely ().guessed ();
629b3d75
MJ
7886 }
7887 }
7888
65f4b875 7889 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
7890 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7891 loc = gimple_location (gsi_stmt (gsi));
7892
7893 if (!gimple_in_ssa_p (cfun))
7894 {
7895 /* Insert the final value of V, in case it is live. This is the
7896 value for the only thread that survives past the join. */
7897 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
7898 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
7899 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
7900 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
7901 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
7902 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7903 true, GSI_SAME_STMT);
7904 ass = gimple_build_assign (v, expr);
7905 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7906 }
7907
01914336 7908 /* Remove the OMP_RETURN. */
629b3d75
MJ
7909 gsi_remove (&gsi, true);
7910
7911 if (cont_bb)
7912 {
02889d23 7913 /* We now have one, two or three nested loops. Update the loop
629b3d75 7914 structures. */
99b1c316
MS
7915 class loop *parent = entry_bb->loop_father;
7916 class loop *body = body_bb->loop_father;
629b3d75
MJ
7917
7918 if (chunking)
7919 {
99b1c316 7920 class loop *chunk_loop = alloc_loop ();
629b3d75
MJ
7921 chunk_loop->header = head_bb;
7922 chunk_loop->latch = bottom_bb;
7923 add_loop (chunk_loop, parent);
7924 parent = chunk_loop;
7925 }
7926 else if (parent != body)
7927 {
7928 gcc_assert (body->header == body_bb);
7929 gcc_assert (body->latch == cont_bb
7930 || single_pred (body->latch) == cont_bb);
7931 parent = NULL;
7932 }
7933
7934 if (parent)
7935 {
99b1c316 7936 class loop *body_loop = alloc_loop ();
629b3d75
MJ
7937 body_loop->header = body_bb;
7938 body_loop->latch = cont_bb;
7939 add_loop (body_loop, parent);
02889d23
CLT
7940
7941 if (fd->tiling)
7942 {
7943 /* Insert tiling's element loop. */
99b1c316 7944 class loop *inner_loop = alloc_loop ();
02889d23
CLT
7945 inner_loop->header = elem_body_bb;
7946 inner_loop->latch = elem_cont_bb;
7947 add_loop (inner_loop, body_loop);
7948 }
629b3d75
MJ
7949 }
7950 }
7951}
7952
7953/* Expand the OMP loop defined by REGION. */
7954
7955static void
7956expand_omp_for (struct omp_region *region, gimple *inner_stmt)
7957{
7958 struct omp_for_data fd;
7959 struct omp_for_data_loop *loops;
7960
9d50112a
JJ
7961 loops = XALLOCAVEC (struct omp_for_data_loop,
7962 gimple_omp_for_collapse (last_stmt (region->entry)));
629b3d75
MJ
7963 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
7964 &fd, loops);
7965 region->sched_kind = fd.sched_kind;
7966 region->sched_modifiers = fd.sched_modifiers;
0b887b75 7967 region->has_lastprivate_conditional = fd.lastprivate_conditional != 0;
9d50112a
JJ
7968 if (fd.non_rect && !gimple_omp_for_combined_into_p (fd.for_stmt))
7969 {
7970 for (int i = fd.first_nonrect; i <= fd.last_nonrect; i++)
7971 if ((loops[i].m1 || loops[i].m2)
7972 && (loops[i].m1 == NULL_TREE
7973 || TREE_CODE (loops[i].m1) == INTEGER_CST)
7974 && (loops[i].m2 == NULL_TREE
7975 || TREE_CODE (loops[i].m2) == INTEGER_CST)
7976 && TREE_CODE (loops[i].step) == INTEGER_CST
7977 && TREE_CODE (loops[i - loops[i].outer].step) == INTEGER_CST)
7978 {
7979 tree t;
7980 tree itype = TREE_TYPE (loops[i].v);
7981 if (loops[i].m1 && loops[i].m2)
7982 t = fold_build2 (MINUS_EXPR, itype, loops[i].m2, loops[i].m1);
7983 else if (loops[i].m1)
7984 t = fold_build1 (NEGATE_EXPR, itype, loops[i].m1);
7985 else
7986 t = loops[i].m2;
7987 t = fold_build2 (MULT_EXPR, itype, t,
7988 fold_convert (itype,
7989 loops[i - loops[i].outer].step));
7990 if (TYPE_UNSIGNED (itype) && loops[i].cond_code == GT_EXPR)
7991 t = fold_build2 (TRUNC_MOD_EXPR, itype,
7992 fold_build1 (NEGATE_EXPR, itype, t),
7993 fold_build1 (NEGATE_EXPR, itype,
7994 fold_convert (itype,
7995 loops[i].step)));
7996 else
7997 t = fold_build2 (TRUNC_MOD_EXPR, itype, t,
7998 fold_convert (itype, loops[i].step));
7999 if (integer_nonzerop (t))
8000 error_at (gimple_location (fd.for_stmt),
8001 "invalid OpenMP non-rectangular loop step; "
8002 "%<(%E - %E) * %E%> is not a multiple of loop %d "
8003 "step %qE",
8004 loops[i].m2 ? loops[i].m2 : integer_zero_node,
8005 loops[i].m1 ? loops[i].m1 : integer_zero_node,
8006 loops[i - loops[i].outer].step, i + 1,
8007 loops[i].step);
8008 }
8009 }
629b3d75
MJ
8010
8011 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
8012 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
8013 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
8014 if (region->cont)
8015 {
8016 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
8017 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
8018 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
8019 }
8020 else
8021 /* If there isn't a continue then this is a degerate case where
8022 the introduction of abnormal edges during lowering will prevent
8023 original loops from being detected. Fix that up. */
8024 loops_state_set (LOOPS_NEED_FIXUP);
8025
dfa6e5b4 8026 if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_SIMD)
c2ebf4f1 8027 expand_omp_simd (region, &fd);
629b3d75
MJ
8028 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
8029 {
1160ec9a 8030 gcc_assert (!inner_stmt && !fd.non_rect);
629b3d75
MJ
8031 expand_oacc_for (region, &fd);
8032 }
8033 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
8034 {
8035 if (gimple_omp_for_combined_into_p (fd.for_stmt))
8036 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
8037 else
8038 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
8039 }
8040 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
8041 && !fd.have_ordered)
8042 {
8043 if (fd.chunk_size == NULL)
8044 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
8045 else
8046 expand_omp_for_static_chunk (region, &fd, inner_stmt);
8047 }
8048 else
8049 {
8050 int fn_index, start_ix, next_ix;
28567c40
JJ
8051 unsigned HOST_WIDE_INT sched = 0;
8052 tree sched_arg = NULL_TREE;
629b3d75
MJ
8053
8054 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
1160ec9a 8055 == GF_OMP_FOR_KIND_FOR && !fd.non_rect);
629b3d75
MJ
8056 if (fd.chunk_size == NULL
8057 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
8058 fd.chunk_size = integer_zero_node;
629b3d75
MJ
8059 switch (fd.sched_kind)
8060 {
8061 case OMP_CLAUSE_SCHEDULE_RUNTIME:
0b887b75
JJ
8062 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0
8063 && fd.lastprivate_conditional == 0)
28567c40
JJ
8064 {
8065 gcc_assert (!fd.have_ordered);
8066 fn_index = 6;
8067 sched = 4;
8068 }
8069 else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
0b887b75
JJ
8070 && !fd.have_ordered
8071 && fd.lastprivate_conditional == 0)
28567c40
JJ
8072 fn_index = 7;
8073 else
8074 {
8075 fn_index = 3;
8076 sched = (HOST_WIDE_INT_1U << 31);
8077 }
629b3d75
MJ
8078 break;
8079 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
8080 case OMP_CLAUSE_SCHEDULE_GUIDED:
28567c40 8081 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
0b887b75
JJ
8082 && !fd.have_ordered
8083 && fd.lastprivate_conditional == 0)
629b3d75
MJ
8084 {
8085 fn_index = 3 + fd.sched_kind;
28567c40 8086 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
629b3d75
MJ
8087 break;
8088 }
629b3d75 8089 fn_index = fd.sched_kind;
28567c40
JJ
8090 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
8091 sched += (HOST_WIDE_INT_1U << 31);
629b3d75 8092 break;
28567c40
JJ
8093 case OMP_CLAUSE_SCHEDULE_STATIC:
8094 gcc_assert (fd.have_ordered);
8095 fn_index = 0;
8096 sched = (HOST_WIDE_INT_1U << 31) + 1;
8097 break;
8098 default:
8099 gcc_unreachable ();
629b3d75
MJ
8100 }
8101 if (!fd.ordered)
28567c40 8102 fn_index += fd.have_ordered * 8;
629b3d75
MJ
8103 if (fd.ordered)
8104 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
8105 else
8106 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
8107 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
8221c30b 8108 if (fd.have_reductemp || fd.have_pointer_condtemp)
28567c40
JJ
8109 {
8110 if (fd.ordered)
8111 start_ix = (int)BUILT_IN_GOMP_LOOP_DOACROSS_START;
8112 else if (fd.have_ordered)
8113 start_ix = (int)BUILT_IN_GOMP_LOOP_ORDERED_START;
8114 else
8115 start_ix = (int)BUILT_IN_GOMP_LOOP_START;
8116 sched_arg = build_int_cstu (long_integer_type_node, sched);
8117 if (!fd.chunk_size)
8118 fd.chunk_size = integer_zero_node;
8119 }
629b3d75
MJ
8120 if (fd.iter_type == long_long_unsigned_type_node)
8121 {
8122 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
8123 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
8124 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
8125 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
8126 }
8127 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
28567c40
JJ
8128 (enum built_in_function) next_ix, sched_arg,
8129 inner_stmt);
629b3d75
MJ
8130 }
8131
8132 if (gimple_in_ssa_p (cfun))
8133 update_ssa (TODO_update_ssa_only_virtuals);
8134}
8135
8136/* Expand code for an OpenMP sections directive. In pseudo code, we generate
8137
8138 v = GOMP_sections_start (n);
8139 L0:
8140 switch (v)
8141 {
8142 case 0:
8143 goto L2;
8144 case 1:
8145 section 1;
8146 goto L1;
8147 case 2:
8148 ...
8149 case n:
8150 ...
8151 default:
8152 abort ();
8153 }
8154 L1:
8155 v = GOMP_sections_next ();
8156 goto L0;
8157 L2:
8158 reduction;
8159
8160 If this is a combined parallel sections, replace the call to
8161 GOMP_sections_start with call to GOMP_sections_next. */
8162
8163static void
8164expand_omp_sections (struct omp_region *region)
8165{
8166 tree t, u, vin = NULL, vmain, vnext, l2;
8167 unsigned len;
8168 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
8169 gimple_stmt_iterator si, switch_si;
8170 gomp_sections *sections_stmt;
8171 gimple *stmt;
8172 gomp_continue *cont;
8173 edge_iterator ei;
8174 edge e;
8175 struct omp_region *inner;
8176 unsigned i, casei;
8177 bool exit_reachable = region->cont != NULL;
8178
8179 gcc_assert (region->exit != NULL);
8180 entry_bb = region->entry;
8181 l0_bb = single_succ (entry_bb);
8182 l1_bb = region->cont;
8183 l2_bb = region->exit;
8184 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
8185 l2 = gimple_block_label (l2_bb);
8186 else
8187 {
8188 /* This can happen if there are reductions. */
8189 len = EDGE_COUNT (l0_bb->succs);
8190 gcc_assert (len > 0);
8191 e = EDGE_SUCC (l0_bb, len - 1);
65f4b875 8192 si = gsi_last_nondebug_bb (e->dest);
629b3d75
MJ
8193 l2 = NULL_TREE;
8194 if (gsi_end_p (si)
01914336 8195 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
629b3d75
MJ
8196 l2 = gimple_block_label (e->dest);
8197 else
8198 FOR_EACH_EDGE (e, ei, l0_bb->succs)
8199 {
65f4b875 8200 si = gsi_last_nondebug_bb (e->dest);
629b3d75
MJ
8201 if (gsi_end_p (si)
8202 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
8203 {
8204 l2 = gimple_block_label (e->dest);
8205 break;
8206 }
8207 }
8208 }
8209 if (exit_reachable)
8210 default_bb = create_empty_bb (l1_bb->prev_bb);
8211 else
8212 default_bb = create_empty_bb (l0_bb);
8213
8214 /* We will build a switch() with enough cases for all the
8215 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
8216 and a default case to abort if something goes wrong. */
8217 len = EDGE_COUNT (l0_bb->succs);
8218
8219 /* Use vec::quick_push on label_vec throughout, since we know the size
8220 in advance. */
8221 auto_vec<tree> label_vec (len);
8222
8223 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
8224 GIMPLE_OMP_SECTIONS statement. */
65f4b875 8225 si = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
8226 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
8227 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
8228 vin = gimple_omp_sections_control (sections_stmt);
28567c40
JJ
8229 tree clauses = gimple_omp_sections_clauses (sections_stmt);
8230 tree reductmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
8e7757ba
JJ
8231 tree condtmp = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
8232 tree cond_var = NULL_TREE;
8233 if (reductmp || condtmp)
8234 {
8235 tree reductions = null_pointer_node, mem = null_pointer_node;
8236 tree memv = NULL_TREE, condtemp = NULL_TREE;
8237 gimple_stmt_iterator gsi = gsi_none ();
8238 gimple *g = NULL;
8239 if (reductmp)
8240 {
8241 reductions = OMP_CLAUSE_DECL (reductmp);
8242 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
8243 g = SSA_NAME_DEF_STMT (reductions);
8244 reductions = gimple_assign_rhs1 (g);
8245 OMP_CLAUSE_DECL (reductmp) = reductions;
8246 gsi = gsi_for_stmt (g);
8247 }
8248 else
8249 gsi = si;
8250 if (condtmp)
8251 {
8252 condtemp = OMP_CLAUSE_DECL (condtmp);
8253 tree c = omp_find_clause (OMP_CLAUSE_CHAIN (condtmp),
8254 OMP_CLAUSE__CONDTEMP_);
8255 cond_var = OMP_CLAUSE_DECL (c);
8256 tree type = TREE_TYPE (condtemp);
8257 memv = create_tmp_var (type);
8258 TREE_ADDRESSABLE (memv) = 1;
8259 unsigned cnt = 0;
8260 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
8261 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
8262 && OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (c))
8263 ++cnt;
8264 unsigned HOST_WIDE_INT sz
8265 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))) * cnt;
8266 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
8267 false);
8268 mem = build_fold_addr_expr (memv);
8269 }
28567c40
JJ
8270 t = build_int_cst (unsigned_type_node, len - 1);
8271 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS2_START);
8e7757ba 8272 stmt = gimple_build_call (u, 3, t, reductions, mem);
28567c40
JJ
8273 gimple_call_set_lhs (stmt, vin);
8274 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
8e7757ba
JJ
8275 if (condtmp)
8276 {
8277 expand_omp_build_assign (&gsi, condtemp, memv, false);
8278 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
8279 vin, build_one_cst (TREE_TYPE (cond_var)));
8280 expand_omp_build_assign (&gsi, cond_var, t, false);
8281 }
8282 if (reductmp)
8283 {
8284 gsi_remove (&gsi, true);
8285 release_ssa_name (gimple_assign_lhs (g));
8286 }
28567c40
JJ
8287 }
8288 else if (!is_combined_parallel (region))
629b3d75
MJ
8289 {
8290 /* If we are not inside a combined parallel+sections region,
8291 call GOMP_sections_start. */
8292 t = build_int_cst (unsigned_type_node, len - 1);
8293 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
8294 stmt = gimple_build_call (u, 1, t);
8295 }
8296 else
8297 {
8298 /* Otherwise, call GOMP_sections_next. */
8299 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
8300 stmt = gimple_build_call (u, 0);
8301 }
8e7757ba 8302 if (!reductmp && !condtmp)
28567c40
JJ
8303 {
8304 gimple_call_set_lhs (stmt, vin);
8305 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
8306 }
629b3d75
MJ
8307 gsi_remove (&si, true);
8308
8309 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
8310 L0_BB. */
65f4b875 8311 switch_si = gsi_last_nondebug_bb (l0_bb);
629b3d75
MJ
8312 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
8313 if (exit_reachable)
8314 {
8315 cont = as_a <gomp_continue *> (last_stmt (l1_bb));
8316 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
8317 vmain = gimple_omp_continue_control_use (cont);
8318 vnext = gimple_omp_continue_control_def (cont);
8319 }
8320 else
8321 {
8322 vmain = vin;
8323 vnext = NULL_TREE;
8324 }
8325
8326 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
8327 label_vec.quick_push (t);
8328 i = 1;
8329
8330 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
8331 for (inner = region->inner, casei = 1;
8332 inner;
8333 inner = inner->next, i++, casei++)
8334 {
8335 basic_block s_entry_bb, s_exit_bb;
8336
8337 /* Skip optional reduction region. */
8338 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
8339 {
8340 --i;
8341 --casei;
8342 continue;
8343 }
8344
8345 s_entry_bb = inner->entry;
8346 s_exit_bb = inner->exit;
8347
8348 t = gimple_block_label (s_entry_bb);
8349 u = build_int_cst (unsigned_type_node, casei);
8350 u = build_case_label (u, NULL, t);
8351 label_vec.quick_push (u);
8352
65f4b875 8353 si = gsi_last_nondebug_bb (s_entry_bb);
629b3d75
MJ
8354 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
8355 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
8356 gsi_remove (&si, true);
8357 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
8358
8359 if (s_exit_bb == NULL)
8360 continue;
8361
65f4b875 8362 si = gsi_last_nondebug_bb (s_exit_bb);
629b3d75
MJ
8363 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
8364 gsi_remove (&si, true);
8365
8366 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
8367 }
8368
8369 /* Error handling code goes in DEFAULT_BB. */
8370 t = gimple_block_label (default_bb);
8371 u = build_case_label (NULL, NULL, t);
8372 make_edge (l0_bb, default_bb, 0);
8373 add_bb_to_loop (default_bb, current_loops->tree_root);
8374
8375 stmt = gimple_build_switch (vmain, u, label_vec);
8376 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
8377 gsi_remove (&switch_si, true);
8378
8379 si = gsi_start_bb (default_bb);
8380 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
8381 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
8382
8383 if (exit_reachable)
8384 {
8385 tree bfn_decl;
8386
8387 /* Code to get the next section goes in L1_BB. */
65f4b875 8388 si = gsi_last_nondebug_bb (l1_bb);
629b3d75
MJ
8389 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
8390
8391 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
8392 stmt = gimple_build_call (bfn_decl, 0);
8393 gimple_call_set_lhs (stmt, vnext);
8e7757ba
JJ
8394 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8395 if (cond_var)
8396 {
8397 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
8398 vnext, build_one_cst (TREE_TYPE (cond_var)));
8399 expand_omp_build_assign (&si, cond_var, t, false);
8400 }
629b3d75
MJ
8401 gsi_remove (&si, true);
8402
8403 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
8404 }
8405
8406 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
65f4b875 8407 si = gsi_last_nondebug_bb (l2_bb);
629b3d75
MJ
8408 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
8409 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
8410 else if (gimple_omp_return_lhs (gsi_stmt (si)))
8411 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
8412 else
8413 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
8414 stmt = gimple_build_call (t, 0);
8415 if (gimple_omp_return_lhs (gsi_stmt (si)))
8416 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
8417 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
8418 gsi_remove (&si, true);
8419
8420 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
8421}
8422
e45483c7 8423/* Expand code for an OpenMP single or scope directive. We've already expanded
629b3d75
MJ
8424 much of the code, here we simply place the GOMP_barrier call. */
8425
8426static void
8427expand_omp_single (struct omp_region *region)
8428{
8429 basic_block entry_bb, exit_bb;
8430 gimple_stmt_iterator si;
8431
8432 entry_bb = region->entry;
8433 exit_bb = region->exit;
8434
65f4b875 8435 si = gsi_last_nondebug_bb (entry_bb);
e45483c7
JJ
8436 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
8437 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SCOPE);
629b3d75
MJ
8438 gsi_remove (&si, true);
8439 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8440
65f4b875 8441 si = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
8442 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
8443 {
8444 tree t = gimple_omp_return_lhs (gsi_stmt (si));
8445 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
8446 }
8447 gsi_remove (&si, true);
8448 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
8449}
8450
8451/* Generic expansion for OpenMP synchronization directives: master,
8452 ordered and critical. All we need to do here is remove the entry
8453 and exit markers for REGION. */
8454
8455static void
8456expand_omp_synch (struct omp_region *region)
8457{
8458 basic_block entry_bb, exit_bb;
8459 gimple_stmt_iterator si;
8460
8461 entry_bb = region->entry;
8462 exit_bb = region->exit;
8463
65f4b875 8464 si = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
8465 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
8466 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
d0befed7 8467 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASKED
629b3d75
MJ
8468 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
8469 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
8470 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
8471 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
28567c40
JJ
8472 if (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS
8473 && gimple_omp_teams_host (as_a <gomp_teams *> (gsi_stmt (si))))
8474 {
8475 expand_omp_taskreg (region);
8476 return;
8477 }
629b3d75
MJ
8478 gsi_remove (&si, true);
8479 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8480
8481 if (exit_bb)
8482 {
65f4b875 8483 si = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
8484 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
8485 gsi_remove (&si, true);
8486 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
8487 }
8488}
8489
8122fbff
JJ
8490/* Translate enum omp_memory_order to enum memmodel for the embedded
8491 fail clause in there. */
8492
8493static enum memmodel
8494omp_memory_order_to_fail_memmodel (enum omp_memory_order mo)
8495{
8496 switch (mo & OMP_FAIL_MEMORY_ORDER_MASK)
8497 {
8498 case OMP_FAIL_MEMORY_ORDER_UNSPECIFIED:
8499 switch (mo & OMP_MEMORY_ORDER_MASK)
8500 {
8501 case OMP_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
8502 case OMP_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
8503 case OMP_MEMORY_ORDER_RELEASE: return MEMMODEL_RELAXED;
8504 case OMP_MEMORY_ORDER_ACQ_REL: return MEMMODEL_ACQUIRE;
8505 case OMP_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
8506 default: break;
8507 }
8508 gcc_unreachable ();
8509 case OMP_FAIL_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
8510 case OMP_FAIL_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
8511 case OMP_FAIL_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
8512 default: gcc_unreachable ();
8513 }
8514}
8515
28567c40
JJ
8516/* Translate enum omp_memory_order to enum memmodel. The two enums
8517 are using different numbers so that OMP_MEMORY_ORDER_UNSPECIFIED
8122fbff 8518 is 0 and omp_memory_order has the fail mode encoded in it too. */
28567c40
JJ
8519
8520static enum memmodel
8521omp_memory_order_to_memmodel (enum omp_memory_order mo)
8522{
8122fbff
JJ
8523 enum memmodel ret, fail_ret;
8524 switch (mo & OMP_MEMORY_ORDER_MASK)
8525 {
8526 case OMP_MEMORY_ORDER_RELAXED: ret = MEMMODEL_RELAXED; break;
8527 case OMP_MEMORY_ORDER_ACQUIRE: ret = MEMMODEL_ACQUIRE; break;
8528 case OMP_MEMORY_ORDER_RELEASE: ret = MEMMODEL_RELEASE; break;
8529 case OMP_MEMORY_ORDER_ACQ_REL: ret = MEMMODEL_ACQ_REL; break;
8530 case OMP_MEMORY_ORDER_SEQ_CST: ret = MEMMODEL_SEQ_CST; break;
28567c40
JJ
8531 default: gcc_unreachable ();
8532 }
8122fbff
JJ
8533 /* If we drop the -Winvalid-memory-model warning for C++17 P0418R2,
8534 we can just return ret here unconditionally. Otherwise, work around
8535 it here and make sure fail memmodel is not stronger. */
8536 if ((mo & OMP_FAIL_MEMORY_ORDER_MASK) == OMP_FAIL_MEMORY_ORDER_UNSPECIFIED)
8537 return ret;
8538 fail_ret = omp_memory_order_to_fail_memmodel (mo);
8539 if (fail_ret > ret)
8540 return fail_ret;
8541 return ret;
28567c40
JJ
8542}
8543
629b3d75
MJ
8544/* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8545 operation as a normal volatile load. */
8546
8547static bool
8548expand_omp_atomic_load (basic_block load_bb, tree addr,
8549 tree loaded_val, int index)
8550{
8551 enum built_in_function tmpbase;
8552 gimple_stmt_iterator gsi;
8553 basic_block store_bb;
8554 location_t loc;
8555 gimple *stmt;
8556 tree decl, call, type, itype;
8557
65f4b875 8558 gsi = gsi_last_nondebug_bb (load_bb);
629b3d75
MJ
8559 stmt = gsi_stmt (gsi);
8560 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
8561 loc = gimple_location (stmt);
8562
8563 /* ??? If the target does not implement atomic_load_optab[mode], and mode
8564 is smaller than word size, then expand_atomic_load assumes that the load
8565 is atomic. We could avoid the builtin entirely in this case. */
8566
8567 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
8568 decl = builtin_decl_explicit (tmpbase);
8569 if (decl == NULL_TREE)
8570 return false;
8571
8572 type = TREE_TYPE (loaded_val);
8573 itype = TREE_TYPE (TREE_TYPE (decl));
8574
28567c40
JJ
8575 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
8576 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
8577 call = build_call_expr_loc (loc, decl, 2, addr, mo);
629b3d75
MJ
8578 if (!useless_type_conversion_p (type, itype))
8579 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
8580 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
8581
8582 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8583 gsi_remove (&gsi, true);
8584
8585 store_bb = single_succ (load_bb);
65f4b875 8586 gsi = gsi_last_nondebug_bb (store_bb);
629b3d75
MJ
8587 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
8588 gsi_remove (&gsi, true);
8589
8590 if (gimple_in_ssa_p (cfun))
8591 update_ssa (TODO_update_ssa_no_phi);
8592
8593 return true;
8594}
8595
8596/* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8597 operation as a normal volatile store. */
8598
8599static bool
8600expand_omp_atomic_store (basic_block load_bb, tree addr,
8601 tree loaded_val, tree stored_val, int index)
8602{
8603 enum built_in_function tmpbase;
8604 gimple_stmt_iterator gsi;
8605 basic_block store_bb = single_succ (load_bb);
8606 location_t loc;
8607 gimple *stmt;
8608 tree decl, call, type, itype;
8609 machine_mode imode;
8610 bool exchange;
8611
65f4b875 8612 gsi = gsi_last_nondebug_bb (load_bb);
629b3d75
MJ
8613 stmt = gsi_stmt (gsi);
8614 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
8615
8616 /* If the load value is needed, then this isn't a store but an exchange. */
8617 exchange = gimple_omp_atomic_need_value_p (stmt);
8618
65f4b875 8619 gsi = gsi_last_nondebug_bb (store_bb);
629b3d75
MJ
8620 stmt = gsi_stmt (gsi);
8621 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
8622 loc = gimple_location (stmt);
8623
8624 /* ??? If the target does not implement atomic_store_optab[mode], and mode
8625 is smaller than word size, then expand_atomic_store assumes that the store
8626 is atomic. We could avoid the builtin entirely in this case. */
8627
8628 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
8629 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
8630 decl = builtin_decl_explicit (tmpbase);
8631 if (decl == NULL_TREE)
8632 return false;
8633
8634 type = TREE_TYPE (stored_val);
8635
8636 /* Dig out the type of the function's second argument. */
8637 itype = TREE_TYPE (decl);
8638 itype = TYPE_ARG_TYPES (itype);
8639 itype = TREE_CHAIN (itype);
8640 itype = TREE_VALUE (itype);
8641 imode = TYPE_MODE (itype);
8642
8643 if (exchange && !can_atomic_exchange_p (imode, true))
8644 return false;
8645
8646 if (!useless_type_conversion_p (itype, type))
8647 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
28567c40
JJ
8648 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
8649 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
8650 call = build_call_expr_loc (loc, decl, 3, addr, stored_val, mo);
629b3d75
MJ
8651 if (exchange)
8652 {
8653 if (!useless_type_conversion_p (type, itype))
8654 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
8655 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
8656 }
8657
8658 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8659 gsi_remove (&gsi, true);
8660
8661 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
65f4b875 8662 gsi = gsi_last_nondebug_bb (load_bb);
629b3d75
MJ
8663 gsi_remove (&gsi, true);
8664
8665 if (gimple_in_ssa_p (cfun))
8666 update_ssa (TODO_update_ssa_no_phi);
8667
8668 return true;
8669}
8670
8671/* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8672 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
8673 size of the data type, and thus usable to find the index of the builtin
8674 decl. Returns false if the expression is not of the proper form. */
8675
8676static bool
8677expand_omp_atomic_fetch_op (basic_block load_bb,
8678 tree addr, tree loaded_val,
8679 tree stored_val, int index)
8680{
8681 enum built_in_function oldbase, newbase, tmpbase;
8682 tree decl, itype, call;
8683 tree lhs, rhs;
8684 basic_block store_bb = single_succ (load_bb);
8685 gimple_stmt_iterator gsi;
8686 gimple *stmt;
8687 location_t loc;
8688 enum tree_code code;
8689 bool need_old, need_new;
8690 machine_mode imode;
629b3d75
MJ
8691
8692 /* We expect to find the following sequences:
8693
8694 load_bb:
8695 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
8696
8697 store_bb:
8698 val = tmp OP something; (or: something OP tmp)
8699 GIMPLE_OMP_STORE (val)
8700
8701 ???FIXME: Allow a more flexible sequence.
8702 Perhaps use data flow to pick the statements.
8703
8704 */
8705
8706 gsi = gsi_after_labels (store_bb);
8707 stmt = gsi_stmt (gsi);
65f4b875
AO
8708 if (is_gimple_debug (stmt))
8709 {
8710 gsi_next_nondebug (&gsi);
8711 if (gsi_end_p (gsi))
8712 return false;
8713 stmt = gsi_stmt (gsi);
8714 }
629b3d75
MJ
8715 loc = gimple_location (stmt);
8716 if (!is_gimple_assign (stmt))
8717 return false;
65f4b875 8718 gsi_next_nondebug (&gsi);
629b3d75
MJ
8719 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
8720 return false;
8721 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
8722 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
28567c40
JJ
8723 enum omp_memory_order omo
8724 = gimple_omp_atomic_memory_order (last_stmt (load_bb));
8725 enum memmodel mo = omp_memory_order_to_memmodel (omo);
629b3d75
MJ
8726 gcc_checking_assert (!need_old || !need_new);
8727
8728 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
8729 return false;
8730
8731 /* Check for one of the supported fetch-op operations. */
8732 code = gimple_assign_rhs_code (stmt);
8733 switch (code)
8734 {
8735 case PLUS_EXPR:
8736 case POINTER_PLUS_EXPR:
8737 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
8738 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
8739 break;
8740 case MINUS_EXPR:
8741 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
8742 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
8743 break;
8744 case BIT_AND_EXPR:
8745 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
8746 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
8747 break;
8748 case BIT_IOR_EXPR:
8749 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
8750 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
8751 break;
8752 case BIT_XOR_EXPR:
8753 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
8754 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
8755 break;
8756 default:
8757 return false;
8758 }
8759
8760 /* Make sure the expression is of the proper form. */
8761 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
8762 rhs = gimple_assign_rhs2 (stmt);
8763 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
8764 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
8765 rhs = gimple_assign_rhs1 (stmt);
8766 else
8767 return false;
8768
8769 tmpbase = ((enum built_in_function)
8770 ((need_new ? newbase : oldbase) + index + 1));
8771 decl = builtin_decl_explicit (tmpbase);
8772 if (decl == NULL_TREE)
8773 return false;
8774 itype = TREE_TYPE (TREE_TYPE (decl));
8775 imode = TYPE_MODE (itype);
8776
8777 /* We could test all of the various optabs involved, but the fact of the
8778 matter is that (with the exception of i486 vs i586 and xadd) all targets
8779 that support any atomic operaton optab also implements compare-and-swap.
8780 Let optabs.c take care of expanding any compare-and-swap loop. */
dc06356a 8781 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
629b3d75
MJ
8782 return false;
8783
65f4b875 8784 gsi = gsi_last_nondebug_bb (load_bb);
629b3d75
MJ
8785 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
8786
8787 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
8788 It only requires that the operation happen atomically. Thus we can
8789 use the RELAXED memory model. */
8790 call = build_call_expr_loc (loc, decl, 3, addr,
8791 fold_convert_loc (loc, itype, rhs),
28567c40 8792 build_int_cst (NULL, mo));
629b3d75
MJ
8793
8794 if (need_old || need_new)
8795 {
8796 lhs = need_old ? loaded_val : stored_val;
8797 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
8798 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
8799 }
8800 else
8801 call = fold_convert_loc (loc, void_type_node, call);
8802 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8803 gsi_remove (&gsi, true);
8804
65f4b875 8805 gsi = gsi_last_nondebug_bb (store_bb);
629b3d75
MJ
8806 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
8807 gsi_remove (&gsi, true);
65f4b875 8808 gsi = gsi_last_nondebug_bb (store_bb);
629b3d75
MJ
8809 stmt = gsi_stmt (gsi);
8810 gsi_remove (&gsi, true);
8811
8812 if (gimple_in_ssa_p (cfun))
8813 {
8814 release_defs (stmt);
8815 update_ssa (TODO_update_ssa_no_phi);
8816 }
8817
8818 return true;
8819}
8820
8122fbff
JJ
8821/* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8822 compare and exchange as an ATOMIC_COMPARE_EXCHANGE internal function.
8823 Returns false if the expression is not of the proper form. */
8824
8825static bool
8826expand_omp_atomic_cas (basic_block load_bb, tree addr,
8827 tree loaded_val, tree stored_val, int index)
8828{
8829 /* We expect to find the following sequences:
8830
8831 load_bb:
8832 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
8833
8834 store_bb:
8835 val = tmp == e ? d : tmp;
8836 GIMPLE_OMP_ATOMIC_STORE (val)
8837
8838 or in store_bb instead:
8839 tmp2 = tmp == e;
8840 val = tmp2 ? d : tmp;
8841 GIMPLE_OMP_ATOMIC_STORE (val)
8842
8843 or:
8844 tmp3 = VIEW_CONVERT_EXPR<integral_type>(tmp);
8845 val = e == tmp3 ? d : tmp;
8846 GIMPLE_OMP_ATOMIC_STORE (val)
8847
8848 etc. */
8849
8850
8851 basic_block store_bb = single_succ (load_bb);
8852 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (store_bb);
8853 gimple *store_stmt = gsi_stmt (gsi);
8854 if (!store_stmt || gimple_code (store_stmt) != GIMPLE_OMP_ATOMIC_STORE)
8855 return false;
8856 gsi_prev_nondebug (&gsi);
8857 if (gsi_end_p (gsi))
8858 return false;
8859 gimple *condexpr_stmt = gsi_stmt (gsi);
8860 if (!is_gimple_assign (condexpr_stmt)
8861 || gimple_assign_rhs_code (condexpr_stmt) != COND_EXPR)
8862 return false;
8863 if (!operand_equal_p (gimple_assign_lhs (condexpr_stmt), stored_val, 0))
8864 return false;
8865 gimple *cond_stmt = NULL;
8866 gimple *vce_stmt = NULL;
8867 gsi_prev_nondebug (&gsi);
8868 if (!gsi_end_p (gsi))
8869 {
8870 cond_stmt = gsi_stmt (gsi);
8871 if (!is_gimple_assign (cond_stmt))
8872 return false;
8873 if (gimple_assign_rhs_code (cond_stmt) == EQ_EXPR)
8874 {
8875 gsi_prev_nondebug (&gsi);
8876 if (!gsi_end_p (gsi))
8877 {
8878 vce_stmt = gsi_stmt (gsi);
8879 if (!is_gimple_assign (vce_stmt)
8880 || gimple_assign_rhs_code (vce_stmt) != VIEW_CONVERT_EXPR)
8881 return false;
8882 }
8883 }
8884 else if (gimple_assign_rhs_code (cond_stmt) == VIEW_CONVERT_EXPR)
8885 std::swap (vce_stmt, cond_stmt);
8886 else
8887 return false;
8888 if (vce_stmt)
8889 {
8890 tree vce_rhs = gimple_assign_rhs1 (vce_stmt);
8891 if (TREE_CODE (vce_rhs) != VIEW_CONVERT_EXPR
8892 || !operand_equal_p (TREE_OPERAND (vce_rhs, 0), loaded_val))
8893 return false;
8894 if (!INTEGRAL_TYPE_P (TREE_TYPE (vce_rhs))
8895 || !SCALAR_FLOAT_TYPE_P (TREE_TYPE (loaded_val))
8896 || !tree_int_cst_equal (TYPE_SIZE (TREE_TYPE (vce_rhs)),
8897 TYPE_SIZE (TREE_TYPE (loaded_val))))
8898 return false;
8899 gsi_prev_nondebug (&gsi);
8900 if (!gsi_end_p (gsi))
8901 return false;
8902 }
8903 }
8904 tree cond = gimple_assign_rhs1 (condexpr_stmt);
8905 tree cond_op1, cond_op2;
8906 if (cond_stmt)
8907 {
8908 if (!operand_equal_p (cond, gimple_assign_lhs (cond_stmt)))
8909 return false;
8910 cond_op1 = gimple_assign_rhs1 (cond_stmt);
8911 cond_op2 = gimple_assign_rhs2 (cond_stmt);
8912 }
8913 else if (TREE_CODE (cond) != EQ_EXPR && TREE_CODE (cond) != NE_EXPR)
8914 return false;
8915 else
8916 {
8917 cond_op1 = TREE_OPERAND (cond, 0);
8918 cond_op2 = TREE_OPERAND (cond, 1);
8919 }
8920 tree d;
8921 if (TREE_CODE (cond) == NE_EXPR)
8922 {
8923 if (!operand_equal_p (gimple_assign_rhs2 (condexpr_stmt), loaded_val))
8924 return false;
8925 d = gimple_assign_rhs3 (condexpr_stmt);
8926 }
8927 else if (!operand_equal_p (gimple_assign_rhs3 (condexpr_stmt), loaded_val))
8928 return false;
8929 else
8930 d = gimple_assign_rhs2 (condexpr_stmt);
8931 tree e = vce_stmt ? gimple_assign_lhs (vce_stmt) : loaded_val;
8932 if (operand_equal_p (e, cond_op1))
8933 e = cond_op2;
8934 else if (operand_equal_p (e, cond_op2))
8935 e = cond_op1;
8936 else
8937 return false;
8938
8939 location_t loc = gimple_location (store_stmt);
8940 gimple *load_stmt = last_stmt (load_bb);
8941 bool need_new = gimple_omp_atomic_need_value_p (store_stmt);
8942 bool need_old = gimple_omp_atomic_need_value_p (load_stmt);
8943 bool weak = gimple_omp_atomic_weak_p (load_stmt);
8944 enum omp_memory_order omo = gimple_omp_atomic_memory_order (load_stmt);
8945 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
8946 tree fmo = build_int_cst (NULL, omp_memory_order_to_fail_memmodel (omo));
8947 gcc_checking_assert (!need_old || !need_new);
8948
8949 enum built_in_function fncode
8950 = (enum built_in_function) ((int) BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
8951 + index + 1);
8952 tree cmpxchg = builtin_decl_explicit (fncode);
8953 if (cmpxchg == NULL_TREE)
8954 return false;
8955 tree itype = TREE_TYPE (TREE_TYPE (cmpxchg));
8956
8957 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
8958 || !can_atomic_load_p (TYPE_MODE (itype)))
8959 return false;
8960
8961 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
8962 if (SCALAR_FLOAT_TYPE_P (type) && !vce_stmt)
8963 return false;
8964
8965 gsi = gsi_for_stmt (store_stmt);
8966 if (!useless_type_conversion_p (itype, TREE_TYPE (e)))
8967 {
8968 tree ne = create_tmp_reg (itype);
8969 gimple *g = gimple_build_assign (ne, NOP_EXPR, e);
8970 gimple_set_location (g, loc);
8971 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
8972 e = ne;
8973 }
8974 if (!useless_type_conversion_p (itype, TREE_TYPE (d)))
8975 {
8976 tree nd = create_tmp_reg (itype);
8977 enum tree_code code;
8978 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (d)))
8979 {
8980 code = VIEW_CONVERT_EXPR;
8981 d = build1 (VIEW_CONVERT_EXPR, itype, d);
8982 }
8983 else
8984 code = NOP_EXPR;
8985 gimple *g = gimple_build_assign (nd, code, d);
8986 gimple_set_location (g, loc);
8987 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
8988 d = nd;
8989 }
8990
8991 tree ctype = build_complex_type (itype);
8992 int flag = int_size_in_bytes (itype) + (weak ? 256 : 0);
8993 gimple *g
8994 = gimple_build_call_internal (IFN_ATOMIC_COMPARE_EXCHANGE, 6, addr, e, d,
8995 build_int_cst (integer_type_node, flag),
8996 mo, fmo);
8997 tree cres = create_tmp_reg (ctype);
8998 gimple_call_set_lhs (g, cres);
8999 gimple_set_location (g, loc);
9000 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9001
9002 if (cond_stmt || need_old || need_new)
9003 {
9004 tree im = create_tmp_reg (itype);
9005 g = gimple_build_assign (im, IMAGPART_EXPR,
9006 build1 (IMAGPART_EXPR, itype, cres));
9007 gimple_set_location (g, loc);
9008 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9009
9010 tree re = NULL_TREE;
9011 if (need_old || need_new)
9012 {
9013 re = create_tmp_reg (itype);
9014 g = gimple_build_assign (re, REALPART_EXPR,
9015 build1 (REALPART_EXPR, itype, cres));
9016 gimple_set_location (g, loc);
9017 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9018 }
9019
9020 if (cond_stmt)
9021 {
9022 g = gimple_build_assign (gimple_assign_lhs (cond_stmt),
9023 NOP_EXPR, im);
9024 gimple_set_location (g, loc);
9025 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9026 }
9027 else if (need_new)
9028 {
9029 g = gimple_build_assign (create_tmp_reg (itype), COND_EXPR,
9030 build2 (NE_EXPR, boolean_type_node,
9031 im, build_zero_cst (itype)),
9032 d, re);
9033 gimple_set_location (g, loc);
9034 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9035 re = gimple_assign_lhs (g);
9036 }
9037
9038 if (need_old || need_new)
9039 {
9040 tree v = need_old ? loaded_val : stored_val;
9041 enum tree_code code;
9042 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (v)))
9043 {
9044 code = VIEW_CONVERT_EXPR;
9045 re = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (v), re);
9046 }
9047 else if (!useless_type_conversion_p (TREE_TYPE (v), itype))
9048 code = NOP_EXPR;
9049 else
9050 code = TREE_CODE (re);
9051 g = gimple_build_assign (v, code, re);
9052 gimple_set_location (g, loc);
9053 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9054 }
9055 }
9056
9057 gsi_remove (&gsi, true);
9058 gsi = gsi_for_stmt (load_stmt);
9059 gsi_remove (&gsi, true);
9060 gsi = gsi_for_stmt (condexpr_stmt);
9061 gsi_remove (&gsi, true);
9062 if (cond_stmt)
9063 {
9064 gsi = gsi_for_stmt (cond_stmt);
9065 gsi_remove (&gsi, true);
9066 }
9067 if (vce_stmt)
9068 {
9069 gsi = gsi_for_stmt (vce_stmt);
9070 gsi_remove (&gsi, true);
9071 }
9072
9073 return true;
9074}
9075
629b3d75
MJ
9076/* A subroutine of expand_omp_atomic. Implement the atomic operation as:
9077
9078 oldval = *addr;
9079 repeat:
01914336 9080 newval = rhs; // with oldval replacing *addr in rhs
629b3d75
MJ
9081 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
9082 if (oldval != newval)
9083 goto repeat;
9084
9085 INDEX is log2 of the size of the data type, and thus usable to find the
9086 index of the builtin decl. */
9087
9088static bool
9089expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
9090 tree addr, tree loaded_val, tree stored_val,
9091 int index)
9092{
9093 tree loadedi, storedi, initial, new_storedi, old_vali;
b4e47472 9094 tree type, itype, cmpxchg, iaddr, atype;
629b3d75
MJ
9095 gimple_stmt_iterator si;
9096 basic_block loop_header = single_succ (load_bb);
9097 gimple *phi, *stmt;
9098 edge e;
9099 enum built_in_function fncode;
9100
629b3d75
MJ
9101 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
9102 + index + 1);
9103 cmpxchg = builtin_decl_explicit (fncode);
9104 if (cmpxchg == NULL_TREE)
9105 return false;
b4e47472
JJ
9106 type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
9107 atype = type;
629b3d75
MJ
9108 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
9109
dc06356a
JJ
9110 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
9111 || !can_atomic_load_p (TYPE_MODE (itype)))
629b3d75
MJ
9112 return false;
9113
9114 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
65f4b875 9115 si = gsi_last_nondebug_bb (load_bb);
629b3d75 9116 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
090f0d78
JJ
9117 location_t loc = gimple_location (gsi_stmt (si));
9118 enum omp_memory_order omo = gimple_omp_atomic_memory_order (gsi_stmt (si));
8122fbff
JJ
9119 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
9120 tree fmo = build_int_cst (NULL, omp_memory_order_to_fail_memmodel (omo));
629b3d75
MJ
9121
9122 /* For floating-point values, we'll need to view-convert them to integers
9123 so that we can perform the atomic compare and swap. Simplify the
9124 following code by always setting up the "i"ntegral variables. */
9125 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
9126 {
9127 tree iaddr_val;
9128
9129 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
9130 true));
b4e47472 9131 atype = itype;
629b3d75
MJ
9132 iaddr_val
9133 = force_gimple_operand_gsi (&si,
9134 fold_convert (TREE_TYPE (iaddr), addr),
9135 false, NULL_TREE, true, GSI_SAME_STMT);
9136 stmt = gimple_build_assign (iaddr, iaddr_val);
9137 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9138 loadedi = create_tmp_var (itype);
9139 if (gimple_in_ssa_p (cfun))
9140 loadedi = make_ssa_name (loadedi);
9141 }
9142 else
9143 {
9144 iaddr = addr;
9145 loadedi = loaded_val;
9146 }
9147
9148 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
9149 tree loaddecl = builtin_decl_explicit (fncode);
9150 if (loaddecl)
9151 initial
b4e47472 9152 = fold_convert (atype,
629b3d75
MJ
9153 build_call_expr (loaddecl, 2, iaddr,
9154 build_int_cst (NULL_TREE,
9155 MEMMODEL_RELAXED)));
9156 else
b4e47472
JJ
9157 {
9158 tree off
9159 = build_int_cst (build_pointer_type_for_mode (atype, ptr_mode,
9160 true), 0);
9161 initial = build2 (MEM_REF, atype, iaddr, off);
9162 }
629b3d75
MJ
9163
9164 initial
9165 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
9166 GSI_SAME_STMT);
9167
9168 /* Move the value to the LOADEDI temporary. */
9169 if (gimple_in_ssa_p (cfun))
9170 {
9171 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
9172 phi = create_phi_node (loadedi, loop_header);
9173 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
9174 initial);
9175 }
9176 else
9177 gsi_insert_before (&si,
9178 gimple_build_assign (loadedi, initial),
9179 GSI_SAME_STMT);
9180 if (loadedi != loaded_val)
9181 {
9182 gimple_stmt_iterator gsi2;
9183 tree x;
9184
9185 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
9186 gsi2 = gsi_start_bb (loop_header);
9187 if (gimple_in_ssa_p (cfun))
9188 {
9189 gassign *stmt;
9190 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
9191 true, GSI_SAME_STMT);
9192 stmt = gimple_build_assign (loaded_val, x);
9193 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
9194 }
9195 else
9196 {
9197 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
9198 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
9199 true, GSI_SAME_STMT);
9200 }
9201 }
9202 gsi_remove (&si, true);
9203
65f4b875 9204 si = gsi_last_nondebug_bb (store_bb);
629b3d75
MJ
9205 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
9206
9207 if (iaddr == addr)
9208 storedi = stored_val;
9209 else
01914336
MJ
9210 storedi
9211 = force_gimple_operand_gsi (&si,
9212 build1 (VIEW_CONVERT_EXPR, itype,
9213 stored_val), true, NULL_TREE, true,
9214 GSI_SAME_STMT);
629b3d75
MJ
9215
9216 /* Build the compare&swap statement. */
090f0d78
JJ
9217 tree ctype = build_complex_type (itype);
9218 int flag = int_size_in_bytes (itype);
9219 new_storedi = build_call_expr_internal_loc (loc, IFN_ATOMIC_COMPARE_EXCHANGE,
9220 ctype, 6, iaddr, loadedi,
9221 storedi,
9222 build_int_cst (integer_type_node,
9223 flag),
9224 mo, fmo);
9225 new_storedi = build1 (REALPART_EXPR, itype, new_storedi);
629b3d75
MJ
9226 new_storedi = force_gimple_operand_gsi (&si,
9227 fold_convert (TREE_TYPE (loadedi),
9228 new_storedi),
9229 true, NULL_TREE,
9230 true, GSI_SAME_STMT);
9231
9232 if (gimple_in_ssa_p (cfun))
9233 old_vali = loadedi;
9234 else
9235 {
9236 old_vali = create_tmp_var (TREE_TYPE (loadedi));
9237 stmt = gimple_build_assign (old_vali, loadedi);
9238 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9239
9240 stmt = gimple_build_assign (loadedi, new_storedi);
9241 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9242 }
9243
9244 /* Note that we always perform the comparison as an integer, even for
9245 floating point. This allows the atomic operation to properly
9246 succeed even with NaNs and -0.0. */
01914336
MJ
9247 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
9248 stmt = gimple_build_cond_empty (ne);
629b3d75
MJ
9249 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9250
9251 /* Update cfg. */
9252 e = single_succ_edge (store_bb);
9253 e->flags &= ~EDGE_FALLTHRU;
9254 e->flags |= EDGE_FALSE_VALUE;
357067f2
JH
9255 /* Expect no looping. */
9256 e->probability = profile_probability::guessed_always ();
629b3d75
MJ
9257
9258 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
357067f2 9259 e->probability = profile_probability::guessed_never ();
629b3d75
MJ
9260
9261 /* Copy the new value to loadedi (we already did that before the condition
9262 if we are not in SSA). */
9263 if (gimple_in_ssa_p (cfun))
9264 {
9265 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
9266 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
9267 }
9268
9269 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
9270 gsi_remove (&si, true);
9271
99b1c316 9272 class loop *loop = alloc_loop ();
629b3d75
MJ
9273 loop->header = loop_header;
9274 loop->latch = store_bb;
9275 add_loop (loop, loop_header->loop_father);
9276
9277 if (gimple_in_ssa_p (cfun))
9278 update_ssa (TODO_update_ssa_no_phi);
9279
9280 return true;
9281}
9282
9283/* A subroutine of expand_omp_atomic. Implement the atomic operation as:
9284
01914336
MJ
9285 GOMP_atomic_start ();
9286 *addr = rhs;
9287 GOMP_atomic_end ();
629b3d75
MJ
9288
9289 The result is not globally atomic, but works so long as all parallel
9290 references are within #pragma omp atomic directives. According to
9291 responses received from omp@openmp.org, appears to be within spec.
9292 Which makes sense, since that's how several other compilers handle
9293 this situation as well.
9294 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
9295 expanding. STORED_VAL is the operand of the matching
9296 GIMPLE_OMP_ATOMIC_STORE.
9297
9298 We replace
9299 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
9300 loaded_val = *addr;
9301
9302 and replace
9303 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
9304 *addr = stored_val;
9305*/
9306
9307static bool
9308expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
9309 tree addr, tree loaded_val, tree stored_val)
9310{
9311 gimple_stmt_iterator si;
9312 gassign *stmt;
9313 tree t;
9314
65f4b875 9315 si = gsi_last_nondebug_bb (load_bb);
629b3d75
MJ
9316 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
9317
9318 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
9319 t = build_call_expr (t, 0);
9320 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
9321
b4e47472
JJ
9322 tree mem = build_simple_mem_ref (addr);
9323 TREE_TYPE (mem) = TREE_TYPE (loaded_val);
9324 TREE_OPERAND (mem, 1)
9325 = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode,
9326 true),
9327 TREE_OPERAND (mem, 1));
9328 stmt = gimple_build_assign (loaded_val, mem);
629b3d75
MJ
9329 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9330 gsi_remove (&si, true);
9331
65f4b875 9332 si = gsi_last_nondebug_bb (store_bb);
629b3d75
MJ
9333 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
9334
b4e47472 9335 stmt = gimple_build_assign (unshare_expr (mem), stored_val);
629b3d75
MJ
9336 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9337
9338 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
9339 t = build_call_expr (t, 0);
9340 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
9341 gsi_remove (&si, true);
9342
9343 if (gimple_in_ssa_p (cfun))
9344 update_ssa (TODO_update_ssa_no_phi);
9345 return true;
9346}
9347
9348/* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
01914336 9349 using expand_omp_atomic_fetch_op. If it failed, we try to
629b3d75
MJ
9350 call expand_omp_atomic_pipeline, and if it fails too, the
9351 ultimate fallback is wrapping the operation in a mutex
9352 (expand_omp_atomic_mutex). REGION is the atomic region built
9353 by build_omp_regions_1(). */
9354
9355static void
9356expand_omp_atomic (struct omp_region *region)
9357{
9358 basic_block load_bb = region->entry, store_bb = region->exit;
9359 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
9360 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
9361 tree loaded_val = gimple_omp_atomic_load_lhs (load);
9362 tree addr = gimple_omp_atomic_load_rhs (load);
9363 tree stored_val = gimple_omp_atomic_store_val (store);
b4e47472 9364 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
629b3d75
MJ
9365 HOST_WIDE_INT index;
9366
9367 /* Make sure the type is one of the supported sizes. */
9368 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
9369 index = exact_log2 (index);
9370 if (index >= 0 && index <= 4)
9371 {
9372 unsigned int align = TYPE_ALIGN_UNIT (type);
9373
9374 /* __sync builtins require strict data alignment. */
9375 if (exact_log2 (align) >= index)
9376 {
9377 /* Atomic load. */
3bd8f481 9378 scalar_mode smode;
629b3d75 9379 if (loaded_val == stored_val
3bd8f481
RS
9380 && (is_int_mode (TYPE_MODE (type), &smode)
9381 || is_float_mode (TYPE_MODE (type), &smode))
9382 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
629b3d75
MJ
9383 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
9384 return;
9385
9386 /* Atomic store. */
3bd8f481
RS
9387 if ((is_int_mode (TYPE_MODE (type), &smode)
9388 || is_float_mode (TYPE_MODE (type), &smode))
9389 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
629b3d75
MJ
9390 && store_bb == single_succ (load_bb)
9391 && first_stmt (store_bb) == store
9392 && expand_omp_atomic_store (load_bb, addr, loaded_val,
9393 stored_val, index))
9394 return;
9395
9396 /* When possible, use specialized atomic update functions. */
9397 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
9398 && store_bb == single_succ (load_bb)
9399 && expand_omp_atomic_fetch_op (load_bb, addr,
9400 loaded_val, stored_val, index))
9401 return;
9402
8122fbff
JJ
9403 /* When possible, use ATOMIC_COMPARE_EXCHANGE ifn without a loop. */
9404 if (store_bb == single_succ (load_bb)
9405 && !gimple_in_ssa_p (cfun)
9406 && expand_omp_atomic_cas (load_bb, addr, loaded_val, stored_val,
9407 index))
9408 return;
9409
629b3d75
MJ
9410 /* If we don't have specialized __sync builtins, try and implement
9411 as a compare and swap loop. */
9412 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
9413 loaded_val, stored_val, index))
9414 return;
9415 }
9416 }
9417
9418 /* The ultimate fallback is wrapping the operation in a mutex. */
9419 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
9420}
9421
9422/* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
9423 at REGION_EXIT. */
9424
9425static void
9426mark_loops_in_oacc_kernels_region (basic_block region_entry,
9427 basic_block region_exit)
9428{
99b1c316 9429 class loop *outer = region_entry->loop_father;
629b3d75
MJ
9430 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
9431
9432 /* Don't parallelize the kernels region if it contains more than one outer
9433 loop. */
9434 unsigned int nr_outer_loops = 0;
99b1c316
MS
9435 class loop *single_outer = NULL;
9436 for (class loop *loop = outer->inner; loop != NULL; loop = loop->next)
629b3d75
MJ
9437 {
9438 gcc_assert (loop_outer (loop) == outer);
9439
9440 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
9441 continue;
9442
9443 if (region_exit != NULL
9444 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
9445 continue;
9446
9447 nr_outer_loops++;
9448 single_outer = loop;
9449 }
9450 if (nr_outer_loops != 1)
9451 return;
9452
99b1c316 9453 for (class loop *loop = single_outer->inner;
01914336
MJ
9454 loop != NULL;
9455 loop = loop->inner)
629b3d75
MJ
9456 if (loop->next)
9457 return;
9458
9459 /* Mark the loops in the region. */
99b1c316 9460 for (class loop *loop = single_outer; loop != NULL; loop = loop->inner)
629b3d75
MJ
9461 loop->in_oacc_kernels_region = true;
9462}
9463
629b3d75
MJ
9464/* Build target argument identifier from the DEVICE identifier, value
9465 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
9466
9467static tree
9468get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
9469{
9470 tree t = build_int_cst (integer_type_node, device);
9471 if (subseqent_param)
9472 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9473 build_int_cst (integer_type_node,
9474 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
9475 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9476 build_int_cst (integer_type_node, id));
9477 return t;
9478}
9479
9480/* Like above but return it in type that can be directly stored as an element
9481 of the argument array. */
9482
9483static tree
9484get_target_argument_identifier (int device, bool subseqent_param, int id)
9485{
9486 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
9487 return fold_convert (ptr_type_node, t);
9488}
9489
9490/* Return a target argument consisting of DEVICE identifier, value identifier
9491 ID, and the actual VALUE. */
9492
9493static tree
9494get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
9495 tree value)
9496{
9497 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
9498 fold_convert (integer_type_node, value),
9499 build_int_cst (unsigned_type_node,
9500 GOMP_TARGET_ARG_VALUE_SHIFT));
9501 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9502 get_target_argument_identifier_1 (device, false, id));
9503 t = fold_convert (ptr_type_node, t);
9504 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
9505}
9506
9507/* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
9508 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
9509 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
9510 arguments. */
9511
9512static void
9513push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
9514 int id, tree value, vec <tree> *args)
9515{
9516 if (tree_fits_shwi_p (value)
9517 && tree_to_shwi (value) > -(1 << 15)
9518 && tree_to_shwi (value) < (1 << 15))
9519 args->quick_push (get_target_argument_value (gsi, device, id, value));
9520 else
9521 {
9522 args->quick_push (get_target_argument_identifier (device, true, id));
9523 value = fold_convert (ptr_type_node, value);
9524 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
9525 GSI_SAME_STMT);
9526 args->quick_push (value);
9527 }
9528}
9529
01914336 9530/* Create an array of arguments that is then passed to GOMP_target. */
629b3d75
MJ
9531
9532static tree
9533get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
9534{
9535 auto_vec <tree, 6> args;
9536 tree clauses = gimple_omp_target_clauses (tgt_stmt);
9537 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
9538 if (c)
9539 t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
9540 else
9541 t = integer_minus_one_node;
9542 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
9543 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
9544
9545 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
9546 if (c)
9547 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
9548 else
9549 t = integer_minus_one_node;
9550 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
9551 GOMP_TARGET_ARG_THREAD_LIMIT, t,
9552 &args);
9553
629b3d75
MJ
9554 /* Produce more, perhaps device specific, arguments here. */
9555
9556 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
9557 args.length () + 1),
9558 ".omp_target_args");
9559 for (unsigned i = 0; i < args.length (); i++)
9560 {
9561 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
9562 build_int_cst (integer_type_node, i),
9563 NULL_TREE, NULL_TREE);
9564 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
9565 GSI_SAME_STMT);
9566 }
9567 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
9568 build_int_cst (integer_type_node, args.length ()),
9569 NULL_TREE, NULL_TREE);
9570 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
9571 GSI_SAME_STMT);
9572 TREE_ADDRESSABLE (argarray) = 1;
9573 return build_fold_addr_expr (argarray);
9574}
9575
9576/* Expand the GIMPLE_OMP_TARGET starting at REGION. */
9577
9578static void
9579expand_omp_target (struct omp_region *region)
9580{
9581 basic_block entry_bb, exit_bb, new_bb;
9582 struct function *child_cfun;
9583 tree child_fn, block, t;
9584 gimple_stmt_iterator gsi;
9585 gomp_target *entry_stmt;
9586 gimple *stmt;
9587 edge e;
cc9b9c0b 9588 bool offloaded;
62aee289 9589 int target_kind;
629b3d75
MJ
9590
9591 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
62aee289 9592 target_kind = gimple_omp_target_kind (entry_stmt);
629b3d75
MJ
9593 new_bb = region->entry;
9594
9595 offloaded = is_gimple_omp_offloaded (entry_stmt);
62aee289 9596 switch (target_kind)
629b3d75
MJ
9597 {
9598 case GF_OMP_TARGET_KIND_REGION:
9599 case GF_OMP_TARGET_KIND_UPDATE:
9600 case GF_OMP_TARGET_KIND_ENTER_DATA:
9601 case GF_OMP_TARGET_KIND_EXIT_DATA:
9602 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9603 case GF_OMP_TARGET_KIND_OACC_KERNELS:
62aee289 9604 case GF_OMP_TARGET_KIND_OACC_SERIAL:
629b3d75 9605 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7aefef31
AS
9606 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
9607 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
629b3d75 9608 case GF_OMP_TARGET_KIND_OACC_DECLARE:
e898ce79
GB
9609 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9610 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
629b3d75
MJ
9611 case GF_OMP_TARGET_KIND_DATA:
9612 case GF_OMP_TARGET_KIND_OACC_DATA:
9613 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
e898ce79 9614 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
629b3d75
MJ
9615 break;
9616 default:
9617 gcc_unreachable ();
9618 }
9619
9620 child_fn = NULL_TREE;
9621 child_cfun = NULL;
9622 if (offloaded)
9623 {
9624 child_fn = gimple_omp_target_child_fn (entry_stmt);
9625 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
9626 }
9627
9628 /* Supported by expand_omp_taskreg, but not here. */
9629 if (child_cfun != NULL)
9630 gcc_checking_assert (!child_cfun->cfg);
9631 gcc_checking_assert (!gimple_in_ssa_p (cfun));
9632
9633 entry_bb = region->entry;
9634 exit_bb = region->exit;
9635
703e4f86
TS
9636 if (target_kind == GF_OMP_TARGET_KIND_OACC_KERNELS)
9637 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
9638
9639 /* Going on, all OpenACC compute constructs are mapped to
9640 'BUILT_IN_GOACC_PARALLEL', and get their compute regions outlined.
9641 To distinguish between them, we attach attributes. */
62aee289 9642 switch (target_kind)
25651634 9643 {
703e4f86
TS
9644 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9645 DECL_ATTRIBUTES (child_fn)
9646 = tree_cons (get_identifier ("oacc parallel"),
9647 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9648 break;
62aee289 9649 case GF_OMP_TARGET_KIND_OACC_KERNELS:
25651634
TS
9650 DECL_ATTRIBUTES (child_fn)
9651 = tree_cons (get_identifier ("oacc kernels"),
9652 NULL_TREE, DECL_ATTRIBUTES (child_fn));
62aee289
MR
9653 break;
9654 case GF_OMP_TARGET_KIND_OACC_SERIAL:
62aee289
MR
9655 DECL_ATTRIBUTES (child_fn)
9656 = tree_cons (get_identifier ("oacc serial"),
9657 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9658 break;
e898ce79
GB
9659 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9660 DECL_ATTRIBUTES (child_fn)
9661 = tree_cons (get_identifier ("oacc parallel_kernels_parallelized"),
9662 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9663 break;
9664 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9665 DECL_ATTRIBUTES (child_fn)
9666 = tree_cons (get_identifier ("oacc parallel_kernels_gang_single"),
9667 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9668 break;
62aee289 9669 default:
703e4f86
TS
9670 /* Make sure we don't miss any. */
9671 gcc_checking_assert (!(is_gimple_omp_oacc (entry_stmt)
9672 && is_gimple_omp_offloaded (entry_stmt)));
62aee289 9673 break;
25651634 9674 }
629b3d75
MJ
9675
9676 if (offloaded)
9677 {
9678 unsigned srcidx, dstidx, num;
9679
9680 /* If the offloading region needs data sent from the parent
9681 function, then the very first statement (except possible
9682 tree profile counter updates) of the offloading body
9683 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
9684 &.OMP_DATA_O is passed as an argument to the child function,
9685 we need to replace it with the argument as seen by the child
9686 function.
9687
9688 In most cases, this will end up being the identity assignment
9689 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
9690 a function call that has been inlined, the original PARM_DECL
9691 .OMP_DATA_I may have been converted into a different local
9692 variable. In which case, we need to keep the assignment. */
9693 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
9694 if (data_arg)
9695 {
9696 basic_block entry_succ_bb = single_succ (entry_bb);
9697 gimple_stmt_iterator gsi;
9698 tree arg;
9699 gimple *tgtcopy_stmt = NULL;
9700 tree sender = TREE_VEC_ELT (data_arg, 0);
9701
9702 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
9703 {
9704 gcc_assert (!gsi_end_p (gsi));
9705 stmt = gsi_stmt (gsi);
9706 if (gimple_code (stmt) != GIMPLE_ASSIGN)
9707 continue;
9708
9709 if (gimple_num_ops (stmt) == 2)
9710 {
9711 tree arg = gimple_assign_rhs1 (stmt);
9712
9713 /* We're ignoring the subcode because we're
9714 effectively doing a STRIP_NOPS. */
9715
9716 if (TREE_CODE (arg) == ADDR_EXPR
9717 && TREE_OPERAND (arg, 0) == sender)
9718 {
9719 tgtcopy_stmt = stmt;
9720 break;
9721 }
9722 }
9723 }
9724
9725 gcc_assert (tgtcopy_stmt != NULL);
9726 arg = DECL_ARGUMENTS (child_fn);
9727
9728 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
9729 gsi_remove (&gsi, true);
9730 }
9731
9732 /* Declare local variables needed in CHILD_CFUN. */
9733 block = DECL_INITIAL (child_fn);
9734 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
9735 /* The gimplifier could record temporaries in the offloading block
9736 rather than in containing function's local_decls chain,
9737 which would mean cgraph missed finalizing them. Do it now. */
9738 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
9739 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
9740 varpool_node::finalize_decl (t);
9741 DECL_SAVED_TREE (child_fn) = NULL;
9742 /* We'll create a CFG for child_fn, so no gimple body is needed. */
9743 gimple_set_body (child_fn, NULL);
9744 TREE_USED (block) = 1;
9745
9746 /* Reset DECL_CONTEXT on function arguments. */
9747 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
9748 DECL_CONTEXT (t) = child_fn;
9749
9750 /* Split ENTRY_BB at GIMPLE_*,
9751 so that it can be moved to the child function. */
65f4b875 9752 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
9753 stmt = gsi_stmt (gsi);
9754 gcc_assert (stmt
9755 && gimple_code (stmt) == gimple_code (entry_stmt));
9756 e = split_block (entry_bb, stmt);
9757 gsi_remove (&gsi, true);
9758 entry_bb = e->dest;
9759 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
9760
9761 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
9762 if (exit_bb)
9763 {
65f4b875 9764 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
9765 gcc_assert (!gsi_end_p (gsi)
9766 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
9767 stmt = gimple_build_return (NULL);
9768 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
9769 gsi_remove (&gsi, true);
9770 }
9771
9772 /* Move the offloading region into CHILD_CFUN. */
9773
9774 block = gimple_block (entry_stmt);
9775
9776 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
9777 if (exit_bb)
9778 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
9779 /* When the OMP expansion process cannot guarantee an up-to-date
9780 loop tree arrange for the child function to fixup loops. */
9781 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9782 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
9783
9784 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
9785 num = vec_safe_length (child_cfun->local_decls);
9786 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
9787 {
9788 t = (*child_cfun->local_decls)[srcidx];
9789 if (DECL_CONTEXT (t) == cfun->decl)
9790 continue;
9791 if (srcidx != dstidx)
9792 (*child_cfun->local_decls)[dstidx] = t;
9793 dstidx++;
9794 }
9795 if (dstidx != num)
9796 vec_safe_truncate (child_cfun->local_decls, dstidx);
9797
9798 /* Inform the callgraph about the new function. */
9799 child_cfun->curr_properties = cfun->curr_properties;
9800 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
9801 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
9802 cgraph_node *node = cgraph_node::get_create (child_fn);
9803 node->parallelized_function = 1;
9804 cgraph_node::add_new_function (child_fn, true);
9805
9806 /* Add the new function to the offload table. */
9807 if (ENABLE_OFFLOADING)
60bf575c
TV
9808 {
9809 if (in_lto_p)
9810 DECL_PRESERVE_P (child_fn) = 1;
9811 vec_safe_push (offload_funcs, child_fn);
9812 }
629b3d75
MJ
9813
9814 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
9815 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
9816
9817 /* Fix the callgraph edges for child_cfun. Those for cfun will be
9818 fixed in a following pass. */
9819 push_cfun (child_cfun);
9820 if (need_asm)
9579db35 9821 assign_assembler_name_if_needed (child_fn);
629b3d75
MJ
9822 cgraph_edge::rebuild_edges ();
9823
9824 /* Some EH regions might become dead, see PR34608. If
9825 pass_cleanup_cfg isn't the first pass to happen with the
9826 new child, these dead EH edges might cause problems.
9827 Clean them up now. */
9828 if (flag_exceptions)
9829 {
9830 basic_block bb;
9831 bool changed = false;
9832
9833 FOR_EACH_BB_FN (bb, cfun)
9834 changed |= gimple_purge_dead_eh_edges (bb);
9835 if (changed)
9836 cleanup_tree_cfg ();
9837 }
9838 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9839 verify_loop_structure ();
9840 pop_cfun ();
9841
9842 if (dump_file && !gimple_in_ssa_p (cfun))
9843 {
9844 omp_any_child_fn_dumped = true;
9845 dump_function_header (dump_file, child_fn, dump_flags);
9846 dump_function_to_file (child_fn, dump_file, dump_flags);
9847 }
4ccc4e30
JJ
9848
9849 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
629b3d75
MJ
9850 }
9851
9852 /* Emit a library call to launch the offloading region, or do data
9853 transfers. */
59d5960c 9854 tree t1, t2, t3, t4, depend, c, clauses;
629b3d75 9855 enum built_in_function start_ix;
629b3d75 9856 unsigned int flags_i = 0;
629b3d75
MJ
9857
9858 switch (gimple_omp_target_kind (entry_stmt))
9859 {
9860 case GF_OMP_TARGET_KIND_REGION:
9861 start_ix = BUILT_IN_GOMP_TARGET;
9862 break;
9863 case GF_OMP_TARGET_KIND_DATA:
9864 start_ix = BUILT_IN_GOMP_TARGET_DATA;
9865 break;
9866 case GF_OMP_TARGET_KIND_UPDATE:
9867 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
9868 break;
9869 case GF_OMP_TARGET_KIND_ENTER_DATA:
9870 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
9871 break;
9872 case GF_OMP_TARGET_KIND_EXIT_DATA:
9873 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
9874 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
9875 break;
629b3d75 9876 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
62aee289
MR
9877 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9878 case GF_OMP_TARGET_KIND_OACC_SERIAL:
e898ce79
GB
9879 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9880 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
629b3d75
MJ
9881 start_ix = BUILT_IN_GOACC_PARALLEL;
9882 break;
9883 case GF_OMP_TARGET_KIND_OACC_DATA:
9884 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
e898ce79 9885 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
629b3d75
MJ
9886 start_ix = BUILT_IN_GOACC_DATA_START;
9887 break;
9888 case GF_OMP_TARGET_KIND_OACC_UPDATE:
9889 start_ix = BUILT_IN_GOACC_UPDATE;
9890 break;
7aefef31
AS
9891 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
9892 start_ix = BUILT_IN_GOACC_ENTER_DATA;
9893 break;
9894 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
9895 start_ix = BUILT_IN_GOACC_EXIT_DATA;
629b3d75
MJ
9896 break;
9897 case GF_OMP_TARGET_KIND_OACC_DECLARE:
9898 start_ix = BUILT_IN_GOACC_DECLARE;
9899 break;
9900 default:
9901 gcc_unreachable ();
9902 }
9903
9904 clauses = gimple_omp_target_clauses (entry_stmt);
9905
59d5960c
TS
9906 tree device = NULL_TREE;
9907 location_t device_loc = UNKNOWN_LOCATION;
9908 tree goacc_flags = NULL_TREE;
9909 if (is_gimple_omp_oacc (entry_stmt))
629b3d75 9910 {
59d5960c
TS
9911 /* By default, no GOACC_FLAGs are set. */
9912 goacc_flags = integer_zero_node;
629b3d75
MJ
9913 }
9914 else
59d5960c
TS
9915 {
9916 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
9917 if (c)
9918 {
9919 device = OMP_CLAUSE_DEVICE_ID (c);
9920 device_loc = OMP_CLAUSE_LOCATION (c);
03be3cfe
MV
9921 if (OMP_CLAUSE_DEVICE_ANCESTOR (c))
9922 sorry_at (device_loc, "%<ancestor%> not yet supported");
59d5960c
TS
9923 }
9924 else
9925 {
9926 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
9927 library choose). */
9928 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
9929 device_loc = gimple_location (entry_stmt);
9930 }
629b3d75 9931
59d5960c 9932 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
7619d334
JJ
9933 /* FIXME: in_reduction(...) nowait is unimplemented yet, pretend
9934 nowait doesn't appear. */
9935 if (c && omp_find_clause (clauses, OMP_CLAUSE_IN_REDUCTION))
9936 c = NULL;
59d5960c
TS
9937 if (c)
9938 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
9939 }
629b3d75 9940
59d5960c
TS
9941 /* By default, there is no conditional. */
9942 tree cond = NULL_TREE;
9943 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
9944 if (c)
9945 cond = OMP_CLAUSE_IF_EXPR (c);
9946 /* If we found the clause 'if (cond)', build:
9947 OpenACC: goacc_flags = (cond ? goacc_flags : flags | GOACC_FLAG_HOST_FALLBACK)
9948 OpenMP: device = (cond ? device : GOMP_DEVICE_HOST_FALLBACK) */
629b3d75
MJ
9949 if (cond)
9950 {
59d5960c
TS
9951 tree *tp;
9952 if (is_gimple_omp_oacc (entry_stmt))
9953 tp = &goacc_flags;
9954 else
9955 {
9956 /* Ensure 'device' is of the correct type. */
9957 device = fold_convert_loc (device_loc, integer_type_node, device);
9958
9959 tp = &device;
9960 }
9961
629b3d75
MJ
9962 cond = gimple_boolify (cond);
9963
9964 basic_block cond_bb, then_bb, else_bb;
9965 edge e;
9966 tree tmp_var;
9967
59d5960c 9968 tmp_var = create_tmp_var (TREE_TYPE (*tp));
629b3d75
MJ
9969 if (offloaded)
9970 e = split_block_after_labels (new_bb);
9971 else
9972 {
65f4b875 9973 gsi = gsi_last_nondebug_bb (new_bb);
629b3d75
MJ
9974 gsi_prev (&gsi);
9975 e = split_block (new_bb, gsi_stmt (gsi));
9976 }
9977 cond_bb = e->src;
9978 new_bb = e->dest;
9979 remove_edge (e);
9980
9981 then_bb = create_empty_bb (cond_bb);
9982 else_bb = create_empty_bb (then_bb);
9983 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
9984 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
9985
9986 stmt = gimple_build_cond_empty (cond);
9987 gsi = gsi_last_bb (cond_bb);
9988 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
9989
9990 gsi = gsi_start_bb (then_bb);
59d5960c 9991 stmt = gimple_build_assign (tmp_var, *tp);
629b3d75
MJ
9992 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
9993
9994 gsi = gsi_start_bb (else_bb);
59d5960c
TS
9995 if (is_gimple_omp_oacc (entry_stmt))
9996 stmt = gimple_build_assign (tmp_var,
9997 BIT_IOR_EXPR,
9998 *tp,
9999 build_int_cst (integer_type_node,
10000 GOACC_FLAG_HOST_FALLBACK));
10001 else
10002 stmt = gimple_build_assign (tmp_var,
10003 build_int_cst (integer_type_node,
10004 GOMP_DEVICE_HOST_FALLBACK));
629b3d75
MJ
10005 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
10006
10007 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
10008 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
10009 add_bb_to_loop (then_bb, cond_bb->loop_father);
10010 add_bb_to_loop (else_bb, cond_bb->loop_father);
10011 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
10012 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
10013
59d5960c
TS
10014 *tp = tmp_var;
10015
65f4b875 10016 gsi = gsi_last_nondebug_bb (new_bb);
629b3d75
MJ
10017 }
10018 else
10019 {
65f4b875 10020 gsi = gsi_last_nondebug_bb (new_bb);
59d5960c
TS
10021
10022 if (device != NULL_TREE)
10023 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
10024 true, GSI_SAME_STMT);
629b3d75
MJ
10025 }
10026
10027 t = gimple_omp_target_data_arg (entry_stmt);
10028 if (t == NULL)
10029 {
10030 t1 = size_zero_node;
10031 t2 = build_zero_cst (ptr_type_node);
10032 t3 = t2;
10033 t4 = t2;
10034 }
10035 else
10036 {
10037 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
10038 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
10039 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
10040 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
10041 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
10042 }
10043
10044 gimple *g;
10045 bool tagging = false;
10046 /* The maximum number used by any start_ix, without varargs. */
10047 auto_vec<tree, 11> args;
59d5960c
TS
10048 if (is_gimple_omp_oacc (entry_stmt))
10049 {
10050 tree goacc_flags_m = fold_build1 (GOACC_FLAGS_MARSHAL_OP,
10051 TREE_TYPE (goacc_flags), goacc_flags);
10052 goacc_flags_m = force_gimple_operand_gsi (&gsi, goacc_flags_m, true,
10053 NULL_TREE, true,
10054 GSI_SAME_STMT);
10055 args.quick_push (goacc_flags_m);
10056 }
10057 else
10058 args.quick_push (device);
629b3d75
MJ
10059 if (offloaded)
10060 args.quick_push (build_fold_addr_expr (child_fn));
10061 args.quick_push (t1);
10062 args.quick_push (t2);
10063 args.quick_push (t3);
10064 args.quick_push (t4);
10065 switch (start_ix)
10066 {
10067 case BUILT_IN_GOACC_DATA_START:
10068 case BUILT_IN_GOACC_DECLARE:
10069 case BUILT_IN_GOMP_TARGET_DATA:
10070 break;
10071 case BUILT_IN_GOMP_TARGET:
10072 case BUILT_IN_GOMP_TARGET_UPDATE:
10073 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
10074 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
10075 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
10076 if (c)
10077 depend = OMP_CLAUSE_DECL (c);
10078 else
10079 depend = build_int_cst (ptr_type_node, 0);
10080 args.quick_push (depend);
10081 if (start_ix == BUILT_IN_GOMP_TARGET)
10082 args.quick_push (get_target_arguments (&gsi, entry_stmt));
10083 break;
10084 case BUILT_IN_GOACC_PARALLEL:
62aee289
MR
10085 if (lookup_attribute ("oacc serial", DECL_ATTRIBUTES (child_fn)) != NULL)
10086 {
10087 tree dims = NULL_TREE;
10088 unsigned int ix;
10089
10090 /* For serial constructs we set all dimensions to 1. */
10091 for (ix = GOMP_DIM_MAX; ix--;)
10092 dims = tree_cons (NULL_TREE, integer_one_node, dims);
10093 oacc_replace_fn_attrib (child_fn, dims);
10094 }
10095 else
10096 oacc_set_fn_attrib (child_fn, clauses, &args);
25651634 10097 tagging = true;
629b3d75 10098 /* FALLTHRU */
7aefef31
AS
10099 case BUILT_IN_GOACC_ENTER_DATA:
10100 case BUILT_IN_GOACC_EXIT_DATA:
629b3d75
MJ
10101 case BUILT_IN_GOACC_UPDATE:
10102 {
10103 tree t_async = NULL_TREE;
10104
10105 /* If present, use the value specified by the respective
10106 clause, making sure that is of the correct type. */
10107 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
10108 if (c)
10109 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
10110 integer_type_node,
10111 OMP_CLAUSE_ASYNC_EXPR (c));
10112 else if (!tagging)
10113 /* Default values for t_async. */
10114 t_async = fold_convert_loc (gimple_location (entry_stmt),
10115 integer_type_node,
10116 build_int_cst (integer_type_node,
10117 GOMP_ASYNC_SYNC));
10118 if (tagging && t_async)
10119 {
10120 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
10121
10122 if (TREE_CODE (t_async) == INTEGER_CST)
10123 {
10124 /* See if we can pack the async arg in to the tag's
10125 operand. */
10126 i_async = TREE_INT_CST_LOW (t_async);
10127 if (i_async < GOMP_LAUNCH_OP_MAX)
10128 t_async = NULL_TREE;
10129 else
10130 i_async = GOMP_LAUNCH_OP_MAX;
10131 }
10132 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
10133 i_async));
10134 }
10135 if (t_async)
ee9fcee3
AS
10136 args.safe_push (force_gimple_operand_gsi (&gsi, t_async, true,
10137 NULL_TREE, true,
10138 GSI_SAME_STMT));
629b3d75
MJ
10139
10140 /* Save the argument index, and ... */
10141 unsigned t_wait_idx = args.length ();
10142 unsigned num_waits = 0;
10143 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
10144 if (!tagging || c)
10145 /* ... push a placeholder. */
10146 args.safe_push (integer_zero_node);
10147
10148 for (; c; c = OMP_CLAUSE_CHAIN (c))
10149 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
10150 {
ee9fcee3
AS
10151 tree arg = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
10152 integer_type_node,
10153 OMP_CLAUSE_WAIT_EXPR (c));
10154 arg = force_gimple_operand_gsi (&gsi, arg, true, NULL_TREE, true,
10155 GSI_SAME_STMT);
10156 args.safe_push (arg);
629b3d75
MJ
10157 num_waits++;
10158 }
10159
10160 if (!tagging || num_waits)
10161 {
10162 tree len;
10163
10164 /* Now that we know the number, update the placeholder. */
10165 if (tagging)
10166 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
10167 else
10168 len = build_int_cst (integer_type_node, num_waits);
10169 len = fold_convert_loc (gimple_location (entry_stmt),
10170 unsigned_type_node, len);
10171 args[t_wait_idx] = len;
10172 }
10173 }
10174 break;
10175 default:
10176 gcc_unreachable ();
10177 }
10178 if (tagging)
10179 /* Push terminal marker - zero. */
10180 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
10181
10182 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
10183 gimple_set_location (g, gimple_location (entry_stmt));
10184 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
10185 if (!offloaded)
10186 {
10187 g = gsi_stmt (gsi);
10188 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
10189 gsi_remove (&gsi, true);
10190 }
629b3d75
MJ
10191}
10192
629b3d75
MJ
10193/* Expand the parallel region tree rooted at REGION. Expansion
10194 proceeds in depth-first order. Innermost regions are expanded
10195 first. This way, parallel regions that require a new function to
10196 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
10197 internal dependencies in their body. */
10198
10199static void
10200expand_omp (struct omp_region *region)
10201{
10202 omp_any_child_fn_dumped = false;
10203 while (region)
10204 {
10205 location_t saved_location;
10206 gimple *inner_stmt = NULL;
10207
10208 /* First, determine whether this is a combined parallel+workshare
01914336 10209 region. */
629b3d75
MJ
10210 if (region->type == GIMPLE_OMP_PARALLEL)
10211 determine_parallel_type (region);
629b3d75
MJ
10212
10213 if (region->type == GIMPLE_OMP_FOR
10214 && gimple_omp_for_combined_p (last_stmt (region->entry)))
10215 inner_stmt = last_stmt (region->inner->entry);
10216
10217 if (region->inner)
10218 expand_omp (region->inner);
10219
10220 saved_location = input_location;
10221 if (gimple_has_location (last_stmt (region->entry)))
10222 input_location = gimple_location (last_stmt (region->entry));
10223
10224 switch (region->type)
10225 {
10226 case GIMPLE_OMP_PARALLEL:
10227 case GIMPLE_OMP_TASK:
10228 expand_omp_taskreg (region);
10229 break;
10230
10231 case GIMPLE_OMP_FOR:
10232 expand_omp_for (region, inner_stmt);
10233 break;
10234
10235 case GIMPLE_OMP_SECTIONS:
10236 expand_omp_sections (region);
10237 break;
10238
10239 case GIMPLE_OMP_SECTION:
10240 /* Individual omp sections are handled together with their
10241 parent GIMPLE_OMP_SECTIONS region. */
10242 break;
10243
10244 case GIMPLE_OMP_SINGLE:
e45483c7 10245 case GIMPLE_OMP_SCOPE:
629b3d75
MJ
10246 expand_omp_single (region);
10247 break;
10248
10249 case GIMPLE_OMP_ORDERED:
10250 {
10251 gomp_ordered *ord_stmt
10252 = as_a <gomp_ordered *> (last_stmt (region->entry));
10253 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
10254 OMP_CLAUSE_DEPEND))
10255 {
10256 /* We'll expand these when expanding corresponding
10257 worksharing region with ordered(n) clause. */
10258 gcc_assert (region->outer
10259 && region->outer->type == GIMPLE_OMP_FOR);
10260 region->ord_stmt = ord_stmt;
10261 break;
10262 }
10263 }
10264 /* FALLTHRU */
10265 case GIMPLE_OMP_MASTER:
d0befed7 10266 case GIMPLE_OMP_MASKED:
629b3d75
MJ
10267 case GIMPLE_OMP_TASKGROUP:
10268 case GIMPLE_OMP_CRITICAL:
10269 case GIMPLE_OMP_TEAMS:
10270 expand_omp_synch (region);
10271 break;
10272
10273 case GIMPLE_OMP_ATOMIC_LOAD:
10274 expand_omp_atomic (region);
10275 break;
10276
10277 case GIMPLE_OMP_TARGET:
10278 expand_omp_target (region);
10279 break;
10280
10281 default:
10282 gcc_unreachable ();
10283 }
10284
10285 input_location = saved_location;
10286 region = region->next;
10287 }
10288 if (omp_any_child_fn_dumped)
10289 {
10290 if (dump_file)
10291 dump_function_header (dump_file, current_function_decl, dump_flags);
10292 omp_any_child_fn_dumped = false;
10293 }
10294}
10295
10296/* Helper for build_omp_regions. Scan the dominator tree starting at
10297 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
10298 true, the function ends once a single tree is built (otherwise, whole
10299 forest of OMP constructs may be built). */
10300
10301static void
10302build_omp_regions_1 (basic_block bb, struct omp_region *parent,
10303 bool single_tree)
10304{
10305 gimple_stmt_iterator gsi;
10306 gimple *stmt;
10307 basic_block son;
10308
65f4b875 10309 gsi = gsi_last_nondebug_bb (bb);
629b3d75
MJ
10310 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
10311 {
10312 struct omp_region *region;
10313 enum gimple_code code;
10314
10315 stmt = gsi_stmt (gsi);
10316 code = gimple_code (stmt);
10317 if (code == GIMPLE_OMP_RETURN)
10318 {
10319 /* STMT is the return point out of region PARENT. Mark it
10320 as the exit point and make PARENT the immediately
10321 enclosing region. */
10322 gcc_assert (parent);
10323 region = parent;
10324 region->exit = bb;
10325 parent = parent->outer;
10326 }
10327 else if (code == GIMPLE_OMP_ATOMIC_STORE)
10328 {
5764ee3c 10329 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
629b3d75
MJ
10330 GIMPLE_OMP_RETURN, but matches with
10331 GIMPLE_OMP_ATOMIC_LOAD. */
10332 gcc_assert (parent);
10333 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
10334 region = parent;
10335 region->exit = bb;
10336 parent = parent->outer;
10337 }
10338 else if (code == GIMPLE_OMP_CONTINUE)
10339 {
10340 gcc_assert (parent);
10341 parent->cont = bb;
10342 }
10343 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
10344 {
10345 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
10346 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
10347 }
10348 else
10349 {
10350 region = new_omp_region (bb, code, parent);
10351 /* Otherwise... */
10352 if (code == GIMPLE_OMP_TARGET)
10353 {
10354 switch (gimple_omp_target_kind (stmt))
10355 {
10356 case GF_OMP_TARGET_KIND_REGION:
629b3d75
MJ
10357 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
10358 case GF_OMP_TARGET_KIND_OACC_KERNELS:
62aee289 10359 case GF_OMP_TARGET_KIND_OACC_SERIAL:
e898ce79
GB
10360 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
10361 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
629b3d75
MJ
10362 break;
10363 case GF_OMP_TARGET_KIND_UPDATE:
10364 case GF_OMP_TARGET_KIND_ENTER_DATA:
10365 case GF_OMP_TARGET_KIND_EXIT_DATA:
cc9b9c0b
JJ
10366 case GF_OMP_TARGET_KIND_DATA:
10367 case GF_OMP_TARGET_KIND_OACC_DATA:
10368 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
10369 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
629b3d75 10370 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7aefef31
AS
10371 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
10372 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
629b3d75
MJ
10373 case GF_OMP_TARGET_KIND_OACC_DECLARE:
10374 /* ..., other than for those stand-alone directives... */
10375 region = NULL;
10376 break;
10377 default:
10378 gcc_unreachable ();
10379 }
10380 }
10381 else if (code == GIMPLE_OMP_ORDERED
10382 && omp_find_clause (gimple_omp_ordered_clauses
10383 (as_a <gomp_ordered *> (stmt)),
10384 OMP_CLAUSE_DEPEND))
10385 /* #pragma omp ordered depend is also just a stand-alone
10386 directive. */
10387 region = NULL;
28567c40
JJ
10388 else if (code == GIMPLE_OMP_TASK
10389 && gimple_omp_task_taskwait_p (stmt))
10390 /* #pragma omp taskwait depend(...) is a stand-alone directive. */
10391 region = NULL;
629b3d75
MJ
10392 /* ..., this directive becomes the parent for a new region. */
10393 if (region)
10394 parent = region;
10395 }
10396 }
10397
10398 if (single_tree && !parent)
10399 return;
10400
10401 for (son = first_dom_son (CDI_DOMINATORS, bb);
10402 son;
10403 son = next_dom_son (CDI_DOMINATORS, son))
10404 build_omp_regions_1 (son, parent, single_tree);
10405}
10406
10407/* Builds the tree of OMP regions rooted at ROOT, storing it to
10408 root_omp_region. */
10409
10410static void
10411build_omp_regions_root (basic_block root)
10412{
10413 gcc_assert (root_omp_region == NULL);
10414 build_omp_regions_1 (root, NULL, true);
10415 gcc_assert (root_omp_region != NULL);
10416}
10417
10418/* Expands omp construct (and its subconstructs) starting in HEAD. */
10419
10420void
10421omp_expand_local (basic_block head)
10422{
10423 build_omp_regions_root (head);
10424 if (dump_file && (dump_flags & TDF_DETAILS))
10425 {
10426 fprintf (dump_file, "\nOMP region tree\n\n");
10427 dump_omp_region (dump_file, root_omp_region, 0);
10428 fprintf (dump_file, "\n");
10429 }
10430
10431 remove_exit_barriers (root_omp_region);
10432 expand_omp (root_omp_region);
10433
10434 omp_free_regions ();
10435}
10436
10437/* Scan the CFG and build a tree of OMP regions. Return the root of
10438 the OMP region tree. */
10439
10440static void
10441build_omp_regions (void)
10442{
10443 gcc_assert (root_omp_region == NULL);
10444 calculate_dominance_info (CDI_DOMINATORS);
10445 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
10446}
10447
10448/* Main entry point for expanding OMP-GIMPLE into runtime calls. */
10449
10450static unsigned int
10451execute_expand_omp (void)
10452{
10453 build_omp_regions ();
10454
10455 if (!root_omp_region)
10456 return 0;
10457
10458 if (dump_file)
10459 {
10460 fprintf (dump_file, "\nOMP region tree\n\n");
10461 dump_omp_region (dump_file, root_omp_region, 0);
10462 fprintf (dump_file, "\n");
10463 }
10464
10465 remove_exit_barriers (root_omp_region);
10466
10467 expand_omp (root_omp_region);
10468
10469 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
10470 verify_loop_structure ();
10471 cleanup_tree_cfg ();
10472
10473 omp_free_regions ();
10474
10475 return 0;
10476}
10477
10478/* OMP expansion -- the default pass, run before creation of SSA form. */
10479
10480namespace {
10481
10482const pass_data pass_data_expand_omp =
10483{
10484 GIMPLE_PASS, /* type */
10485 "ompexp", /* name */
fd2b8c8b 10486 OPTGROUP_OMP, /* optinfo_flags */
629b3d75
MJ
10487 TV_NONE, /* tv_id */
10488 PROP_gimple_any, /* properties_required */
10489 PROP_gimple_eomp, /* properties_provided */
10490 0, /* properties_destroyed */
10491 0, /* todo_flags_start */
10492 0, /* todo_flags_finish */
10493};
10494
10495class pass_expand_omp : public gimple_opt_pass
10496{
10497public:
10498 pass_expand_omp (gcc::context *ctxt)
10499 : gimple_opt_pass (pass_data_expand_omp, ctxt)
10500 {}
10501
10502 /* opt_pass methods: */
10503 virtual unsigned int execute (function *)
10504 {
5e9d6aa4 10505 bool gate = ((flag_openacc != 0 || flag_openmp != 0
629b3d75
MJ
10506 || flag_openmp_simd != 0)
10507 && !seen_error ());
10508
10509 /* This pass always runs, to provide PROP_gimple_eomp.
10510 But often, there is nothing to do. */
10511 if (!gate)
10512 return 0;
10513
10514 return execute_expand_omp ();
10515 }
10516
10517}; // class pass_expand_omp
10518
10519} // anon namespace
10520
10521gimple_opt_pass *
10522make_pass_expand_omp (gcc::context *ctxt)
10523{
10524 return new pass_expand_omp (ctxt);
10525}
10526
10527namespace {
10528
10529const pass_data pass_data_expand_omp_ssa =
10530{
10531 GIMPLE_PASS, /* type */
10532 "ompexpssa", /* name */
fd2b8c8b 10533 OPTGROUP_OMP, /* optinfo_flags */
629b3d75
MJ
10534 TV_NONE, /* tv_id */
10535 PROP_cfg | PROP_ssa, /* properties_required */
10536 PROP_gimple_eomp, /* properties_provided */
10537 0, /* properties_destroyed */
10538 0, /* todo_flags_start */
10539 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
10540};
10541
10542class pass_expand_omp_ssa : public gimple_opt_pass
10543{
10544public:
10545 pass_expand_omp_ssa (gcc::context *ctxt)
10546 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
10547 {}
10548
10549 /* opt_pass methods: */
10550 virtual bool gate (function *fun)
10551 {
10552 return !(fun->curr_properties & PROP_gimple_eomp);
10553 }
10554 virtual unsigned int execute (function *) { return execute_expand_omp (); }
10555 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
10556
10557}; // class pass_expand_omp_ssa
10558
10559} // anon namespace
10560
10561gimple_opt_pass *
10562make_pass_expand_omp_ssa (gcc::context *ctxt)
10563{
10564 return new pass_expand_omp_ssa (ctxt);
10565}
10566
10567/* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
10568 GIMPLE_* codes. */
10569
10570bool
10571omp_make_gimple_edges (basic_block bb, struct omp_region **region,
10572 int *region_idx)
10573{
10574 gimple *last = last_stmt (bb);
10575 enum gimple_code code = gimple_code (last);
10576 struct omp_region *cur_region = *region;
10577 bool fallthru = false;
10578
10579 switch (code)
10580 {
10581 case GIMPLE_OMP_PARALLEL:
629b3d75
MJ
10582 case GIMPLE_OMP_FOR:
10583 case GIMPLE_OMP_SINGLE:
10584 case GIMPLE_OMP_TEAMS:
10585 case GIMPLE_OMP_MASTER:
d0befed7 10586 case GIMPLE_OMP_MASKED:
e45483c7 10587 case GIMPLE_OMP_SCOPE:
629b3d75
MJ
10588 case GIMPLE_OMP_TASKGROUP:
10589 case GIMPLE_OMP_CRITICAL:
10590 case GIMPLE_OMP_SECTION:
629b3d75
MJ
10591 cur_region = new_omp_region (bb, code, cur_region);
10592 fallthru = true;
10593 break;
10594
28567c40
JJ
10595 case GIMPLE_OMP_TASK:
10596 cur_region = new_omp_region (bb, code, cur_region);
10597 fallthru = true;
10598 if (gimple_omp_task_taskwait_p (last))
10599 cur_region = cur_region->outer;
10600 break;
10601
629b3d75
MJ
10602 case GIMPLE_OMP_ORDERED:
10603 cur_region = new_omp_region (bb, code, cur_region);
10604 fallthru = true;
10605 if (omp_find_clause (gimple_omp_ordered_clauses
10606 (as_a <gomp_ordered *> (last)),
10607 OMP_CLAUSE_DEPEND))
10608 cur_region = cur_region->outer;
10609 break;
10610
10611 case GIMPLE_OMP_TARGET:
10612 cur_region = new_omp_region (bb, code, cur_region);
10613 fallthru = true;
10614 switch (gimple_omp_target_kind (last))
10615 {
10616 case GF_OMP_TARGET_KIND_REGION:
629b3d75
MJ
10617 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
10618 case GF_OMP_TARGET_KIND_OACC_KERNELS:
62aee289 10619 case GF_OMP_TARGET_KIND_OACC_SERIAL:
e898ce79
GB
10620 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
10621 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
629b3d75
MJ
10622 break;
10623 case GF_OMP_TARGET_KIND_UPDATE:
10624 case GF_OMP_TARGET_KIND_ENTER_DATA:
10625 case GF_OMP_TARGET_KIND_EXIT_DATA:
cc9b9c0b
JJ
10626 case GF_OMP_TARGET_KIND_DATA:
10627 case GF_OMP_TARGET_KIND_OACC_DATA:
10628 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
10629 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
629b3d75 10630 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7aefef31
AS
10631 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
10632 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
629b3d75
MJ
10633 case GF_OMP_TARGET_KIND_OACC_DECLARE:
10634 cur_region = cur_region->outer;
10635 break;
10636 default:
10637 gcc_unreachable ();
10638 }
10639 break;
10640
10641 case GIMPLE_OMP_SECTIONS:
10642 cur_region = new_omp_region (bb, code, cur_region);
10643 fallthru = true;
10644 break;
10645
10646 case GIMPLE_OMP_SECTIONS_SWITCH:
10647 fallthru = false;
10648 break;
10649
10650 case GIMPLE_OMP_ATOMIC_LOAD:
10651 case GIMPLE_OMP_ATOMIC_STORE:
10652 fallthru = true;
10653 break;
10654
10655 case GIMPLE_OMP_RETURN:
10656 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
10657 somewhere other than the next block. This will be
10658 created later. */
10659 cur_region->exit = bb;
10660 if (cur_region->type == GIMPLE_OMP_TASK)
10661 /* Add an edge corresponding to not scheduling the task
10662 immediately. */
10663 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
10664 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
10665 cur_region = cur_region->outer;
10666 break;
10667
10668 case GIMPLE_OMP_CONTINUE:
10669 cur_region->cont = bb;
10670 switch (cur_region->type)
10671 {
10672 case GIMPLE_OMP_FOR:
10673 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
10674 succs edges as abnormal to prevent splitting
10675 them. */
10676 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
10677 /* Make the loopback edge. */
10678 make_edge (bb, single_succ (cur_region->entry),
10679 EDGE_ABNORMAL);
10680
10681 /* Create an edge from GIMPLE_OMP_FOR to exit, which
10682 corresponds to the case that the body of the loop
10683 is not executed at all. */
10684 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
10685 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
10686 fallthru = false;
10687 break;
10688
10689 case GIMPLE_OMP_SECTIONS:
10690 /* Wire up the edges into and out of the nested sections. */
10691 {
10692 basic_block switch_bb = single_succ (cur_region->entry);
10693
10694 struct omp_region *i;
10695 for (i = cur_region->inner; i ; i = i->next)
10696 {
10697 gcc_assert (i->type == GIMPLE_OMP_SECTION);
10698 make_edge (switch_bb, i->entry, 0);
10699 make_edge (i->exit, bb, EDGE_FALLTHRU);
10700 }
10701
10702 /* Make the loopback edge to the block with
10703 GIMPLE_OMP_SECTIONS_SWITCH. */
10704 make_edge (bb, switch_bb, 0);
10705
10706 /* Make the edge from the switch to exit. */
10707 make_edge (switch_bb, bb->next_bb, 0);
10708 fallthru = false;
10709 }
10710 break;
10711
10712 case GIMPLE_OMP_TASK:
10713 fallthru = true;
10714 break;
10715
10716 default:
10717 gcc_unreachable ();
10718 }
10719 break;
10720
10721 default:
10722 gcc_unreachable ();
10723 }
10724
10725 if (*region != cur_region)
10726 {
10727 *region = cur_region;
10728 if (cur_region)
10729 *region_idx = cur_region->entry->index;
10730 else
10731 *region_idx = 0;
10732 }
10733
10734 return fallthru;
10735}