]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/omp-expand.c
omp-low.c (lower_rec_input_clauses): For lastprivate conditional references...
[thirdparty/gcc.git] / gcc / omp-expand.c
CommitLineData
629b3d75
MJ
1/* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
4
a5544970 5Copyright (C) 2005-2019 Free Software Foundation, Inc.
629b3d75
MJ
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
11Software Foundation; either version 3, or (at your option) any later
12version.
13
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING3. If not see
21<http://www.gnu.org/licenses/>. */
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "memmodel.h"
27#include "backend.h"
28#include "target.h"
29#include "rtl.h"
30#include "tree.h"
31#include "gimple.h"
32#include "cfghooks.h"
33#include "tree-pass.h"
34#include "ssa.h"
35#include "optabs.h"
36#include "cgraph.h"
37#include "pretty-print.h"
38#include "diagnostic-core.h"
39#include "fold-const.h"
40#include "stor-layout.h"
41#include "cfganal.h"
42#include "internal-fn.h"
43#include "gimplify.h"
44#include "gimple-iterator.h"
45#include "gimplify-me.h"
46#include "gimple-walk.h"
47#include "tree-cfg.h"
48#include "tree-into-ssa.h"
49#include "tree-ssa.h"
50#include "splay-tree.h"
51#include "cfgloop.h"
52#include "omp-general.h"
53#include "omp-offload.h"
54#include "tree-cfgcleanup.h"
55#include "symbol-summary.h"
629b3d75
MJ
56#include "gomp-constants.h"
57#include "gimple-pretty-print.h"
13293add 58#include "hsa-common.h"
314e6352
ML
59#include "stringpool.h"
60#include "attribs.h"
629b3d75
MJ
61
62/* OMP region information. Every parallel and workshare
63 directive is enclosed between two markers, the OMP_* directive
64 and a corresponding GIMPLE_OMP_RETURN statement. */
65
66struct omp_region
67{
68 /* The enclosing region. */
69 struct omp_region *outer;
70
71 /* First child region. */
72 struct omp_region *inner;
73
74 /* Next peer region. */
75 struct omp_region *next;
76
77 /* Block containing the omp directive as its last stmt. */
78 basic_block entry;
79
80 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
81 basic_block exit;
82
83 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
84 basic_block cont;
85
86 /* If this is a combined parallel+workshare region, this is a list
87 of additional arguments needed by the combined parallel+workshare
88 library call. */
89 vec<tree, va_gc> *ws_args;
90
91 /* The code for the omp directive of this region. */
92 enum gimple_code type;
93
94 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
95 enum omp_clause_schedule_kind sched_kind;
96
97 /* Schedule modifiers. */
98 unsigned char sched_modifiers;
99
100 /* True if this is a combined parallel+workshare region. */
101 bool is_combined_parallel;
102
103 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
104 a depend clause. */
105 gomp_ordered *ord_stmt;
106};
107
108static struct omp_region *root_omp_region;
109static bool omp_any_child_fn_dumped;
110
111static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
112 bool = false);
113static gphi *find_phi_with_arg_on_edge (tree, edge);
114static void expand_omp (struct omp_region *region);
115
116/* Return true if REGION is a combined parallel+workshare region. */
117
118static inline bool
119is_combined_parallel (struct omp_region *region)
120{
121 return region->is_combined_parallel;
122}
123
124/* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
125 is the immediate dominator of PAR_ENTRY_BB, return true if there
126 are no data dependencies that would prevent expanding the parallel
127 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
128
129 When expanding a combined parallel+workshare region, the call to
130 the child function may need additional arguments in the case of
131 GIMPLE_OMP_FOR regions. In some cases, these arguments are
132 computed out of variables passed in from the parent to the child
133 via 'struct .omp_data_s'. For instance:
134
135 #pragma omp parallel for schedule (guided, i * 4)
136 for (j ...)
137
138 Is lowered into:
139
01914336 140 # BLOCK 2 (PAR_ENTRY_BB)
629b3d75
MJ
141 .omp_data_o.i = i;
142 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
143
144 # BLOCK 3 (WS_ENTRY_BB)
145 .omp_data_i = &.omp_data_o;
146 D.1667 = .omp_data_i->i;
147 D.1598 = D.1667 * 4;
148 #pragma omp for schedule (guided, D.1598)
149
150 When we outline the parallel region, the call to the child function
151 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
152 that value is computed *after* the call site. So, in principle we
153 cannot do the transformation.
154
155 To see whether the code in WS_ENTRY_BB blocks the combined
156 parallel+workshare call, we collect all the variables used in the
157 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
158 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
159 call.
160
161 FIXME. If we had the SSA form built at this point, we could merely
162 hoist the code in block 3 into block 2 and be done with it. But at
163 this point we don't have dataflow information and though we could
164 hack something up here, it is really not worth the aggravation. */
165
166static bool
167workshare_safe_to_combine_p (basic_block ws_entry_bb)
168{
169 struct omp_for_data fd;
170 gimple *ws_stmt = last_stmt (ws_entry_bb);
171
172 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
173 return true;
174
175 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
28567c40
JJ
176 if (gimple_omp_for_kind (ws_stmt) != GF_OMP_FOR_KIND_FOR)
177 return false;
629b3d75
MJ
178
179 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
180
181 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
182 return false;
183 if (fd.iter_type != long_integer_type_node)
184 return false;
185
186 /* FIXME. We give up too easily here. If any of these arguments
187 are not constants, they will likely involve variables that have
188 been mapped into fields of .omp_data_s for sharing with the child
189 function. With appropriate data flow, it would be possible to
190 see through this. */
191 if (!is_gimple_min_invariant (fd.loop.n1)
192 || !is_gimple_min_invariant (fd.loop.n2)
193 || !is_gimple_min_invariant (fd.loop.step)
194 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
195 return false;
196
197 return true;
198}
199
200/* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
201 presence (SIMD_SCHEDULE). */
202
203static tree
204omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
205{
28567c40 206 if (!simd_schedule || integer_zerop (chunk_size))
629b3d75
MJ
207 return chunk_size;
208
9d2f08ab
RS
209 poly_uint64 vf = omp_max_vf ();
210 if (known_eq (vf, 1U))
629b3d75
MJ
211 return chunk_size;
212
213 tree type = TREE_TYPE (chunk_size);
214 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
215 build_int_cst (type, vf - 1));
216 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
217 build_int_cst (type, -vf));
218}
219
220/* Collect additional arguments needed to emit a combined
221 parallel+workshare call. WS_STMT is the workshare directive being
222 expanded. */
223
224static vec<tree, va_gc> *
225get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
226{
227 tree t;
228 location_t loc = gimple_location (ws_stmt);
229 vec<tree, va_gc> *ws_args;
230
231 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
232 {
233 struct omp_for_data fd;
234 tree n1, n2;
235
236 omp_extract_for_data (for_stmt, &fd, NULL);
237 n1 = fd.loop.n1;
238 n2 = fd.loop.n2;
239
240 if (gimple_omp_for_combined_into_p (for_stmt))
241 {
242 tree innerc
243 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
244 OMP_CLAUSE__LOOPTEMP_);
245 gcc_assert (innerc);
246 n1 = OMP_CLAUSE_DECL (innerc);
247 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
248 OMP_CLAUSE__LOOPTEMP_);
249 gcc_assert (innerc);
250 n2 = OMP_CLAUSE_DECL (innerc);
251 }
252
253 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
254
255 t = fold_convert_loc (loc, long_integer_type_node, n1);
256 ws_args->quick_push (t);
257
258 t = fold_convert_loc (loc, long_integer_type_node, n2);
259 ws_args->quick_push (t);
260
261 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
262 ws_args->quick_push (t);
263
264 if (fd.chunk_size)
265 {
266 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
267 t = omp_adjust_chunk_size (t, fd.simd_schedule);
268 ws_args->quick_push (t);
269 }
270
271 return ws_args;
272 }
273 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
274 {
275 /* Number of sections is equal to the number of edges from the
276 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
277 the exit of the sections region. */
278 basic_block bb = single_succ (gimple_bb (ws_stmt));
279 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
280 vec_alloc (ws_args, 1);
281 ws_args->quick_push (t);
282 return ws_args;
283 }
284
285 gcc_unreachable ();
286}
287
288/* Discover whether REGION is a combined parallel+workshare region. */
289
290static void
291determine_parallel_type (struct omp_region *region)
292{
293 basic_block par_entry_bb, par_exit_bb;
294 basic_block ws_entry_bb, ws_exit_bb;
295
296 if (region == NULL || region->inner == NULL
297 || region->exit == NULL || region->inner->exit == NULL
298 || region->inner->cont == NULL)
299 return;
300
301 /* We only support parallel+for and parallel+sections. */
302 if (region->type != GIMPLE_OMP_PARALLEL
303 || (region->inner->type != GIMPLE_OMP_FOR
304 && region->inner->type != GIMPLE_OMP_SECTIONS))
305 return;
306
307 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
308 WS_EXIT_BB -> PAR_EXIT_BB. */
309 par_entry_bb = region->entry;
310 par_exit_bb = region->exit;
311 ws_entry_bb = region->inner->entry;
312 ws_exit_bb = region->inner->exit;
313
28567c40
JJ
314 /* Give up for task reductions on the parallel, while it is implementable,
315 adding another big set of APIs or slowing down the normal paths is
316 not acceptable. */
317 tree pclauses = gimple_omp_parallel_clauses (last_stmt (par_entry_bb));
318 if (omp_find_clause (pclauses, OMP_CLAUSE__REDUCTEMP_))
319 return;
320
629b3d75
MJ
321 if (single_succ (par_entry_bb) == ws_entry_bb
322 && single_succ (ws_exit_bb) == par_exit_bb
323 && workshare_safe_to_combine_p (ws_entry_bb)
324 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
325 || (last_and_only_stmt (ws_entry_bb)
326 && last_and_only_stmt (par_exit_bb))))
327 {
328 gimple *par_stmt = last_stmt (par_entry_bb);
329 gimple *ws_stmt = last_stmt (ws_entry_bb);
330
331 if (region->inner->type == GIMPLE_OMP_FOR)
332 {
333 /* If this is a combined parallel loop, we need to determine
334 whether or not to use the combined library calls. There
335 are two cases where we do not apply the transformation:
336 static loops and any kind of ordered loop. In the first
337 case, we already open code the loop so there is no need
338 to do anything else. In the latter case, the combined
339 parallel loop call would still need extra synchronization
340 to implement ordered semantics, so there would not be any
341 gain in using the combined call. */
342 tree clauses = gimple_omp_for_clauses (ws_stmt);
343 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
344 if (c == NULL
345 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
346 == OMP_CLAUSE_SCHEDULE_STATIC)
28567c40 347 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED)
6c7ae8c5 348 || omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_)
8221c30b
JJ
349 || ((c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_))
350 && POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (c)))))
28567c40 351 return;
629b3d75 352 }
28567c40 353 else if (region->inner->type == GIMPLE_OMP_SECTIONS
8221c30b
JJ
354 && (omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
355 OMP_CLAUSE__REDUCTEMP_)
356 || omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
357 OMP_CLAUSE__CONDTEMP_)))
28567c40 358 return;
629b3d75
MJ
359
360 region->is_combined_parallel = true;
361 region->inner->is_combined_parallel = true;
362 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
363 }
364}
365
366/* Debugging dumps for parallel regions. */
367void dump_omp_region (FILE *, struct omp_region *, int);
368void debug_omp_region (struct omp_region *);
369void debug_all_omp_regions (void);
370
371/* Dump the parallel region tree rooted at REGION. */
372
373void
374dump_omp_region (FILE *file, struct omp_region *region, int indent)
375{
376 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
377 gimple_code_name[region->type]);
378
379 if (region->inner)
380 dump_omp_region (file, region->inner, indent + 4);
381
382 if (region->cont)
383 {
384 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
385 region->cont->index);
386 }
387
388 if (region->exit)
389 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
390 region->exit->index);
391 else
392 fprintf (file, "%*s[no exit marker]\n", indent, "");
393
394 if (region->next)
395 dump_omp_region (file, region->next, indent);
396}
397
398DEBUG_FUNCTION void
399debug_omp_region (struct omp_region *region)
400{
401 dump_omp_region (stderr, region, 0);
402}
403
404DEBUG_FUNCTION void
405debug_all_omp_regions (void)
406{
407 dump_omp_region (stderr, root_omp_region, 0);
408}
409
410/* Create a new parallel region starting at STMT inside region PARENT. */
411
412static struct omp_region *
413new_omp_region (basic_block bb, enum gimple_code type,
414 struct omp_region *parent)
415{
416 struct omp_region *region = XCNEW (struct omp_region);
417
418 region->outer = parent;
419 region->entry = bb;
420 region->type = type;
421
422 if (parent)
423 {
424 /* This is a nested region. Add it to the list of inner
425 regions in PARENT. */
426 region->next = parent->inner;
427 parent->inner = region;
428 }
429 else
430 {
431 /* This is a toplevel region. Add it to the list of toplevel
432 regions in ROOT_OMP_REGION. */
433 region->next = root_omp_region;
434 root_omp_region = region;
435 }
436
437 return region;
438}
439
440/* Release the memory associated with the region tree rooted at REGION. */
441
442static void
443free_omp_region_1 (struct omp_region *region)
444{
445 struct omp_region *i, *n;
446
447 for (i = region->inner; i ; i = n)
448 {
449 n = i->next;
450 free_omp_region_1 (i);
451 }
452
453 free (region);
454}
455
456/* Release the memory for the entire omp region tree. */
457
458void
459omp_free_regions (void)
460{
461 struct omp_region *r, *n;
462 for (r = root_omp_region; r ; r = n)
463 {
464 n = r->next;
465 free_omp_region_1 (r);
466 }
467 root_omp_region = NULL;
468}
469
470/* A convenience function to build an empty GIMPLE_COND with just the
471 condition. */
472
473static gcond *
474gimple_build_cond_empty (tree cond)
475{
476 enum tree_code pred_code;
477 tree lhs, rhs;
478
479 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
480 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
481}
482
483/* Return true if a parallel REGION is within a declare target function or
484 within a target region and is not a part of a gridified target. */
485
486static bool
487parallel_needs_hsa_kernel_p (struct omp_region *region)
488{
489 bool indirect = false;
490 for (region = region->outer; region; region = region->outer)
491 {
492 if (region->type == GIMPLE_OMP_PARALLEL)
493 indirect = true;
494 else if (region->type == GIMPLE_OMP_TARGET)
495 {
496 gomp_target *tgt_stmt
497 = as_a <gomp_target *> (last_stmt (region->entry));
498
499 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
500 OMP_CLAUSE__GRIDDIM_))
501 return indirect;
502 else
503 return true;
504 }
505 }
506
507 if (lookup_attribute ("omp declare target",
508 DECL_ATTRIBUTES (current_function_decl)))
509 return true;
510
511 return false;
512}
513
623c6df5
KB
514/* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
515 Add CHILD_FNDECL to decl chain of the supercontext of the block
516 ENTRY_BLOCK - this is the block which originally contained the
517 code from which CHILD_FNDECL was created.
518
519 Together, these actions ensure that the debug info for the outlined
520 function will be emitted with the correct lexical scope. */
521
522static void
4ccc4e30
JJ
523adjust_context_and_scope (struct omp_region *region, tree entry_block,
524 tree child_fndecl)
623c6df5 525{
4ccc4e30
JJ
526 tree parent_fndecl = NULL_TREE;
527 gimple *entry_stmt;
528 /* OMP expansion expands inner regions before outer ones, so if
529 we e.g. have explicit task region nested in parallel region, when
530 expanding the task region current_function_decl will be the original
531 source function, but we actually want to use as context the child
532 function of the parallel. */
533 for (region = region->outer;
534 region && parent_fndecl == NULL_TREE; region = region->outer)
535 switch (region->type)
536 {
537 case GIMPLE_OMP_PARALLEL:
538 case GIMPLE_OMP_TASK:
539 case GIMPLE_OMP_TEAMS:
540 entry_stmt = last_stmt (region->entry);
541 parent_fndecl = gimple_omp_taskreg_child_fn (entry_stmt);
542 break;
543 case GIMPLE_OMP_TARGET:
544 entry_stmt = last_stmt (region->entry);
545 parent_fndecl
546 = gimple_omp_target_child_fn (as_a <gomp_target *> (entry_stmt));
547 break;
548 default:
549 break;
550 }
551
552 if (parent_fndecl == NULL_TREE)
553 parent_fndecl = current_function_decl;
554 DECL_CONTEXT (child_fndecl) = parent_fndecl;
555
623c6df5
KB
556 if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
557 {
558 tree b = BLOCK_SUPERCONTEXT (entry_block);
623c6df5
KB
559 if (TREE_CODE (b) == BLOCK)
560 {
623c6df5
KB
561 DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
562 BLOCK_VARS (b) = child_fndecl;
563 }
564 }
565}
566
28567c40 567/* Build the function calls to GOMP_parallel etc to actually
629b3d75
MJ
568 generate the parallel operation. REGION is the parallel region
569 being expanded. BB is the block where to insert the code. WS_ARGS
570 will be set if this is a call to a combined parallel+workshare
571 construct, it contains the list of additional arguments needed by
572 the workshare construct. */
573
574static void
575expand_parallel_call (struct omp_region *region, basic_block bb,
576 gomp_parallel *entry_stmt,
577 vec<tree, va_gc> *ws_args)
578{
579 tree t, t1, t2, val, cond, c, clauses, flags;
580 gimple_stmt_iterator gsi;
581 gimple *stmt;
582 enum built_in_function start_ix;
583 int start_ix2;
584 location_t clause_loc;
585 vec<tree, va_gc> *args;
586
587 clauses = gimple_omp_parallel_clauses (entry_stmt);
588
589 /* Determine what flavor of GOMP_parallel we will be
590 emitting. */
591 start_ix = BUILT_IN_GOMP_PARALLEL;
28567c40
JJ
592 tree rtmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
593 if (rtmp)
594 start_ix = BUILT_IN_GOMP_PARALLEL_REDUCTIONS;
595 else if (is_combined_parallel (region))
629b3d75
MJ
596 {
597 switch (region->inner->type)
598 {
599 case GIMPLE_OMP_FOR:
600 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
601 switch (region->inner->sched_kind)
602 {
603 case OMP_CLAUSE_SCHEDULE_RUNTIME:
28567c40
JJ
604 if ((region->inner->sched_modifiers
605 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
606 start_ix2 = 6;
607 else if ((region->inner->sched_modifiers
608 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
609 start_ix2 = 7;
610 else
611 start_ix2 = 3;
629b3d75
MJ
612 break;
613 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
614 case OMP_CLAUSE_SCHEDULE_GUIDED:
28567c40
JJ
615 if ((region->inner->sched_modifiers
616 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
629b3d75
MJ
617 {
618 start_ix2 = 3 + region->inner->sched_kind;
619 break;
620 }
621 /* FALLTHRU */
622 default:
623 start_ix2 = region->inner->sched_kind;
624 break;
625 }
626 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
627 start_ix = (enum built_in_function) start_ix2;
628 break;
629 case GIMPLE_OMP_SECTIONS:
630 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
631 break;
632 default:
633 gcc_unreachable ();
634 }
635 }
636
637 /* By default, the value of NUM_THREADS is zero (selected at run time)
638 and there is no conditional. */
639 cond = NULL_TREE;
640 val = build_int_cst (unsigned_type_node, 0);
641 flags = build_int_cst (unsigned_type_node, 0);
642
643 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
644 if (c)
645 cond = OMP_CLAUSE_IF_EXPR (c);
646
647 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
648 if (c)
649 {
650 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
651 clause_loc = OMP_CLAUSE_LOCATION (c);
652 }
653 else
654 clause_loc = gimple_location (entry_stmt);
655
656 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
657 if (c)
658 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
659
660 /* Ensure 'val' is of the correct type. */
661 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
662
663 /* If we found the clause 'if (cond)', build either
664 (cond != 0) or (cond ? val : 1u). */
665 if (cond)
666 {
667 cond = gimple_boolify (cond);
668
669 if (integer_zerop (val))
670 val = fold_build2_loc (clause_loc,
671 EQ_EXPR, unsigned_type_node, cond,
672 build_int_cst (TREE_TYPE (cond), 0));
673 else
674 {
675 basic_block cond_bb, then_bb, else_bb;
676 edge e, e_then, e_else;
677 tree tmp_then, tmp_else, tmp_join, tmp_var;
678
679 tmp_var = create_tmp_var (TREE_TYPE (val));
680 if (gimple_in_ssa_p (cfun))
681 {
682 tmp_then = make_ssa_name (tmp_var);
683 tmp_else = make_ssa_name (tmp_var);
684 tmp_join = make_ssa_name (tmp_var);
685 }
686 else
687 {
688 tmp_then = tmp_var;
689 tmp_else = tmp_var;
690 tmp_join = tmp_var;
691 }
692
693 e = split_block_after_labels (bb);
694 cond_bb = e->src;
695 bb = e->dest;
696 remove_edge (e);
697
698 then_bb = create_empty_bb (cond_bb);
699 else_bb = create_empty_bb (then_bb);
700 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
701 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
702
703 stmt = gimple_build_cond_empty (cond);
704 gsi = gsi_start_bb (cond_bb);
705 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
706
707 gsi = gsi_start_bb (then_bb);
708 expand_omp_build_assign (&gsi, tmp_then, val, true);
709
710 gsi = gsi_start_bb (else_bb);
711 expand_omp_build_assign (&gsi, tmp_else,
712 build_int_cst (unsigned_type_node, 1),
713 true);
714
715 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
716 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
717 add_bb_to_loop (then_bb, cond_bb->loop_father);
718 add_bb_to_loop (else_bb, cond_bb->loop_father);
719 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
720 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
721
722 if (gimple_in_ssa_p (cfun))
723 {
724 gphi *phi = create_phi_node (tmp_join, bb);
725 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
726 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
727 }
728
729 val = tmp_join;
730 }
731
732 gsi = gsi_start_bb (bb);
733 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
734 false, GSI_CONTINUE_LINKING);
735 }
736
65f4b875 737 gsi = gsi_last_nondebug_bb (bb);
629b3d75
MJ
738 t = gimple_omp_parallel_data_arg (entry_stmt);
739 if (t == NULL)
740 t1 = null_pointer_node;
741 else
742 t1 = build_fold_addr_expr (t);
743 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
744 t2 = build_fold_addr_expr (child_fndecl);
745
746 vec_alloc (args, 4 + vec_safe_length (ws_args));
747 args->quick_push (t2);
748 args->quick_push (t1);
749 args->quick_push (val);
750 if (ws_args)
751 args->splice (*ws_args);
752 args->quick_push (flags);
753
754 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
755 builtin_decl_explicit (start_ix), args);
756
28567c40
JJ
757 if (rtmp)
758 {
759 tree type = TREE_TYPE (OMP_CLAUSE_DECL (rtmp));
760 t = build2 (MODIFY_EXPR, type, OMP_CLAUSE_DECL (rtmp),
761 fold_convert (type,
762 fold_convert (pointer_sized_int_node, t)));
763 }
629b3d75
MJ
764 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
765 false, GSI_CONTINUE_LINKING);
766
767 if (hsa_gen_requested_p ()
768 && parallel_needs_hsa_kernel_p (region))
769 {
770 cgraph_node *child_cnode = cgraph_node::get (child_fndecl);
771 hsa_register_kernel (child_cnode);
772 }
773}
774
629b3d75
MJ
775/* Build the function call to GOMP_task to actually
776 generate the task operation. BB is the block where to insert the code. */
777
778static void
779expand_task_call (struct omp_region *region, basic_block bb,
780 gomp_task *entry_stmt)
781{
782 tree t1, t2, t3;
783 gimple_stmt_iterator gsi;
784 location_t loc = gimple_location (entry_stmt);
785
786 tree clauses = gimple_omp_task_clauses (entry_stmt);
787
788 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
789 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
790 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
791 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
792 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
793 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
794
795 unsigned int iflags
796 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
797 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
798 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
799
800 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
801 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
802 tree num_tasks = NULL_TREE;
803 bool ull = false;
804 if (taskloop_p)
805 {
806 gimple *g = last_stmt (region->outer->entry);
807 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
808 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
809 struct omp_for_data fd;
810 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
811 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
812 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
813 OMP_CLAUSE__LOOPTEMP_);
814 startvar = OMP_CLAUSE_DECL (startvar);
815 endvar = OMP_CLAUSE_DECL (endvar);
816 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
817 if (fd.loop.cond_code == LT_EXPR)
818 iflags |= GOMP_TASK_FLAG_UP;
819 tree tclauses = gimple_omp_for_clauses (g);
820 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
821 if (num_tasks)
822 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
823 else
824 {
825 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
826 if (num_tasks)
827 {
828 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
829 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
830 }
831 else
832 num_tasks = integer_zero_node;
833 }
834 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
835 if (ifc == NULL_TREE)
836 iflags |= GOMP_TASK_FLAG_IF;
837 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
838 iflags |= GOMP_TASK_FLAG_NOGROUP;
839 ull = fd.iter_type == long_long_unsigned_type_node;
28567c40
JJ
840 if (omp_find_clause (clauses, OMP_CLAUSE_REDUCTION))
841 iflags |= GOMP_TASK_FLAG_REDUCTION;
629b3d75
MJ
842 }
843 else if (priority)
844 iflags |= GOMP_TASK_FLAG_PRIORITY;
845
846 tree flags = build_int_cst (unsigned_type_node, iflags);
847
848 tree cond = boolean_true_node;
849 if (ifc)
850 {
851 if (taskloop_p)
852 {
853 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
854 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
855 build_int_cst (unsigned_type_node,
856 GOMP_TASK_FLAG_IF),
857 build_int_cst (unsigned_type_node, 0));
858 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
859 flags, t);
860 }
861 else
862 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
863 }
864
865 if (finalc)
866 {
867 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
868 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
869 build_int_cst (unsigned_type_node,
870 GOMP_TASK_FLAG_FINAL),
871 build_int_cst (unsigned_type_node, 0));
872 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
873 }
874 if (depend)
875 depend = OMP_CLAUSE_DECL (depend);
876 else
877 depend = build_int_cst (ptr_type_node, 0);
878 if (priority)
879 priority = fold_convert (integer_type_node,
880 OMP_CLAUSE_PRIORITY_EXPR (priority));
881 else
882 priority = integer_zero_node;
883
65f4b875 884 gsi = gsi_last_nondebug_bb (bb);
629b3d75
MJ
885 tree t = gimple_omp_task_data_arg (entry_stmt);
886 if (t == NULL)
887 t2 = null_pointer_node;
888 else
889 t2 = build_fold_addr_expr_loc (loc, t);
890 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
891 t = gimple_omp_task_copy_fn (entry_stmt);
892 if (t == NULL)
893 t3 = null_pointer_node;
894 else
895 t3 = build_fold_addr_expr_loc (loc, t);
896
897 if (taskloop_p)
898 t = build_call_expr (ull
899 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
900 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
901 11, t1, t2, t3,
902 gimple_omp_task_arg_size (entry_stmt),
903 gimple_omp_task_arg_align (entry_stmt), flags,
904 num_tasks, priority, startvar, endvar, step);
905 else
906 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
907 9, t1, t2, t3,
908 gimple_omp_task_arg_size (entry_stmt),
909 gimple_omp_task_arg_align (entry_stmt), cond, flags,
910 depend, priority);
911
912 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
913 false, GSI_CONTINUE_LINKING);
914}
915
28567c40
JJ
916/* Build the function call to GOMP_taskwait_depend to actually
917 generate the taskwait operation. BB is the block where to insert the
918 code. */
919
920static void
921expand_taskwait_call (basic_block bb, gomp_task *entry_stmt)
922{
923 tree clauses = gimple_omp_task_clauses (entry_stmt);
924 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
925 if (depend == NULL_TREE)
926 return;
927
928 depend = OMP_CLAUSE_DECL (depend);
929
930 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
931 tree t
932 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASKWAIT_DEPEND),
933 1, depend);
934
935 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
936 false, GSI_CONTINUE_LINKING);
937}
938
939/* Build the function call to GOMP_teams_reg to actually
940 generate the host teams operation. REGION is the teams region
941 being expanded. BB is the block where to insert the code. */
942
943static void
944expand_teams_call (basic_block bb, gomp_teams *entry_stmt)
945{
946 tree clauses = gimple_omp_teams_clauses (entry_stmt);
947 tree num_teams = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
948 if (num_teams == NULL_TREE)
949 num_teams = build_int_cst (unsigned_type_node, 0);
950 else
951 {
952 num_teams = OMP_CLAUSE_NUM_TEAMS_EXPR (num_teams);
953 num_teams = fold_convert (unsigned_type_node, num_teams);
954 }
955 tree thread_limit = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
956 if (thread_limit == NULL_TREE)
957 thread_limit = build_int_cst (unsigned_type_node, 0);
958 else
959 {
960 thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit);
961 thread_limit = fold_convert (unsigned_type_node, thread_limit);
962 }
963
964 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
965 tree t = gimple_omp_teams_data_arg (entry_stmt), t1;
966 if (t == NULL)
967 t1 = null_pointer_node;
968 else
969 t1 = build_fold_addr_expr (t);
970 tree child_fndecl = gimple_omp_teams_child_fn (entry_stmt);
971 tree t2 = build_fold_addr_expr (child_fndecl);
972
28567c40
JJ
973 vec<tree, va_gc> *args;
974 vec_alloc (args, 5);
975 args->quick_push (t2);
976 args->quick_push (t1);
977 args->quick_push (num_teams);
978 args->quick_push (thread_limit);
979 /* For future extensibility. */
980 args->quick_push (build_zero_cst (unsigned_type_node));
981
982 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
983 builtin_decl_explicit (BUILT_IN_GOMP_TEAMS_REG),
984 args);
985
986 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
987 false, GSI_CONTINUE_LINKING);
988}
989
629b3d75
MJ
990/* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
991
992static tree
993vec2chain (vec<tree, va_gc> *v)
994{
995 tree chain = NULL_TREE, t;
996 unsigned ix;
997
998 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
999 {
1000 DECL_CHAIN (t) = chain;
1001 chain = t;
1002 }
1003
1004 return chain;
1005}
1006
1007/* Remove barriers in REGION->EXIT's block. Note that this is only
1008 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
1009 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
1010 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
1011 removed. */
1012
1013static void
1014remove_exit_barrier (struct omp_region *region)
1015{
1016 gimple_stmt_iterator gsi;
1017 basic_block exit_bb;
1018 edge_iterator ei;
1019 edge e;
1020 gimple *stmt;
1021 int any_addressable_vars = -1;
1022
1023 exit_bb = region->exit;
1024
1025 /* If the parallel region doesn't return, we don't have REGION->EXIT
1026 block at all. */
1027 if (! exit_bb)
1028 return;
1029
1030 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
1031 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
1032 statements that can appear in between are extremely limited -- no
1033 memory operations at all. Here, we allow nothing at all, so the
1034 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
65f4b875 1035 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75 1036 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
65f4b875 1037 gsi_prev_nondebug (&gsi);
629b3d75
MJ
1038 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
1039 return;
1040
1041 FOR_EACH_EDGE (e, ei, exit_bb->preds)
1042 {
65f4b875 1043 gsi = gsi_last_nondebug_bb (e->src);
629b3d75
MJ
1044 if (gsi_end_p (gsi))
1045 continue;
1046 stmt = gsi_stmt (gsi);
1047 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
1048 && !gimple_omp_return_nowait_p (stmt))
1049 {
1050 /* OpenMP 3.0 tasks unfortunately prevent this optimization
1051 in many cases. If there could be tasks queued, the barrier
1052 might be needed to let the tasks run before some local
1053 variable of the parallel that the task uses as shared
1054 runs out of scope. The task can be spawned either
1055 from within current function (this would be easy to check)
1056 or from some function it calls and gets passed an address
1057 of such a variable. */
1058 if (any_addressable_vars < 0)
1059 {
1060 gomp_parallel *parallel_stmt
1061 = as_a <gomp_parallel *> (last_stmt (region->entry));
1062 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
1063 tree local_decls, block, decl;
1064 unsigned ix;
1065
1066 any_addressable_vars = 0;
1067 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
1068 if (TREE_ADDRESSABLE (decl))
1069 {
1070 any_addressable_vars = 1;
1071 break;
1072 }
1073 for (block = gimple_block (stmt);
1074 !any_addressable_vars
1075 && block
1076 && TREE_CODE (block) == BLOCK;
1077 block = BLOCK_SUPERCONTEXT (block))
1078 {
1079 for (local_decls = BLOCK_VARS (block);
1080 local_decls;
1081 local_decls = DECL_CHAIN (local_decls))
1082 if (TREE_ADDRESSABLE (local_decls))
1083 {
1084 any_addressable_vars = 1;
1085 break;
1086 }
1087 if (block == gimple_block (parallel_stmt))
1088 break;
1089 }
1090 }
1091 if (!any_addressable_vars)
1092 gimple_omp_return_set_nowait (stmt);
1093 }
1094 }
1095}
1096
1097static void
1098remove_exit_barriers (struct omp_region *region)
1099{
1100 if (region->type == GIMPLE_OMP_PARALLEL)
1101 remove_exit_barrier (region);
1102
1103 if (region->inner)
1104 {
1105 region = region->inner;
1106 remove_exit_barriers (region);
1107 while (region->next)
1108 {
1109 region = region->next;
1110 remove_exit_barriers (region);
1111 }
1112 }
1113}
1114
1115/* Optimize omp_get_thread_num () and omp_get_num_threads ()
1116 calls. These can't be declared as const functions, but
1117 within one parallel body they are constant, so they can be
1118 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1119 which are declared const. Similarly for task body, except
1120 that in untied task omp_get_thread_num () can change at any task
1121 scheduling point. */
1122
1123static void
1124optimize_omp_library_calls (gimple *entry_stmt)
1125{
1126 basic_block bb;
1127 gimple_stmt_iterator gsi;
1128 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1129 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1130 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1131 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1132 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1133 && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1134 OMP_CLAUSE_UNTIED) != NULL);
1135
1136 FOR_EACH_BB_FN (bb, cfun)
1137 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1138 {
1139 gimple *call = gsi_stmt (gsi);
1140 tree decl;
1141
1142 if (is_gimple_call (call)
1143 && (decl = gimple_call_fndecl (call))
1144 && DECL_EXTERNAL (decl)
1145 && TREE_PUBLIC (decl)
1146 && DECL_INITIAL (decl) == NULL)
1147 {
1148 tree built_in;
1149
1150 if (DECL_NAME (decl) == thr_num_id)
1151 {
1152 /* In #pragma omp task untied omp_get_thread_num () can change
1153 during the execution of the task region. */
1154 if (untied_task)
1155 continue;
1156 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1157 }
1158 else if (DECL_NAME (decl) == num_thr_id)
1159 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1160 else
1161 continue;
1162
1163 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1164 || gimple_call_num_args (call) != 0)
1165 continue;
1166
1167 if (flag_exceptions && !TREE_NOTHROW (decl))
1168 continue;
1169
1170 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1171 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1172 TREE_TYPE (TREE_TYPE (built_in))))
1173 continue;
1174
1175 gimple_call_set_fndecl (call, built_in);
1176 }
1177 }
1178}
1179
1180/* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1181 regimplified. */
1182
1183static tree
1184expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1185{
1186 tree t = *tp;
1187
1188 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1189 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1190 return t;
1191
1192 if (TREE_CODE (t) == ADDR_EXPR)
1193 recompute_tree_invariant_for_addr_expr (t);
1194
1195 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1196 return NULL_TREE;
1197}
1198
1199/* Prepend or append TO = FROM assignment before or after *GSI_P. */
1200
1201static void
1202expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1203 bool after)
1204{
1205 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1206 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1207 !after, after ? GSI_CONTINUE_LINKING
1208 : GSI_SAME_STMT);
1209 gimple *stmt = gimple_build_assign (to, from);
1210 if (after)
1211 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1212 else
1213 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1214 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1215 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1216 {
1217 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1218 gimple_regimplify_operands (stmt, &gsi);
1219 }
1220}
1221
1222/* Expand the OpenMP parallel or task directive starting at REGION. */
1223
1224static void
1225expand_omp_taskreg (struct omp_region *region)
1226{
1227 basic_block entry_bb, exit_bb, new_bb;
1228 struct function *child_cfun;
1229 tree child_fn, block, t;
1230 gimple_stmt_iterator gsi;
1231 gimple *entry_stmt, *stmt;
1232 edge e;
1233 vec<tree, va_gc> *ws_args;
1234
1235 entry_stmt = last_stmt (region->entry);
28567c40
JJ
1236 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1237 && gimple_omp_task_taskwait_p (entry_stmt))
1238 {
1239 new_bb = region->entry;
1240 gsi = gsi_last_nondebug_bb (region->entry);
1241 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1242 gsi_remove (&gsi, true);
1243 expand_taskwait_call (new_bb, as_a <gomp_task *> (entry_stmt));
1244 return;
1245 }
1246
629b3d75
MJ
1247 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1248 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1249
1250 entry_bb = region->entry;
1251 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1252 exit_bb = region->cont;
1253 else
1254 exit_bb = region->exit;
1255
5e9d6aa4 1256 if (is_combined_parallel (region))
629b3d75
MJ
1257 ws_args = region->ws_args;
1258 else
1259 ws_args = NULL;
1260
1261 if (child_cfun->cfg)
1262 {
1263 /* Due to inlining, it may happen that we have already outlined
1264 the region, in which case all we need to do is make the
1265 sub-graph unreachable and emit the parallel call. */
1266 edge entry_succ_e, exit_succ_e;
1267
1268 entry_succ_e = single_succ_edge (entry_bb);
1269
65f4b875 1270 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75 1271 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
28567c40
JJ
1272 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK
1273 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TEAMS);
629b3d75
MJ
1274 gsi_remove (&gsi, true);
1275
1276 new_bb = entry_bb;
1277 if (exit_bb)
1278 {
1279 exit_succ_e = single_succ_edge (exit_bb);
1280 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1281 }
1282 remove_edge_and_dominated_blocks (entry_succ_e);
1283 }
1284 else
1285 {
1286 unsigned srcidx, dstidx, num;
1287
1288 /* If the parallel region needs data sent from the parent
1289 function, then the very first statement (except possible
1290 tree profile counter updates) of the parallel body
1291 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1292 &.OMP_DATA_O is passed as an argument to the child function,
1293 we need to replace it with the argument as seen by the child
1294 function.
1295
1296 In most cases, this will end up being the identity assignment
1297 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1298 a function call that has been inlined, the original PARM_DECL
1299 .OMP_DATA_I may have been converted into a different local
1300 variable. In which case, we need to keep the assignment. */
1301 if (gimple_omp_taskreg_data_arg (entry_stmt))
1302 {
1303 basic_block entry_succ_bb
1304 = single_succ_p (entry_bb) ? single_succ (entry_bb)
1305 : FALLTHRU_EDGE (entry_bb)->dest;
1306 tree arg;
1307 gimple *parcopy_stmt = NULL;
1308
1309 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1310 {
1311 gimple *stmt;
1312
1313 gcc_assert (!gsi_end_p (gsi));
1314 stmt = gsi_stmt (gsi);
1315 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1316 continue;
1317
1318 if (gimple_num_ops (stmt) == 2)
1319 {
1320 tree arg = gimple_assign_rhs1 (stmt);
1321
1322 /* We're ignore the subcode because we're
1323 effectively doing a STRIP_NOPS. */
1324
1325 if (TREE_CODE (arg) == ADDR_EXPR
28567c40
JJ
1326 && (TREE_OPERAND (arg, 0)
1327 == gimple_omp_taskreg_data_arg (entry_stmt)))
629b3d75
MJ
1328 {
1329 parcopy_stmt = stmt;
1330 break;
1331 }
1332 }
1333 }
1334
1335 gcc_assert (parcopy_stmt != NULL);
1336 arg = DECL_ARGUMENTS (child_fn);
1337
1338 if (!gimple_in_ssa_p (cfun))
1339 {
1340 if (gimple_assign_lhs (parcopy_stmt) == arg)
1341 gsi_remove (&gsi, true);
1342 else
1343 {
01914336 1344 /* ?? Is setting the subcode really necessary ?? */
629b3d75
MJ
1345 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1346 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1347 }
1348 }
1349 else
1350 {
1351 tree lhs = gimple_assign_lhs (parcopy_stmt);
1352 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1353 /* We'd like to set the rhs to the default def in the child_fn,
1354 but it's too early to create ssa names in the child_fn.
1355 Instead, we set the rhs to the parm. In
1356 move_sese_region_to_fn, we introduce a default def for the
1357 parm, map the parm to it's default def, and once we encounter
1358 this stmt, replace the parm with the default def. */
1359 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1360 update_stmt (parcopy_stmt);
1361 }
1362 }
1363
1364 /* Declare local variables needed in CHILD_CFUN. */
1365 block = DECL_INITIAL (child_fn);
1366 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1367 /* The gimplifier could record temporaries in parallel/task block
1368 rather than in containing function's local_decls chain,
1369 which would mean cgraph missed finalizing them. Do it now. */
1370 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1371 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1372 varpool_node::finalize_decl (t);
1373 DECL_SAVED_TREE (child_fn) = NULL;
1374 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1375 gimple_set_body (child_fn, NULL);
1376 TREE_USED (block) = 1;
1377
1378 /* Reset DECL_CONTEXT on function arguments. */
1379 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1380 DECL_CONTEXT (t) = child_fn;
1381
1382 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1383 so that it can be moved to the child function. */
65f4b875 1384 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
1385 stmt = gsi_stmt (gsi);
1386 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
28567c40
JJ
1387 || gimple_code (stmt) == GIMPLE_OMP_TASK
1388 || gimple_code (stmt) == GIMPLE_OMP_TEAMS));
629b3d75
MJ
1389 e = split_block (entry_bb, stmt);
1390 gsi_remove (&gsi, true);
1391 entry_bb = e->dest;
1392 edge e2 = NULL;
28567c40 1393 if (gimple_code (entry_stmt) != GIMPLE_OMP_TASK)
629b3d75
MJ
1394 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1395 else
1396 {
1397 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1398 gcc_assert (e2->dest == region->exit);
1399 remove_edge (BRANCH_EDGE (entry_bb));
1400 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
65f4b875 1401 gsi = gsi_last_nondebug_bb (region->exit);
629b3d75
MJ
1402 gcc_assert (!gsi_end_p (gsi)
1403 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1404 gsi_remove (&gsi, true);
1405 }
1406
1407 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1408 if (exit_bb)
1409 {
65f4b875 1410 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
1411 gcc_assert (!gsi_end_p (gsi)
1412 && (gimple_code (gsi_stmt (gsi))
1413 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1414 stmt = gimple_build_return (NULL);
1415 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1416 gsi_remove (&gsi, true);
1417 }
1418
1419 /* Move the parallel region into CHILD_CFUN. */
1420
1421 if (gimple_in_ssa_p (cfun))
1422 {
1423 init_tree_ssa (child_cfun);
1424 init_ssa_operands (child_cfun);
1425 child_cfun->gimple_df->in_ssa_p = true;
1426 block = NULL_TREE;
1427 }
1428 else
1429 block = gimple_block (entry_stmt);
1430
1431 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1432 if (exit_bb)
1433 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1434 if (e2)
1435 {
1436 basic_block dest_bb = e2->dest;
1437 if (!exit_bb)
1438 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1439 remove_edge (e2);
1440 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1441 }
1442 /* When the OMP expansion process cannot guarantee an up-to-date
01914336 1443 loop tree arrange for the child function to fixup loops. */
629b3d75
MJ
1444 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1445 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1446
1447 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1448 num = vec_safe_length (child_cfun->local_decls);
1449 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1450 {
1451 t = (*child_cfun->local_decls)[srcidx];
1452 if (DECL_CONTEXT (t) == cfun->decl)
1453 continue;
1454 if (srcidx != dstidx)
1455 (*child_cfun->local_decls)[dstidx] = t;
1456 dstidx++;
1457 }
1458 if (dstidx != num)
1459 vec_safe_truncate (child_cfun->local_decls, dstidx);
1460
1461 /* Inform the callgraph about the new function. */
1462 child_cfun->curr_properties = cfun->curr_properties;
1463 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1464 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1465 cgraph_node *node = cgraph_node::get_create (child_fn);
1466 node->parallelized_function = 1;
1467 cgraph_node::add_new_function (child_fn, true);
1468
1469 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1470 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1471
1472 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1473 fixed in a following pass. */
1474 push_cfun (child_cfun);
1475 if (need_asm)
9579db35 1476 assign_assembler_name_if_needed (child_fn);
629b3d75
MJ
1477
1478 if (optimize)
1479 optimize_omp_library_calls (entry_stmt);
fc06ae0d 1480 update_max_bb_count ();
629b3d75
MJ
1481 cgraph_edge::rebuild_edges ();
1482
1483 /* Some EH regions might become dead, see PR34608. If
1484 pass_cleanup_cfg isn't the first pass to happen with the
1485 new child, these dead EH edges might cause problems.
1486 Clean them up now. */
1487 if (flag_exceptions)
1488 {
1489 basic_block bb;
1490 bool changed = false;
1491
1492 FOR_EACH_BB_FN (bb, cfun)
1493 changed |= gimple_purge_dead_eh_edges (bb);
1494 if (changed)
1495 cleanup_tree_cfg ();
1496 }
1497 if (gimple_in_ssa_p (cfun))
1498 update_ssa (TODO_update_ssa);
1499 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1500 verify_loop_structure ();
1501 pop_cfun ();
1502
1503 if (dump_file && !gimple_in_ssa_p (cfun))
1504 {
1505 omp_any_child_fn_dumped = true;
1506 dump_function_header (dump_file, child_fn, dump_flags);
1507 dump_function_to_file (child_fn, dump_file, dump_flags);
1508 }
1509 }
1510
4ccc4e30
JJ
1511 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
1512
5e9d6aa4 1513 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
629b3d75
MJ
1514 expand_parallel_call (region, new_bb,
1515 as_a <gomp_parallel *> (entry_stmt), ws_args);
28567c40
JJ
1516 else if (gimple_code (entry_stmt) == GIMPLE_OMP_TEAMS)
1517 expand_teams_call (new_bb, as_a <gomp_teams *> (entry_stmt));
629b3d75
MJ
1518 else
1519 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1520 if (gimple_in_ssa_p (cfun))
1521 update_ssa (TODO_update_ssa_only_virtuals);
1522}
1523
1524/* Information about members of an OpenACC collapsed loop nest. */
1525
1526struct oacc_collapse
1527{
01914336 1528 tree base; /* Base value. */
629b3d75 1529 tree iters; /* Number of steps. */
02889d23
CLT
1530 tree step; /* Step size. */
1531 tree tile; /* Tile increment (if tiled). */
1532 tree outer; /* Tile iterator var. */
629b3d75
MJ
1533};
1534
1535/* Helper for expand_oacc_for. Determine collapsed loop information.
1536 Fill in COUNTS array. Emit any initialization code before GSI.
1537 Return the calculated outer loop bound of BOUND_TYPE. */
1538
1539static tree
1540expand_oacc_collapse_init (const struct omp_for_data *fd,
1541 gimple_stmt_iterator *gsi,
02889d23
CLT
1542 oacc_collapse *counts, tree bound_type,
1543 location_t loc)
629b3d75 1544{
02889d23 1545 tree tiling = fd->tiling;
629b3d75
MJ
1546 tree total = build_int_cst (bound_type, 1);
1547 int ix;
1548
1549 gcc_assert (integer_onep (fd->loop.step));
1550 gcc_assert (integer_zerop (fd->loop.n1));
1551
02889d23
CLT
1552 /* When tiling, the first operand of the tile clause applies to the
1553 innermost loop, and we work outwards from there. Seems
1554 backwards, but whatever. */
1555 for (ix = fd->collapse; ix--;)
629b3d75
MJ
1556 {
1557 const omp_for_data_loop *loop = &fd->loops[ix];
1558
1559 tree iter_type = TREE_TYPE (loop->v);
1560 tree diff_type = iter_type;
1561 tree plus_type = iter_type;
1562
1563 gcc_assert (loop->cond_code == fd->loop.cond_code);
1564
1565 if (POINTER_TYPE_P (iter_type))
1566 plus_type = sizetype;
1567 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
1568 diff_type = signed_type_for (diff_type);
c31bc4ac
TV
1569 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
1570 diff_type = integer_type_node;
629b3d75 1571
02889d23
CLT
1572 if (tiling)
1573 {
1574 tree num = build_int_cst (integer_type_node, fd->collapse);
1575 tree loop_no = build_int_cst (integer_type_node, ix);
1576 tree tile = TREE_VALUE (tiling);
1577 gcall *call
1578 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1579 /* gwv-outer=*/integer_zero_node,
1580 /* gwv-inner=*/integer_zero_node);
1581
1582 counts[ix].outer = create_tmp_var (iter_type, ".outer");
1583 counts[ix].tile = create_tmp_var (diff_type, ".tile");
1584 gimple_call_set_lhs (call, counts[ix].tile);
1585 gimple_set_location (call, loc);
1586 gsi_insert_before (gsi, call, GSI_SAME_STMT);
1587
1588 tiling = TREE_CHAIN (tiling);
1589 }
1590 else
1591 {
1592 counts[ix].tile = NULL;
1593 counts[ix].outer = loop->v;
1594 }
1595
629b3d75
MJ
1596 tree b = loop->n1;
1597 tree e = loop->n2;
1598 tree s = loop->step;
1599 bool up = loop->cond_code == LT_EXPR;
1600 tree dir = build_int_cst (diff_type, up ? +1 : -1);
1601 bool negating;
1602 tree expr;
1603
1604 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1605 true, GSI_SAME_STMT);
1606 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1607 true, GSI_SAME_STMT);
1608
01914336 1609 /* Convert the step, avoiding possible unsigned->signed overflow. */
629b3d75
MJ
1610 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1611 if (negating)
1612 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1613 s = fold_convert (diff_type, s);
1614 if (negating)
1615 s = fold_build1 (NEGATE_EXPR, diff_type, s);
1616 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1617 true, GSI_SAME_STMT);
1618
01914336 1619 /* Determine the range, avoiding possible unsigned->signed overflow. */
629b3d75
MJ
1620 negating = !up && TYPE_UNSIGNED (iter_type);
1621 expr = fold_build2 (MINUS_EXPR, plus_type,
1622 fold_convert (plus_type, negating ? b : e),
1623 fold_convert (plus_type, negating ? e : b));
1624 expr = fold_convert (diff_type, expr);
1625 if (negating)
1626 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1627 tree range = force_gimple_operand_gsi
1628 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1629
1630 /* Determine number of iterations. */
1631 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1632 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1633 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1634
1635 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1636 true, GSI_SAME_STMT);
1637
1638 counts[ix].base = b;
1639 counts[ix].iters = iters;
1640 counts[ix].step = s;
1641
1642 total = fold_build2 (MULT_EXPR, bound_type, total,
1643 fold_convert (bound_type, iters));
1644 }
1645
1646 return total;
1647}
1648
02889d23
CLT
1649/* Emit initializers for collapsed loop members. INNER is true if
1650 this is for the element loop of a TILE. IVAR is the outer
629b3d75
MJ
1651 loop iteration variable, from which collapsed loop iteration values
1652 are calculated. COUNTS array has been initialized by
1653 expand_oacc_collapse_inits. */
1654
1655static void
02889d23 1656expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
629b3d75
MJ
1657 gimple_stmt_iterator *gsi,
1658 const oacc_collapse *counts, tree ivar)
1659{
1660 tree ivar_type = TREE_TYPE (ivar);
1661
1662 /* The most rapidly changing iteration variable is the innermost
1663 one. */
1664 for (int ix = fd->collapse; ix--;)
1665 {
1666 const omp_for_data_loop *loop = &fd->loops[ix];
1667 const oacc_collapse *collapse = &counts[ix];
02889d23
CLT
1668 tree v = inner ? loop->v : collapse->outer;
1669 tree iter_type = TREE_TYPE (v);
629b3d75
MJ
1670 tree diff_type = TREE_TYPE (collapse->step);
1671 tree plus_type = iter_type;
1672 enum tree_code plus_code = PLUS_EXPR;
1673 tree expr;
1674
1675 if (POINTER_TYPE_P (iter_type))
1676 {
1677 plus_code = POINTER_PLUS_EXPR;
1678 plus_type = sizetype;
1679 }
1680
02889d23
CLT
1681 expr = ivar;
1682 if (ix)
1683 {
1684 tree mod = fold_convert (ivar_type, collapse->iters);
1685 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1686 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1687 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1688 true, GSI_SAME_STMT);
1689 }
1690
629b3d75
MJ
1691 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1692 collapse->step);
02889d23
CLT
1693 expr = fold_build2 (plus_code, iter_type,
1694 inner ? collapse->outer : collapse->base,
629b3d75
MJ
1695 fold_convert (plus_type, expr));
1696 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1697 true, GSI_SAME_STMT);
02889d23 1698 gassign *ass = gimple_build_assign (v, expr);
629b3d75 1699 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
629b3d75
MJ
1700 }
1701}
1702
1703/* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1704 of the combined collapse > 1 loop constructs, generate code like:
1705 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1706 if (cond3 is <)
1707 adj = STEP3 - 1;
1708 else
1709 adj = STEP3 + 1;
1710 count3 = (adj + N32 - N31) / STEP3;
1711 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1712 if (cond2 is <)
1713 adj = STEP2 - 1;
1714 else
1715 adj = STEP2 + 1;
1716 count2 = (adj + N22 - N21) / STEP2;
1717 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1718 if (cond1 is <)
1719 adj = STEP1 - 1;
1720 else
1721 adj = STEP1 + 1;
1722 count1 = (adj + N12 - N11) / STEP1;
1723 count = count1 * count2 * count3;
1724 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1725 count = 0;
1726 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1727 of the combined loop constructs, just initialize COUNTS array
1728 from the _looptemp_ clauses. */
1729
1730/* NOTE: It *could* be better to moosh all of the BBs together,
1731 creating one larger BB with all the computation and the unexpected
1732 jump at the end. I.e.
1733
1734 bool zero3, zero2, zero1, zero;
1735
1736 zero3 = N32 c3 N31;
1737 count3 = (N32 - N31) /[cl] STEP3;
1738 zero2 = N22 c2 N21;
1739 count2 = (N22 - N21) /[cl] STEP2;
1740 zero1 = N12 c1 N11;
1741 count1 = (N12 - N11) /[cl] STEP1;
1742 zero = zero3 || zero2 || zero1;
1743 count = count1 * count2 * count3;
1744 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1745
1746 After all, we expect the zero=false, and thus we expect to have to
1747 evaluate all of the comparison expressions, so short-circuiting
1748 oughtn't be a win. Since the condition isn't protecting a
1749 denominator, we're not concerned about divide-by-zero, so we can
1750 fully evaluate count even if a numerator turned out to be wrong.
1751
1752 It seems like putting this all together would create much better
1753 scheduling opportunities, and less pressure on the chip's branch
1754 predictor. */
1755
1756static void
1757expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1758 basic_block &entry_bb, tree *counts,
1759 basic_block &zero_iter1_bb, int &first_zero_iter1,
1760 basic_block &zero_iter2_bb, int &first_zero_iter2,
1761 basic_block &l2_dom_bb)
1762{
1763 tree t, type = TREE_TYPE (fd->loop.v);
1764 edge e, ne;
1765 int i;
1766
1767 /* Collapsed loops need work for expansion into SSA form. */
1768 gcc_assert (!gimple_in_ssa_p (cfun));
1769
1770 if (gimple_omp_for_combined_into_p (fd->for_stmt)
1771 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1772 {
1773 gcc_assert (fd->ordered == 0);
1774 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1775 isn't supposed to be handled, as the inner loop doesn't
1776 use it. */
1777 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1778 OMP_CLAUSE__LOOPTEMP_);
1779 gcc_assert (innerc);
1780 for (i = 0; i < fd->collapse; i++)
1781 {
1782 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1783 OMP_CLAUSE__LOOPTEMP_);
1784 gcc_assert (innerc);
1785 if (i)
1786 counts[i] = OMP_CLAUSE_DECL (innerc);
1787 else
1788 counts[0] = NULL_TREE;
1789 }
1790 return;
1791 }
1792
1793 for (i = fd->collapse; i < fd->ordered; i++)
1794 {
1795 tree itype = TREE_TYPE (fd->loops[i].v);
1796 counts[i] = NULL_TREE;
1797 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1798 fold_convert (itype, fd->loops[i].n1),
1799 fold_convert (itype, fd->loops[i].n2));
1800 if (t && integer_zerop (t))
1801 {
1802 for (i = fd->collapse; i < fd->ordered; i++)
1803 counts[i] = build_int_cst (type, 0);
1804 break;
1805 }
1806 }
1807 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1808 {
1809 tree itype = TREE_TYPE (fd->loops[i].v);
1810
1811 if (i >= fd->collapse && counts[i])
1812 continue;
1813 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1814 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1815 fold_convert (itype, fd->loops[i].n1),
1816 fold_convert (itype, fd->loops[i].n2)))
1817 == NULL_TREE || !integer_onep (t)))
1818 {
1819 gcond *cond_stmt;
1820 tree n1, n2;
1821 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1822 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1823 true, GSI_SAME_STMT);
1824 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1825 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1826 true, GSI_SAME_STMT);
1827 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1828 NULL_TREE, NULL_TREE);
1829 gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1830 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1831 expand_omp_regimplify_p, NULL, NULL)
1832 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1833 expand_omp_regimplify_p, NULL, NULL))
1834 {
1835 *gsi = gsi_for_stmt (cond_stmt);
1836 gimple_regimplify_operands (cond_stmt, gsi);
1837 }
1838 e = split_block (entry_bb, cond_stmt);
1839 basic_block &zero_iter_bb
1840 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1841 int &first_zero_iter
1842 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1843 if (zero_iter_bb == NULL)
1844 {
1845 gassign *assign_stmt;
1846 first_zero_iter = i;
1847 zero_iter_bb = create_empty_bb (entry_bb);
1848 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1849 *gsi = gsi_after_labels (zero_iter_bb);
1850 if (i < fd->collapse)
1851 assign_stmt = gimple_build_assign (fd->loop.n2,
1852 build_zero_cst (type));
1853 else
1854 {
1855 counts[i] = create_tmp_reg (type, ".count");
1856 assign_stmt
1857 = gimple_build_assign (counts[i], build_zero_cst (type));
1858 }
1859 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1860 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1861 entry_bb);
1862 }
1863 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
357067f2 1864 ne->probability = profile_probability::very_unlikely ();
629b3d75 1865 e->flags = EDGE_TRUE_VALUE;
357067f2 1866 e->probability = ne->probability.invert ();
629b3d75
MJ
1867 if (l2_dom_bb == NULL)
1868 l2_dom_bb = entry_bb;
1869 entry_bb = e->dest;
65f4b875 1870 *gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
1871 }
1872
1873 if (POINTER_TYPE_P (itype))
1874 itype = signed_type_for (itype);
1875 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1876 ? -1 : 1));
1877 t = fold_build2 (PLUS_EXPR, itype,
1878 fold_convert (itype, fd->loops[i].step), t);
1879 t = fold_build2 (PLUS_EXPR, itype, t,
1880 fold_convert (itype, fd->loops[i].n2));
1881 t = fold_build2 (MINUS_EXPR, itype, t,
1882 fold_convert (itype, fd->loops[i].n1));
1883 /* ?? We could probably use CEIL_DIV_EXPR instead of
1884 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1885 generate the same code in the end because generically we
1886 don't know that the values involved must be negative for
1887 GT?? */
1888 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1889 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1890 fold_build1 (NEGATE_EXPR, itype, t),
1891 fold_build1 (NEGATE_EXPR, itype,
1892 fold_convert (itype,
1893 fd->loops[i].step)));
1894 else
1895 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1896 fold_convert (itype, fd->loops[i].step));
1897 t = fold_convert (type, t);
1898 if (TREE_CODE (t) == INTEGER_CST)
1899 counts[i] = t;
1900 else
1901 {
1902 if (i < fd->collapse || i != first_zero_iter2)
1903 counts[i] = create_tmp_reg (type, ".count");
1904 expand_omp_build_assign (gsi, counts[i], t);
1905 }
1906 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1907 {
1908 if (i == 0)
1909 t = counts[0];
1910 else
1911 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1912 expand_omp_build_assign (gsi, fd->loop.n2, t);
1913 }
1914 }
1915}
1916
1917/* Helper function for expand_omp_{for_*,simd}. Generate code like:
1918 T = V;
1919 V3 = N31 + (T % count3) * STEP3;
1920 T = T / count3;
1921 V2 = N21 + (T % count2) * STEP2;
1922 T = T / count2;
1923 V1 = N11 + T * STEP1;
1924 if this loop doesn't have an inner loop construct combined with it.
1925 If it does have an inner loop construct combined with it and the
1926 iteration count isn't known constant, store values from counts array
1927 into its _looptemp_ temporaries instead. */
1928
1929static void
1930expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1931 tree *counts, gimple *inner_stmt, tree startvar)
1932{
1933 int i;
1934 if (gimple_omp_for_combined_p (fd->for_stmt))
1935 {
1936 /* If fd->loop.n2 is constant, then no propagation of the counts
1937 is needed, they are constant. */
1938 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
1939 return;
1940
1941 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
1942 ? gimple_omp_taskreg_clauses (inner_stmt)
1943 : gimple_omp_for_clauses (inner_stmt);
1944 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1945 isn't supposed to be handled, as the inner loop doesn't
1946 use it. */
1947 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
1948 gcc_assert (innerc);
1949 for (i = 0; i < fd->collapse; i++)
1950 {
1951 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1952 OMP_CLAUSE__LOOPTEMP_);
1953 gcc_assert (innerc);
1954 if (i)
1955 {
1956 tree tem = OMP_CLAUSE_DECL (innerc);
1957 tree t = fold_convert (TREE_TYPE (tem), counts[i]);
1958 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1959 false, GSI_CONTINUE_LINKING);
1960 gassign *stmt = gimple_build_assign (tem, t);
1961 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1962 }
1963 }
1964 return;
1965 }
1966
1967 tree type = TREE_TYPE (fd->loop.v);
1968 tree tem = create_tmp_reg (type, ".tem");
1969 gassign *stmt = gimple_build_assign (tem, startvar);
1970 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1971
1972 for (i = fd->collapse - 1; i >= 0; i--)
1973 {
1974 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
1975 itype = vtype;
1976 if (POINTER_TYPE_P (vtype))
1977 itype = signed_type_for (vtype);
1978 if (i != 0)
1979 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
1980 else
1981 t = tem;
1982 t = fold_convert (itype, t);
1983 t = fold_build2 (MULT_EXPR, itype, t,
1984 fold_convert (itype, fd->loops[i].step));
1985 if (POINTER_TYPE_P (vtype))
1986 t = fold_build_pointer_plus (fd->loops[i].n1, t);
1987 else
1988 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
1989 t = force_gimple_operand_gsi (gsi, t,
1990 DECL_P (fd->loops[i].v)
1991 && TREE_ADDRESSABLE (fd->loops[i].v),
1992 NULL_TREE, false,
1993 GSI_CONTINUE_LINKING);
1994 stmt = gimple_build_assign (fd->loops[i].v, t);
1995 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1996 if (i != 0)
1997 {
1998 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
1999 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
2000 false, GSI_CONTINUE_LINKING);
2001 stmt = gimple_build_assign (tem, t);
2002 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2003 }
2004 }
2005}
2006
2007/* Helper function for expand_omp_for_*. Generate code like:
2008 L10:
2009 V3 += STEP3;
2010 if (V3 cond3 N32) goto BODY_BB; else goto L11;
2011 L11:
2012 V3 = N31;
2013 V2 += STEP2;
2014 if (V2 cond2 N22) goto BODY_BB; else goto L12;
2015 L12:
2016 V2 = N21;
2017 V1 += STEP1;
2018 goto BODY_BB; */
2019
2020static basic_block
2021extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb,
2022 basic_block body_bb)
2023{
2024 basic_block last_bb, bb, collapse_bb = NULL;
2025 int i;
2026 gimple_stmt_iterator gsi;
2027 edge e;
2028 tree t;
2029 gimple *stmt;
2030
2031 last_bb = cont_bb;
2032 for (i = fd->collapse - 1; i >= 0; i--)
2033 {
2034 tree vtype = TREE_TYPE (fd->loops[i].v);
2035
2036 bb = create_empty_bb (last_bb);
2037 add_bb_to_loop (bb, last_bb->loop_father);
2038 gsi = gsi_start_bb (bb);
2039
2040 if (i < fd->collapse - 1)
2041 {
2042 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
357067f2 2043 e->probability = profile_probability::guessed_always ().apply_scale (1, 8);
629b3d75
MJ
2044
2045 t = fd->loops[i + 1].n1;
2046 t = force_gimple_operand_gsi (&gsi, t,
2047 DECL_P (fd->loops[i + 1].v)
2048 && TREE_ADDRESSABLE (fd->loops[i
2049 + 1].v),
2050 NULL_TREE, false,
2051 GSI_CONTINUE_LINKING);
2052 stmt = gimple_build_assign (fd->loops[i + 1].v, t);
2053 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2054 }
2055 else
2056 collapse_bb = bb;
2057
2058 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
2059
2060 if (POINTER_TYPE_P (vtype))
2061 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
2062 else
2063 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
2064 t = force_gimple_operand_gsi (&gsi, t,
2065 DECL_P (fd->loops[i].v)
2066 && TREE_ADDRESSABLE (fd->loops[i].v),
2067 NULL_TREE, false, GSI_CONTINUE_LINKING);
2068 stmt = gimple_build_assign (fd->loops[i].v, t);
2069 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2070
2071 if (i > 0)
2072 {
2073 t = fd->loops[i].n2;
2074 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2075 false, GSI_CONTINUE_LINKING);
2076 tree v = fd->loops[i].v;
2077 if (DECL_P (v) && TREE_ADDRESSABLE (v))
2078 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
2079 false, GSI_CONTINUE_LINKING);
2080 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
2081 stmt = gimple_build_cond_empty (t);
2082 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
d1ffbd43
JJ
2083 if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)),
2084 expand_omp_regimplify_p, NULL, NULL)
2085 || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)),
2086 expand_omp_regimplify_p, NULL, NULL))
2087 gimple_regimplify_operands (stmt, &gsi);
629b3d75 2088 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
357067f2 2089 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
629b3d75
MJ
2090 }
2091 else
2092 make_edge (bb, body_bb, EDGE_FALLTHRU);
2093 last_bb = bb;
2094 }
2095
2096 return collapse_bb;
2097}
2098
2099/* Expand #pragma omp ordered depend(source). */
2100
2101static void
2102expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
2103 tree *counts, location_t loc)
2104{
2105 enum built_in_function source_ix
2106 = fd->iter_type == long_integer_type_node
2107 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
2108 gimple *g
2109 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
2110 build_fold_addr_expr (counts[fd->ordered]));
2111 gimple_set_location (g, loc);
2112 gsi_insert_before (gsi, g, GSI_SAME_STMT);
2113}
2114
2115/* Expand a single depend from #pragma omp ordered depend(sink:...). */
2116
2117static void
2118expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
2119 tree *counts, tree c, location_t loc)
2120{
2121 auto_vec<tree, 10> args;
2122 enum built_in_function sink_ix
2123 = fd->iter_type == long_integer_type_node
2124 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
2125 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
2126 int i;
2127 gimple_stmt_iterator gsi2 = *gsi;
2128 bool warned_step = false;
2129
2130 for (i = 0; i < fd->ordered; i++)
2131 {
2132 tree step = NULL_TREE;
2133 off = TREE_PURPOSE (deps);
2134 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2135 {
2136 step = TREE_OPERAND (off, 1);
2137 off = TREE_OPERAND (off, 0);
2138 }
2139 if (!integer_zerop (off))
2140 {
2141 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2142 || fd->loops[i].cond_code == GT_EXPR);
2143 bool forward = fd->loops[i].cond_code == LT_EXPR;
2144 if (step)
2145 {
2146 /* Non-simple Fortran DO loops. If step is variable,
2147 we don't know at compile even the direction, so can't
2148 warn. */
2149 if (TREE_CODE (step) != INTEGER_CST)
2150 break;
2151 forward = tree_int_cst_sgn (step) != -1;
2152 }
2153 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
90a0bf4e
JJ
2154 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
2155 "waiting for lexically later iteration");
629b3d75
MJ
2156 break;
2157 }
2158 deps = TREE_CHAIN (deps);
2159 }
2160 /* If all offsets corresponding to the collapsed loops are zero,
2161 this depend clause can be ignored. FIXME: but there is still a
2162 flush needed. We need to emit one __sync_synchronize () for it
2163 though (perhaps conditionally)? Solve this together with the
2164 conservative dependence folding optimization.
2165 if (i >= fd->collapse)
2166 return; */
2167
2168 deps = OMP_CLAUSE_DECL (c);
2169 gsi_prev (&gsi2);
2170 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
2171 edge e2 = split_block_after_labels (e1->dest);
2172
2173 gsi2 = gsi_after_labels (e1->dest);
2174 *gsi = gsi_last_bb (e1->src);
2175 for (i = 0; i < fd->ordered; i++)
2176 {
2177 tree itype = TREE_TYPE (fd->loops[i].v);
2178 tree step = NULL_TREE;
2179 tree orig_off = NULL_TREE;
2180 if (POINTER_TYPE_P (itype))
2181 itype = sizetype;
2182 if (i)
2183 deps = TREE_CHAIN (deps);
2184 off = TREE_PURPOSE (deps);
2185 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2186 {
2187 step = TREE_OPERAND (off, 1);
2188 off = TREE_OPERAND (off, 0);
2189 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2190 && integer_onep (fd->loops[i].step)
2191 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
2192 }
2193 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
2194 if (step)
2195 {
2196 off = fold_convert_loc (loc, itype, off);
2197 orig_off = off;
2198 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2199 }
2200
2201 if (integer_zerop (off))
2202 t = boolean_true_node;
2203 else
2204 {
2205 tree a;
2206 tree co = fold_convert_loc (loc, itype, off);
2207 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
2208 {
2209 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2210 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
2211 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
2212 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
2213 co);
2214 }
2215 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2216 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2217 fd->loops[i].v, co);
2218 else
2219 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
2220 fd->loops[i].v, co);
2221 if (step)
2222 {
2223 tree t1, t2;
2224 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2225 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2226 fd->loops[i].n1);
2227 else
2228 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2229 fd->loops[i].n2);
2230 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2231 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2232 fd->loops[i].n2);
2233 else
2234 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2235 fd->loops[i].n1);
2236 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
2237 step, build_int_cst (TREE_TYPE (step), 0));
2238 if (TREE_CODE (step) != INTEGER_CST)
2239 {
2240 t1 = unshare_expr (t1);
2241 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
2242 false, GSI_CONTINUE_LINKING);
2243 t2 = unshare_expr (t2);
2244 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
2245 false, GSI_CONTINUE_LINKING);
2246 }
2247 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
2248 t, t2, t1);
2249 }
2250 else if (fd->loops[i].cond_code == LT_EXPR)
2251 {
2252 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2253 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2254 fd->loops[i].n1);
2255 else
2256 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2257 fd->loops[i].n2);
2258 }
2259 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2260 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
2261 fd->loops[i].n2);
2262 else
2263 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
2264 fd->loops[i].n1);
2265 }
2266 if (cond)
2267 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
2268 else
2269 cond = t;
2270
2271 off = fold_convert_loc (loc, itype, off);
2272
2273 if (step
2274 || (fd->loops[i].cond_code == LT_EXPR
2275 ? !integer_onep (fd->loops[i].step)
2276 : !integer_minus_onep (fd->loops[i].step)))
2277 {
2278 if (step == NULL_TREE
2279 && TYPE_UNSIGNED (itype)
2280 && fd->loops[i].cond_code == GT_EXPR)
2281 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
2282 fold_build1_loc (loc, NEGATE_EXPR, itype,
2283 s));
2284 else
2285 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
2286 orig_off ? orig_off : off, s);
2287 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
2288 build_int_cst (itype, 0));
2289 if (integer_zerop (t) && !warned_step)
2290 {
90a0bf4e
JJ
2291 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
2292 "refers to iteration never in the iteration "
2293 "space");
629b3d75
MJ
2294 warned_step = true;
2295 }
2296 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
2297 cond, t);
2298 }
2299
2300 if (i <= fd->collapse - 1 && fd->collapse > 1)
2301 t = fd->loop.v;
2302 else if (counts[i])
2303 t = counts[i];
2304 else
2305 {
2306 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2307 fd->loops[i].v, fd->loops[i].n1);
2308 t = fold_convert_loc (loc, fd->iter_type, t);
2309 }
2310 if (step)
2311 /* We have divided off by step already earlier. */;
2312 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
2313 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
2314 fold_build1_loc (loc, NEGATE_EXPR, itype,
2315 s));
2316 else
2317 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2318 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2319 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
2320 off = fold_convert_loc (loc, fd->iter_type, off);
2321 if (i <= fd->collapse - 1 && fd->collapse > 1)
2322 {
2323 if (i)
2324 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
2325 off);
2326 if (i < fd->collapse - 1)
2327 {
2328 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
2329 counts[i]);
2330 continue;
2331 }
2332 }
2333 off = unshare_expr (off);
2334 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
2335 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2336 true, GSI_SAME_STMT);
2337 args.safe_push (t);
2338 }
2339 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
2340 gimple_set_location (g, loc);
2341 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
2342
2343 cond = unshare_expr (cond);
2344 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
2345 GSI_CONTINUE_LINKING);
2346 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
2347 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
357067f2
JH
2348 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2349 e1->probability = e3->probability.invert ();
629b3d75
MJ
2350 e1->flags = EDGE_TRUE_VALUE;
2351 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
2352
2353 *gsi = gsi_after_labels (e2->dest);
2354}
2355
2356/* Expand all #pragma omp ordered depend(source) and
2357 #pragma omp ordered depend(sink:...) constructs in the current
2358 #pragma omp for ordered(n) region. */
2359
2360static void
2361expand_omp_ordered_source_sink (struct omp_region *region,
2362 struct omp_for_data *fd, tree *counts,
2363 basic_block cont_bb)
2364{
2365 struct omp_region *inner;
2366 int i;
2367 for (i = fd->collapse - 1; i < fd->ordered; i++)
2368 if (i == fd->collapse - 1 && fd->collapse > 1)
2369 counts[i] = NULL_TREE;
2370 else if (i >= fd->collapse && !cont_bb)
2371 counts[i] = build_zero_cst (fd->iter_type);
2372 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
2373 && integer_onep (fd->loops[i].step))
2374 counts[i] = NULL_TREE;
2375 else
2376 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
2377 tree atype
2378 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
2379 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
2380 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
2381
2382 for (inner = region->inner; inner; inner = inner->next)
2383 if (inner->type == GIMPLE_OMP_ORDERED)
2384 {
2385 gomp_ordered *ord_stmt = inner->ord_stmt;
2386 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
2387 location_t loc = gimple_location (ord_stmt);
2388 tree c;
2389 for (c = gimple_omp_ordered_clauses (ord_stmt);
2390 c; c = OMP_CLAUSE_CHAIN (c))
2391 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
2392 break;
2393 if (c)
2394 expand_omp_ordered_source (&gsi, fd, counts, loc);
2395 for (c = gimple_omp_ordered_clauses (ord_stmt);
2396 c; c = OMP_CLAUSE_CHAIN (c))
2397 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
2398 expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
2399 gsi_remove (&gsi, true);
2400 }
2401}
2402
2403/* Wrap the body into fd->ordered - fd->collapse loops that aren't
2404 collapsed. */
2405
2406static basic_block
2407expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
2408 basic_block cont_bb, basic_block body_bb,
2409 bool ordered_lastprivate)
2410{
2411 if (fd->ordered == fd->collapse)
2412 return cont_bb;
2413
2414 if (!cont_bb)
2415 {
2416 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2417 for (int i = fd->collapse; i < fd->ordered; i++)
2418 {
2419 tree type = TREE_TYPE (fd->loops[i].v);
2420 tree n1 = fold_convert (type, fd->loops[i].n1);
2421 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
2422 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2423 size_int (i - fd->collapse + 1),
2424 NULL_TREE, NULL_TREE);
2425 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2426 }
2427 return NULL;
2428 }
2429
2430 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
2431 {
2432 tree t, type = TREE_TYPE (fd->loops[i].v);
2433 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2434 expand_omp_build_assign (&gsi, fd->loops[i].v,
2435 fold_convert (type, fd->loops[i].n1));
2436 if (counts[i])
2437 expand_omp_build_assign (&gsi, counts[i],
2438 build_zero_cst (fd->iter_type));
2439 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2440 size_int (i - fd->collapse + 1),
2441 NULL_TREE, NULL_TREE);
2442 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2443 if (!gsi_end_p (gsi))
2444 gsi_prev (&gsi);
2445 else
2446 gsi = gsi_last_bb (body_bb);
2447 edge e1 = split_block (body_bb, gsi_stmt (gsi));
2448 basic_block new_body = e1->dest;
2449 if (body_bb == cont_bb)
2450 cont_bb = new_body;
2451 edge e2 = NULL;
2452 basic_block new_header;
2453 if (EDGE_COUNT (cont_bb->preds) > 0)
2454 {
2455 gsi = gsi_last_bb (cont_bb);
2456 if (POINTER_TYPE_P (type))
2457 t = fold_build_pointer_plus (fd->loops[i].v,
2458 fold_convert (sizetype,
2459 fd->loops[i].step));
2460 else
2461 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
2462 fold_convert (type, fd->loops[i].step));
2463 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
2464 if (counts[i])
2465 {
2466 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
2467 build_int_cst (fd->iter_type, 1));
2468 expand_omp_build_assign (&gsi, counts[i], t);
2469 t = counts[i];
2470 }
2471 else
2472 {
2473 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2474 fd->loops[i].v, fd->loops[i].n1);
2475 t = fold_convert (fd->iter_type, t);
2476 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2477 true, GSI_SAME_STMT);
2478 }
2479 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2480 size_int (i - fd->collapse + 1),
2481 NULL_TREE, NULL_TREE);
2482 expand_omp_build_assign (&gsi, aref, t);
2483 gsi_prev (&gsi);
2484 e2 = split_block (cont_bb, gsi_stmt (gsi));
2485 new_header = e2->dest;
2486 }
2487 else
2488 new_header = cont_bb;
2489 gsi = gsi_after_labels (new_header);
2490 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
2491 true, GSI_SAME_STMT);
2492 tree n2
2493 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
2494 true, NULL_TREE, true, GSI_SAME_STMT);
2495 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
2496 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
2497 edge e3 = split_block (new_header, gsi_stmt (gsi));
2498 cont_bb = e3->dest;
2499 remove_edge (e1);
2500 make_edge (body_bb, new_header, EDGE_FALLTHRU);
2501 e3->flags = EDGE_FALSE_VALUE;
357067f2 2502 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
629b3d75 2503 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
357067f2 2504 e1->probability = e3->probability.invert ();
629b3d75
MJ
2505
2506 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
2507 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
2508
2509 if (e2)
2510 {
2511 struct loop *loop = alloc_loop ();
2512 loop->header = new_header;
2513 loop->latch = e2->src;
2514 add_loop (loop, body_bb->loop_father);
2515 }
2516 }
2517
2518 /* If there are any lastprivate clauses and it is possible some loops
2519 might have zero iterations, ensure all the decls are initialized,
2520 otherwise we could crash evaluating C++ class iterators with lastprivate
2521 clauses. */
2522 bool need_inits = false;
2523 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
2524 if (need_inits)
2525 {
2526 tree type = TREE_TYPE (fd->loops[i].v);
2527 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2528 expand_omp_build_assign (&gsi, fd->loops[i].v,
2529 fold_convert (type, fd->loops[i].n1));
2530 }
2531 else
2532 {
2533 tree type = TREE_TYPE (fd->loops[i].v);
2534 tree this_cond = fold_build2 (fd->loops[i].cond_code,
2535 boolean_type_node,
2536 fold_convert (type, fd->loops[i].n1),
2537 fold_convert (type, fd->loops[i].n2));
2538 if (!integer_onep (this_cond))
2539 need_inits = true;
2540 }
2541
2542 return cont_bb;
2543}
2544
2545/* A subroutine of expand_omp_for. Generate code for a parallel
2546 loop with any schedule. Given parameters:
2547
2548 for (V = N1; V cond N2; V += STEP) BODY;
2549
2550 where COND is "<" or ">", we generate pseudocode
2551
2552 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
2553 if (more) goto L0; else goto L3;
2554 L0:
2555 V = istart0;
2556 iend = iend0;
2557 L1:
2558 BODY;
2559 V += STEP;
2560 if (V cond iend) goto L1; else goto L2;
2561 L2:
2562 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2563 L3:
2564
2565 If this is a combined omp parallel loop, instead of the call to
2566 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
2567 If this is gimple_omp_for_combined_p loop, then instead of assigning
2568 V and iend in L0 we assign the first two _looptemp_ clause decls of the
2569 inner GIMPLE_OMP_FOR and V += STEP; and
2570 if (V cond iend) goto L1; else goto L2; are removed.
2571
2572 For collapsed loops, given parameters:
2573 collapse(3)
2574 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2575 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2576 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
2577 BODY;
2578
2579 we generate pseudocode
2580
2581 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
2582 if (cond3 is <)
2583 adj = STEP3 - 1;
2584 else
2585 adj = STEP3 + 1;
2586 count3 = (adj + N32 - N31) / STEP3;
2587 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
2588 if (cond2 is <)
2589 adj = STEP2 - 1;
2590 else
2591 adj = STEP2 + 1;
2592 count2 = (adj + N22 - N21) / STEP2;
2593 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
2594 if (cond1 is <)
2595 adj = STEP1 - 1;
2596 else
2597 adj = STEP1 + 1;
2598 count1 = (adj + N12 - N11) / STEP1;
2599 count = count1 * count2 * count3;
2600 goto Z1;
2601 Z0:
2602 count = 0;
2603 Z1:
2604 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
2605 if (more) goto L0; else goto L3;
2606 L0:
2607 V = istart0;
2608 T = V;
2609 V3 = N31 + (T % count3) * STEP3;
2610 T = T / count3;
2611 V2 = N21 + (T % count2) * STEP2;
2612 T = T / count2;
2613 V1 = N11 + T * STEP1;
2614 iend = iend0;
2615 L1:
2616 BODY;
2617 V += 1;
2618 if (V < iend) goto L10; else goto L2;
2619 L10:
2620 V3 += STEP3;
2621 if (V3 cond3 N32) goto L1; else goto L11;
2622 L11:
2623 V3 = N31;
2624 V2 += STEP2;
2625 if (V2 cond2 N22) goto L1; else goto L12;
2626 L12:
2627 V2 = N21;
2628 V1 += STEP1;
2629 goto L1;
2630 L2:
2631 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2632 L3:
2633
2634 */
2635
2636static void
2637expand_omp_for_generic (struct omp_region *region,
2638 struct omp_for_data *fd,
2639 enum built_in_function start_fn,
2640 enum built_in_function next_fn,
28567c40 2641 tree sched_arg,
629b3d75
MJ
2642 gimple *inner_stmt)
2643{
2644 tree type, istart0, iend0, iend;
2645 tree t, vmain, vback, bias = NULL_TREE;
2646 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
2647 basic_block l2_bb = NULL, l3_bb = NULL;
2648 gimple_stmt_iterator gsi;
2649 gassign *assign_stmt;
2650 bool in_combined_parallel = is_combined_parallel (region);
2651 bool broken_loop = region->cont == NULL;
2652 edge e, ne;
2653 tree *counts = NULL;
2654 int i;
2655 bool ordered_lastprivate = false;
2656
2657 gcc_assert (!broken_loop || !in_combined_parallel);
2658 gcc_assert (fd->iter_type == long_integer_type_node
2659 || !in_combined_parallel);
2660
2661 entry_bb = region->entry;
2662 cont_bb = region->cont;
2663 collapse_bb = NULL;
2664 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
2665 gcc_assert (broken_loop
2666 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
2667 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
2668 l1_bb = single_succ (l0_bb);
2669 if (!broken_loop)
2670 {
2671 l2_bb = create_empty_bb (cont_bb);
2672 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
2673 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
2674 == l1_bb));
2675 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
2676 }
2677 else
2678 l2_bb = NULL;
2679 l3_bb = BRANCH_EDGE (entry_bb)->dest;
2680 exit_bb = region->exit;
2681
65f4b875 2682 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
2683
2684 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
2685 if (fd->ordered
6c7ae8c5 2686 && omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
629b3d75
MJ
2687 OMP_CLAUSE_LASTPRIVATE))
2688 ordered_lastprivate = false;
28567c40 2689 tree reductions = NULL_TREE;
6c7ae8c5
JJ
2690 tree mem = NULL_TREE, cond_var = NULL_TREE, condtemp = NULL_TREE;
2691 tree memv = NULL_TREE;
8221c30b
JJ
2692 if (fd->lastprivate_conditional)
2693 {
2694 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2695 OMP_CLAUSE__CONDTEMP_);
2696 if (fd->have_pointer_condtemp)
2697 condtemp = OMP_CLAUSE_DECL (c);
2698 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
2699 cond_var = OMP_CLAUSE_DECL (c);
2700 }
28567c40
JJ
2701 if (sched_arg)
2702 {
2703 if (fd->have_reductemp)
2704 {
6c7ae8c5 2705 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
28567c40
JJ
2706 OMP_CLAUSE__REDUCTEMP_);
2707 reductions = OMP_CLAUSE_DECL (c);
2708 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
2709 gimple *g = SSA_NAME_DEF_STMT (reductions);
2710 reductions = gimple_assign_rhs1 (g);
2711 OMP_CLAUSE_DECL (c) = reductions;
2712 entry_bb = gimple_bb (g);
2713 edge e = split_block (entry_bb, g);
2714 if (region->entry == entry_bb)
2715 region->entry = e->dest;
2716 gsi = gsi_last_bb (entry_bb);
2717 }
2718 else
2719 reductions = null_pointer_node;
8221c30b 2720 if (fd->have_pointer_condtemp)
6c7ae8c5 2721 {
6c7ae8c5
JJ
2722 tree type = TREE_TYPE (condtemp);
2723 memv = create_tmp_var (type);
2724 TREE_ADDRESSABLE (memv) = 1;
2725 unsigned HOST_WIDE_INT sz
2726 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
2727 sz *= fd->lastprivate_conditional;
2728 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
2729 false);
2730 mem = build_fold_addr_expr (memv);
2731 }
2732 else
2733 mem = null_pointer_node;
28567c40 2734 }
629b3d75
MJ
2735 if (fd->collapse > 1 || fd->ordered)
2736 {
2737 int first_zero_iter1 = -1, first_zero_iter2 = -1;
2738 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
2739
2740 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
2741 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
2742 zero_iter1_bb, first_zero_iter1,
2743 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
2744
2745 if (zero_iter1_bb)
2746 {
2747 /* Some counts[i] vars might be uninitialized if
2748 some loop has zero iterations. But the body shouldn't
2749 be executed in that case, so just avoid uninit warnings. */
2750 for (i = first_zero_iter1;
2751 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
2752 if (SSA_VAR_P (counts[i]))
2753 TREE_NO_WARNING (counts[i]) = 1;
2754 gsi_prev (&gsi);
2755 e = split_block (entry_bb, gsi_stmt (gsi));
2756 entry_bb = e->dest;
2757 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
65f4b875 2758 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
2759 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2760 get_immediate_dominator (CDI_DOMINATORS,
2761 zero_iter1_bb));
2762 }
2763 if (zero_iter2_bb)
2764 {
2765 /* Some counts[i] vars might be uninitialized if
2766 some loop has zero iterations. But the body shouldn't
2767 be executed in that case, so just avoid uninit warnings. */
2768 for (i = first_zero_iter2; i < fd->ordered; i++)
2769 if (SSA_VAR_P (counts[i]))
2770 TREE_NO_WARNING (counts[i]) = 1;
2771 if (zero_iter1_bb)
2772 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2773 else
2774 {
2775 gsi_prev (&gsi);
2776 e = split_block (entry_bb, gsi_stmt (gsi));
2777 entry_bb = e->dest;
2778 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
65f4b875 2779 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
2780 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2781 get_immediate_dominator
2782 (CDI_DOMINATORS, zero_iter2_bb));
2783 }
2784 }
2785 if (fd->collapse == 1)
2786 {
2787 counts[0] = fd->loop.n2;
2788 fd->loop = fd->loops[0];
2789 }
2790 }
2791
2792 type = TREE_TYPE (fd->loop.v);
2793 istart0 = create_tmp_var (fd->iter_type, ".istart0");
2794 iend0 = create_tmp_var (fd->iter_type, ".iend0");
2795 TREE_ADDRESSABLE (istart0) = 1;
2796 TREE_ADDRESSABLE (iend0) = 1;
2797
2798 /* See if we need to bias by LLONG_MIN. */
2799 if (fd->iter_type == long_long_unsigned_type_node
2800 && TREE_CODE (type) == INTEGER_TYPE
2801 && !TYPE_UNSIGNED (type)
2802 && fd->ordered == 0)
2803 {
2804 tree n1, n2;
2805
2806 if (fd->loop.cond_code == LT_EXPR)
2807 {
2808 n1 = fd->loop.n1;
2809 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
2810 }
2811 else
2812 {
2813 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
2814 n2 = fd->loop.n1;
2815 }
2816 if (TREE_CODE (n1) != INTEGER_CST
2817 || TREE_CODE (n2) != INTEGER_CST
2818 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
2819 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
2820 }
2821
2822 gimple_stmt_iterator gsif = gsi;
2823 gsi_prev (&gsif);
2824
2825 tree arr = NULL_TREE;
2826 if (in_combined_parallel)
2827 {
2828 gcc_assert (fd->ordered == 0);
2829 /* In a combined parallel loop, emit a call to
2830 GOMP_loop_foo_next. */
2831 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
2832 build_fold_addr_expr (istart0),
2833 build_fold_addr_expr (iend0));
2834 }
2835 else
2836 {
2837 tree t0, t1, t2, t3, t4;
2838 /* If this is not a combined parallel loop, emit a call to
2839 GOMP_loop_foo_start in ENTRY_BB. */
2840 t4 = build_fold_addr_expr (iend0);
2841 t3 = build_fold_addr_expr (istart0);
2842 if (fd->ordered)
2843 {
2844 t0 = build_int_cst (unsigned_type_node,
2845 fd->ordered - fd->collapse + 1);
2846 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
2847 fd->ordered
2848 - fd->collapse + 1),
2849 ".omp_counts");
2850 DECL_NAMELESS (arr) = 1;
2851 TREE_ADDRESSABLE (arr) = 1;
2852 TREE_STATIC (arr) = 1;
2853 vec<constructor_elt, va_gc> *v;
2854 vec_alloc (v, fd->ordered - fd->collapse + 1);
2855 int idx;
2856
2857 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
2858 {
2859 tree c;
2860 if (idx == 0 && fd->collapse > 1)
2861 c = fd->loop.n2;
2862 else
2863 c = counts[idx + fd->collapse - 1];
2864 tree purpose = size_int (idx);
2865 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
2866 if (TREE_CODE (c) != INTEGER_CST)
2867 TREE_STATIC (arr) = 0;
2868 }
2869
2870 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
2871 if (!TREE_STATIC (arr))
2872 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
2873 void_type_node, arr),
2874 true, NULL_TREE, true, GSI_SAME_STMT);
2875 t1 = build_fold_addr_expr (arr);
2876 t2 = NULL_TREE;
2877 }
2878 else
2879 {
2880 t2 = fold_convert (fd->iter_type, fd->loop.step);
2881 t1 = fd->loop.n2;
2882 t0 = fd->loop.n1;
2883 if (gimple_omp_for_combined_into_p (fd->for_stmt))
2884 {
2885 tree innerc
2886 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2887 OMP_CLAUSE__LOOPTEMP_);
2888 gcc_assert (innerc);
2889 t0 = OMP_CLAUSE_DECL (innerc);
2890 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2891 OMP_CLAUSE__LOOPTEMP_);
2892 gcc_assert (innerc);
2893 t1 = OMP_CLAUSE_DECL (innerc);
2894 }
2895 if (POINTER_TYPE_P (TREE_TYPE (t0))
2896 && TYPE_PRECISION (TREE_TYPE (t0))
2897 != TYPE_PRECISION (fd->iter_type))
2898 {
2899 /* Avoid casting pointers to integer of a different size. */
2900 tree itype = signed_type_for (type);
2901 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
2902 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
2903 }
2904 else
2905 {
2906 t1 = fold_convert (fd->iter_type, t1);
2907 t0 = fold_convert (fd->iter_type, t0);
2908 }
2909 if (bias)
2910 {
2911 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
2912 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
2913 }
2914 }
2915 if (fd->iter_type == long_integer_type_node || fd->ordered)
2916 {
2917 if (fd->chunk_size)
2918 {
2919 t = fold_convert (fd->iter_type, fd->chunk_size);
2920 t = omp_adjust_chunk_size (t, fd->simd_schedule);
28567c40
JJ
2921 if (sched_arg)
2922 {
2923 if (fd->ordered)
2924 t = build_call_expr (builtin_decl_explicit (start_fn),
2925 8, t0, t1, sched_arg, t, t3, t4,
2926 reductions, mem);
2927 else
2928 t = build_call_expr (builtin_decl_explicit (start_fn),
2929 9, t0, t1, t2, sched_arg, t, t3, t4,
2930 reductions, mem);
2931 }
2932 else if (fd->ordered)
629b3d75
MJ
2933 t = build_call_expr (builtin_decl_explicit (start_fn),
2934 5, t0, t1, t, t3, t4);
2935 else
2936 t = build_call_expr (builtin_decl_explicit (start_fn),
2937 6, t0, t1, t2, t, t3, t4);
2938 }
2939 else if (fd->ordered)
2940 t = build_call_expr (builtin_decl_explicit (start_fn),
2941 4, t0, t1, t3, t4);
2942 else
2943 t = build_call_expr (builtin_decl_explicit (start_fn),
2944 5, t0, t1, t2, t3, t4);
2945 }
2946 else
2947 {
2948 tree t5;
2949 tree c_bool_type;
2950 tree bfn_decl;
2951
2952 /* The GOMP_loop_ull_*start functions have additional boolean
2953 argument, true for < loops and false for > loops.
2954 In Fortran, the C bool type can be different from
2955 boolean_type_node. */
2956 bfn_decl = builtin_decl_explicit (start_fn);
2957 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
2958 t5 = build_int_cst (c_bool_type,
2959 fd->loop.cond_code == LT_EXPR ? 1 : 0);
2960 if (fd->chunk_size)
2961 {
2962 tree bfn_decl = builtin_decl_explicit (start_fn);
2963 t = fold_convert (fd->iter_type, fd->chunk_size);
2964 t = omp_adjust_chunk_size (t, fd->simd_schedule);
28567c40
JJ
2965 if (sched_arg)
2966 t = build_call_expr (bfn_decl, 10, t5, t0, t1, t2, sched_arg,
2967 t, t3, t4, reductions, mem);
2968 else
2969 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
629b3d75
MJ
2970 }
2971 else
2972 t = build_call_expr (builtin_decl_explicit (start_fn),
2973 6, t5, t0, t1, t2, t3, t4);
2974 }
2975 }
2976 if (TREE_TYPE (t) != boolean_type_node)
2977 t = fold_build2 (NE_EXPR, boolean_type_node,
2978 t, build_int_cst (TREE_TYPE (t), 0));
2979 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
01914336 2980 true, GSI_SAME_STMT);
629b3d75
MJ
2981 if (arr && !TREE_STATIC (arr))
2982 {
2983 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
2984 TREE_THIS_VOLATILE (clobber) = 1;
2985 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
2986 GSI_SAME_STMT);
2987 }
8221c30b 2988 if (fd->have_pointer_condtemp)
6c7ae8c5 2989 expand_omp_build_assign (&gsi, condtemp, memv, false);
28567c40
JJ
2990 if (fd->have_reductemp)
2991 {
2992 gimple *g = gsi_stmt (gsi);
2993 gsi_remove (&gsi, true);
2994 release_ssa_name (gimple_assign_lhs (g));
2995
2996 entry_bb = region->entry;
2997 gsi = gsi_last_nondebug_bb (entry_bb);
2998
2999 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3000 }
629b3d75
MJ
3001 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3002
3003 /* Remove the GIMPLE_OMP_FOR statement. */
3004 gsi_remove (&gsi, true);
3005
3006 if (gsi_end_p (gsif))
3007 gsif = gsi_after_labels (gsi_bb (gsif));
3008 gsi_next (&gsif);
3009
3010 /* Iteration setup for sequential loop goes in L0_BB. */
3011 tree startvar = fd->loop.v;
3012 tree endvar = NULL_TREE;
3013
3014 if (gimple_omp_for_combined_p (fd->for_stmt))
3015 {
3016 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
3017 && gimple_omp_for_kind (inner_stmt)
3018 == GF_OMP_FOR_KIND_SIMD);
3019 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
3020 OMP_CLAUSE__LOOPTEMP_);
3021 gcc_assert (innerc);
3022 startvar = OMP_CLAUSE_DECL (innerc);
3023 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3024 OMP_CLAUSE__LOOPTEMP_);
3025 gcc_assert (innerc);
3026 endvar = OMP_CLAUSE_DECL (innerc);
3027 }
3028
3029 gsi = gsi_start_bb (l0_bb);
3030 t = istart0;
3031 if (fd->ordered && fd->collapse == 1)
3032 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
3033 fold_convert (fd->iter_type, fd->loop.step));
3034 else if (bias)
3035 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
3036 if (fd->ordered && fd->collapse == 1)
3037 {
3038 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3039 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
3040 fd->loop.n1, fold_convert (sizetype, t));
3041 else
3042 {
3043 t = fold_convert (TREE_TYPE (startvar), t);
3044 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
3045 fd->loop.n1, t);
3046 }
3047 }
3048 else
3049 {
3050 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3051 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
3052 t = fold_convert (TREE_TYPE (startvar), t);
3053 }
3054 t = force_gimple_operand_gsi (&gsi, t,
3055 DECL_P (startvar)
3056 && TREE_ADDRESSABLE (startvar),
3057 NULL_TREE, false, GSI_CONTINUE_LINKING);
3058 assign_stmt = gimple_build_assign (startvar, t);
3059 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6c7ae8c5
JJ
3060 if (cond_var)
3061 {
3062 tree itype = TREE_TYPE (cond_var);
3063 /* For lastprivate(conditional:) itervar, we need some iteration
3064 counter that starts at unsigned non-zero and increases.
3065 Prefer as few IVs as possible, so if we can use startvar
3066 itself, use that, or startvar + constant (those would be
3067 incremented with step), and as last resort use the s0 + 1
3068 incremented by 1. */
3069 if ((fd->ordered && fd->collapse == 1)
3070 || bias
3071 || POINTER_TYPE_P (type)
3072 || TREE_CODE (fd->loop.n1) != INTEGER_CST
3073 || fd->loop.cond_code != LT_EXPR)
3074 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, istart0),
3075 build_int_cst (itype, 1));
3076 else if (tree_int_cst_sgn (fd->loop.n1) == 1)
3077 t = fold_convert (itype, t);
3078 else
3079 {
3080 tree c = fold_convert (itype, fd->loop.n1);
3081 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
3082 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
3083 }
3084 t = force_gimple_operand_gsi (&gsi, t, false,
3085 NULL_TREE, false, GSI_CONTINUE_LINKING);
3086 assign_stmt = gimple_build_assign (cond_var, t);
3087 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3088 }
629b3d75
MJ
3089
3090 t = iend0;
3091 if (fd->ordered && fd->collapse == 1)
3092 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
3093 fold_convert (fd->iter_type, fd->loop.step));
3094 else if (bias)
3095 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
3096 if (fd->ordered && fd->collapse == 1)
3097 {
3098 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3099 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
3100 fd->loop.n1, fold_convert (sizetype, t));
3101 else
3102 {
3103 t = fold_convert (TREE_TYPE (startvar), t);
3104 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
3105 fd->loop.n1, t);
3106 }
3107 }
3108 else
3109 {
3110 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3111 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
3112 t = fold_convert (TREE_TYPE (startvar), t);
3113 }
3114 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3115 false, GSI_CONTINUE_LINKING);
3116 if (endvar)
3117 {
3118 assign_stmt = gimple_build_assign (endvar, iend);
3119 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3120 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
3121 assign_stmt = gimple_build_assign (fd->loop.v, iend);
3122 else
3123 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
3124 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3125 }
3126 /* Handle linear clause adjustments. */
3127 tree itercnt = NULL_TREE;
3128 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
3129 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
3130 c; c = OMP_CLAUSE_CHAIN (c))
3131 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
3132 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
3133 {
3134 tree d = OMP_CLAUSE_DECL (c);
3135 bool is_ref = omp_is_reference (d);
3136 tree t = d, a, dest;
3137 if (is_ref)
3138 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
3139 tree type = TREE_TYPE (t);
3140 if (POINTER_TYPE_P (type))
3141 type = sizetype;
3142 dest = unshare_expr (t);
3143 tree v = create_tmp_var (TREE_TYPE (t), NULL);
3144 expand_omp_build_assign (&gsif, v, t);
3145 if (itercnt == NULL_TREE)
3146 {
3147 itercnt = startvar;
3148 tree n1 = fd->loop.n1;
3149 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
3150 {
3151 itercnt
3152 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
3153 itercnt);
3154 n1 = fold_convert (TREE_TYPE (itercnt), n1);
3155 }
3156 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
3157 itercnt, n1);
3158 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
3159 itercnt, fd->loop.step);
3160 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
3161 NULL_TREE, false,
3162 GSI_CONTINUE_LINKING);
3163 }
3164 a = fold_build2 (MULT_EXPR, type,
3165 fold_convert (type, itercnt),
3166 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
3167 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
3168 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
3169 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3170 false, GSI_CONTINUE_LINKING);
3171 assign_stmt = gimple_build_assign (dest, t);
3172 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3173 }
3174 if (fd->collapse > 1)
3175 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
3176
3177 if (fd->ordered)
3178 {
3179 /* Until now, counts array contained number of iterations or
3180 variable containing it for ith loop. From now on, we need
3181 those counts only for collapsed loops, and only for the 2nd
3182 till the last collapsed one. Move those one element earlier,
3183 we'll use counts[fd->collapse - 1] for the first source/sink
3184 iteration counter and so on and counts[fd->ordered]
3185 as the array holding the current counter values for
3186 depend(source). */
3187 if (fd->collapse > 1)
3188 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
3189 if (broken_loop)
3190 {
3191 int i;
3192 for (i = fd->collapse; i < fd->ordered; i++)
3193 {
3194 tree type = TREE_TYPE (fd->loops[i].v);
3195 tree this_cond
3196 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
3197 fold_convert (type, fd->loops[i].n1),
3198 fold_convert (type, fd->loops[i].n2));
3199 if (!integer_onep (this_cond))
3200 break;
3201 }
3202 if (i < fd->ordered)
3203 {
3204 cont_bb
3205 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
3206 add_bb_to_loop (cont_bb, l1_bb->loop_father);
3207 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
3208 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
3209 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3210 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
3211 make_edge (cont_bb, l1_bb, 0);
3212 l2_bb = create_empty_bb (cont_bb);
3213 broken_loop = false;
3214 }
3215 }
3216 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
3217 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
3218 ordered_lastprivate);
3219 if (counts[fd->collapse - 1])
3220 {
3221 gcc_assert (fd->collapse == 1);
3222 gsi = gsi_last_bb (l0_bb);
3223 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
3224 istart0, true);
3225 gsi = gsi_last_bb (cont_bb);
3226 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1],
3227 build_int_cst (fd->iter_type, 1));
3228 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
3229 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3230 size_zero_node, NULL_TREE, NULL_TREE);
3231 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
3232 t = counts[fd->collapse - 1];
3233 }
3234 else if (fd->collapse > 1)
3235 t = fd->loop.v;
3236 else
3237 {
3238 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3239 fd->loops[0].v, fd->loops[0].n1);
3240 t = fold_convert (fd->iter_type, t);
3241 }
3242 gsi = gsi_last_bb (l0_bb);
3243 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3244 size_zero_node, NULL_TREE, NULL_TREE);
3245 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3246 false, GSI_CONTINUE_LINKING);
3247 expand_omp_build_assign (&gsi, aref, t, true);
3248 }
3249
3250 if (!broken_loop)
3251 {
3252 /* Code to control the increment and predicate for the sequential
3253 loop goes in the CONT_BB. */
65f4b875 3254 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
3255 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3256 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3257 vmain = gimple_omp_continue_control_use (cont_stmt);
3258 vback = gimple_omp_continue_control_def (cont_stmt);
3259
7855700e
JJ
3260 if (cond_var)
3261 {
3262 tree itype = TREE_TYPE (cond_var);
3263 tree t2;
3264 if ((fd->ordered && fd->collapse == 1)
3265 || bias
3266 || POINTER_TYPE_P (type)
3267 || TREE_CODE (fd->loop.n1) != INTEGER_CST
3268 || fd->loop.cond_code != LT_EXPR)
3269 t2 = build_int_cst (itype, 1);
3270 else
3271 t2 = fold_convert (itype, fd->loop.step);
3272 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
3273 t2 = force_gimple_operand_gsi (&gsi, t2, false,
3274 NULL_TREE, true, GSI_SAME_STMT);
3275 assign_stmt = gimple_build_assign (cond_var, t2);
3276 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3277 }
3278
629b3d75
MJ
3279 if (!gimple_omp_for_combined_p (fd->for_stmt))
3280 {
3281 if (POINTER_TYPE_P (type))
3282 t = fold_build_pointer_plus (vmain, fd->loop.step);
3283 else
3284 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
3285 t = force_gimple_operand_gsi (&gsi, t,
3286 DECL_P (vback)
3287 && TREE_ADDRESSABLE (vback),
3288 NULL_TREE, true, GSI_SAME_STMT);
3289 assign_stmt = gimple_build_assign (vback, t);
3290 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3291
3292 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
3293 {
d1ffbd43 3294 tree tem;
629b3d75 3295 if (fd->collapse > 1)
d1ffbd43 3296 tem = fd->loop.v;
629b3d75
MJ
3297 else
3298 {
d1ffbd43
JJ
3299 tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3300 fd->loops[0].v, fd->loops[0].n1);
3301 tem = fold_convert (fd->iter_type, tem);
629b3d75
MJ
3302 }
3303 tree aref = build4 (ARRAY_REF, fd->iter_type,
3304 counts[fd->ordered], size_zero_node,
3305 NULL_TREE, NULL_TREE);
d1ffbd43
JJ
3306 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE,
3307 true, GSI_SAME_STMT);
3308 expand_omp_build_assign (&gsi, aref, tem);
629b3d75
MJ
3309 }
3310
3311 t = build2 (fd->loop.cond_code, boolean_type_node,
3312 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
3313 iend);
3314 gcond *cond_stmt = gimple_build_cond_empty (t);
3315 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3316 }
3317
3318 /* Remove GIMPLE_OMP_CONTINUE. */
3319 gsi_remove (&gsi, true);
3320
3321 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3322 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb);
3323
3324 /* Emit code to get the next parallel iteration in L2_BB. */
3325 gsi = gsi_start_bb (l2_bb);
3326
3327 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
3328 build_fold_addr_expr (istart0),
3329 build_fold_addr_expr (iend0));
3330 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3331 false, GSI_CONTINUE_LINKING);
3332 if (TREE_TYPE (t) != boolean_type_node)
3333 t = fold_build2 (NE_EXPR, boolean_type_node,
3334 t, build_int_cst (TREE_TYPE (t), 0));
3335 gcond *cond_stmt = gimple_build_cond_empty (t);
3336 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
3337 }
3338
3339 /* Add the loop cleanup function. */
65f4b875 3340 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
3341 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3342 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
3343 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3344 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
3345 else
3346 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
3347 gcall *call_stmt = gimple_build_call (t, 0);
629b3d75
MJ
3348 if (fd->ordered)
3349 {
3350 tree arr = counts[fd->ordered];
3351 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
3352 TREE_THIS_VOLATILE (clobber) = 1;
3353 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
3354 GSI_SAME_STMT);
3355 }
28567c40
JJ
3356 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3357 {
3358 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
3359 if (fd->have_reductemp)
3360 {
3361 gimple *g = gimple_build_assign (reductions, NOP_EXPR,
3362 gimple_call_lhs (call_stmt));
3363 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
3364 }
3365 }
3366 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
629b3d75
MJ
3367 gsi_remove (&gsi, true);
3368
3369 /* Connect the new blocks. */
3370 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
3371 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
3372
3373 if (!broken_loop)
3374 {
3375 gimple_seq phis;
3376
3377 e = find_edge (cont_bb, l3_bb);
3378 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
3379
3380 phis = phi_nodes (l3_bb);
3381 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
3382 {
3383 gimple *phi = gsi_stmt (gsi);
3384 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
3385 PHI_ARG_DEF_FROM_EDGE (phi, e));
3386 }
3387 remove_edge (e);
3388
3389 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
3390 e = find_edge (cont_bb, l1_bb);
3391 if (e == NULL)
3392 {
3393 e = BRANCH_EDGE (cont_bb);
3394 gcc_assert (single_succ (e->dest) == l1_bb);
3395 }
3396 if (gimple_omp_for_combined_p (fd->for_stmt))
3397 {
3398 remove_edge (e);
3399 e = NULL;
3400 }
3401 else if (fd->collapse > 1)
3402 {
3403 remove_edge (e);
3404 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3405 }
3406 else
3407 e->flags = EDGE_TRUE_VALUE;
3408 if (e)
3409 {
357067f2
JH
3410 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
3411 find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
629b3d75
MJ
3412 }
3413 else
3414 {
3415 e = find_edge (cont_bb, l2_bb);
3416 e->flags = EDGE_FALLTHRU;
3417 }
3418 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
3419
3420 if (gimple_in_ssa_p (cfun))
3421 {
3422 /* Add phis to the outer loop that connect to the phis in the inner,
3423 original loop, and move the loop entry value of the inner phi to
3424 the loop entry value of the outer phi. */
3425 gphi_iterator psi;
3426 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
3427 {
620e594b 3428 location_t locus;
629b3d75
MJ
3429 gphi *nphi;
3430 gphi *exit_phi = psi.phi ();
3431
164485b5
JJ
3432 if (virtual_operand_p (gimple_phi_result (exit_phi)))
3433 continue;
3434
629b3d75
MJ
3435 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
3436 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
3437
3438 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
3439 edge latch_to_l1 = find_edge (latch, l1_bb);
3440 gphi *inner_phi
3441 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
3442
3443 tree t = gimple_phi_result (exit_phi);
3444 tree new_res = copy_ssa_name (t, NULL);
3445 nphi = create_phi_node (new_res, l0_bb);
3446
3447 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
3448 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
3449 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
3450 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
3451 add_phi_arg (nphi, t, entry_to_l0, locus);
3452
3453 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
3454 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
3455
3456 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
164485b5 3457 }
629b3d75
MJ
3458 }
3459
3460 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
3461 recompute_dominator (CDI_DOMINATORS, l2_bb));
3462 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
3463 recompute_dominator (CDI_DOMINATORS, l3_bb));
3464 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
3465 recompute_dominator (CDI_DOMINATORS, l0_bb));
3466 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
3467 recompute_dominator (CDI_DOMINATORS, l1_bb));
3468
3469 /* We enter expand_omp_for_generic with a loop. This original loop may
3470 have its own loop struct, or it may be part of an outer loop struct
3471 (which may be the fake loop). */
3472 struct loop *outer_loop = entry_bb->loop_father;
3473 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
3474
3475 add_bb_to_loop (l2_bb, outer_loop);
3476
3477 /* We've added a new loop around the original loop. Allocate the
3478 corresponding loop struct. */
3479 struct loop *new_loop = alloc_loop ();
3480 new_loop->header = l0_bb;
3481 new_loop->latch = l2_bb;
3482 add_loop (new_loop, outer_loop);
3483
3484 /* Allocate a loop structure for the original loop unless we already
3485 had one. */
3486 if (!orig_loop_has_loop_struct
3487 && !gimple_omp_for_combined_p (fd->for_stmt))
3488 {
3489 struct loop *orig_loop = alloc_loop ();
3490 orig_loop->header = l1_bb;
3491 /* The loop may have multiple latches. */
3492 add_loop (orig_loop, new_loop);
3493 }
3494 }
3495}
3496
3497/* A subroutine of expand_omp_for. Generate code for a parallel
3498 loop with static schedule and no specified chunk size. Given
3499 parameters:
3500
3501 for (V = N1; V cond N2; V += STEP) BODY;
3502
3503 where COND is "<" or ">", we generate pseudocode
3504
3505 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3506 if (cond is <)
3507 adj = STEP - 1;
3508 else
3509 adj = STEP + 1;
3510 if ((__typeof (V)) -1 > 0 && cond is >)
3511 n = -(adj + N2 - N1) / -STEP;
3512 else
3513 n = (adj + N2 - N1) / STEP;
3514 q = n / nthreads;
3515 tt = n % nthreads;
3516 if (threadid < tt) goto L3; else goto L4;
3517 L3:
3518 tt = 0;
3519 q = q + 1;
3520 L4:
3521 s0 = q * threadid + tt;
3522 e0 = s0 + q;
3523 V = s0 * STEP + N1;
3524 if (s0 >= e0) goto L2; else goto L0;
3525 L0:
3526 e = e0 * STEP + N1;
3527 L1:
3528 BODY;
3529 V += STEP;
3530 if (V cond e) goto L1;
3531 L2:
3532*/
3533
3534static void
3535expand_omp_for_static_nochunk (struct omp_region *region,
3536 struct omp_for_data *fd,
3537 gimple *inner_stmt)
3538{
3539 tree n, q, s0, e0, e, t, tt, nthreads, threadid;
3540 tree type, itype, vmain, vback;
3541 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
3542 basic_block body_bb, cont_bb, collapse_bb = NULL;
3543 basic_block fin_bb;
6c7ae8c5 3544 gimple_stmt_iterator gsi, gsip;
629b3d75
MJ
3545 edge ep;
3546 bool broken_loop = region->cont == NULL;
3547 tree *counts = NULL;
3548 tree n1, n2, step;
28567c40 3549 tree reductions = NULL_TREE;
8221c30b 3550 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
629b3d75
MJ
3551
3552 itype = type = TREE_TYPE (fd->loop.v);
3553 if (POINTER_TYPE_P (type))
3554 itype = signed_type_for (type);
3555
3556 entry_bb = region->entry;
3557 cont_bb = region->cont;
3558 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3559 fin_bb = BRANCH_EDGE (entry_bb)->dest;
3560 gcc_assert (broken_loop
3561 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
3562 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3563 body_bb = single_succ (seq_start_bb);
3564 if (!broken_loop)
3565 {
3566 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3567 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3568 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3569 }
3570 exit_bb = region->exit;
3571
3572 /* Iteration space partitioning goes in ENTRY_BB. */
65f4b875 3573 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75 3574 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
6c7ae8c5
JJ
3575 gsip = gsi;
3576 gsi_prev (&gsip);
629b3d75
MJ
3577
3578 if (fd->collapse > 1)
3579 {
3580 int first_zero_iter = -1, dummy = -1;
3581 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3582
3583 counts = XALLOCAVEC (tree, fd->collapse);
3584 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3585 fin_bb, first_zero_iter,
3586 dummy_bb, dummy, l2_dom_bb);
3587 t = NULL_TREE;
3588 }
3589 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3590 t = integer_one_node;
3591 else
3592 t = fold_binary (fd->loop.cond_code, boolean_type_node,
3593 fold_convert (type, fd->loop.n1),
3594 fold_convert (type, fd->loop.n2));
3595 if (fd->collapse == 1
3596 && TYPE_UNSIGNED (type)
3597 && (t == NULL_TREE || !integer_onep (t)))
3598 {
3599 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3600 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3601 true, GSI_SAME_STMT);
3602 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3603 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3604 true, GSI_SAME_STMT);
3605 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
6c7ae8c5 3606 NULL_TREE, NULL_TREE);
629b3d75
MJ
3607 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3608 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3609 expand_omp_regimplify_p, NULL, NULL)
3610 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3611 expand_omp_regimplify_p, NULL, NULL))
3612 {
3613 gsi = gsi_for_stmt (cond_stmt);
3614 gimple_regimplify_operands (cond_stmt, &gsi);
3615 }
3616 ep = split_block (entry_bb, cond_stmt);
3617 ep->flags = EDGE_TRUE_VALUE;
3618 entry_bb = ep->dest;
357067f2 3619 ep->probability = profile_probability::very_likely ();
629b3d75 3620 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
357067f2 3621 ep->probability = profile_probability::very_unlikely ();
629b3d75
MJ
3622 if (gimple_in_ssa_p (cfun))
3623 {
3624 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
3625 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3626 !gsi_end_p (gpi); gsi_next (&gpi))
3627 {
3628 gphi *phi = gpi.phi ();
3629 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3630 ep, UNKNOWN_LOCATION);
3631 }
3632 }
3633 gsi = gsi_last_bb (entry_bb);
3634 }
3635
8221c30b
JJ
3636 if (fd->lastprivate_conditional)
3637 {
3638 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
3639 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
3640 if (fd->have_pointer_condtemp)
3641 condtemp = OMP_CLAUSE_DECL (c);
3642 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
3643 cond_var = OMP_CLAUSE_DECL (c);
3644 }
3645 if (fd->have_reductemp || fd->have_pointer_condtemp)
28567c40
JJ
3646 {
3647 tree t1 = build_int_cst (long_integer_type_node, 0);
3648 tree t2 = build_int_cst (long_integer_type_node, 1);
3649 tree t3 = build_int_cstu (long_integer_type_node,
3650 (HOST_WIDE_INT_1U << 31) + 1);
3651 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
6c7ae8c5
JJ
3652 gimple_stmt_iterator gsi2 = gsi_none ();
3653 gimple *g = NULL;
3654 tree mem = null_pointer_node, memv = NULL_TREE;
6c7ae8c5
JJ
3655 if (fd->have_reductemp)
3656 {
3657 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
3658 reductions = OMP_CLAUSE_DECL (c);
3659 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
3660 g = SSA_NAME_DEF_STMT (reductions);
3661 reductions = gimple_assign_rhs1 (g);
3662 OMP_CLAUSE_DECL (c) = reductions;
3663 gsi2 = gsi_for_stmt (g);
3664 }
3665 else
3666 {
3667 if (gsi_end_p (gsip))
3668 gsi2 = gsi_after_labels (region->entry);
3669 else
3670 gsi2 = gsip;
3671 reductions = null_pointer_node;
3672 }
8221c30b 3673 if (fd->have_pointer_condtemp)
6c7ae8c5 3674 {
6c7ae8c5
JJ
3675 tree type = TREE_TYPE (condtemp);
3676 memv = create_tmp_var (type);
3677 TREE_ADDRESSABLE (memv) = 1;
3678 unsigned HOST_WIDE_INT sz
3679 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
3680 sz *= fd->lastprivate_conditional;
3681 expand_omp_build_assign (&gsi2, memv, build_int_cst (type, sz),
3682 false);
3683 mem = build_fold_addr_expr (memv);
3684 }
28567c40
JJ
3685 tree t
3686 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
3687 9, t1, t2, t2, t3, t1, null_pointer_node,
6c7ae8c5 3688 null_pointer_node, reductions, mem);
28567c40
JJ
3689 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3690 true, GSI_SAME_STMT);
8221c30b 3691 if (fd->have_pointer_condtemp)
6c7ae8c5
JJ
3692 expand_omp_build_assign (&gsi2, condtemp, memv, false);
3693 if (fd->have_reductemp)
3694 {
3695 gsi_remove (&gsi2, true);
3696 release_ssa_name (gimple_assign_lhs (g));
3697 }
28567c40 3698 }
629b3d75
MJ
3699 switch (gimple_omp_for_kind (fd->for_stmt))
3700 {
3701 case GF_OMP_FOR_KIND_FOR:
3702 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3703 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3704 break;
3705 case GF_OMP_FOR_KIND_DISTRIBUTE:
3706 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3707 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3708 break;
3709 default:
3710 gcc_unreachable ();
3711 }
3712 nthreads = build_call_expr (nthreads, 0);
3713 nthreads = fold_convert (itype, nthreads);
3714 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3715 true, GSI_SAME_STMT);
3716 threadid = build_call_expr (threadid, 0);
3717 threadid = fold_convert (itype, threadid);
3718 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3719 true, GSI_SAME_STMT);
3720
3721 n1 = fd->loop.n1;
3722 n2 = fd->loop.n2;
3723 step = fd->loop.step;
3724 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3725 {
3726 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3727 OMP_CLAUSE__LOOPTEMP_);
3728 gcc_assert (innerc);
3729 n1 = OMP_CLAUSE_DECL (innerc);
3730 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3731 OMP_CLAUSE__LOOPTEMP_);
3732 gcc_assert (innerc);
3733 n2 = OMP_CLAUSE_DECL (innerc);
3734 }
3735 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3736 true, NULL_TREE, true, GSI_SAME_STMT);
3737 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3738 true, NULL_TREE, true, GSI_SAME_STMT);
3739 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3740 true, NULL_TREE, true, GSI_SAME_STMT);
3741
3742 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3743 t = fold_build2 (PLUS_EXPR, itype, step, t);
3744 t = fold_build2 (PLUS_EXPR, itype, t, n2);
3745 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3746 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3747 t = fold_build2 (TRUNC_DIV_EXPR, itype,
3748 fold_build1 (NEGATE_EXPR, itype, t),
3749 fold_build1 (NEGATE_EXPR, itype, step));
3750 else
3751 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3752 t = fold_convert (itype, t);
3753 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3754
3755 q = create_tmp_reg (itype, "q");
3756 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
3757 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3758 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
3759
3760 tt = create_tmp_reg (itype, "tt");
3761 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
3762 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3763 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
3764
3765 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
3766 gcond *cond_stmt = gimple_build_cond_empty (t);
3767 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3768
3769 second_bb = split_block (entry_bb, cond_stmt)->dest;
65f4b875 3770 gsi = gsi_last_nondebug_bb (second_bb);
629b3d75
MJ
3771 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3772
3773 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
3774 GSI_SAME_STMT);
3775 gassign *assign_stmt
3776 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
3777 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3778
3779 third_bb = split_block (second_bb, assign_stmt)->dest;
65f4b875 3780 gsi = gsi_last_nondebug_bb (third_bb);
629b3d75
MJ
3781 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3782
3783 t = build2 (MULT_EXPR, itype, q, threadid);
3784 t = build2 (PLUS_EXPR, itype, t, tt);
3785 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3786
3787 t = fold_build2 (PLUS_EXPR, itype, s0, q);
3788 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3789
3790 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
3791 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3792
3793 /* Remove the GIMPLE_OMP_FOR statement. */
3794 gsi_remove (&gsi, true);
3795
3796 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3797 gsi = gsi_start_bb (seq_start_bb);
3798
3799 tree startvar = fd->loop.v;
3800 tree endvar = NULL_TREE;
3801
3802 if (gimple_omp_for_combined_p (fd->for_stmt))
3803 {
3804 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
3805 ? gimple_omp_parallel_clauses (inner_stmt)
3806 : gimple_omp_for_clauses (inner_stmt);
3807 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
3808 gcc_assert (innerc);
3809 startvar = OMP_CLAUSE_DECL (innerc);
3810 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3811 OMP_CLAUSE__LOOPTEMP_);
3812 gcc_assert (innerc);
3813 endvar = OMP_CLAUSE_DECL (innerc);
3814 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
3815 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3816 {
3817 int i;
3818 for (i = 1; i < fd->collapse; i++)
3819 {
3820 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3821 OMP_CLAUSE__LOOPTEMP_);
3822 gcc_assert (innerc);
3823 }
3824 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3825 OMP_CLAUSE__LOOPTEMP_);
3826 if (innerc)
3827 {
3828 /* If needed (distribute parallel for with lastprivate),
3829 propagate down the total number of iterations. */
3830 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
3831 fd->loop.n2);
3832 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
3833 GSI_CONTINUE_LINKING);
3834 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
3835 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3836 }
3837 }
3838 }
3839 t = fold_convert (itype, s0);
3840 t = fold_build2 (MULT_EXPR, itype, t, step);
3841 if (POINTER_TYPE_P (type))
bde84d51
RB
3842 {
3843 t = fold_build_pointer_plus (n1, t);
3844 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
3845 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
3846 t = fold_convert (signed_type_for (type), t);
3847 }
629b3d75
MJ
3848 else
3849 t = fold_build2 (PLUS_EXPR, type, t, n1);
3850 t = fold_convert (TREE_TYPE (startvar), t);
3851 t = force_gimple_operand_gsi (&gsi, t,
3852 DECL_P (startvar)
3853 && TREE_ADDRESSABLE (startvar),
3854 NULL_TREE, false, GSI_CONTINUE_LINKING);
3855 assign_stmt = gimple_build_assign (startvar, t);
3856 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6c7ae8c5
JJ
3857 if (cond_var)
3858 {
3859 tree itype = TREE_TYPE (cond_var);
3860 /* For lastprivate(conditional:) itervar, we need some iteration
3861 counter that starts at unsigned non-zero and increases.
3862 Prefer as few IVs as possible, so if we can use startvar
3863 itself, use that, or startvar + constant (those would be
3864 incremented with step), and as last resort use the s0 + 1
3865 incremented by 1. */
3866 if (POINTER_TYPE_P (type)
3867 || TREE_CODE (n1) != INTEGER_CST
3868 || fd->loop.cond_code != LT_EXPR)
3869 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
3870 build_int_cst (itype, 1));
3871 else if (tree_int_cst_sgn (n1) == 1)
3872 t = fold_convert (itype, t);
3873 else
3874 {
3875 tree c = fold_convert (itype, n1);
3876 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
3877 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
3878 }
3879 t = force_gimple_operand_gsi (&gsi, t, false,
3880 NULL_TREE, false, GSI_CONTINUE_LINKING);
3881 assign_stmt = gimple_build_assign (cond_var, t);
3882 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3883 }
629b3d75
MJ
3884
3885 t = fold_convert (itype, e0);
3886 t = fold_build2 (MULT_EXPR, itype, t, step);
3887 if (POINTER_TYPE_P (type))
bde84d51
RB
3888 {
3889 t = fold_build_pointer_plus (n1, t);
3890 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
3891 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
3892 t = fold_convert (signed_type_for (type), t);
3893 }
629b3d75
MJ
3894 else
3895 t = fold_build2 (PLUS_EXPR, type, t, n1);
3896 t = fold_convert (TREE_TYPE (startvar), t);
3897 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3898 false, GSI_CONTINUE_LINKING);
3899 if (endvar)
3900 {
3901 assign_stmt = gimple_build_assign (endvar, e);
3902 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3903 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
3904 assign_stmt = gimple_build_assign (fd->loop.v, e);
3905 else
3906 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
3907 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3908 }
3909 /* Handle linear clause adjustments. */
3910 tree itercnt = NULL_TREE;
3911 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
3912 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
3913 c; c = OMP_CLAUSE_CHAIN (c))
3914 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
3915 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
3916 {
3917 tree d = OMP_CLAUSE_DECL (c);
3918 bool is_ref = omp_is_reference (d);
3919 tree t = d, a, dest;
3920 if (is_ref)
3921 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
3922 if (itercnt == NULL_TREE)
3923 {
3924 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3925 {
3926 itercnt = fold_build2 (MINUS_EXPR, itype,
3927 fold_convert (itype, n1),
3928 fold_convert (itype, fd->loop.n1));
3929 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
3930 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
3931 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
3932 NULL_TREE, false,
3933 GSI_CONTINUE_LINKING);
3934 }
3935 else
3936 itercnt = s0;
3937 }
3938 tree type = TREE_TYPE (t);
3939 if (POINTER_TYPE_P (type))
3940 type = sizetype;
3941 a = fold_build2 (MULT_EXPR, type,
3942 fold_convert (type, itercnt),
3943 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
3944 dest = unshare_expr (t);
3945 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
3946 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
3947 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3948 false, GSI_CONTINUE_LINKING);
3949 assign_stmt = gimple_build_assign (dest, t);
3950 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3951 }
3952 if (fd->collapse > 1)
3953 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
3954
3955 if (!broken_loop)
3956 {
3957 /* The code controlling the sequential loop replaces the
3958 GIMPLE_OMP_CONTINUE. */
65f4b875 3959 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
3960 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3961 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3962 vmain = gimple_omp_continue_control_use (cont_stmt);
3963 vback = gimple_omp_continue_control_def (cont_stmt);
3964
7855700e
JJ
3965 if (cond_var)
3966 {
3967 tree itype = TREE_TYPE (cond_var);
3968 tree t2;
3969 if (POINTER_TYPE_P (type)
3970 || TREE_CODE (n1) != INTEGER_CST
3971 || fd->loop.cond_code != LT_EXPR)
3972 t2 = build_int_cst (itype, 1);
3973 else
3974 t2 = fold_convert (itype, step);
3975 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
3976 t2 = force_gimple_operand_gsi (&gsi, t2, false,
3977 NULL_TREE, true, GSI_SAME_STMT);
3978 assign_stmt = gimple_build_assign (cond_var, t2);
3979 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3980 }
3981
629b3d75
MJ
3982 if (!gimple_omp_for_combined_p (fd->for_stmt))
3983 {
3984 if (POINTER_TYPE_P (type))
3985 t = fold_build_pointer_plus (vmain, step);
3986 else
3987 t = fold_build2 (PLUS_EXPR, type, vmain, step);
3988 t = force_gimple_operand_gsi (&gsi, t,
3989 DECL_P (vback)
3990 && TREE_ADDRESSABLE (vback),
3991 NULL_TREE, true, GSI_SAME_STMT);
3992 assign_stmt = gimple_build_assign (vback, t);
3993 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3994
3995 t = build2 (fd->loop.cond_code, boolean_type_node,
3996 DECL_P (vback) && TREE_ADDRESSABLE (vback)
3997 ? t : vback, e);
3998 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3999 }
4000
4001 /* Remove the GIMPLE_OMP_CONTINUE statement. */
4002 gsi_remove (&gsi, true);
4003
4004 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4005 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
4006 }
4007
4008 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
65f4b875 4009 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
4010 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4011 {
4012 t = gimple_omp_return_lhs (gsi_stmt (gsi));
8221c30b 4013 if (fd->have_reductemp || fd->have_pointer_condtemp)
28567c40
JJ
4014 {
4015 tree fn;
4016 if (t)
4017 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4018 else
4019 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4020 gcall *g = gimple_build_call (fn, 0);
4021 if (t)
4022 {
4023 gimple_call_set_lhs (g, t);
6c7ae8c5
JJ
4024 if (fd->have_reductemp)
4025 gsi_insert_after (&gsi, gimple_build_assign (reductions,
4026 NOP_EXPR, t),
4027 GSI_SAME_STMT);
28567c40
JJ
4028 }
4029 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4030 }
4031 else
4032 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
629b3d75
MJ
4033 }
4034 gsi_remove (&gsi, true);
4035
4036 /* Connect all the blocks. */
4037 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
357067f2 4038 ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
629b3d75
MJ
4039 ep = find_edge (entry_bb, second_bb);
4040 ep->flags = EDGE_TRUE_VALUE;
357067f2 4041 ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
629b3d75
MJ
4042 find_edge (third_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
4043 find_edge (third_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
4044
4045 if (!broken_loop)
4046 {
4047 ep = find_edge (cont_bb, body_bb);
4048 if (ep == NULL)
4049 {
4050 ep = BRANCH_EDGE (cont_bb);
4051 gcc_assert (single_succ (ep->dest) == body_bb);
4052 }
4053 if (gimple_omp_for_combined_p (fd->for_stmt))
4054 {
4055 remove_edge (ep);
4056 ep = NULL;
4057 }
4058 else if (fd->collapse > 1)
4059 {
4060 remove_edge (ep);
4061 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4062 }
4063 else
4064 ep->flags = EDGE_TRUE_VALUE;
4065 find_edge (cont_bb, fin_bb)->flags
4066 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
4067 }
4068
4069 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
4070 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
4071 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, third_bb);
4072
4073 set_immediate_dominator (CDI_DOMINATORS, body_bb,
4074 recompute_dominator (CDI_DOMINATORS, body_bb));
4075 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
4076 recompute_dominator (CDI_DOMINATORS, fin_bb));
4077
4078 struct loop *loop = body_bb->loop_father;
4079 if (loop != entry_bb->loop_father)
4080 {
4081 gcc_assert (broken_loop || loop->header == body_bb);
4082 gcc_assert (broken_loop
4083 || loop->latch == region->cont
4084 || single_pred (loop->latch) == region->cont);
4085 return;
4086 }
4087
4088 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
4089 {
4090 loop = alloc_loop ();
4091 loop->header = body_bb;
4092 if (collapse_bb == NULL)
4093 loop->latch = cont_bb;
4094 add_loop (loop, body_bb->loop_father);
4095 }
4096}
4097
4098/* Return phi in E->DEST with ARG on edge E. */
4099
4100static gphi *
4101find_phi_with_arg_on_edge (tree arg, edge e)
4102{
4103 basic_block bb = e->dest;
4104
4105 for (gphi_iterator gpi = gsi_start_phis (bb);
4106 !gsi_end_p (gpi);
4107 gsi_next (&gpi))
4108 {
4109 gphi *phi = gpi.phi ();
4110 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
4111 return phi;
4112 }
4113
4114 return NULL;
4115}
4116
4117/* A subroutine of expand_omp_for. Generate code for a parallel
4118 loop with static schedule and a specified chunk size. Given
4119 parameters:
4120
4121 for (V = N1; V cond N2; V += STEP) BODY;
4122
4123 where COND is "<" or ">", we generate pseudocode
4124
4125 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
4126 if (cond is <)
4127 adj = STEP - 1;
4128 else
4129 adj = STEP + 1;
4130 if ((__typeof (V)) -1 > 0 && cond is >)
4131 n = -(adj + N2 - N1) / -STEP;
4132 else
4133 n = (adj + N2 - N1) / STEP;
4134 trip = 0;
4135 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
4136 here so that V is defined
4137 if the loop is not entered
4138 L0:
4139 s0 = (trip * nthreads + threadid) * CHUNK;
01914336 4140 e0 = min (s0 + CHUNK, n);
629b3d75
MJ
4141 if (s0 < n) goto L1; else goto L4;
4142 L1:
4143 V = s0 * STEP + N1;
4144 e = e0 * STEP + N1;
4145 L2:
4146 BODY;
4147 V += STEP;
4148 if (V cond e) goto L2; else goto L3;
4149 L3:
4150 trip += 1;
4151 goto L0;
4152 L4:
4153*/
4154
4155static void
4156expand_omp_for_static_chunk (struct omp_region *region,
4157 struct omp_for_data *fd, gimple *inner_stmt)
4158{
4159 tree n, s0, e0, e, t;
4160 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
4161 tree type, itype, vmain, vback, vextra;
4162 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
4163 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
6c7ae8c5 4164 gimple_stmt_iterator gsi, gsip;
629b3d75
MJ
4165 edge se;
4166 bool broken_loop = region->cont == NULL;
4167 tree *counts = NULL;
4168 tree n1, n2, step;
28567c40 4169 tree reductions = NULL_TREE;
8221c30b 4170 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
629b3d75
MJ
4171
4172 itype = type = TREE_TYPE (fd->loop.v);
4173 if (POINTER_TYPE_P (type))
4174 itype = signed_type_for (type);
4175
4176 entry_bb = region->entry;
4177 se = split_block (entry_bb, last_stmt (entry_bb));
4178 entry_bb = se->src;
4179 iter_part_bb = se->dest;
4180 cont_bb = region->cont;
4181 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
4182 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
4183 gcc_assert (broken_loop
4184 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
4185 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
4186 body_bb = single_succ (seq_start_bb);
4187 if (!broken_loop)
4188 {
4189 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
4190 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
4191 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4192 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
4193 }
4194 exit_bb = region->exit;
4195
4196 /* Trip and adjustment setup goes in ENTRY_BB. */
65f4b875 4197 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75 4198 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
6c7ae8c5
JJ
4199 gsip = gsi;
4200 gsi_prev (&gsip);
629b3d75
MJ
4201
4202 if (fd->collapse > 1)
4203 {
4204 int first_zero_iter = -1, dummy = -1;
4205 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
4206
4207 counts = XALLOCAVEC (tree, fd->collapse);
4208 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4209 fin_bb, first_zero_iter,
4210 dummy_bb, dummy, l2_dom_bb);
4211 t = NULL_TREE;
4212 }
4213 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
4214 t = integer_one_node;
4215 else
4216 t = fold_binary (fd->loop.cond_code, boolean_type_node,
4217 fold_convert (type, fd->loop.n1),
4218 fold_convert (type, fd->loop.n2));
4219 if (fd->collapse == 1
4220 && TYPE_UNSIGNED (type)
4221 && (t == NULL_TREE || !integer_onep (t)))
4222 {
4223 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
4224 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
4225 true, GSI_SAME_STMT);
4226 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
4227 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
4228 true, GSI_SAME_STMT);
4229 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
4230 NULL_TREE, NULL_TREE);
4231 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4232 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
4233 expand_omp_regimplify_p, NULL, NULL)
4234 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
4235 expand_omp_regimplify_p, NULL, NULL))
4236 {
4237 gsi = gsi_for_stmt (cond_stmt);
4238 gimple_regimplify_operands (cond_stmt, &gsi);
4239 }
4240 se = split_block (entry_bb, cond_stmt);
4241 se->flags = EDGE_TRUE_VALUE;
4242 entry_bb = se->dest;
357067f2 4243 se->probability = profile_probability::very_likely ();
629b3d75 4244 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
357067f2 4245 se->probability = profile_probability::very_unlikely ();
629b3d75
MJ
4246 if (gimple_in_ssa_p (cfun))
4247 {
4248 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
4249 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
4250 !gsi_end_p (gpi); gsi_next (&gpi))
4251 {
4252 gphi *phi = gpi.phi ();
4253 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
4254 se, UNKNOWN_LOCATION);
4255 }
4256 }
4257 gsi = gsi_last_bb (entry_bb);
4258 }
4259
8221c30b
JJ
4260 if (fd->lastprivate_conditional)
4261 {
4262 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4263 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
4264 if (fd->have_pointer_condtemp)
4265 condtemp = OMP_CLAUSE_DECL (c);
4266 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
4267 cond_var = OMP_CLAUSE_DECL (c);
4268 }
4269 if (fd->have_reductemp || fd->have_pointer_condtemp)
28567c40
JJ
4270 {
4271 tree t1 = build_int_cst (long_integer_type_node, 0);
4272 tree t2 = build_int_cst (long_integer_type_node, 1);
4273 tree t3 = build_int_cstu (long_integer_type_node,
4274 (HOST_WIDE_INT_1U << 31) + 1);
4275 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
6c7ae8c5
JJ
4276 gimple_stmt_iterator gsi2 = gsi_none ();
4277 gimple *g = NULL;
4278 tree mem = null_pointer_node, memv = NULL_TREE;
6c7ae8c5
JJ
4279 if (fd->have_reductemp)
4280 {
4281 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
4282 reductions = OMP_CLAUSE_DECL (c);
4283 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
4284 g = SSA_NAME_DEF_STMT (reductions);
4285 reductions = gimple_assign_rhs1 (g);
4286 OMP_CLAUSE_DECL (c) = reductions;
4287 gsi2 = gsi_for_stmt (g);
4288 }
4289 else
4290 {
4291 if (gsi_end_p (gsip))
4292 gsi2 = gsi_after_labels (region->entry);
4293 else
4294 gsi2 = gsip;
4295 reductions = null_pointer_node;
4296 }
8221c30b 4297 if (fd->have_pointer_condtemp)
6c7ae8c5 4298 {
6c7ae8c5
JJ
4299 tree type = TREE_TYPE (condtemp);
4300 memv = create_tmp_var (type);
4301 TREE_ADDRESSABLE (memv) = 1;
4302 unsigned HOST_WIDE_INT sz
4303 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
4304 sz *= fd->lastprivate_conditional;
4305 expand_omp_build_assign (&gsi2, memv, build_int_cst (type, sz),
4306 false);
4307 mem = build_fold_addr_expr (memv);
4308 }
28567c40
JJ
4309 tree t
4310 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
4311 9, t1, t2, t2, t3, t1, null_pointer_node,
6c7ae8c5 4312 null_pointer_node, reductions, mem);
28567c40
JJ
4313 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
4314 true, GSI_SAME_STMT);
8221c30b 4315 if (fd->have_pointer_condtemp)
6c7ae8c5
JJ
4316 expand_omp_build_assign (&gsi2, condtemp, memv, false);
4317 if (fd->have_reductemp)
4318 {
4319 gsi_remove (&gsi2, true);
4320 release_ssa_name (gimple_assign_lhs (g));
4321 }
28567c40 4322 }
629b3d75
MJ
4323 switch (gimple_omp_for_kind (fd->for_stmt))
4324 {
4325 case GF_OMP_FOR_KIND_FOR:
4326 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
4327 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
4328 break;
4329 case GF_OMP_FOR_KIND_DISTRIBUTE:
4330 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
4331 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
4332 break;
4333 default:
4334 gcc_unreachable ();
4335 }
4336 nthreads = build_call_expr (nthreads, 0);
4337 nthreads = fold_convert (itype, nthreads);
4338 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
4339 true, GSI_SAME_STMT);
4340 threadid = build_call_expr (threadid, 0);
4341 threadid = fold_convert (itype, threadid);
4342 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
4343 true, GSI_SAME_STMT);
4344
4345 n1 = fd->loop.n1;
4346 n2 = fd->loop.n2;
4347 step = fd->loop.step;
4348 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4349 {
4350 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4351 OMP_CLAUSE__LOOPTEMP_);
4352 gcc_assert (innerc);
4353 n1 = OMP_CLAUSE_DECL (innerc);
4354 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4355 OMP_CLAUSE__LOOPTEMP_);
4356 gcc_assert (innerc);
4357 n2 = OMP_CLAUSE_DECL (innerc);
4358 }
4359 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
4360 true, NULL_TREE, true, GSI_SAME_STMT);
4361 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
4362 true, NULL_TREE, true, GSI_SAME_STMT);
4363 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
4364 true, NULL_TREE, true, GSI_SAME_STMT);
4365 tree chunk_size = fold_convert (itype, fd->chunk_size);
4366 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
4367 chunk_size
4368 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
4369 GSI_SAME_STMT);
4370
4371 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
4372 t = fold_build2 (PLUS_EXPR, itype, step, t);
4373 t = fold_build2 (PLUS_EXPR, itype, t, n2);
4374 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
4375 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
4376 t = fold_build2 (TRUNC_DIV_EXPR, itype,
4377 fold_build1 (NEGATE_EXPR, itype, t),
4378 fold_build1 (NEGATE_EXPR, itype, step));
4379 else
4380 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
4381 t = fold_convert (itype, t);
4382 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4383 true, GSI_SAME_STMT);
4384
4385 trip_var = create_tmp_reg (itype, ".trip");
4386 if (gimple_in_ssa_p (cfun))
4387 {
4388 trip_init = make_ssa_name (trip_var);
4389 trip_main = make_ssa_name (trip_var);
4390 trip_back = make_ssa_name (trip_var);
4391 }
4392 else
4393 {
4394 trip_init = trip_var;
4395 trip_main = trip_var;
4396 trip_back = trip_var;
4397 }
4398
4399 gassign *assign_stmt
4400 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
4401 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4402
4403 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
4404 t = fold_build2 (MULT_EXPR, itype, t, step);
4405 if (POINTER_TYPE_P (type))
4406 t = fold_build_pointer_plus (n1, t);
4407 else
4408 t = fold_build2 (PLUS_EXPR, type, t, n1);
4409 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4410 true, GSI_SAME_STMT);
4411
4412 /* Remove the GIMPLE_OMP_FOR. */
4413 gsi_remove (&gsi, true);
4414
4415 gimple_stmt_iterator gsif = gsi;
4416
4417 /* Iteration space partitioning goes in ITER_PART_BB. */
4418 gsi = gsi_last_bb (iter_part_bb);
4419
4420 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
4421 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
4422 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
4423 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4424 false, GSI_CONTINUE_LINKING);
4425
4426 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
4427 t = fold_build2 (MIN_EXPR, itype, t, n);
4428 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4429 false, GSI_CONTINUE_LINKING);
4430
4431 t = build2 (LT_EXPR, boolean_type_node, s0, n);
4432 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
4433
4434 /* Setup code for sequential iteration goes in SEQ_START_BB. */
4435 gsi = gsi_start_bb (seq_start_bb);
4436
4437 tree startvar = fd->loop.v;
4438 tree endvar = NULL_TREE;
4439
4440 if (gimple_omp_for_combined_p (fd->for_stmt))
4441 {
4442 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
4443 ? gimple_omp_parallel_clauses (inner_stmt)
4444 : gimple_omp_for_clauses (inner_stmt);
4445 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
4446 gcc_assert (innerc);
4447 startvar = OMP_CLAUSE_DECL (innerc);
4448 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4449 OMP_CLAUSE__LOOPTEMP_);
4450 gcc_assert (innerc);
4451 endvar = OMP_CLAUSE_DECL (innerc);
4452 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
4453 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
4454 {
4455 int i;
4456 for (i = 1; i < fd->collapse; i++)
4457 {
4458 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4459 OMP_CLAUSE__LOOPTEMP_);
4460 gcc_assert (innerc);
4461 }
4462 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4463 OMP_CLAUSE__LOOPTEMP_);
4464 if (innerc)
4465 {
4466 /* If needed (distribute parallel for with lastprivate),
4467 propagate down the total number of iterations. */
4468 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
4469 fd->loop.n2);
4470 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
4471 GSI_CONTINUE_LINKING);
4472 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4473 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4474 }
4475 }
4476 }
4477
4478 t = fold_convert (itype, s0);
4479 t = fold_build2 (MULT_EXPR, itype, t, step);
4480 if (POINTER_TYPE_P (type))
bde84d51
RB
4481 {
4482 t = fold_build_pointer_plus (n1, t);
4483 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4484 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4485 t = fold_convert (signed_type_for (type), t);
4486 }
629b3d75
MJ
4487 else
4488 t = fold_build2 (PLUS_EXPR, type, t, n1);
4489 t = fold_convert (TREE_TYPE (startvar), t);
4490 t = force_gimple_operand_gsi (&gsi, t,
4491 DECL_P (startvar)
4492 && TREE_ADDRESSABLE (startvar),
4493 NULL_TREE, false, GSI_CONTINUE_LINKING);
4494 assign_stmt = gimple_build_assign (startvar, t);
4495 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6c7ae8c5
JJ
4496 if (cond_var)
4497 {
4498 tree itype = TREE_TYPE (cond_var);
4499 /* For lastprivate(conditional:) itervar, we need some iteration
4500 counter that starts at unsigned non-zero and increases.
4501 Prefer as few IVs as possible, so if we can use startvar
4502 itself, use that, or startvar + constant (those would be
4503 incremented with step), and as last resort use the s0 + 1
4504 incremented by 1. */
4505 if (POINTER_TYPE_P (type)
4506 || TREE_CODE (n1) != INTEGER_CST
4507 || fd->loop.cond_code != LT_EXPR)
4508 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
4509 build_int_cst (itype, 1));
4510 else if (tree_int_cst_sgn (n1) == 1)
4511 t = fold_convert (itype, t);
4512 else
4513 {
4514 tree c = fold_convert (itype, n1);
4515 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
4516 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
4517 }
4518 t = force_gimple_operand_gsi (&gsi, t, false,
4519 NULL_TREE, false, GSI_CONTINUE_LINKING);
4520 assign_stmt = gimple_build_assign (cond_var, t);
4521 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4522 }
629b3d75
MJ
4523
4524 t = fold_convert (itype, e0);
4525 t = fold_build2 (MULT_EXPR, itype, t, step);
4526 if (POINTER_TYPE_P (type))
bde84d51
RB
4527 {
4528 t = fold_build_pointer_plus (n1, t);
4529 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4530 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4531 t = fold_convert (signed_type_for (type), t);
4532 }
629b3d75
MJ
4533 else
4534 t = fold_build2 (PLUS_EXPR, type, t, n1);
4535 t = fold_convert (TREE_TYPE (startvar), t);
4536 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4537 false, GSI_CONTINUE_LINKING);
4538 if (endvar)
4539 {
4540 assign_stmt = gimple_build_assign (endvar, e);
4541 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4542 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4543 assign_stmt = gimple_build_assign (fd->loop.v, e);
4544 else
4545 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4546 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4547 }
4548 /* Handle linear clause adjustments. */
4549 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
4550 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4551 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4552 c; c = OMP_CLAUSE_CHAIN (c))
4553 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4554 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4555 {
4556 tree d = OMP_CLAUSE_DECL (c);
4557 bool is_ref = omp_is_reference (d);
4558 tree t = d, a, dest;
4559 if (is_ref)
4560 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4561 tree type = TREE_TYPE (t);
4562 if (POINTER_TYPE_P (type))
4563 type = sizetype;
4564 dest = unshare_expr (t);
4565 tree v = create_tmp_var (TREE_TYPE (t), NULL);
4566 expand_omp_build_assign (&gsif, v, t);
4567 if (itercnt == NULL_TREE)
4568 {
4569 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4570 {
4571 itercntbias
4572 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
4573 fold_convert (itype, fd->loop.n1));
4574 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
4575 itercntbias, step);
4576 itercntbias
4577 = force_gimple_operand_gsi (&gsif, itercntbias, true,
4578 NULL_TREE, true,
4579 GSI_SAME_STMT);
4580 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
4581 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4582 NULL_TREE, false,
4583 GSI_CONTINUE_LINKING);
4584 }
4585 else
4586 itercnt = s0;
4587 }
4588 a = fold_build2 (MULT_EXPR, type,
4589 fold_convert (type, itercnt),
4590 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4591 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4592 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4593 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4594 false, GSI_CONTINUE_LINKING);
4595 assign_stmt = gimple_build_assign (dest, t);
4596 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4597 }
4598 if (fd->collapse > 1)
4599 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4600
4601 if (!broken_loop)
4602 {
4603 /* The code controlling the sequential loop goes in CONT_BB,
4604 replacing the GIMPLE_OMP_CONTINUE. */
65f4b875 4605 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
4606 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4607 vmain = gimple_omp_continue_control_use (cont_stmt);
4608 vback = gimple_omp_continue_control_def (cont_stmt);
4609
7855700e
JJ
4610 if (cond_var)
4611 {
4612 tree itype = TREE_TYPE (cond_var);
4613 tree t2;
4614 if (POINTER_TYPE_P (type)
4615 || TREE_CODE (n1) != INTEGER_CST
4616 || fd->loop.cond_code != LT_EXPR)
4617 t2 = build_int_cst (itype, 1);
4618 else
4619 t2 = fold_convert (itype, step);
4620 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
4621 t2 = force_gimple_operand_gsi (&gsi, t2, false,
4622 NULL_TREE, true, GSI_SAME_STMT);
4623 assign_stmt = gimple_build_assign (cond_var, t2);
4624 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4625 }
4626
629b3d75
MJ
4627 if (!gimple_omp_for_combined_p (fd->for_stmt))
4628 {
4629 if (POINTER_TYPE_P (type))
4630 t = fold_build_pointer_plus (vmain, step);
4631 else
4632 t = fold_build2 (PLUS_EXPR, type, vmain, step);
4633 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
4634 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4635 true, GSI_SAME_STMT);
4636 assign_stmt = gimple_build_assign (vback, t);
4637 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4638
4639 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
4640 t = build2 (EQ_EXPR, boolean_type_node,
4641 build_int_cst (itype, 0),
4642 build_int_cst (itype, 1));
4643 else
4644 t = build2 (fd->loop.cond_code, boolean_type_node,
4645 DECL_P (vback) && TREE_ADDRESSABLE (vback)
4646 ? t : vback, e);
4647 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4648 }
4649
4650 /* Remove GIMPLE_OMP_CONTINUE. */
4651 gsi_remove (&gsi, true);
4652
4653 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4654 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
4655
4656 /* Trip update code goes into TRIP_UPDATE_BB. */
4657 gsi = gsi_start_bb (trip_update_bb);
4658
4659 t = build_int_cst (itype, 1);
4660 t = build2 (PLUS_EXPR, itype, trip_main, t);
4661 assign_stmt = gimple_build_assign (trip_back, t);
4662 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4663 }
4664
4665 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
65f4b875 4666 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
4667 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4668 {
4669 t = gimple_omp_return_lhs (gsi_stmt (gsi));
8221c30b 4670 if (fd->have_reductemp || fd->have_pointer_condtemp)
28567c40
JJ
4671 {
4672 tree fn;
4673 if (t)
4674 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4675 else
4676 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4677 gcall *g = gimple_build_call (fn, 0);
4678 if (t)
4679 {
4680 gimple_call_set_lhs (g, t);
6c7ae8c5
JJ
4681 if (fd->have_reductemp)
4682 gsi_insert_after (&gsi, gimple_build_assign (reductions,
4683 NOP_EXPR, t),
4684 GSI_SAME_STMT);
28567c40
JJ
4685 }
4686 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4687 }
4688 else
4689 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
629b3d75
MJ
4690 }
4691 gsi_remove (&gsi, true);
4692
4693 /* Connect the new blocks. */
4694 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
4695 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
4696
4697 if (!broken_loop)
4698 {
4699 se = find_edge (cont_bb, body_bb);
4700 if (se == NULL)
4701 {
4702 se = BRANCH_EDGE (cont_bb);
4703 gcc_assert (single_succ (se->dest) == body_bb);
4704 }
4705 if (gimple_omp_for_combined_p (fd->for_stmt))
4706 {
4707 remove_edge (se);
4708 se = NULL;
4709 }
4710 else if (fd->collapse > 1)
4711 {
4712 remove_edge (se);
4713 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4714 }
4715 else
4716 se->flags = EDGE_TRUE_VALUE;
4717 find_edge (cont_bb, trip_update_bb)->flags
4718 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
4719
01914336
MJ
4720 redirect_edge_and_branch (single_succ_edge (trip_update_bb),
4721 iter_part_bb);
629b3d75
MJ
4722 }
4723
4724 if (gimple_in_ssa_p (cfun))
4725 {
4726 gphi_iterator psi;
4727 gphi *phi;
4728 edge re, ene;
4729 edge_var_map *vm;
4730 size_t i;
4731
4732 gcc_assert (fd->collapse == 1 && !broken_loop);
4733
4734 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
4735 remove arguments of the phi nodes in fin_bb. We need to create
4736 appropriate phi nodes in iter_part_bb instead. */
4737 se = find_edge (iter_part_bb, fin_bb);
4738 re = single_succ_edge (trip_update_bb);
4739 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
4740 ene = single_succ_edge (entry_bb);
4741
4742 psi = gsi_start_phis (fin_bb);
4743 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
4744 gsi_next (&psi), ++i)
4745 {
4746 gphi *nphi;
620e594b 4747 location_t locus;
629b3d75
MJ
4748
4749 phi = psi.phi ();
d83cc5cc
TV
4750 if (operand_equal_p (gimple_phi_arg_def (phi, 0),
4751 redirect_edge_var_map_def (vm), 0))
4752 continue;
4753
629b3d75
MJ
4754 t = gimple_phi_result (phi);
4755 gcc_assert (t == redirect_edge_var_map_result (vm));
4756
4757 if (!single_pred_p (fin_bb))
4758 t = copy_ssa_name (t, phi);
4759
4760 nphi = create_phi_node (t, iter_part_bb);
4761
4762 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
4763 locus = gimple_phi_arg_location_from_edge (phi, se);
4764
4765 /* A special case -- fd->loop.v is not yet computed in
4766 iter_part_bb, we need to use vextra instead. */
4767 if (t == fd->loop.v)
4768 t = vextra;
4769 add_phi_arg (nphi, t, ene, locus);
4770 locus = redirect_edge_var_map_location (vm);
4771 tree back_arg = redirect_edge_var_map_def (vm);
4772 add_phi_arg (nphi, back_arg, re, locus);
4773 edge ce = find_edge (cont_bb, body_bb);
4774 if (ce == NULL)
4775 {
4776 ce = BRANCH_EDGE (cont_bb);
4777 gcc_assert (single_succ (ce->dest) == body_bb);
4778 ce = single_succ_edge (ce->dest);
4779 }
4780 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
4781 gcc_assert (inner_loop_phi != NULL);
4782 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
4783 find_edge (seq_start_bb, body_bb), locus);
4784
4785 if (!single_pred_p (fin_bb))
4786 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
4787 }
4788 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
4789 redirect_edge_var_map_clear (re);
4790 if (single_pred_p (fin_bb))
4791 while (1)
4792 {
4793 psi = gsi_start_phis (fin_bb);
4794 if (gsi_end_p (psi))
4795 break;
4796 remove_phi_node (&psi, false);
4797 }
4798
4799 /* Make phi node for trip. */
4800 phi = create_phi_node (trip_main, iter_part_bb);
4801 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
4802 UNKNOWN_LOCATION);
4803 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
4804 UNKNOWN_LOCATION);
4805 }
4806
4807 if (!broken_loop)
4808 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
4809 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
4810 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
4811 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
4812 recompute_dominator (CDI_DOMINATORS, fin_bb));
4813 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
4814 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
4815 set_immediate_dominator (CDI_DOMINATORS, body_bb,
4816 recompute_dominator (CDI_DOMINATORS, body_bb));
4817
4818 if (!broken_loop)
4819 {
4820 struct loop *loop = body_bb->loop_father;
4821 struct loop *trip_loop = alloc_loop ();
4822 trip_loop->header = iter_part_bb;
4823 trip_loop->latch = trip_update_bb;
4824 add_loop (trip_loop, iter_part_bb->loop_father);
4825
4826 if (loop != entry_bb->loop_father)
4827 {
4828 gcc_assert (loop->header == body_bb);
4829 gcc_assert (loop->latch == region->cont
4830 || single_pred (loop->latch) == region->cont);
4831 trip_loop->inner = loop;
4832 return;
4833 }
4834
4835 if (!gimple_omp_for_combined_p (fd->for_stmt))
4836 {
4837 loop = alloc_loop ();
4838 loop->header = body_bb;
4839 if (collapse_bb == NULL)
4840 loop->latch = cont_bb;
4841 add_loop (loop, trip_loop);
4842 }
4843 }
4844}
4845
629b3d75
MJ
4846/* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
4847 loop. Given parameters:
4848
4849 for (V = N1; V cond N2; V += STEP) BODY;
4850
4851 where COND is "<" or ">", we generate pseudocode
4852
4853 V = N1;
4854 goto L1;
4855 L0:
4856 BODY;
4857 V += STEP;
4858 L1:
4859 if (V cond N2) goto L0; else goto L2;
4860 L2:
4861
4862 For collapsed loops, given parameters:
4863 collapse(3)
4864 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
4865 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
4866 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
4867 BODY;
4868
4869 we generate pseudocode
4870
4871 if (cond3 is <)
4872 adj = STEP3 - 1;
4873 else
4874 adj = STEP3 + 1;
4875 count3 = (adj + N32 - N31) / STEP3;
4876 if (cond2 is <)
4877 adj = STEP2 - 1;
4878 else
4879 adj = STEP2 + 1;
4880 count2 = (adj + N22 - N21) / STEP2;
4881 if (cond1 is <)
4882 adj = STEP1 - 1;
4883 else
4884 adj = STEP1 + 1;
4885 count1 = (adj + N12 - N11) / STEP1;
4886 count = count1 * count2 * count3;
4887 V = 0;
4888 V1 = N11;
4889 V2 = N21;
4890 V3 = N31;
4891 goto L1;
4892 L0:
4893 BODY;
4894 V += 1;
4895 V3 += STEP3;
4896 V2 += (V3 cond3 N32) ? 0 : STEP2;
4897 V3 = (V3 cond3 N32) ? V3 : N31;
4898 V1 += (V2 cond2 N22) ? 0 : STEP1;
4899 V2 = (V2 cond2 N22) ? V2 : N21;
4900 L1:
4901 if (V < count) goto L0; else goto L2;
4902 L2:
4903
4904 */
4905
4906static void
4907expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
4908{
4909 tree type, t;
4910 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
4911 gimple_stmt_iterator gsi;
4912 gimple *stmt;
4913 gcond *cond_stmt;
4914 bool broken_loop = region->cont == NULL;
4915 edge e, ne;
4916 tree *counts = NULL;
4917 int i;
4918 int safelen_int = INT_MAX;
fed2a43c 4919 bool dont_vectorize = false;
629b3d75
MJ
4920 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4921 OMP_CLAUSE_SAFELEN);
4922 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4923 OMP_CLAUSE__SIMDUID_);
fed2a43c
JJ
4924 tree ifc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4925 OMP_CLAUSE_IF);
4926 tree simdlen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4927 OMP_CLAUSE_SIMDLEN);
e7393c89
JJ
4928 tree condtemp = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4929 OMP_CLAUSE__CONDTEMP_);
629b3d75 4930 tree n1, n2;
e7393c89 4931 tree cond_var = condtemp ? OMP_CLAUSE_DECL (condtemp) : NULL_TREE;
629b3d75
MJ
4932
4933 if (safelen)
4934 {
9d2f08ab 4935 poly_uint64 val;
629b3d75 4936 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
9d2f08ab 4937 if (!poly_int_tree_p (safelen, &val))
629b3d75 4938 safelen_int = 0;
9d2f08ab
RS
4939 else
4940 safelen_int = MIN (constant_lower_bound (val), INT_MAX);
629b3d75
MJ
4941 if (safelen_int == 1)
4942 safelen_int = 0;
4943 }
fed2a43c
JJ
4944 if ((ifc && integer_zerop (OMP_CLAUSE_IF_EXPR (ifc)))
4945 || (simdlen && integer_onep (OMP_CLAUSE_SIMDLEN_EXPR (simdlen))))
4946 {
4947 safelen_int = 0;
4948 dont_vectorize = true;
4949 }
629b3d75
MJ
4950 type = TREE_TYPE (fd->loop.v);
4951 entry_bb = region->entry;
4952 cont_bb = region->cont;
4953 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4954 gcc_assert (broken_loop
4955 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4956 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
4957 if (!broken_loop)
4958 {
4959 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
4960 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4961 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
4962 l2_bb = BRANCH_EDGE (entry_bb)->dest;
4963 }
4964 else
4965 {
4966 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
4967 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
4968 l2_bb = single_succ (l1_bb);
4969 }
4970 exit_bb = region->exit;
4971 l2_dom_bb = NULL;
4972
65f4b875 4973 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
4974
4975 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4976 /* Not needed in SSA form right now. */
4977 gcc_assert (!gimple_in_ssa_p (cfun));
4978 if (fd->collapse > 1)
4979 {
4980 int first_zero_iter = -1, dummy = -1;
4981 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
4982
4983 counts = XALLOCAVEC (tree, fd->collapse);
4984 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4985 zero_iter_bb, first_zero_iter,
4986 dummy_bb, dummy, l2_dom_bb);
4987 }
4988 if (l2_dom_bb == NULL)
4989 l2_dom_bb = l1_bb;
4990
4991 n1 = fd->loop.n1;
4992 n2 = fd->loop.n2;
4993 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4994 {
4995 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4996 OMP_CLAUSE__LOOPTEMP_);
4997 gcc_assert (innerc);
4998 n1 = OMP_CLAUSE_DECL (innerc);
4999 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5000 OMP_CLAUSE__LOOPTEMP_);
5001 gcc_assert (innerc);
5002 n2 = OMP_CLAUSE_DECL (innerc);
5003 }
5004 tree step = fd->loop.step;
5005
4cea8675
AM
5006 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5007 OMP_CLAUSE__SIMT_);
629b3d75
MJ
5008 if (is_simt)
5009 {
5010 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
4cea8675
AM
5011 is_simt = safelen_int > 1;
5012 }
5013 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
5014 if (is_simt)
5015 {
629b3d75
MJ
5016 simt_lane = create_tmp_var (unsigned_type_node);
5017 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
5018 gimple_call_set_lhs (g, simt_lane);
5019 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5020 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
5021 fold_convert (TREE_TYPE (step), simt_lane));
5022 n1 = fold_convert (type, n1);
5023 if (POINTER_TYPE_P (type))
5024 n1 = fold_build_pointer_plus (n1, offset);
5025 else
5026 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
5027
5028 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
5029 if (fd->collapse > 1)
5030 simt_maxlane = build_one_cst (unsigned_type_node);
5031 else if (safelen_int < omp_max_simt_vf ())
5032 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
5033 tree vf
5034 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
5035 unsigned_type_node, 0);
5036 if (simt_maxlane)
5037 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
5038 vf = fold_convert (TREE_TYPE (step), vf);
5039 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
5040 }
5041
5042 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
5043 if (fd->collapse > 1)
5044 {
5045 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5046 {
5047 gsi_prev (&gsi);
5048 expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1);
5049 gsi_next (&gsi);
5050 }
5051 else
5052 for (i = 0; i < fd->collapse; i++)
5053 {
5054 tree itype = TREE_TYPE (fd->loops[i].v);
5055 if (POINTER_TYPE_P (itype))
5056 itype = signed_type_for (itype);
5057 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
5058 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
5059 }
5060 }
e7393c89
JJ
5061 if (cond_var)
5062 {
5063 if (POINTER_TYPE_P (type)
5064 || TREE_CODE (n1) != INTEGER_CST
5065 || fd->loop.cond_code != LT_EXPR
5066 || tree_int_cst_sgn (n1) != 1)
5067 expand_omp_build_assign (&gsi, cond_var,
5068 build_one_cst (TREE_TYPE (cond_var)));
5069 else
5070 expand_omp_build_assign (&gsi, cond_var,
5071 fold_convert (TREE_TYPE (cond_var), n1));
5072 }
629b3d75
MJ
5073
5074 /* Remove the GIMPLE_OMP_FOR statement. */
5075 gsi_remove (&gsi, true);
5076
5077 if (!broken_loop)
5078 {
5079 /* Code to control the increment goes in the CONT_BB. */
65f4b875 5080 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
5081 stmt = gsi_stmt (gsi);
5082 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
5083
5084 if (POINTER_TYPE_P (type))
5085 t = fold_build_pointer_plus (fd->loop.v, step);
5086 else
5087 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
5088 expand_omp_build_assign (&gsi, fd->loop.v, t);
5089
5090 if (fd->collapse > 1)
5091 {
5092 i = fd->collapse - 1;
5093 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
5094 {
5095 t = fold_convert (sizetype, fd->loops[i].step);
5096 t = fold_build_pointer_plus (fd->loops[i].v, t);
5097 }
5098 else
5099 {
5100 t = fold_convert (TREE_TYPE (fd->loops[i].v),
5101 fd->loops[i].step);
5102 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
5103 fd->loops[i].v, t);
5104 }
5105 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
5106
5107 for (i = fd->collapse - 1; i > 0; i--)
5108 {
5109 tree itype = TREE_TYPE (fd->loops[i].v);
5110 tree itype2 = TREE_TYPE (fd->loops[i - 1].v);
5111 if (POINTER_TYPE_P (itype2))
5112 itype2 = signed_type_for (itype2);
bcc6842b
JJ
5113 t = fold_convert (itype2, fd->loops[i - 1].step);
5114 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
5115 GSI_SAME_STMT);
629b3d75
MJ
5116 t = build3 (COND_EXPR, itype2,
5117 build2 (fd->loops[i].cond_code, boolean_type_node,
5118 fd->loops[i].v,
5119 fold_convert (itype, fd->loops[i].n2)),
bcc6842b 5120 build_int_cst (itype2, 0), t);
629b3d75
MJ
5121 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v)))
5122 t = fold_build_pointer_plus (fd->loops[i - 1].v, t);
5123 else
5124 t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t);
5125 expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t);
5126
bcc6842b
JJ
5127 t = fold_convert (itype, fd->loops[i].n1);
5128 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
5129 GSI_SAME_STMT);
629b3d75
MJ
5130 t = build3 (COND_EXPR, itype,
5131 build2 (fd->loops[i].cond_code, boolean_type_node,
5132 fd->loops[i].v,
5133 fold_convert (itype, fd->loops[i].n2)),
bcc6842b 5134 fd->loops[i].v, t);
629b3d75
MJ
5135 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
5136 }
5137 }
e7393c89
JJ
5138 if (cond_var)
5139 {
5140 if (POINTER_TYPE_P (type)
5141 || TREE_CODE (n1) != INTEGER_CST
5142 || fd->loop.cond_code != LT_EXPR
5143 || tree_int_cst_sgn (n1) != 1)
5144 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
5145 build_one_cst (TREE_TYPE (cond_var)));
5146 else
5147 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
5148 fold_convert (TREE_TYPE (cond_var), step));
5149 expand_omp_build_assign (&gsi, cond_var, t);
5150 }
629b3d75
MJ
5151
5152 /* Remove GIMPLE_OMP_CONTINUE. */
5153 gsi_remove (&gsi, true);
5154 }
5155
5156 /* Emit the condition in L1_BB. */
5157 gsi = gsi_start_bb (l1_bb);
5158
5159 t = fold_convert (type, n2);
5160 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5161 false, GSI_CONTINUE_LINKING);
5162 tree v = fd->loop.v;
5163 if (DECL_P (v) && TREE_ADDRESSABLE (v))
5164 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
5165 false, GSI_CONTINUE_LINKING);
5166 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
5167 cond_stmt = gimple_build_cond_empty (t);
5168 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
5169 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
5170 NULL, NULL)
5171 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
5172 NULL, NULL))
5173 {
5174 gsi = gsi_for_stmt (cond_stmt);
5175 gimple_regimplify_operands (cond_stmt, &gsi);
5176 }
5177
5178 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
5179 if (is_simt)
5180 {
5181 gsi = gsi_start_bb (l2_bb);
5182 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step);
5183 if (POINTER_TYPE_P (type))
5184 t = fold_build_pointer_plus (fd->loop.v, step);
5185 else
5186 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
5187 expand_omp_build_assign (&gsi, fd->loop.v, t);
5188 }
5189
5190 /* Remove GIMPLE_OMP_RETURN. */
65f4b875 5191 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
5192 gsi_remove (&gsi, true);
5193
5194 /* Connect the new blocks. */
5195 remove_edge (FALLTHRU_EDGE (entry_bb));
5196
5197 if (!broken_loop)
5198 {
5199 remove_edge (BRANCH_EDGE (entry_bb));
5200 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
5201
5202 e = BRANCH_EDGE (l1_bb);
5203 ne = FALLTHRU_EDGE (l1_bb);
5204 e->flags = EDGE_TRUE_VALUE;
5205 }
5206 else
5207 {
5208 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
5209
5210 ne = single_succ_edge (l1_bb);
5211 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
5212
5213 }
5214 ne->flags = EDGE_FALSE_VALUE;
357067f2
JH
5215 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
5216 ne->probability = e->probability.invert ();
629b3d75
MJ
5217
5218 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
5219 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
5220
5221 if (simt_maxlane)
5222 {
5223 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
5224 NULL_TREE, NULL_TREE);
5225 gsi = gsi_last_bb (entry_bb);
5226 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
5227 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
5228 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
357067f2
JH
5229 FALLTHRU_EDGE (entry_bb)->probability
5230 = profile_probability::guessed_always ().apply_scale (7, 8);
5231 BRANCH_EDGE (entry_bb)->probability
5232 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
629b3d75
MJ
5233 l2_dom_bb = entry_bb;
5234 }
5235 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
5236
5237 if (!broken_loop)
5238 {
5239 struct loop *loop = alloc_loop ();
5240 loop->header = l1_bb;
5241 loop->latch = cont_bb;
5242 add_loop (loop, l1_bb->loop_father);
5243 loop->safelen = safelen_int;
5244 if (simduid)
5245 {
5246 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
5247 cfun->has_simduid_loops = true;
5248 }
5249 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
5250 the loop. */
5251 if ((flag_tree_loop_vectorize
26d476cd 5252 || !global_options_set.x_flag_tree_loop_vectorize)
629b3d75
MJ
5253 && flag_tree_loop_optimize
5254 && loop->safelen > 1)
5255 {
5256 loop->force_vectorize = true;
f63445e5
JJ
5257 if (simdlen && tree_fits_uhwi_p (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)))
5258 {
5259 unsigned HOST_WIDE_INT v
5260 = tree_to_uhwi (OMP_CLAUSE_SIMDLEN_EXPR (simdlen));
5261 if (v < INT_MAX && v <= (unsigned HOST_WIDE_INT) loop->safelen)
5262 loop->simdlen = v;
5263 }
629b3d75
MJ
5264 cfun->has_force_vectorize_loops = true;
5265 }
fed2a43c
JJ
5266 else if (dont_vectorize)
5267 loop->dont_vectorize = true;
629b3d75
MJ
5268 }
5269 else if (simduid)
5270 cfun->has_simduid_loops = true;
5271}
5272
5273/* Taskloop construct is represented after gimplification with
5274 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
5275 in between them. This routine expands the outer GIMPLE_OMP_FOR,
5276 which should just compute all the needed loop temporaries
5277 for GIMPLE_OMP_TASK. */
5278
5279static void
5280expand_omp_taskloop_for_outer (struct omp_region *region,
5281 struct omp_for_data *fd,
5282 gimple *inner_stmt)
5283{
5284 tree type, bias = NULL_TREE;
5285 basic_block entry_bb, cont_bb, exit_bb;
5286 gimple_stmt_iterator gsi;
5287 gassign *assign_stmt;
5288 tree *counts = NULL;
5289 int i;
5290
5291 gcc_assert (inner_stmt);
5292 gcc_assert (region->cont);
5293 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
5294 && gimple_omp_task_taskloop_p (inner_stmt));
5295 type = TREE_TYPE (fd->loop.v);
5296
5297 /* See if we need to bias by LLONG_MIN. */
5298 if (fd->iter_type == long_long_unsigned_type_node
5299 && TREE_CODE (type) == INTEGER_TYPE
5300 && !TYPE_UNSIGNED (type))
5301 {
5302 tree n1, n2;
5303
5304 if (fd->loop.cond_code == LT_EXPR)
5305 {
5306 n1 = fd->loop.n1;
5307 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
5308 }
5309 else
5310 {
5311 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
5312 n2 = fd->loop.n1;
5313 }
5314 if (TREE_CODE (n1) != INTEGER_CST
5315 || TREE_CODE (n2) != INTEGER_CST
5316 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
5317 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
5318 }
5319
5320 entry_bb = region->entry;
5321 cont_bb = region->cont;
5322 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5323 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
5324 exit_bb = region->exit;
5325
65f4b875 5326 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
5327 gimple *for_stmt = gsi_stmt (gsi);
5328 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
5329 if (fd->collapse > 1)
5330 {
5331 int first_zero_iter = -1, dummy = -1;
5332 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
5333
5334 counts = XALLOCAVEC (tree, fd->collapse);
5335 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5336 zero_iter_bb, first_zero_iter,
5337 dummy_bb, dummy, l2_dom_bb);
5338
5339 if (zero_iter_bb)
5340 {
5341 /* Some counts[i] vars might be uninitialized if
5342 some loop has zero iterations. But the body shouldn't
5343 be executed in that case, so just avoid uninit warnings. */
5344 for (i = first_zero_iter; i < fd->collapse; i++)
5345 if (SSA_VAR_P (counts[i]))
5346 TREE_NO_WARNING (counts[i]) = 1;
5347 gsi_prev (&gsi);
5348 edge e = split_block (entry_bb, gsi_stmt (gsi));
5349 entry_bb = e->dest;
5350 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
5351 gsi = gsi_last_bb (entry_bb);
5352 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
5353 get_immediate_dominator (CDI_DOMINATORS,
5354 zero_iter_bb));
5355 }
5356 }
5357
5358 tree t0, t1;
5359 t1 = fd->loop.n2;
5360 t0 = fd->loop.n1;
5361 if (POINTER_TYPE_P (TREE_TYPE (t0))
5362 && TYPE_PRECISION (TREE_TYPE (t0))
5363 != TYPE_PRECISION (fd->iter_type))
5364 {
5365 /* Avoid casting pointers to integer of a different size. */
5366 tree itype = signed_type_for (type);
5367 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
5368 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
5369 }
5370 else
5371 {
5372 t1 = fold_convert (fd->iter_type, t1);
5373 t0 = fold_convert (fd->iter_type, t0);
5374 }
5375 if (bias)
5376 {
5377 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
5378 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
5379 }
5380
5381 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
5382 OMP_CLAUSE__LOOPTEMP_);
5383 gcc_assert (innerc);
5384 tree startvar = OMP_CLAUSE_DECL (innerc);
5385 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
5386 gcc_assert (innerc);
5387 tree endvar = OMP_CLAUSE_DECL (innerc);
5388 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
5389 {
5390 gcc_assert (innerc);
5391 for (i = 1; i < fd->collapse; i++)
5392 {
5393 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5394 OMP_CLAUSE__LOOPTEMP_);
5395 gcc_assert (innerc);
5396 }
5397 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5398 OMP_CLAUSE__LOOPTEMP_);
5399 if (innerc)
5400 {
5401 /* If needed (inner taskloop has lastprivate clause), propagate
5402 down the total number of iterations. */
5403 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
5404 NULL_TREE, false,
5405 GSI_CONTINUE_LINKING);
5406 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5407 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5408 }
5409 }
5410
5411 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
5412 GSI_CONTINUE_LINKING);
5413 assign_stmt = gimple_build_assign (startvar, t0);
5414 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5415
5416 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
5417 GSI_CONTINUE_LINKING);
5418 assign_stmt = gimple_build_assign (endvar, t1);
5419 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5420 if (fd->collapse > 1)
5421 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5422
5423 /* Remove the GIMPLE_OMP_FOR statement. */
5424 gsi = gsi_for_stmt (for_stmt);
5425 gsi_remove (&gsi, true);
5426
65f4b875 5427 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
5428 gsi_remove (&gsi, true);
5429
65f4b875 5430 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
5431 gsi_remove (&gsi, true);
5432
357067f2 5433 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
629b3d75 5434 remove_edge (BRANCH_EDGE (entry_bb));
357067f2 5435 FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
629b3d75
MJ
5436 remove_edge (BRANCH_EDGE (cont_bb));
5437 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
5438 set_immediate_dominator (CDI_DOMINATORS, region->entry,
5439 recompute_dominator (CDI_DOMINATORS, region->entry));
5440}
5441
5442/* Taskloop construct is represented after gimplification with
5443 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
5444 in between them. This routine expands the inner GIMPLE_OMP_FOR.
5445 GOMP_taskloop{,_ull} function arranges for each task to be given just
5446 a single range of iterations. */
5447
5448static void
5449expand_omp_taskloop_for_inner (struct omp_region *region,
5450 struct omp_for_data *fd,
5451 gimple *inner_stmt)
5452{
5453 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
5454 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
5455 basic_block fin_bb;
5456 gimple_stmt_iterator gsi;
5457 edge ep;
5458 bool broken_loop = region->cont == NULL;
5459 tree *counts = NULL;
5460 tree n1, n2, step;
5461
5462 itype = type = TREE_TYPE (fd->loop.v);
5463 if (POINTER_TYPE_P (type))
5464 itype = signed_type_for (type);
5465
5466 /* See if we need to bias by LLONG_MIN. */
5467 if (fd->iter_type == long_long_unsigned_type_node
5468 && TREE_CODE (type) == INTEGER_TYPE
5469 && !TYPE_UNSIGNED (type))
5470 {
5471 tree n1, n2;
5472
5473 if (fd->loop.cond_code == LT_EXPR)
5474 {
5475 n1 = fd->loop.n1;
5476 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
5477 }
5478 else
5479 {
5480 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
5481 n2 = fd->loop.n1;
5482 }
5483 if (TREE_CODE (n1) != INTEGER_CST
5484 || TREE_CODE (n2) != INTEGER_CST
5485 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
5486 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
5487 }
5488
5489 entry_bb = region->entry;
5490 cont_bb = region->cont;
5491 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5492 fin_bb = BRANCH_EDGE (entry_bb)->dest;
5493 gcc_assert (broken_loop
5494 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
5495 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5496 if (!broken_loop)
5497 {
5498 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
5499 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5500 }
5501 exit_bb = region->exit;
5502
5503 /* Iteration space partitioning goes in ENTRY_BB. */
65f4b875 5504 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
5505 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5506
5507 if (fd->collapse > 1)
5508 {
5509 int first_zero_iter = -1, dummy = -1;
5510 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5511
5512 counts = XALLOCAVEC (tree, fd->collapse);
5513 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5514 fin_bb, first_zero_iter,
5515 dummy_bb, dummy, l2_dom_bb);
5516 t = NULL_TREE;
5517 }
5518 else
5519 t = integer_one_node;
5520
5521 step = fd->loop.step;
5522 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5523 OMP_CLAUSE__LOOPTEMP_);
5524 gcc_assert (innerc);
5525 n1 = OMP_CLAUSE_DECL (innerc);
5526 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
5527 gcc_assert (innerc);
5528 n2 = OMP_CLAUSE_DECL (innerc);
5529 if (bias)
5530 {
5531 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
5532 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
5533 }
5534 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5535 true, NULL_TREE, true, GSI_SAME_STMT);
5536 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5537 true, NULL_TREE, true, GSI_SAME_STMT);
5538 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5539 true, NULL_TREE, true, GSI_SAME_STMT);
5540
5541 tree startvar = fd->loop.v;
5542 tree endvar = NULL_TREE;
5543
5544 if (gimple_omp_for_combined_p (fd->for_stmt))
5545 {
5546 tree clauses = gimple_omp_for_clauses (inner_stmt);
5547 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5548 gcc_assert (innerc);
5549 startvar = OMP_CLAUSE_DECL (innerc);
5550 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5551 OMP_CLAUSE__LOOPTEMP_);
5552 gcc_assert (innerc);
5553 endvar = OMP_CLAUSE_DECL (innerc);
5554 }
5555 t = fold_convert (TREE_TYPE (startvar), n1);
5556 t = force_gimple_operand_gsi (&gsi, t,
5557 DECL_P (startvar)
5558 && TREE_ADDRESSABLE (startvar),
5559 NULL_TREE, false, GSI_CONTINUE_LINKING);
5560 gimple *assign_stmt = gimple_build_assign (startvar, t);
5561 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5562
5563 t = fold_convert (TREE_TYPE (startvar), n2);
5564 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5565 false, GSI_CONTINUE_LINKING);
5566 if (endvar)
5567 {
5568 assign_stmt = gimple_build_assign (endvar, e);
5569 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5570 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5571 assign_stmt = gimple_build_assign (fd->loop.v, e);
5572 else
5573 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5574 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5575 }
5576 if (fd->collapse > 1)
5577 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5578
5579 if (!broken_loop)
5580 {
5581 /* The code controlling the sequential loop replaces the
5582 GIMPLE_OMP_CONTINUE. */
65f4b875 5583 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
5584 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5585 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5586 vmain = gimple_omp_continue_control_use (cont_stmt);
5587 vback = gimple_omp_continue_control_def (cont_stmt);
5588
5589 if (!gimple_omp_for_combined_p (fd->for_stmt))
5590 {
5591 if (POINTER_TYPE_P (type))
5592 t = fold_build_pointer_plus (vmain, step);
5593 else
5594 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5595 t = force_gimple_operand_gsi (&gsi, t,
5596 DECL_P (vback)
5597 && TREE_ADDRESSABLE (vback),
5598 NULL_TREE, true, GSI_SAME_STMT);
5599 assign_stmt = gimple_build_assign (vback, t);
5600 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5601
5602 t = build2 (fd->loop.cond_code, boolean_type_node,
5603 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5604 ? t : vback, e);
5605 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5606 }
5607
5608 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5609 gsi_remove (&gsi, true);
5610
5611 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5612 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
5613 }
5614
5615 /* Remove the GIMPLE_OMP_FOR statement. */
5616 gsi = gsi_for_stmt (fd->for_stmt);
5617 gsi_remove (&gsi, true);
5618
5619 /* Remove the GIMPLE_OMP_RETURN statement. */
65f4b875 5620 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
5621 gsi_remove (&gsi, true);
5622
357067f2 5623 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
629b3d75
MJ
5624 if (!broken_loop)
5625 remove_edge (BRANCH_EDGE (entry_bb));
5626 else
5627 {
5628 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
5629 region->outer->cont = NULL;
5630 }
5631
5632 /* Connect all the blocks. */
5633 if (!broken_loop)
5634 {
5635 ep = find_edge (cont_bb, body_bb);
5636 if (gimple_omp_for_combined_p (fd->for_stmt))
5637 {
5638 remove_edge (ep);
5639 ep = NULL;
5640 }
5641 else if (fd->collapse > 1)
5642 {
5643 remove_edge (ep);
5644 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5645 }
5646 else
5647 ep->flags = EDGE_TRUE_VALUE;
5648 find_edge (cont_bb, fin_bb)->flags
5649 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5650 }
5651
5652 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5653 recompute_dominator (CDI_DOMINATORS, body_bb));
5654 if (!broken_loop)
5655 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5656 recompute_dominator (CDI_DOMINATORS, fin_bb));
5657
5658 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5659 {
5660 struct loop *loop = alloc_loop ();
5661 loop->header = body_bb;
5662 if (collapse_bb == NULL)
5663 loop->latch = cont_bb;
5664 add_loop (loop, body_bb->loop_father);
5665 }
5666}
5667
5668/* A subroutine of expand_omp_for. Generate code for an OpenACC
5669 partitioned loop. The lowering here is abstracted, in that the
5670 loop parameters are passed through internal functions, which are
5671 further lowered by oacc_device_lower, once we get to the target
5672 compiler. The loop is of the form:
5673
5674 for (V = B; V LTGT E; V += S) {BODY}
5675
5676 where LTGT is < or >. We may have a specified chunking size, CHUNKING
5677 (constant 0 for no chunking) and we will have a GWV partitioning
5678 mask, specifying dimensions over which the loop is to be
02889d23
CLT
5679 partitioned (see note below). We generate code that looks like
5680 (this ignores tiling):
629b3d75
MJ
5681
5682 <entry_bb> [incoming FALL->body, BRANCH->exit]
5683 typedef signedintify (typeof (V)) T; // underlying signed integral type
5684 T range = E - B;
5685 T chunk_no = 0;
5686 T DIR = LTGT == '<' ? +1 : -1;
5687 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
5688 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
5689
5690 <head_bb> [created by splitting end of entry_bb]
5691 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
5692 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
5693 if (!(offset LTGT bound)) goto bottom_bb;
5694
5695 <body_bb> [incoming]
5696 V = B + offset;
5697 {BODY}
5698
5699 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
5700 offset += step;
5701 if (offset LTGT bound) goto body_bb; [*]
5702
5703 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
5704 chunk_no++;
5705 if (chunk < chunk_max) goto head_bb;
5706
5707 <exit_bb> [incoming]
5708 V = B + ((range -/+ 1) / S +/- 1) * S [*]
5709
02889d23 5710 [*] Needed if V live at end of loop. */
629b3d75
MJ
5711
5712static void
5713expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
5714{
5715 tree v = fd->loop.v;
5716 enum tree_code cond_code = fd->loop.cond_code;
5717 enum tree_code plus_code = PLUS_EXPR;
5718
5719 tree chunk_size = integer_minus_one_node;
5720 tree gwv = integer_zero_node;
5721 tree iter_type = TREE_TYPE (v);
5722 tree diff_type = iter_type;
5723 tree plus_type = iter_type;
5724 struct oacc_collapse *counts = NULL;
5725
5726 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
5727 == GF_OMP_FOR_KIND_OACC_LOOP);
5728 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
5729 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
5730
5731 if (POINTER_TYPE_P (iter_type))
5732 {
5733 plus_code = POINTER_PLUS_EXPR;
5734 plus_type = sizetype;
5735 }
5736 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
5737 diff_type = signed_type_for (diff_type);
f4c222c0
TV
5738 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
5739 diff_type = integer_type_node;
629b3d75
MJ
5740
5741 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
5742 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
5743 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
5744 basic_block bottom_bb = NULL;
5745
5746 /* entry_bb has two sucessors; the branch edge is to the exit
5747 block, fallthrough edge to body. */
5748 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
5749 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
5750
5751 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
5752 body_bb, or to a block whose only successor is the body_bb. Its
5753 fallthrough successor is the final block (same as the branch
5754 successor of the entry_bb). */
5755 if (cont_bb)
5756 {
5757 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5758 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
5759
5760 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
5761 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
5762 }
5763 else
5764 gcc_assert (!gimple_in_ssa_p (cfun));
5765
5766 /* The exit block only has entry_bb and cont_bb as predecessors. */
5767 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
5768
5769 tree chunk_no;
5770 tree chunk_max = NULL_TREE;
5771 tree bound, offset;
5772 tree step = create_tmp_var (diff_type, ".step");
5773 bool up = cond_code == LT_EXPR;
5774 tree dir = build_int_cst (diff_type, up ? +1 : -1);
02889d23 5775 bool chunking = !gimple_in_ssa_p (cfun);
629b3d75
MJ
5776 bool negating;
5777
02889d23
CLT
5778 /* Tiling vars. */
5779 tree tile_size = NULL_TREE;
5780 tree element_s = NULL_TREE;
5781 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
5782 basic_block elem_body_bb = NULL;
5783 basic_block elem_cont_bb = NULL;
5784
629b3d75
MJ
5785 /* SSA instances. */
5786 tree offset_incr = NULL_TREE;
5787 tree offset_init = NULL_TREE;
5788
5789 gimple_stmt_iterator gsi;
5790 gassign *ass;
5791 gcall *call;
5792 gimple *stmt;
5793 tree expr;
5794 location_t loc;
5795 edge split, be, fte;
5796
5797 /* Split the end of entry_bb to create head_bb. */
5798 split = split_block (entry_bb, last_stmt (entry_bb));
5799 basic_block head_bb = split->dest;
5800 entry_bb = split->src;
5801
5802 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
65f4b875 5803 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
5804 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
5805 loc = gimple_location (for_stmt);
5806
5807 if (gimple_in_ssa_p (cfun))
5808 {
5809 offset_init = gimple_omp_for_index (for_stmt, 0);
5810 gcc_assert (integer_zerop (fd->loop.n1));
5811 /* The SSA parallelizer does gang parallelism. */
5812 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
5813 }
5814
02889d23 5815 if (fd->collapse > 1 || fd->tiling)
629b3d75 5816 {
02889d23 5817 gcc_assert (!gimple_in_ssa_p (cfun) && up);
629b3d75
MJ
5818 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
5819 tree total = expand_oacc_collapse_init (fd, &gsi, counts,
02889d23 5820 TREE_TYPE (fd->loop.n2), loc);
629b3d75
MJ
5821
5822 if (SSA_VAR_P (fd->loop.n2))
5823 {
5824 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
5825 true, GSI_SAME_STMT);
5826 ass = gimple_build_assign (fd->loop.n2, total);
5827 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5828 }
629b3d75
MJ
5829 }
5830
5831 tree b = fd->loop.n1;
5832 tree e = fd->loop.n2;
5833 tree s = fd->loop.step;
5834
5835 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
5836 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
5837
01914336 5838 /* Convert the step, avoiding possible unsigned->signed overflow. */
629b3d75
MJ
5839 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
5840 if (negating)
5841 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
5842 s = fold_convert (diff_type, s);
5843 if (negating)
5844 s = fold_build1 (NEGATE_EXPR, diff_type, s);
5845 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
5846
5847 if (!chunking)
5848 chunk_size = integer_zero_node;
5849 expr = fold_convert (diff_type, chunk_size);
5850 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
5851 NULL_TREE, true, GSI_SAME_STMT);
02889d23
CLT
5852
5853 if (fd->tiling)
5854 {
5855 /* Determine the tile size and element step,
5856 modify the outer loop step size. */
5857 tile_size = create_tmp_var (diff_type, ".tile_size");
5858 expr = build_int_cst (diff_type, 1);
5859 for (int ix = 0; ix < fd->collapse; ix++)
5860 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
5861 expr = force_gimple_operand_gsi (&gsi, expr, true,
5862 NULL_TREE, true, GSI_SAME_STMT);
5863 ass = gimple_build_assign (tile_size, expr);
5864 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5865
5866 element_s = create_tmp_var (diff_type, ".element_s");
5867 ass = gimple_build_assign (element_s, s);
5868 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5869
5870 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
5871 s = force_gimple_operand_gsi (&gsi, expr, true,
5872 NULL_TREE, true, GSI_SAME_STMT);
5873 }
5874
01914336 5875 /* Determine the range, avoiding possible unsigned->signed overflow. */
629b3d75
MJ
5876 negating = !up && TYPE_UNSIGNED (iter_type);
5877 expr = fold_build2 (MINUS_EXPR, plus_type,
5878 fold_convert (plus_type, negating ? b : e),
5879 fold_convert (plus_type, negating ? e : b));
5880 expr = fold_convert (diff_type, expr);
5881 if (negating)
5882 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
5883 tree range = force_gimple_operand_gsi (&gsi, expr, true,
5884 NULL_TREE, true, GSI_SAME_STMT);
5885
5886 chunk_no = build_int_cst (diff_type, 0);
5887 if (chunking)
5888 {
5889 gcc_assert (!gimple_in_ssa_p (cfun));
5890
5891 expr = chunk_no;
5892 chunk_max = create_tmp_var (diff_type, ".chunk_max");
5893 chunk_no = create_tmp_var (diff_type, ".chunk_no");
5894
5895 ass = gimple_build_assign (chunk_no, expr);
5896 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5897
5898 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5899 build_int_cst (integer_type_node,
5900 IFN_GOACC_LOOP_CHUNKS),
5901 dir, range, s, chunk_size, gwv);
5902 gimple_call_set_lhs (call, chunk_max);
5903 gimple_set_location (call, loc);
5904 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5905 }
5906 else
5907 chunk_size = chunk_no;
5908
5909 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5910 build_int_cst (integer_type_node,
5911 IFN_GOACC_LOOP_STEP),
5912 dir, range, s, chunk_size, gwv);
5913 gimple_call_set_lhs (call, step);
5914 gimple_set_location (call, loc);
5915 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5916
5917 /* Remove the GIMPLE_OMP_FOR. */
5918 gsi_remove (&gsi, true);
5919
01914336 5920 /* Fixup edges from head_bb. */
629b3d75
MJ
5921 be = BRANCH_EDGE (head_bb);
5922 fte = FALLTHRU_EDGE (head_bb);
5923 be->flags |= EDGE_FALSE_VALUE;
5924 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5925
5926 basic_block body_bb = fte->dest;
5927
5928 if (gimple_in_ssa_p (cfun))
5929 {
65f4b875 5930 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
5931 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5932
5933 offset = gimple_omp_continue_control_use (cont_stmt);
5934 offset_incr = gimple_omp_continue_control_def (cont_stmt);
5935 }
5936 else
5937 {
5938 offset = create_tmp_var (diff_type, ".offset");
5939 offset_init = offset_incr = offset;
5940 }
5941 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
5942
5943 /* Loop offset & bound go into head_bb. */
5944 gsi = gsi_start_bb (head_bb);
5945
5946 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5947 build_int_cst (integer_type_node,
5948 IFN_GOACC_LOOP_OFFSET),
5949 dir, range, s,
5950 chunk_size, gwv, chunk_no);
5951 gimple_call_set_lhs (call, offset_init);
5952 gimple_set_location (call, loc);
5953 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5954
5955 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5956 build_int_cst (integer_type_node,
5957 IFN_GOACC_LOOP_BOUND),
5958 dir, range, s,
5959 chunk_size, gwv, offset_init);
5960 gimple_call_set_lhs (call, bound);
5961 gimple_set_location (call, loc);
5962 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5963
5964 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
5965 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5966 GSI_CONTINUE_LINKING);
5967
5968 /* V assignment goes into body_bb. */
5969 if (!gimple_in_ssa_p (cfun))
5970 {
5971 gsi = gsi_start_bb (body_bb);
5972
5973 expr = build2 (plus_code, iter_type, b,
5974 fold_convert (plus_type, offset));
5975 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5976 true, GSI_SAME_STMT);
5977 ass = gimple_build_assign (v, expr);
5978 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
02889d23
CLT
5979
5980 if (fd->collapse > 1 || fd->tiling)
5981 expand_oacc_collapse_vars (fd, false, &gsi, counts, v);
5982
5983 if (fd->tiling)
5984 {
5985 /* Determine the range of the element loop -- usually simply
5986 the tile_size, but could be smaller if the final
5987 iteration of the outer loop is a partial tile. */
5988 tree e_range = create_tmp_var (diff_type, ".e_range");
5989
5990 expr = build2 (MIN_EXPR, diff_type,
5991 build2 (MINUS_EXPR, diff_type, bound, offset),
5992 build2 (MULT_EXPR, diff_type, tile_size,
5993 element_s));
5994 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5995 true, GSI_SAME_STMT);
5996 ass = gimple_build_assign (e_range, expr);
5997 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5998
5999 /* Determine bound, offset & step of inner loop. */
6000 e_bound = create_tmp_var (diff_type, ".e_bound");
6001 e_offset = create_tmp_var (diff_type, ".e_offset");
6002 e_step = create_tmp_var (diff_type, ".e_step");
6003
6004 /* Mark these as element loops. */
6005 tree t, e_gwv = integer_minus_one_node;
6006 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */
6007
6008 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
6009 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
6010 element_s, chunk, e_gwv, chunk);
6011 gimple_call_set_lhs (call, e_offset);
6012 gimple_set_location (call, loc);
6013 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
6014
6015 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
6016 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
6017 element_s, chunk, e_gwv, e_offset);
6018 gimple_call_set_lhs (call, e_bound);
6019 gimple_set_location (call, loc);
6020 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
6021
6022 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
6023 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
6024 element_s, chunk, e_gwv);
6025 gimple_call_set_lhs (call, e_step);
6026 gimple_set_location (call, loc);
6027 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
6028
6029 /* Add test and split block. */
6030 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
6031 stmt = gimple_build_cond_empty (expr);
6032 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
6033 split = split_block (body_bb, stmt);
6034 elem_body_bb = split->dest;
6035 if (cont_bb == body_bb)
6036 cont_bb = elem_body_bb;
6037 body_bb = split->src;
6038
6039 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
6040
05e0af43
CP
6041 /* Add a dummy exit for the tiled block when cont_bb is missing. */
6042 if (cont_bb == NULL)
6043 {
6044 edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
6045 e->probability = profile_probability::even ();
6046 split->probability = profile_probability::even ();
6047 }
6048
02889d23
CLT
6049 /* Initialize the user's loop vars. */
6050 gsi = gsi_start_bb (elem_body_bb);
6051 expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset);
6052 }
629b3d75
MJ
6053 }
6054
6055 /* Loop increment goes into cont_bb. If this is not a loop, we
6056 will have spawned threads as if it was, and each one will
6057 execute one iteration. The specification is not explicit about
6058 whether such constructs are ill-formed or not, and they can
6059 occur, especially when noreturn routines are involved. */
6060 if (cont_bb)
6061 {
65f4b875 6062 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
6063 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
6064 loc = gimple_location (cont_stmt);
6065
02889d23
CLT
6066 if (fd->tiling)
6067 {
6068 /* Insert element loop increment and test. */
6069 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
6070 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
6071 true, GSI_SAME_STMT);
6072 ass = gimple_build_assign (e_offset, expr);
6073 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6074 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
6075
6076 stmt = gimple_build_cond_empty (expr);
6077 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
6078 split = split_block (cont_bb, stmt);
6079 elem_cont_bb = split->src;
6080 cont_bb = split->dest;
6081
6082 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
9fba2943
TV
6083 split->probability = profile_probability::unlikely ().guessed ();
6084 edge latch_edge
6085 = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
6086 latch_edge->probability = profile_probability::likely ().guessed ();
6087
6088 edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
6089 skip_edge->probability = profile_probability::unlikely ().guessed ();
6090 edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
6091 loop_entry_edge->probability
6092 = profile_probability::likely ().guessed ();
02889d23
CLT
6093
6094 gsi = gsi_for_stmt (cont_stmt);
6095 }
6096
629b3d75
MJ
6097 /* Increment offset. */
6098 if (gimple_in_ssa_p (cfun))
02889d23
CLT
6099 expr = build2 (plus_code, iter_type, offset,
6100 fold_convert (plus_type, step));
629b3d75
MJ
6101 else
6102 expr = build2 (PLUS_EXPR, diff_type, offset, step);
6103 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
6104 true, GSI_SAME_STMT);
6105 ass = gimple_build_assign (offset_incr, expr);
6106 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6107 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
6108 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
6109
6110 /* Remove the GIMPLE_OMP_CONTINUE. */
6111 gsi_remove (&gsi, true);
6112
01914336 6113 /* Fixup edges from cont_bb. */
629b3d75
MJ
6114 be = BRANCH_EDGE (cont_bb);
6115 fte = FALLTHRU_EDGE (cont_bb);
6116 be->flags |= EDGE_TRUE_VALUE;
6117 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
6118
6119 if (chunking)
6120 {
6121 /* Split the beginning of exit_bb to make bottom_bb. We
6122 need to insert a nop at the start, because splitting is
01914336 6123 after a stmt, not before. */
629b3d75
MJ
6124 gsi = gsi_start_bb (exit_bb);
6125 stmt = gimple_build_nop ();
6126 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
6127 split = split_block (exit_bb, stmt);
6128 bottom_bb = split->src;
6129 exit_bb = split->dest;
6130 gsi = gsi_last_bb (bottom_bb);
6131
6132 /* Chunk increment and test goes into bottom_bb. */
6133 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
6134 build_int_cst (diff_type, 1));
6135 ass = gimple_build_assign (chunk_no, expr);
6136 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
6137
6138 /* Chunk test at end of bottom_bb. */
6139 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
6140 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
6141 GSI_CONTINUE_LINKING);
6142
01914336 6143 /* Fixup edges from bottom_bb. */
629b3d75 6144 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
f38057cc
TV
6145 split->probability = profile_probability::unlikely ().guessed ();
6146 edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
6147 latch_edge->probability = profile_probability::likely ().guessed ();
629b3d75
MJ
6148 }
6149 }
6150
65f4b875 6151 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
6152 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
6153 loc = gimple_location (gsi_stmt (gsi));
6154
6155 if (!gimple_in_ssa_p (cfun))
6156 {
6157 /* Insert the final value of V, in case it is live. This is the
6158 value for the only thread that survives past the join. */
6159 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
6160 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
6161 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
6162 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
6163 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
6164 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
6165 true, GSI_SAME_STMT);
6166 ass = gimple_build_assign (v, expr);
6167 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6168 }
6169
01914336 6170 /* Remove the OMP_RETURN. */
629b3d75
MJ
6171 gsi_remove (&gsi, true);
6172
6173 if (cont_bb)
6174 {
02889d23 6175 /* We now have one, two or three nested loops. Update the loop
629b3d75
MJ
6176 structures. */
6177 struct loop *parent = entry_bb->loop_father;
6178 struct loop *body = body_bb->loop_father;
6179
6180 if (chunking)
6181 {
6182 struct loop *chunk_loop = alloc_loop ();
6183 chunk_loop->header = head_bb;
6184 chunk_loop->latch = bottom_bb;
6185 add_loop (chunk_loop, parent);
6186 parent = chunk_loop;
6187 }
6188 else if (parent != body)
6189 {
6190 gcc_assert (body->header == body_bb);
6191 gcc_assert (body->latch == cont_bb
6192 || single_pred (body->latch) == cont_bb);
6193 parent = NULL;
6194 }
6195
6196 if (parent)
6197 {
6198 struct loop *body_loop = alloc_loop ();
6199 body_loop->header = body_bb;
6200 body_loop->latch = cont_bb;
6201 add_loop (body_loop, parent);
02889d23
CLT
6202
6203 if (fd->tiling)
6204 {
6205 /* Insert tiling's element loop. */
6206 struct loop *inner_loop = alloc_loop ();
6207 inner_loop->header = elem_body_bb;
6208 inner_loop->latch = elem_cont_bb;
6209 add_loop (inner_loop, body_loop);
6210 }
629b3d75
MJ
6211 }
6212 }
6213}
6214
6215/* Expand the OMP loop defined by REGION. */
6216
6217static void
6218expand_omp_for (struct omp_region *region, gimple *inner_stmt)
6219{
6220 struct omp_for_data fd;
6221 struct omp_for_data_loop *loops;
6222
6223 loops
6224 = (struct omp_for_data_loop *)
6225 alloca (gimple_omp_for_collapse (last_stmt (region->entry))
6226 * sizeof (struct omp_for_data_loop));
6227 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
6228 &fd, loops);
6229 region->sched_kind = fd.sched_kind;
6230 region->sched_modifiers = fd.sched_modifiers;
6231
6232 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
6233 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
6234 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
6235 if (region->cont)
6236 {
6237 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
6238 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
6239 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
6240 }
6241 else
6242 /* If there isn't a continue then this is a degerate case where
6243 the introduction of abnormal edges during lowering will prevent
6244 original loops from being detected. Fix that up. */
6245 loops_state_set (LOOPS_NEED_FIXUP);
6246
6247 if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_SIMD)
6248 expand_omp_simd (region, &fd);
629b3d75
MJ
6249 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
6250 {
6251 gcc_assert (!inner_stmt);
6252 expand_oacc_for (region, &fd);
6253 }
6254 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
6255 {
6256 if (gimple_omp_for_combined_into_p (fd.for_stmt))
6257 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
6258 else
6259 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
6260 }
6261 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
6262 && !fd.have_ordered)
6263 {
6264 if (fd.chunk_size == NULL)
6265 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
6266 else
6267 expand_omp_for_static_chunk (region, &fd, inner_stmt);
6268 }
6269 else
6270 {
6271 int fn_index, start_ix, next_ix;
28567c40
JJ
6272 unsigned HOST_WIDE_INT sched = 0;
6273 tree sched_arg = NULL_TREE;
629b3d75
MJ
6274
6275 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
6276 == GF_OMP_FOR_KIND_FOR);
6277 if (fd.chunk_size == NULL
6278 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
6279 fd.chunk_size = integer_zero_node;
629b3d75
MJ
6280 switch (fd.sched_kind)
6281 {
6282 case OMP_CLAUSE_SCHEDULE_RUNTIME:
28567c40
JJ
6283 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
6284 {
6285 gcc_assert (!fd.have_ordered);
6286 fn_index = 6;
6287 sched = 4;
6288 }
6289 else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
6290 && !fd.have_ordered)
6291 fn_index = 7;
6292 else
6293 {
6294 fn_index = 3;
6295 sched = (HOST_WIDE_INT_1U << 31);
6296 }
629b3d75
MJ
6297 break;
6298 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
6299 case OMP_CLAUSE_SCHEDULE_GUIDED:
28567c40 6300 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
629b3d75
MJ
6301 && !fd.have_ordered)
6302 {
6303 fn_index = 3 + fd.sched_kind;
28567c40 6304 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
629b3d75
MJ
6305 break;
6306 }
629b3d75 6307 fn_index = fd.sched_kind;
28567c40
JJ
6308 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
6309 sched += (HOST_WIDE_INT_1U << 31);
629b3d75 6310 break;
28567c40
JJ
6311 case OMP_CLAUSE_SCHEDULE_STATIC:
6312 gcc_assert (fd.have_ordered);
6313 fn_index = 0;
6314 sched = (HOST_WIDE_INT_1U << 31) + 1;
6315 break;
6316 default:
6317 gcc_unreachable ();
629b3d75
MJ
6318 }
6319 if (!fd.ordered)
28567c40 6320 fn_index += fd.have_ordered * 8;
629b3d75
MJ
6321 if (fd.ordered)
6322 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
6323 else
6324 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
6325 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
8221c30b 6326 if (fd.have_reductemp || fd.have_pointer_condtemp)
28567c40
JJ
6327 {
6328 if (fd.ordered)
6329 start_ix = (int)BUILT_IN_GOMP_LOOP_DOACROSS_START;
6330 else if (fd.have_ordered)
6331 start_ix = (int)BUILT_IN_GOMP_LOOP_ORDERED_START;
6332 else
6333 start_ix = (int)BUILT_IN_GOMP_LOOP_START;
6334 sched_arg = build_int_cstu (long_integer_type_node, sched);
6335 if (!fd.chunk_size)
6336 fd.chunk_size = integer_zero_node;
6337 }
629b3d75
MJ
6338 if (fd.iter_type == long_long_unsigned_type_node)
6339 {
6340 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
6341 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
6342 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
6343 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
6344 }
6345 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
28567c40
JJ
6346 (enum built_in_function) next_ix, sched_arg,
6347 inner_stmt);
629b3d75
MJ
6348 }
6349
6350 if (gimple_in_ssa_p (cfun))
6351 update_ssa (TODO_update_ssa_only_virtuals);
6352}
6353
6354/* Expand code for an OpenMP sections directive. In pseudo code, we generate
6355
6356 v = GOMP_sections_start (n);
6357 L0:
6358 switch (v)
6359 {
6360 case 0:
6361 goto L2;
6362 case 1:
6363 section 1;
6364 goto L1;
6365 case 2:
6366 ...
6367 case n:
6368 ...
6369 default:
6370 abort ();
6371 }
6372 L1:
6373 v = GOMP_sections_next ();
6374 goto L0;
6375 L2:
6376 reduction;
6377
6378 If this is a combined parallel sections, replace the call to
6379 GOMP_sections_start with call to GOMP_sections_next. */
6380
6381static void
6382expand_omp_sections (struct omp_region *region)
6383{
6384 tree t, u, vin = NULL, vmain, vnext, l2;
6385 unsigned len;
6386 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
6387 gimple_stmt_iterator si, switch_si;
6388 gomp_sections *sections_stmt;
6389 gimple *stmt;
6390 gomp_continue *cont;
6391 edge_iterator ei;
6392 edge e;
6393 struct omp_region *inner;
6394 unsigned i, casei;
6395 bool exit_reachable = region->cont != NULL;
6396
6397 gcc_assert (region->exit != NULL);
6398 entry_bb = region->entry;
6399 l0_bb = single_succ (entry_bb);
6400 l1_bb = region->cont;
6401 l2_bb = region->exit;
6402 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
6403 l2 = gimple_block_label (l2_bb);
6404 else
6405 {
6406 /* This can happen if there are reductions. */
6407 len = EDGE_COUNT (l0_bb->succs);
6408 gcc_assert (len > 0);
6409 e = EDGE_SUCC (l0_bb, len - 1);
65f4b875 6410 si = gsi_last_nondebug_bb (e->dest);
629b3d75
MJ
6411 l2 = NULL_TREE;
6412 if (gsi_end_p (si)
01914336 6413 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
629b3d75
MJ
6414 l2 = gimple_block_label (e->dest);
6415 else
6416 FOR_EACH_EDGE (e, ei, l0_bb->succs)
6417 {
65f4b875 6418 si = gsi_last_nondebug_bb (e->dest);
629b3d75
MJ
6419 if (gsi_end_p (si)
6420 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
6421 {
6422 l2 = gimple_block_label (e->dest);
6423 break;
6424 }
6425 }
6426 }
6427 if (exit_reachable)
6428 default_bb = create_empty_bb (l1_bb->prev_bb);
6429 else
6430 default_bb = create_empty_bb (l0_bb);
6431
6432 /* We will build a switch() with enough cases for all the
6433 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
6434 and a default case to abort if something goes wrong. */
6435 len = EDGE_COUNT (l0_bb->succs);
6436
6437 /* Use vec::quick_push on label_vec throughout, since we know the size
6438 in advance. */
6439 auto_vec<tree> label_vec (len);
6440
6441 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
6442 GIMPLE_OMP_SECTIONS statement. */
65f4b875 6443 si = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
6444 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
6445 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
6446 vin = gimple_omp_sections_control (sections_stmt);
28567c40
JJ
6447 tree clauses = gimple_omp_sections_clauses (sections_stmt);
6448 tree reductmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
8e7757ba
JJ
6449 tree condtmp = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
6450 tree cond_var = NULL_TREE;
6451 if (reductmp || condtmp)
6452 {
6453 tree reductions = null_pointer_node, mem = null_pointer_node;
6454 tree memv = NULL_TREE, condtemp = NULL_TREE;
6455 gimple_stmt_iterator gsi = gsi_none ();
6456 gimple *g = NULL;
6457 if (reductmp)
6458 {
6459 reductions = OMP_CLAUSE_DECL (reductmp);
6460 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
6461 g = SSA_NAME_DEF_STMT (reductions);
6462 reductions = gimple_assign_rhs1 (g);
6463 OMP_CLAUSE_DECL (reductmp) = reductions;
6464 gsi = gsi_for_stmt (g);
6465 }
6466 else
6467 gsi = si;
6468 if (condtmp)
6469 {
6470 condtemp = OMP_CLAUSE_DECL (condtmp);
6471 tree c = omp_find_clause (OMP_CLAUSE_CHAIN (condtmp),
6472 OMP_CLAUSE__CONDTEMP_);
6473 cond_var = OMP_CLAUSE_DECL (c);
6474 tree type = TREE_TYPE (condtemp);
6475 memv = create_tmp_var (type);
6476 TREE_ADDRESSABLE (memv) = 1;
6477 unsigned cnt = 0;
6478 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
6479 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
6480 && OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (c))
6481 ++cnt;
6482 unsigned HOST_WIDE_INT sz
6483 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))) * cnt;
6484 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
6485 false);
6486 mem = build_fold_addr_expr (memv);
6487 }
28567c40
JJ
6488 t = build_int_cst (unsigned_type_node, len - 1);
6489 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS2_START);
8e7757ba 6490 stmt = gimple_build_call (u, 3, t, reductions, mem);
28567c40
JJ
6491 gimple_call_set_lhs (stmt, vin);
6492 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
8e7757ba
JJ
6493 if (condtmp)
6494 {
6495 expand_omp_build_assign (&gsi, condtemp, memv, false);
6496 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
6497 vin, build_one_cst (TREE_TYPE (cond_var)));
6498 expand_omp_build_assign (&gsi, cond_var, t, false);
6499 }
6500 if (reductmp)
6501 {
6502 gsi_remove (&gsi, true);
6503 release_ssa_name (gimple_assign_lhs (g));
6504 }
28567c40
JJ
6505 }
6506 else if (!is_combined_parallel (region))
629b3d75
MJ
6507 {
6508 /* If we are not inside a combined parallel+sections region,
6509 call GOMP_sections_start. */
6510 t = build_int_cst (unsigned_type_node, len - 1);
6511 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
6512 stmt = gimple_build_call (u, 1, t);
6513 }
6514 else
6515 {
6516 /* Otherwise, call GOMP_sections_next. */
6517 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
6518 stmt = gimple_build_call (u, 0);
6519 }
8e7757ba 6520 if (!reductmp && !condtmp)
28567c40
JJ
6521 {
6522 gimple_call_set_lhs (stmt, vin);
6523 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6524 }
629b3d75
MJ
6525 gsi_remove (&si, true);
6526
6527 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
6528 L0_BB. */
65f4b875 6529 switch_si = gsi_last_nondebug_bb (l0_bb);
629b3d75
MJ
6530 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
6531 if (exit_reachable)
6532 {
6533 cont = as_a <gomp_continue *> (last_stmt (l1_bb));
6534 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
6535 vmain = gimple_omp_continue_control_use (cont);
6536 vnext = gimple_omp_continue_control_def (cont);
6537 }
6538 else
6539 {
6540 vmain = vin;
6541 vnext = NULL_TREE;
6542 }
6543
6544 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
6545 label_vec.quick_push (t);
6546 i = 1;
6547
6548 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
6549 for (inner = region->inner, casei = 1;
6550 inner;
6551 inner = inner->next, i++, casei++)
6552 {
6553 basic_block s_entry_bb, s_exit_bb;
6554
6555 /* Skip optional reduction region. */
6556 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
6557 {
6558 --i;
6559 --casei;
6560 continue;
6561 }
6562
6563 s_entry_bb = inner->entry;
6564 s_exit_bb = inner->exit;
6565
6566 t = gimple_block_label (s_entry_bb);
6567 u = build_int_cst (unsigned_type_node, casei);
6568 u = build_case_label (u, NULL, t);
6569 label_vec.quick_push (u);
6570
65f4b875 6571 si = gsi_last_nondebug_bb (s_entry_bb);
629b3d75
MJ
6572 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
6573 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
6574 gsi_remove (&si, true);
6575 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
6576
6577 if (s_exit_bb == NULL)
6578 continue;
6579
65f4b875 6580 si = gsi_last_nondebug_bb (s_exit_bb);
629b3d75
MJ
6581 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6582 gsi_remove (&si, true);
6583
6584 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
6585 }
6586
6587 /* Error handling code goes in DEFAULT_BB. */
6588 t = gimple_block_label (default_bb);
6589 u = build_case_label (NULL, NULL, t);
6590 make_edge (l0_bb, default_bb, 0);
6591 add_bb_to_loop (default_bb, current_loops->tree_root);
6592
6593 stmt = gimple_build_switch (vmain, u, label_vec);
6594 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
6595 gsi_remove (&switch_si, true);
6596
6597 si = gsi_start_bb (default_bb);
6598 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
6599 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
6600
6601 if (exit_reachable)
6602 {
6603 tree bfn_decl;
6604
6605 /* Code to get the next section goes in L1_BB. */
65f4b875 6606 si = gsi_last_nondebug_bb (l1_bb);
629b3d75
MJ
6607 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
6608
6609 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
6610 stmt = gimple_build_call (bfn_decl, 0);
6611 gimple_call_set_lhs (stmt, vnext);
8e7757ba
JJ
6612 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6613 if (cond_var)
6614 {
6615 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
6616 vnext, build_one_cst (TREE_TYPE (cond_var)));
6617 expand_omp_build_assign (&si, cond_var, t, false);
6618 }
629b3d75
MJ
6619 gsi_remove (&si, true);
6620
6621 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
6622 }
6623
6624 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
65f4b875 6625 si = gsi_last_nondebug_bb (l2_bb);
629b3d75
MJ
6626 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
6627 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
6628 else if (gimple_omp_return_lhs (gsi_stmt (si)))
6629 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
6630 else
6631 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
6632 stmt = gimple_build_call (t, 0);
6633 if (gimple_omp_return_lhs (gsi_stmt (si)))
6634 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
6635 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6636 gsi_remove (&si, true);
6637
6638 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
6639}
6640
6641/* Expand code for an OpenMP single directive. We've already expanded
6642 much of the code, here we simply place the GOMP_barrier call. */
6643
6644static void
6645expand_omp_single (struct omp_region *region)
6646{
6647 basic_block entry_bb, exit_bb;
6648 gimple_stmt_iterator si;
6649
6650 entry_bb = region->entry;
6651 exit_bb = region->exit;
6652
65f4b875 6653 si = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
6654 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
6655 gsi_remove (&si, true);
6656 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6657
65f4b875 6658 si = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
6659 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
6660 {
6661 tree t = gimple_omp_return_lhs (gsi_stmt (si));
6662 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
6663 }
6664 gsi_remove (&si, true);
6665 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6666}
6667
6668/* Generic expansion for OpenMP synchronization directives: master,
6669 ordered and critical. All we need to do here is remove the entry
6670 and exit markers for REGION. */
6671
6672static void
6673expand_omp_synch (struct omp_region *region)
6674{
6675 basic_block entry_bb, exit_bb;
6676 gimple_stmt_iterator si;
6677
6678 entry_bb = region->entry;
6679 exit_bb = region->exit;
6680
65f4b875 6681 si = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
6682 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
6683 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
6684 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
6685 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
6686 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
6687 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
28567c40
JJ
6688 if (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS
6689 && gimple_omp_teams_host (as_a <gomp_teams *> (gsi_stmt (si))))
6690 {
6691 expand_omp_taskreg (region);
6692 return;
6693 }
629b3d75
MJ
6694 gsi_remove (&si, true);
6695 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6696
6697 if (exit_bb)
6698 {
65f4b875 6699 si = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
6700 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6701 gsi_remove (&si, true);
6702 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6703 }
6704}
6705
28567c40
JJ
6706/* Translate enum omp_memory_order to enum memmodel. The two enums
6707 are using different numbers so that OMP_MEMORY_ORDER_UNSPECIFIED
6708 is 0. */
6709
6710static enum memmodel
6711omp_memory_order_to_memmodel (enum omp_memory_order mo)
6712{
6713 switch (mo)
6714 {
6715 case OMP_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
6716 case OMP_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
6717 case OMP_MEMORY_ORDER_RELEASE: return MEMMODEL_RELEASE;
6718 case OMP_MEMORY_ORDER_ACQ_REL: return MEMMODEL_ACQ_REL;
6719 case OMP_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
6720 default: gcc_unreachable ();
6721 }
6722}
6723
629b3d75
MJ
6724/* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6725 operation as a normal volatile load. */
6726
6727static bool
6728expand_omp_atomic_load (basic_block load_bb, tree addr,
6729 tree loaded_val, int index)
6730{
6731 enum built_in_function tmpbase;
6732 gimple_stmt_iterator gsi;
6733 basic_block store_bb;
6734 location_t loc;
6735 gimple *stmt;
6736 tree decl, call, type, itype;
6737
65f4b875 6738 gsi = gsi_last_nondebug_bb (load_bb);
629b3d75
MJ
6739 stmt = gsi_stmt (gsi);
6740 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6741 loc = gimple_location (stmt);
6742
6743 /* ??? If the target does not implement atomic_load_optab[mode], and mode
6744 is smaller than word size, then expand_atomic_load assumes that the load
6745 is atomic. We could avoid the builtin entirely in this case. */
6746
6747 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6748 decl = builtin_decl_explicit (tmpbase);
6749 if (decl == NULL_TREE)
6750 return false;
6751
6752 type = TREE_TYPE (loaded_val);
6753 itype = TREE_TYPE (TREE_TYPE (decl));
6754
28567c40
JJ
6755 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
6756 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
6757 call = build_call_expr_loc (loc, decl, 2, addr, mo);
629b3d75
MJ
6758 if (!useless_type_conversion_p (type, itype))
6759 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6760 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6761
6762 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6763 gsi_remove (&gsi, true);
6764
6765 store_bb = single_succ (load_bb);
65f4b875 6766 gsi = gsi_last_nondebug_bb (store_bb);
629b3d75
MJ
6767 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6768 gsi_remove (&gsi, true);
6769
6770 if (gimple_in_ssa_p (cfun))
6771 update_ssa (TODO_update_ssa_no_phi);
6772
6773 return true;
6774}
6775
6776/* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6777 operation as a normal volatile store. */
6778
6779static bool
6780expand_omp_atomic_store (basic_block load_bb, tree addr,
6781 tree loaded_val, tree stored_val, int index)
6782{
6783 enum built_in_function tmpbase;
6784 gimple_stmt_iterator gsi;
6785 basic_block store_bb = single_succ (load_bb);
6786 location_t loc;
6787 gimple *stmt;
6788 tree decl, call, type, itype;
6789 machine_mode imode;
6790 bool exchange;
6791
65f4b875 6792 gsi = gsi_last_nondebug_bb (load_bb);
629b3d75
MJ
6793 stmt = gsi_stmt (gsi);
6794 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6795
6796 /* If the load value is needed, then this isn't a store but an exchange. */
6797 exchange = gimple_omp_atomic_need_value_p (stmt);
6798
65f4b875 6799 gsi = gsi_last_nondebug_bb (store_bb);
629b3d75
MJ
6800 stmt = gsi_stmt (gsi);
6801 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
6802 loc = gimple_location (stmt);
6803
6804 /* ??? If the target does not implement atomic_store_optab[mode], and mode
6805 is smaller than word size, then expand_atomic_store assumes that the store
6806 is atomic. We could avoid the builtin entirely in this case. */
6807
6808 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
6809 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
6810 decl = builtin_decl_explicit (tmpbase);
6811 if (decl == NULL_TREE)
6812 return false;
6813
6814 type = TREE_TYPE (stored_val);
6815
6816 /* Dig out the type of the function's second argument. */
6817 itype = TREE_TYPE (decl);
6818 itype = TYPE_ARG_TYPES (itype);
6819 itype = TREE_CHAIN (itype);
6820 itype = TREE_VALUE (itype);
6821 imode = TYPE_MODE (itype);
6822
6823 if (exchange && !can_atomic_exchange_p (imode, true))
6824 return false;
6825
6826 if (!useless_type_conversion_p (itype, type))
6827 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
28567c40
JJ
6828 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
6829 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
6830 call = build_call_expr_loc (loc, decl, 3, addr, stored_val, mo);
629b3d75
MJ
6831 if (exchange)
6832 {
6833 if (!useless_type_conversion_p (type, itype))
6834 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6835 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6836 }
6837
6838 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6839 gsi_remove (&gsi, true);
6840
6841 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
65f4b875 6842 gsi = gsi_last_nondebug_bb (load_bb);
629b3d75
MJ
6843 gsi_remove (&gsi, true);
6844
6845 if (gimple_in_ssa_p (cfun))
6846 update_ssa (TODO_update_ssa_no_phi);
6847
6848 return true;
6849}
6850
6851/* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6852 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
6853 size of the data type, and thus usable to find the index of the builtin
6854 decl. Returns false if the expression is not of the proper form. */
6855
6856static bool
6857expand_omp_atomic_fetch_op (basic_block load_bb,
6858 tree addr, tree loaded_val,
6859 tree stored_val, int index)
6860{
6861 enum built_in_function oldbase, newbase, tmpbase;
6862 tree decl, itype, call;
6863 tree lhs, rhs;
6864 basic_block store_bb = single_succ (load_bb);
6865 gimple_stmt_iterator gsi;
6866 gimple *stmt;
6867 location_t loc;
6868 enum tree_code code;
6869 bool need_old, need_new;
6870 machine_mode imode;
629b3d75
MJ
6871
6872 /* We expect to find the following sequences:
6873
6874 load_bb:
6875 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
6876
6877 store_bb:
6878 val = tmp OP something; (or: something OP tmp)
6879 GIMPLE_OMP_STORE (val)
6880
6881 ???FIXME: Allow a more flexible sequence.
6882 Perhaps use data flow to pick the statements.
6883
6884 */
6885
6886 gsi = gsi_after_labels (store_bb);
6887 stmt = gsi_stmt (gsi);
65f4b875
AO
6888 if (is_gimple_debug (stmt))
6889 {
6890 gsi_next_nondebug (&gsi);
6891 if (gsi_end_p (gsi))
6892 return false;
6893 stmt = gsi_stmt (gsi);
6894 }
629b3d75
MJ
6895 loc = gimple_location (stmt);
6896 if (!is_gimple_assign (stmt))
6897 return false;
65f4b875 6898 gsi_next_nondebug (&gsi);
629b3d75
MJ
6899 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
6900 return false;
6901 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
6902 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
28567c40
JJ
6903 enum omp_memory_order omo
6904 = gimple_omp_atomic_memory_order (last_stmt (load_bb));
6905 enum memmodel mo = omp_memory_order_to_memmodel (omo);
629b3d75
MJ
6906 gcc_checking_assert (!need_old || !need_new);
6907
6908 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
6909 return false;
6910
6911 /* Check for one of the supported fetch-op operations. */
6912 code = gimple_assign_rhs_code (stmt);
6913 switch (code)
6914 {
6915 case PLUS_EXPR:
6916 case POINTER_PLUS_EXPR:
6917 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
6918 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
6919 break;
6920 case MINUS_EXPR:
6921 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
6922 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
6923 break;
6924 case BIT_AND_EXPR:
6925 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
6926 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
6927 break;
6928 case BIT_IOR_EXPR:
6929 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
6930 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
6931 break;
6932 case BIT_XOR_EXPR:
6933 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
6934 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
6935 break;
6936 default:
6937 return false;
6938 }
6939
6940 /* Make sure the expression is of the proper form. */
6941 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
6942 rhs = gimple_assign_rhs2 (stmt);
6943 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
6944 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
6945 rhs = gimple_assign_rhs1 (stmt);
6946 else
6947 return false;
6948
6949 tmpbase = ((enum built_in_function)
6950 ((need_new ? newbase : oldbase) + index + 1));
6951 decl = builtin_decl_explicit (tmpbase);
6952 if (decl == NULL_TREE)
6953 return false;
6954 itype = TREE_TYPE (TREE_TYPE (decl));
6955 imode = TYPE_MODE (itype);
6956
6957 /* We could test all of the various optabs involved, but the fact of the
6958 matter is that (with the exception of i486 vs i586 and xadd) all targets
6959 that support any atomic operaton optab also implements compare-and-swap.
6960 Let optabs.c take care of expanding any compare-and-swap loop. */
dc06356a 6961 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
629b3d75
MJ
6962 return false;
6963
65f4b875 6964 gsi = gsi_last_nondebug_bb (load_bb);
629b3d75
MJ
6965 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
6966
6967 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
6968 It only requires that the operation happen atomically. Thus we can
6969 use the RELAXED memory model. */
6970 call = build_call_expr_loc (loc, decl, 3, addr,
6971 fold_convert_loc (loc, itype, rhs),
28567c40 6972 build_int_cst (NULL, mo));
629b3d75
MJ
6973
6974 if (need_old || need_new)
6975 {
6976 lhs = need_old ? loaded_val : stored_val;
6977 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
6978 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
6979 }
6980 else
6981 call = fold_convert_loc (loc, void_type_node, call);
6982 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6983 gsi_remove (&gsi, true);
6984
65f4b875 6985 gsi = gsi_last_nondebug_bb (store_bb);
629b3d75
MJ
6986 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6987 gsi_remove (&gsi, true);
65f4b875 6988 gsi = gsi_last_nondebug_bb (store_bb);
629b3d75
MJ
6989 stmt = gsi_stmt (gsi);
6990 gsi_remove (&gsi, true);
6991
6992 if (gimple_in_ssa_p (cfun))
6993 {
6994 release_defs (stmt);
6995 update_ssa (TODO_update_ssa_no_phi);
6996 }
6997
6998 return true;
6999}
7000
7001/* A subroutine of expand_omp_atomic. Implement the atomic operation as:
7002
7003 oldval = *addr;
7004 repeat:
01914336 7005 newval = rhs; // with oldval replacing *addr in rhs
629b3d75
MJ
7006 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
7007 if (oldval != newval)
7008 goto repeat;
7009
7010 INDEX is log2 of the size of the data type, and thus usable to find the
7011 index of the builtin decl. */
7012
7013static bool
7014expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
7015 tree addr, tree loaded_val, tree stored_val,
7016 int index)
7017{
7018 tree loadedi, storedi, initial, new_storedi, old_vali;
b4e47472 7019 tree type, itype, cmpxchg, iaddr, atype;
629b3d75
MJ
7020 gimple_stmt_iterator si;
7021 basic_block loop_header = single_succ (load_bb);
7022 gimple *phi, *stmt;
7023 edge e;
7024 enum built_in_function fncode;
7025
7026 /* ??? We need a non-pointer interface to __atomic_compare_exchange in
7027 order to use the RELAXED memory model effectively. */
7028 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
7029 + index + 1);
7030 cmpxchg = builtin_decl_explicit (fncode);
7031 if (cmpxchg == NULL_TREE)
7032 return false;
b4e47472
JJ
7033 type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
7034 atype = type;
629b3d75
MJ
7035 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
7036
dc06356a
JJ
7037 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
7038 || !can_atomic_load_p (TYPE_MODE (itype)))
629b3d75
MJ
7039 return false;
7040
7041 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
65f4b875 7042 si = gsi_last_nondebug_bb (load_bb);
629b3d75
MJ
7043 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
7044
7045 /* For floating-point values, we'll need to view-convert them to integers
7046 so that we can perform the atomic compare and swap. Simplify the
7047 following code by always setting up the "i"ntegral variables. */
7048 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
7049 {
7050 tree iaddr_val;
7051
7052 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
7053 true));
b4e47472 7054 atype = itype;
629b3d75
MJ
7055 iaddr_val
7056 = force_gimple_operand_gsi (&si,
7057 fold_convert (TREE_TYPE (iaddr), addr),
7058 false, NULL_TREE, true, GSI_SAME_STMT);
7059 stmt = gimple_build_assign (iaddr, iaddr_val);
7060 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7061 loadedi = create_tmp_var (itype);
7062 if (gimple_in_ssa_p (cfun))
7063 loadedi = make_ssa_name (loadedi);
7064 }
7065 else
7066 {
7067 iaddr = addr;
7068 loadedi = loaded_val;
7069 }
7070
7071 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
7072 tree loaddecl = builtin_decl_explicit (fncode);
7073 if (loaddecl)
7074 initial
b4e47472 7075 = fold_convert (atype,
629b3d75
MJ
7076 build_call_expr (loaddecl, 2, iaddr,
7077 build_int_cst (NULL_TREE,
7078 MEMMODEL_RELAXED)));
7079 else
b4e47472
JJ
7080 {
7081 tree off
7082 = build_int_cst (build_pointer_type_for_mode (atype, ptr_mode,
7083 true), 0);
7084 initial = build2 (MEM_REF, atype, iaddr, off);
7085 }
629b3d75
MJ
7086
7087 initial
7088 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
7089 GSI_SAME_STMT);
7090
7091 /* Move the value to the LOADEDI temporary. */
7092 if (gimple_in_ssa_p (cfun))
7093 {
7094 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
7095 phi = create_phi_node (loadedi, loop_header);
7096 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
7097 initial);
7098 }
7099 else
7100 gsi_insert_before (&si,
7101 gimple_build_assign (loadedi, initial),
7102 GSI_SAME_STMT);
7103 if (loadedi != loaded_val)
7104 {
7105 gimple_stmt_iterator gsi2;
7106 tree x;
7107
7108 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
7109 gsi2 = gsi_start_bb (loop_header);
7110 if (gimple_in_ssa_p (cfun))
7111 {
7112 gassign *stmt;
7113 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
7114 true, GSI_SAME_STMT);
7115 stmt = gimple_build_assign (loaded_val, x);
7116 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
7117 }
7118 else
7119 {
7120 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
7121 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
7122 true, GSI_SAME_STMT);
7123 }
7124 }
7125 gsi_remove (&si, true);
7126
65f4b875 7127 si = gsi_last_nondebug_bb (store_bb);
629b3d75
MJ
7128 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
7129
7130 if (iaddr == addr)
7131 storedi = stored_val;
7132 else
01914336
MJ
7133 storedi
7134 = force_gimple_operand_gsi (&si,
7135 build1 (VIEW_CONVERT_EXPR, itype,
7136 stored_val), true, NULL_TREE, true,
7137 GSI_SAME_STMT);
629b3d75
MJ
7138
7139 /* Build the compare&swap statement. */
7140 new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
7141 new_storedi = force_gimple_operand_gsi (&si,
7142 fold_convert (TREE_TYPE (loadedi),
7143 new_storedi),
7144 true, NULL_TREE,
7145 true, GSI_SAME_STMT);
7146
7147 if (gimple_in_ssa_p (cfun))
7148 old_vali = loadedi;
7149 else
7150 {
7151 old_vali = create_tmp_var (TREE_TYPE (loadedi));
7152 stmt = gimple_build_assign (old_vali, loadedi);
7153 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7154
7155 stmt = gimple_build_assign (loadedi, new_storedi);
7156 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7157 }
7158
7159 /* Note that we always perform the comparison as an integer, even for
7160 floating point. This allows the atomic operation to properly
7161 succeed even with NaNs and -0.0. */
01914336
MJ
7162 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
7163 stmt = gimple_build_cond_empty (ne);
629b3d75
MJ
7164 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7165
7166 /* Update cfg. */
7167 e = single_succ_edge (store_bb);
7168 e->flags &= ~EDGE_FALLTHRU;
7169 e->flags |= EDGE_FALSE_VALUE;
357067f2
JH
7170 /* Expect no looping. */
7171 e->probability = profile_probability::guessed_always ();
629b3d75
MJ
7172
7173 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
357067f2 7174 e->probability = profile_probability::guessed_never ();
629b3d75
MJ
7175
7176 /* Copy the new value to loadedi (we already did that before the condition
7177 if we are not in SSA). */
7178 if (gimple_in_ssa_p (cfun))
7179 {
7180 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
7181 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
7182 }
7183
7184 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
7185 gsi_remove (&si, true);
7186
7187 struct loop *loop = alloc_loop ();
7188 loop->header = loop_header;
7189 loop->latch = store_bb;
7190 add_loop (loop, loop_header->loop_father);
7191
7192 if (gimple_in_ssa_p (cfun))
7193 update_ssa (TODO_update_ssa_no_phi);
7194
7195 return true;
7196}
7197
7198/* A subroutine of expand_omp_atomic. Implement the atomic operation as:
7199
01914336
MJ
7200 GOMP_atomic_start ();
7201 *addr = rhs;
7202 GOMP_atomic_end ();
629b3d75
MJ
7203
7204 The result is not globally atomic, but works so long as all parallel
7205 references are within #pragma omp atomic directives. According to
7206 responses received from omp@openmp.org, appears to be within spec.
7207 Which makes sense, since that's how several other compilers handle
7208 this situation as well.
7209 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
7210 expanding. STORED_VAL is the operand of the matching
7211 GIMPLE_OMP_ATOMIC_STORE.
7212
7213 We replace
7214 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
7215 loaded_val = *addr;
7216
7217 and replace
7218 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
7219 *addr = stored_val;
7220*/
7221
7222static bool
7223expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
7224 tree addr, tree loaded_val, tree stored_val)
7225{
7226 gimple_stmt_iterator si;
7227 gassign *stmt;
7228 tree t;
7229
65f4b875 7230 si = gsi_last_nondebug_bb (load_bb);
629b3d75
MJ
7231 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
7232
7233 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
7234 t = build_call_expr (t, 0);
7235 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
7236
b4e47472
JJ
7237 tree mem = build_simple_mem_ref (addr);
7238 TREE_TYPE (mem) = TREE_TYPE (loaded_val);
7239 TREE_OPERAND (mem, 1)
7240 = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode,
7241 true),
7242 TREE_OPERAND (mem, 1));
7243 stmt = gimple_build_assign (loaded_val, mem);
629b3d75
MJ
7244 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7245 gsi_remove (&si, true);
7246
65f4b875 7247 si = gsi_last_nondebug_bb (store_bb);
629b3d75
MJ
7248 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
7249
b4e47472 7250 stmt = gimple_build_assign (unshare_expr (mem), stored_val);
629b3d75
MJ
7251 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7252
7253 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
7254 t = build_call_expr (t, 0);
7255 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
7256 gsi_remove (&si, true);
7257
7258 if (gimple_in_ssa_p (cfun))
7259 update_ssa (TODO_update_ssa_no_phi);
7260 return true;
7261}
7262
7263/* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
01914336 7264 using expand_omp_atomic_fetch_op. If it failed, we try to
629b3d75
MJ
7265 call expand_omp_atomic_pipeline, and if it fails too, the
7266 ultimate fallback is wrapping the operation in a mutex
7267 (expand_omp_atomic_mutex). REGION is the atomic region built
7268 by build_omp_regions_1(). */
7269
7270static void
7271expand_omp_atomic (struct omp_region *region)
7272{
7273 basic_block load_bb = region->entry, store_bb = region->exit;
7274 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
7275 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
7276 tree loaded_val = gimple_omp_atomic_load_lhs (load);
7277 tree addr = gimple_omp_atomic_load_rhs (load);
7278 tree stored_val = gimple_omp_atomic_store_val (store);
b4e47472 7279 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
629b3d75
MJ
7280 HOST_WIDE_INT index;
7281
7282 /* Make sure the type is one of the supported sizes. */
7283 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
7284 index = exact_log2 (index);
7285 if (index >= 0 && index <= 4)
7286 {
7287 unsigned int align = TYPE_ALIGN_UNIT (type);
7288
7289 /* __sync builtins require strict data alignment. */
7290 if (exact_log2 (align) >= index)
7291 {
7292 /* Atomic load. */
3bd8f481 7293 scalar_mode smode;
629b3d75 7294 if (loaded_val == stored_val
3bd8f481
RS
7295 && (is_int_mode (TYPE_MODE (type), &smode)
7296 || is_float_mode (TYPE_MODE (type), &smode))
7297 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
629b3d75
MJ
7298 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
7299 return;
7300
7301 /* Atomic store. */
3bd8f481
RS
7302 if ((is_int_mode (TYPE_MODE (type), &smode)
7303 || is_float_mode (TYPE_MODE (type), &smode))
7304 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
629b3d75
MJ
7305 && store_bb == single_succ (load_bb)
7306 && first_stmt (store_bb) == store
7307 && expand_omp_atomic_store (load_bb, addr, loaded_val,
7308 stored_val, index))
7309 return;
7310
7311 /* When possible, use specialized atomic update functions. */
7312 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
7313 && store_bb == single_succ (load_bb)
7314 && expand_omp_atomic_fetch_op (load_bb, addr,
7315 loaded_val, stored_val, index))
7316 return;
7317
7318 /* If we don't have specialized __sync builtins, try and implement
7319 as a compare and swap loop. */
7320 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
7321 loaded_val, stored_val, index))
7322 return;
7323 }
7324 }
7325
7326 /* The ultimate fallback is wrapping the operation in a mutex. */
7327 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
7328}
7329
7330/* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
7331 at REGION_EXIT. */
7332
7333static void
7334mark_loops_in_oacc_kernels_region (basic_block region_entry,
7335 basic_block region_exit)
7336{
7337 struct loop *outer = region_entry->loop_father;
7338 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
7339
7340 /* Don't parallelize the kernels region if it contains more than one outer
7341 loop. */
7342 unsigned int nr_outer_loops = 0;
7343 struct loop *single_outer = NULL;
7344 for (struct loop *loop = outer->inner; loop != NULL; loop = loop->next)
7345 {
7346 gcc_assert (loop_outer (loop) == outer);
7347
7348 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
7349 continue;
7350
7351 if (region_exit != NULL
7352 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
7353 continue;
7354
7355 nr_outer_loops++;
7356 single_outer = loop;
7357 }
7358 if (nr_outer_loops != 1)
7359 return;
7360
01914336
MJ
7361 for (struct loop *loop = single_outer->inner;
7362 loop != NULL;
7363 loop = loop->inner)
629b3d75
MJ
7364 if (loop->next)
7365 return;
7366
7367 /* Mark the loops in the region. */
7368 for (struct loop *loop = single_outer; loop != NULL; loop = loop->inner)
7369 loop->in_oacc_kernels_region = true;
7370}
7371
7372/* Types used to pass grid and wortkgroup sizes to kernel invocation. */
7373
7374struct GTY(()) grid_launch_attributes_trees
7375{
7376 tree kernel_dim_array_type;
7377 tree kernel_lattrs_dimnum_decl;
7378 tree kernel_lattrs_grid_decl;
7379 tree kernel_lattrs_group_decl;
7380 tree kernel_launch_attributes_type;
7381};
7382
7383static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees;
7384
7385/* Create types used to pass kernel launch attributes to target. */
7386
7387static void
7388grid_create_kernel_launch_attr_types (void)
7389{
7390 if (grid_attr_trees)
7391 return;
7392 grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> ();
7393
7394 tree dim_arr_index_type
7395 = build_index_type (build_int_cst (integer_type_node, 2));
7396 grid_attr_trees->kernel_dim_array_type
7397 = build_array_type (uint32_type_node, dim_arr_index_type);
7398
7399 grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE);
7400 grid_attr_trees->kernel_lattrs_dimnum_decl
7401 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"),
7402 uint32_type_node);
7403 DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE;
7404
7405 grid_attr_trees->kernel_lattrs_grid_decl
7406 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"),
7407 grid_attr_trees->kernel_dim_array_type);
7408 DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl)
7409 = grid_attr_trees->kernel_lattrs_dimnum_decl;
7410 grid_attr_trees->kernel_lattrs_group_decl
7411 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"),
7412 grid_attr_trees->kernel_dim_array_type);
7413 DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl)
7414 = grid_attr_trees->kernel_lattrs_grid_decl;
7415 finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type,
7416 "__gomp_kernel_launch_attributes",
7417 grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE);
7418}
7419
7420/* Insert before the current statement in GSI a store of VALUE to INDEX of
7421 array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be
7422 of type uint32_type_node. */
7423
7424static void
7425grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var,
7426 tree fld_decl, int index, tree value)
7427{
7428 tree ref = build4 (ARRAY_REF, uint32_type_node,
7429 build3 (COMPONENT_REF,
7430 grid_attr_trees->kernel_dim_array_type,
7431 range_var, fld_decl, NULL_TREE),
7432 build_int_cst (integer_type_node, index),
7433 NULL_TREE, NULL_TREE);
7434 gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT);
7435}
7436
7437/* Return a tree representation of a pointer to a structure with grid and
7438 work-group size information. Statements filling that information will be
7439 inserted before GSI, TGT_STMT is the target statement which has the
7440 necessary information in it. */
7441
7442static tree
7443grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi,
7444 gomp_target *tgt_stmt)
7445{
7446 grid_create_kernel_launch_attr_types ();
7447 tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type,
7448 "__kernel_launch_attrs");
7449
7450 unsigned max_dim = 0;
7451 for (tree clause = gimple_omp_target_clauses (tgt_stmt);
7452 clause;
7453 clause = OMP_CLAUSE_CHAIN (clause))
7454 {
7455 if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_)
7456 continue;
7457
7458 unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause);
7459 max_dim = MAX (dim, max_dim);
7460
7461 grid_insert_store_range_dim (gsi, lattrs,
7462 grid_attr_trees->kernel_lattrs_grid_decl,
7463 dim, OMP_CLAUSE__GRIDDIM__SIZE (clause));
7464 grid_insert_store_range_dim (gsi, lattrs,
7465 grid_attr_trees->kernel_lattrs_group_decl,
7466 dim, OMP_CLAUSE__GRIDDIM__GROUP (clause));
7467 }
7468
7469 tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs,
7470 grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE);
7471 gcc_checking_assert (max_dim <= 2);
7472 tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1);
7473 gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions),
7474 GSI_SAME_STMT);
7475 TREE_ADDRESSABLE (lattrs) = 1;
7476 return build_fold_addr_expr (lattrs);
7477}
7478
7479/* Build target argument identifier from the DEVICE identifier, value
7480 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
7481
7482static tree
7483get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
7484{
7485 tree t = build_int_cst (integer_type_node, device);
7486 if (subseqent_param)
7487 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
7488 build_int_cst (integer_type_node,
7489 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
7490 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
7491 build_int_cst (integer_type_node, id));
7492 return t;
7493}
7494
7495/* Like above but return it in type that can be directly stored as an element
7496 of the argument array. */
7497
7498static tree
7499get_target_argument_identifier (int device, bool subseqent_param, int id)
7500{
7501 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
7502 return fold_convert (ptr_type_node, t);
7503}
7504
7505/* Return a target argument consisting of DEVICE identifier, value identifier
7506 ID, and the actual VALUE. */
7507
7508static tree
7509get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
7510 tree value)
7511{
7512 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
7513 fold_convert (integer_type_node, value),
7514 build_int_cst (unsigned_type_node,
7515 GOMP_TARGET_ARG_VALUE_SHIFT));
7516 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
7517 get_target_argument_identifier_1 (device, false, id));
7518 t = fold_convert (ptr_type_node, t);
7519 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
7520}
7521
7522/* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
7523 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
7524 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
7525 arguments. */
7526
7527static void
7528push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
7529 int id, tree value, vec <tree> *args)
7530{
7531 if (tree_fits_shwi_p (value)
7532 && tree_to_shwi (value) > -(1 << 15)
7533 && tree_to_shwi (value) < (1 << 15))
7534 args->quick_push (get_target_argument_value (gsi, device, id, value));
7535 else
7536 {
7537 args->quick_push (get_target_argument_identifier (device, true, id));
7538 value = fold_convert (ptr_type_node, value);
7539 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
7540 GSI_SAME_STMT);
7541 args->quick_push (value);
7542 }
7543}
7544
01914336 7545/* Create an array of arguments that is then passed to GOMP_target. */
629b3d75
MJ
7546
7547static tree
7548get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
7549{
7550 auto_vec <tree, 6> args;
7551 tree clauses = gimple_omp_target_clauses (tgt_stmt);
7552 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
7553 if (c)
7554 t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
7555 else
7556 t = integer_minus_one_node;
7557 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
7558 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
7559
7560 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
7561 if (c)
7562 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
7563 else
7564 t = integer_minus_one_node;
7565 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
7566 GOMP_TARGET_ARG_THREAD_LIMIT, t,
7567 &args);
7568
7569 /* Add HSA-specific grid sizes, if available. */
7570 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7571 OMP_CLAUSE__GRIDDIM_))
7572 {
01914336
MJ
7573 int id = GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES;
7574 t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, id);
629b3d75
MJ
7575 args.quick_push (t);
7576 args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt));
7577 }
7578
7579 /* Produce more, perhaps device specific, arguments here. */
7580
7581 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
7582 args.length () + 1),
7583 ".omp_target_args");
7584 for (unsigned i = 0; i < args.length (); i++)
7585 {
7586 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
7587 build_int_cst (integer_type_node, i),
7588 NULL_TREE, NULL_TREE);
7589 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
7590 GSI_SAME_STMT);
7591 }
7592 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
7593 build_int_cst (integer_type_node, args.length ()),
7594 NULL_TREE, NULL_TREE);
7595 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
7596 GSI_SAME_STMT);
7597 TREE_ADDRESSABLE (argarray) = 1;
7598 return build_fold_addr_expr (argarray);
7599}
7600
7601/* Expand the GIMPLE_OMP_TARGET starting at REGION. */
7602
7603static void
7604expand_omp_target (struct omp_region *region)
7605{
7606 basic_block entry_bb, exit_bb, new_bb;
7607 struct function *child_cfun;
7608 tree child_fn, block, t;
7609 gimple_stmt_iterator gsi;
7610 gomp_target *entry_stmt;
7611 gimple *stmt;
7612 edge e;
7613 bool offloaded, data_region;
7614
7615 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
7616 new_bb = region->entry;
7617
7618 offloaded = is_gimple_omp_offloaded (entry_stmt);
7619 switch (gimple_omp_target_kind (entry_stmt))
7620 {
7621 case GF_OMP_TARGET_KIND_REGION:
7622 case GF_OMP_TARGET_KIND_UPDATE:
7623 case GF_OMP_TARGET_KIND_ENTER_DATA:
7624 case GF_OMP_TARGET_KIND_EXIT_DATA:
7625 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7626 case GF_OMP_TARGET_KIND_OACC_KERNELS:
7627 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7628 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7629 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7630 data_region = false;
7631 break;
7632 case GF_OMP_TARGET_KIND_DATA:
7633 case GF_OMP_TARGET_KIND_OACC_DATA:
7634 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7635 data_region = true;
7636 break;
7637 default:
7638 gcc_unreachable ();
7639 }
7640
7641 child_fn = NULL_TREE;
7642 child_cfun = NULL;
7643 if (offloaded)
7644 {
7645 child_fn = gimple_omp_target_child_fn (entry_stmt);
7646 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
7647 }
7648
7649 /* Supported by expand_omp_taskreg, but not here. */
7650 if (child_cfun != NULL)
7651 gcc_checking_assert (!child_cfun->cfg);
7652 gcc_checking_assert (!gimple_in_ssa_p (cfun));
7653
7654 entry_bb = region->entry;
7655 exit_bb = region->exit;
7656
7657 if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
25651634
TS
7658 {
7659 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
7660
7661 /* Further down, both OpenACC kernels and OpenACC parallel constructs
7662 will be mappted to BUILT_IN_GOACC_PARALLEL, and to distinguish the
7663 two, there is an "oacc kernels" attribute set for OpenACC kernels. */
7664 DECL_ATTRIBUTES (child_fn)
7665 = tree_cons (get_identifier ("oacc kernels"),
7666 NULL_TREE, DECL_ATTRIBUTES (child_fn));
7667 }
629b3d75
MJ
7668
7669 if (offloaded)
7670 {
7671 unsigned srcidx, dstidx, num;
7672
7673 /* If the offloading region needs data sent from the parent
7674 function, then the very first statement (except possible
7675 tree profile counter updates) of the offloading body
7676 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
7677 &.OMP_DATA_O is passed as an argument to the child function,
7678 we need to replace it with the argument as seen by the child
7679 function.
7680
7681 In most cases, this will end up being the identity assignment
7682 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
7683 a function call that has been inlined, the original PARM_DECL
7684 .OMP_DATA_I may have been converted into a different local
7685 variable. In which case, we need to keep the assignment. */
7686 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
7687 if (data_arg)
7688 {
7689 basic_block entry_succ_bb = single_succ (entry_bb);
7690 gimple_stmt_iterator gsi;
7691 tree arg;
7692 gimple *tgtcopy_stmt = NULL;
7693 tree sender = TREE_VEC_ELT (data_arg, 0);
7694
7695 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
7696 {
7697 gcc_assert (!gsi_end_p (gsi));
7698 stmt = gsi_stmt (gsi);
7699 if (gimple_code (stmt) != GIMPLE_ASSIGN)
7700 continue;
7701
7702 if (gimple_num_ops (stmt) == 2)
7703 {
7704 tree arg = gimple_assign_rhs1 (stmt);
7705
7706 /* We're ignoring the subcode because we're
7707 effectively doing a STRIP_NOPS. */
7708
7709 if (TREE_CODE (arg) == ADDR_EXPR
7710 && TREE_OPERAND (arg, 0) == sender)
7711 {
7712 tgtcopy_stmt = stmt;
7713 break;
7714 }
7715 }
7716 }
7717
7718 gcc_assert (tgtcopy_stmt != NULL);
7719 arg = DECL_ARGUMENTS (child_fn);
7720
7721 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
7722 gsi_remove (&gsi, true);
7723 }
7724
7725 /* Declare local variables needed in CHILD_CFUN. */
7726 block = DECL_INITIAL (child_fn);
7727 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
7728 /* The gimplifier could record temporaries in the offloading block
7729 rather than in containing function's local_decls chain,
7730 which would mean cgraph missed finalizing them. Do it now. */
7731 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
7732 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
7733 varpool_node::finalize_decl (t);
7734 DECL_SAVED_TREE (child_fn) = NULL;
7735 /* We'll create a CFG for child_fn, so no gimple body is needed. */
7736 gimple_set_body (child_fn, NULL);
7737 TREE_USED (block) = 1;
7738
7739 /* Reset DECL_CONTEXT on function arguments. */
7740 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
7741 DECL_CONTEXT (t) = child_fn;
7742
7743 /* Split ENTRY_BB at GIMPLE_*,
7744 so that it can be moved to the child function. */
65f4b875 7745 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
7746 stmt = gsi_stmt (gsi);
7747 gcc_assert (stmt
7748 && gimple_code (stmt) == gimple_code (entry_stmt));
7749 e = split_block (entry_bb, stmt);
7750 gsi_remove (&gsi, true);
7751 entry_bb = e->dest;
7752 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
7753
7754 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
7755 if (exit_bb)
7756 {
65f4b875 7757 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
7758 gcc_assert (!gsi_end_p (gsi)
7759 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7760 stmt = gimple_build_return (NULL);
7761 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
7762 gsi_remove (&gsi, true);
7763 }
7764
7765 /* Move the offloading region into CHILD_CFUN. */
7766
7767 block = gimple_block (entry_stmt);
7768
7769 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
7770 if (exit_bb)
7771 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
7772 /* When the OMP expansion process cannot guarantee an up-to-date
7773 loop tree arrange for the child function to fixup loops. */
7774 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7775 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
7776
7777 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
7778 num = vec_safe_length (child_cfun->local_decls);
7779 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
7780 {
7781 t = (*child_cfun->local_decls)[srcidx];
7782 if (DECL_CONTEXT (t) == cfun->decl)
7783 continue;
7784 if (srcidx != dstidx)
7785 (*child_cfun->local_decls)[dstidx] = t;
7786 dstidx++;
7787 }
7788 if (dstidx != num)
7789 vec_safe_truncate (child_cfun->local_decls, dstidx);
7790
7791 /* Inform the callgraph about the new function. */
7792 child_cfun->curr_properties = cfun->curr_properties;
7793 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
7794 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
7795 cgraph_node *node = cgraph_node::get_create (child_fn);
7796 node->parallelized_function = 1;
7797 cgraph_node::add_new_function (child_fn, true);
7798
7799 /* Add the new function to the offload table. */
7800 if (ENABLE_OFFLOADING)
60bf575c
TV
7801 {
7802 if (in_lto_p)
7803 DECL_PRESERVE_P (child_fn) = 1;
7804 vec_safe_push (offload_funcs, child_fn);
7805 }
629b3d75
MJ
7806
7807 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
7808 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
7809
7810 /* Fix the callgraph edges for child_cfun. Those for cfun will be
7811 fixed in a following pass. */
7812 push_cfun (child_cfun);
7813 if (need_asm)
9579db35 7814 assign_assembler_name_if_needed (child_fn);
629b3d75
MJ
7815 cgraph_edge::rebuild_edges ();
7816
7817 /* Some EH regions might become dead, see PR34608. If
7818 pass_cleanup_cfg isn't the first pass to happen with the
7819 new child, these dead EH edges might cause problems.
7820 Clean them up now. */
7821 if (flag_exceptions)
7822 {
7823 basic_block bb;
7824 bool changed = false;
7825
7826 FOR_EACH_BB_FN (bb, cfun)
7827 changed |= gimple_purge_dead_eh_edges (bb);
7828 if (changed)
7829 cleanup_tree_cfg ();
7830 }
7831 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7832 verify_loop_structure ();
7833 pop_cfun ();
7834
7835 if (dump_file && !gimple_in_ssa_p (cfun))
7836 {
7837 omp_any_child_fn_dumped = true;
7838 dump_function_header (dump_file, child_fn, dump_flags);
7839 dump_function_to_file (child_fn, dump_file, dump_flags);
7840 }
4ccc4e30
JJ
7841
7842 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
629b3d75
MJ
7843 }
7844
7845 /* Emit a library call to launch the offloading region, or do data
7846 transfers. */
59d5960c 7847 tree t1, t2, t3, t4, depend, c, clauses;
629b3d75 7848 enum built_in_function start_ix;
629b3d75 7849 unsigned int flags_i = 0;
629b3d75
MJ
7850
7851 switch (gimple_omp_target_kind (entry_stmt))
7852 {
7853 case GF_OMP_TARGET_KIND_REGION:
7854 start_ix = BUILT_IN_GOMP_TARGET;
7855 break;
7856 case GF_OMP_TARGET_KIND_DATA:
7857 start_ix = BUILT_IN_GOMP_TARGET_DATA;
7858 break;
7859 case GF_OMP_TARGET_KIND_UPDATE:
7860 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
7861 break;
7862 case GF_OMP_TARGET_KIND_ENTER_DATA:
7863 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7864 break;
7865 case GF_OMP_TARGET_KIND_EXIT_DATA:
7866 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7867 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
7868 break;
7869 case GF_OMP_TARGET_KIND_OACC_KERNELS:
629b3d75
MJ
7870 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7871 start_ix = BUILT_IN_GOACC_PARALLEL;
7872 break;
7873 case GF_OMP_TARGET_KIND_OACC_DATA:
7874 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7875 start_ix = BUILT_IN_GOACC_DATA_START;
7876 break;
7877 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7878 start_ix = BUILT_IN_GOACC_UPDATE;
7879 break;
7880 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7881 start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA;
7882 break;
7883 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7884 start_ix = BUILT_IN_GOACC_DECLARE;
7885 break;
7886 default:
7887 gcc_unreachable ();
7888 }
7889
7890 clauses = gimple_omp_target_clauses (entry_stmt);
7891
59d5960c
TS
7892 tree device = NULL_TREE;
7893 location_t device_loc = UNKNOWN_LOCATION;
7894 tree goacc_flags = NULL_TREE;
7895 if (is_gimple_omp_oacc (entry_stmt))
629b3d75 7896 {
59d5960c
TS
7897 /* By default, no GOACC_FLAGs are set. */
7898 goacc_flags = integer_zero_node;
629b3d75
MJ
7899 }
7900 else
59d5960c
TS
7901 {
7902 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
7903 if (c)
7904 {
7905 device = OMP_CLAUSE_DEVICE_ID (c);
7906 device_loc = OMP_CLAUSE_LOCATION (c);
7907 }
7908 else
7909 {
7910 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
7911 library choose). */
7912 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
7913 device_loc = gimple_location (entry_stmt);
7914 }
629b3d75 7915
59d5960c
TS
7916 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
7917 if (c)
7918 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
7919 }
629b3d75 7920
59d5960c
TS
7921 /* By default, there is no conditional. */
7922 tree cond = NULL_TREE;
7923 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
7924 if (c)
7925 cond = OMP_CLAUSE_IF_EXPR (c);
7926 /* If we found the clause 'if (cond)', build:
7927 OpenACC: goacc_flags = (cond ? goacc_flags : flags | GOACC_FLAG_HOST_FALLBACK)
7928 OpenMP: device = (cond ? device : GOMP_DEVICE_HOST_FALLBACK) */
629b3d75
MJ
7929 if (cond)
7930 {
59d5960c
TS
7931 tree *tp;
7932 if (is_gimple_omp_oacc (entry_stmt))
7933 tp = &goacc_flags;
7934 else
7935 {
7936 /* Ensure 'device' is of the correct type. */
7937 device = fold_convert_loc (device_loc, integer_type_node, device);
7938
7939 tp = &device;
7940 }
7941
629b3d75
MJ
7942 cond = gimple_boolify (cond);
7943
7944 basic_block cond_bb, then_bb, else_bb;
7945 edge e;
7946 tree tmp_var;
7947
59d5960c 7948 tmp_var = create_tmp_var (TREE_TYPE (*tp));
629b3d75
MJ
7949 if (offloaded)
7950 e = split_block_after_labels (new_bb);
7951 else
7952 {
65f4b875 7953 gsi = gsi_last_nondebug_bb (new_bb);
629b3d75
MJ
7954 gsi_prev (&gsi);
7955 e = split_block (new_bb, gsi_stmt (gsi));
7956 }
7957 cond_bb = e->src;
7958 new_bb = e->dest;
7959 remove_edge (e);
7960
7961 then_bb = create_empty_bb (cond_bb);
7962 else_bb = create_empty_bb (then_bb);
7963 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
7964 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
7965
7966 stmt = gimple_build_cond_empty (cond);
7967 gsi = gsi_last_bb (cond_bb);
7968 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7969
7970 gsi = gsi_start_bb (then_bb);
59d5960c 7971 stmt = gimple_build_assign (tmp_var, *tp);
629b3d75
MJ
7972 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7973
7974 gsi = gsi_start_bb (else_bb);
59d5960c
TS
7975 if (is_gimple_omp_oacc (entry_stmt))
7976 stmt = gimple_build_assign (tmp_var,
7977 BIT_IOR_EXPR,
7978 *tp,
7979 build_int_cst (integer_type_node,
7980 GOACC_FLAG_HOST_FALLBACK));
7981 else
7982 stmt = gimple_build_assign (tmp_var,
7983 build_int_cst (integer_type_node,
7984 GOMP_DEVICE_HOST_FALLBACK));
629b3d75
MJ
7985 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7986
7987 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
7988 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
7989 add_bb_to_loop (then_bb, cond_bb->loop_father);
7990 add_bb_to_loop (else_bb, cond_bb->loop_father);
7991 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
7992 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
7993
59d5960c
TS
7994 *tp = tmp_var;
7995
65f4b875 7996 gsi = gsi_last_nondebug_bb (new_bb);
629b3d75
MJ
7997 }
7998 else
7999 {
65f4b875 8000 gsi = gsi_last_nondebug_bb (new_bb);
59d5960c
TS
8001
8002 if (device != NULL_TREE)
8003 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
8004 true, GSI_SAME_STMT);
629b3d75
MJ
8005 }
8006
8007 t = gimple_omp_target_data_arg (entry_stmt);
8008 if (t == NULL)
8009 {
8010 t1 = size_zero_node;
8011 t2 = build_zero_cst (ptr_type_node);
8012 t3 = t2;
8013 t4 = t2;
8014 }
8015 else
8016 {
8017 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
8018 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
8019 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
8020 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
8021 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
8022 }
8023
8024 gimple *g;
8025 bool tagging = false;
8026 /* The maximum number used by any start_ix, without varargs. */
8027 auto_vec<tree, 11> args;
59d5960c
TS
8028 if (is_gimple_omp_oacc (entry_stmt))
8029 {
8030 tree goacc_flags_m = fold_build1 (GOACC_FLAGS_MARSHAL_OP,
8031 TREE_TYPE (goacc_flags), goacc_flags);
8032 goacc_flags_m = force_gimple_operand_gsi (&gsi, goacc_flags_m, true,
8033 NULL_TREE, true,
8034 GSI_SAME_STMT);
8035 args.quick_push (goacc_flags_m);
8036 }
8037 else
8038 args.quick_push (device);
629b3d75
MJ
8039 if (offloaded)
8040 args.quick_push (build_fold_addr_expr (child_fn));
8041 args.quick_push (t1);
8042 args.quick_push (t2);
8043 args.quick_push (t3);
8044 args.quick_push (t4);
8045 switch (start_ix)
8046 {
8047 case BUILT_IN_GOACC_DATA_START:
8048 case BUILT_IN_GOACC_DECLARE:
8049 case BUILT_IN_GOMP_TARGET_DATA:
8050 break;
8051 case BUILT_IN_GOMP_TARGET:
8052 case BUILT_IN_GOMP_TARGET_UPDATE:
8053 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
8054 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
8055 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
8056 if (c)
8057 depend = OMP_CLAUSE_DECL (c);
8058 else
8059 depend = build_int_cst (ptr_type_node, 0);
8060 args.quick_push (depend);
8061 if (start_ix == BUILT_IN_GOMP_TARGET)
8062 args.quick_push (get_target_arguments (&gsi, entry_stmt));
8063 break;
8064 case BUILT_IN_GOACC_PARALLEL:
25651634
TS
8065 oacc_set_fn_attrib (child_fn, clauses, &args);
8066 tagging = true;
629b3d75
MJ
8067 /* FALLTHRU */
8068 case BUILT_IN_GOACC_ENTER_EXIT_DATA:
8069 case BUILT_IN_GOACC_UPDATE:
8070 {
8071 tree t_async = NULL_TREE;
8072
8073 /* If present, use the value specified by the respective
8074 clause, making sure that is of the correct type. */
8075 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
8076 if (c)
8077 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
8078 integer_type_node,
8079 OMP_CLAUSE_ASYNC_EXPR (c));
8080 else if (!tagging)
8081 /* Default values for t_async. */
8082 t_async = fold_convert_loc (gimple_location (entry_stmt),
8083 integer_type_node,
8084 build_int_cst (integer_type_node,
8085 GOMP_ASYNC_SYNC));
8086 if (tagging && t_async)
8087 {
8088 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
8089
8090 if (TREE_CODE (t_async) == INTEGER_CST)
8091 {
8092 /* See if we can pack the async arg in to the tag's
8093 operand. */
8094 i_async = TREE_INT_CST_LOW (t_async);
8095 if (i_async < GOMP_LAUNCH_OP_MAX)
8096 t_async = NULL_TREE;
8097 else
8098 i_async = GOMP_LAUNCH_OP_MAX;
8099 }
8100 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
8101 i_async));
8102 }
8103 if (t_async)
8104 args.safe_push (t_async);
8105
8106 /* Save the argument index, and ... */
8107 unsigned t_wait_idx = args.length ();
8108 unsigned num_waits = 0;
8109 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
8110 if (!tagging || c)
8111 /* ... push a placeholder. */
8112 args.safe_push (integer_zero_node);
8113
8114 for (; c; c = OMP_CLAUSE_CHAIN (c))
8115 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
8116 {
8117 args.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c),
8118 integer_type_node,
8119 OMP_CLAUSE_WAIT_EXPR (c)));
8120 num_waits++;
8121 }
8122
8123 if (!tagging || num_waits)
8124 {
8125 tree len;
8126
8127 /* Now that we know the number, update the placeholder. */
8128 if (tagging)
8129 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
8130 else
8131 len = build_int_cst (integer_type_node, num_waits);
8132 len = fold_convert_loc (gimple_location (entry_stmt),
8133 unsigned_type_node, len);
8134 args[t_wait_idx] = len;
8135 }
8136 }
8137 break;
8138 default:
8139 gcc_unreachable ();
8140 }
8141 if (tagging)
8142 /* Push terminal marker - zero. */
8143 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
8144
8145 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
8146 gimple_set_location (g, gimple_location (entry_stmt));
8147 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
8148 if (!offloaded)
8149 {
8150 g = gsi_stmt (gsi);
8151 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
8152 gsi_remove (&gsi, true);
8153 }
8154 if (data_region && region->exit)
8155 {
65f4b875 8156 gsi = gsi_last_nondebug_bb (region->exit);
629b3d75
MJ
8157 g = gsi_stmt (gsi);
8158 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN);
8159 gsi_remove (&gsi, true);
8160 }
8161}
8162
8163/* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with
8164 iteration variable derived from the thread number. INTRA_GROUP means this
8165 is an expansion of a loop iterating over work-items within a separate
01914336 8166 iteration over groups. */
629b3d75
MJ
8167
8168static void
8169grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group)
8170{
8171 gimple_stmt_iterator gsi;
8172 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
8173 gcc_checking_assert (gimple_omp_for_kind (for_stmt)
8174 == GF_OMP_FOR_KIND_GRID_LOOP);
8175 size_t collapse = gimple_omp_for_collapse (for_stmt);
8176 struct omp_for_data_loop *loops
8177 = XALLOCAVEC (struct omp_for_data_loop,
01914336 8178 gimple_omp_for_collapse (for_stmt));
629b3d75
MJ
8179 struct omp_for_data fd;
8180
8181 remove_edge (BRANCH_EDGE (kfor->entry));
8182 basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest;
8183
8184 gcc_assert (kfor->cont);
8185 omp_extract_for_data (for_stmt, &fd, loops);
8186
8187 gsi = gsi_start_bb (body_bb);
8188
8189 for (size_t dim = 0; dim < collapse; dim++)
8190 {
8191 tree type, itype;
8192 itype = type = TREE_TYPE (fd.loops[dim].v);
8193 if (POINTER_TYPE_P (type))
8194 itype = signed_type_for (type);
8195
8196 tree n1 = fd.loops[dim].n1;
8197 tree step = fd.loops[dim].step;
8198 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
8199 true, NULL_TREE, true, GSI_SAME_STMT);
8200 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
8201 true, NULL_TREE, true, GSI_SAME_STMT);
8202 tree threadid;
8203 if (gimple_omp_for_grid_group_iter (for_stmt))
8204 {
8205 gcc_checking_assert (!intra_group);
8206 threadid = build_call_expr (builtin_decl_explicit
8207 (BUILT_IN_HSA_WORKGROUPID), 1,
8208 build_int_cstu (unsigned_type_node, dim));
8209 }
8210 else if (intra_group)
8211 threadid = build_call_expr (builtin_decl_explicit
8212 (BUILT_IN_HSA_WORKITEMID), 1,
8213 build_int_cstu (unsigned_type_node, dim));
8214 else
8215 threadid = build_call_expr (builtin_decl_explicit
8216 (BUILT_IN_HSA_WORKITEMABSID), 1,
8217 build_int_cstu (unsigned_type_node, dim));
8218 threadid = fold_convert (itype, threadid);
8219 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
8220 true, GSI_SAME_STMT);
8221
8222 tree startvar = fd.loops[dim].v;
8223 tree t = fold_build2 (MULT_EXPR, itype, threadid, step);
8224 if (POINTER_TYPE_P (type))
8225 t = fold_build_pointer_plus (n1, t);
8226 else
8227 t = fold_build2 (PLUS_EXPR, type, t, n1);
8228 t = fold_convert (type, t);
8229 t = force_gimple_operand_gsi (&gsi, t,
8230 DECL_P (startvar)
8231 && TREE_ADDRESSABLE (startvar),
8232 NULL_TREE, true, GSI_SAME_STMT);
8233 gassign *assign_stmt = gimple_build_assign (startvar, t);
8234 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
8235 }
01914336 8236 /* Remove the omp for statement. */
65f4b875 8237 gsi = gsi_last_nondebug_bb (kfor->entry);
629b3d75
MJ
8238 gsi_remove (&gsi, true);
8239
8240 /* Remove the GIMPLE_OMP_CONTINUE statement. */
65f4b875 8241 gsi = gsi_last_nondebug_bb (kfor->cont);
629b3d75
MJ
8242 gcc_assert (!gsi_end_p (gsi)
8243 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE);
8244 gsi_remove (&gsi, true);
8245
8246 /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary. */
65f4b875 8247 gsi = gsi_last_nondebug_bb (kfor->exit);
629b3d75
MJ
8248 gcc_assert (!gsi_end_p (gsi)
8249 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
8250 if (intra_group)
8251 gsi_insert_before (&gsi, omp_build_barrier (NULL_TREE), GSI_SAME_STMT);
8252 gsi_remove (&gsi, true);
8253
8254 /* Fixup the much simpler CFG. */
8255 remove_edge (find_edge (kfor->cont, body_bb));
8256
8257 if (kfor->cont != body_bb)
8258 set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb);
8259 set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont);
8260}
8261
8262/* Structure passed to grid_remap_kernel_arg_accesses so that it can remap
8263 argument_decls. */
8264
8265struct grid_arg_decl_map
8266{
8267 tree old_arg;
8268 tree new_arg;
8269};
8270
8271/* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones
8272 pertaining to kernel function. */
8273
8274static tree
8275grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data)
8276{
8277 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
8278 struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info;
8279 tree t = *tp;
8280
8281 if (t == adm->old_arg)
8282 *tp = adm->new_arg;
8283 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
8284 return NULL_TREE;
8285}
8286
8287/* If TARGET region contains a kernel body for loop, remove its region from the
01914336 8288 TARGET and expand it in HSA gridified kernel fashion. */
629b3d75
MJ
8289
8290static void
8291grid_expand_target_grid_body (struct omp_region *target)
8292{
8293 if (!hsa_gen_requested_p ())
8294 return;
8295
8296 gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry));
8297 struct omp_region **pp;
8298
8299 for (pp = &target->inner; *pp; pp = &(*pp)->next)
8300 if ((*pp)->type == GIMPLE_OMP_GRID_BODY)
8301 break;
8302
8303 struct omp_region *gpukernel = *pp;
8304
8305 tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt);
8306 if (!gpukernel)
8307 {
8308 /* HSA cannot handle OACC stuff. */
8309 if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION)
8310 return;
8311 gcc_checking_assert (orig_child_fndecl);
8312 gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
8313 OMP_CLAUSE__GRIDDIM_));
8314 cgraph_node *n = cgraph_node::get (orig_child_fndecl);
8315
8316 hsa_register_kernel (n);
8317 return;
8318 }
8319
8320 gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
8321 OMP_CLAUSE__GRIDDIM_));
01914336
MJ
8322 tree inside_block
8323 = gimple_block (first_stmt (single_succ (gpukernel->entry)));
629b3d75
MJ
8324 *pp = gpukernel->next;
8325 for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next)
8326 if ((*pp)->type == GIMPLE_OMP_FOR)
8327 break;
8328
8329 struct omp_region *kfor = *pp;
8330 gcc_assert (kfor);
8331 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
8332 gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP);
8333 *pp = kfor->next;
8334 if (kfor->inner)
8335 {
8336 if (gimple_omp_for_grid_group_iter (for_stmt))
8337 {
8338 struct omp_region **next_pp;
8339 for (pp = &kfor->inner; *pp; pp = next_pp)
8340 {
8341 next_pp = &(*pp)->next;
8342 if ((*pp)->type != GIMPLE_OMP_FOR)
8343 continue;
8344 gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry));
8345 gcc_assert (gimple_omp_for_kind (inner)
8346 == GF_OMP_FOR_KIND_GRID_LOOP);
8347 grid_expand_omp_for_loop (*pp, true);
8348 *pp = (*pp)->next;
8349 next_pp = pp;
8350 }
8351 }
8352 expand_omp (kfor->inner);
8353 }
8354 if (gpukernel->inner)
8355 expand_omp (gpukernel->inner);
8356
8357 tree kern_fndecl = copy_node (orig_child_fndecl);
7958186b
MP
8358 DECL_NAME (kern_fndecl) = clone_function_name_numbered (kern_fndecl,
8359 "kernel");
629b3d75
MJ
8360 SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl));
8361 tree tgtblock = gimple_block (tgt_stmt);
8362 tree fniniblock = make_node (BLOCK);
dc16b007 8363 BLOCK_ABSTRACT_ORIGIN (fniniblock) = BLOCK_ORIGIN (tgtblock);
629b3d75
MJ
8364 BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock);
8365 BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock);
8366 BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl;
8367 DECL_INITIAL (kern_fndecl) = fniniblock;
8368 push_struct_function (kern_fndecl);
8369 cfun->function_end_locus = gimple_location (tgt_stmt);
8370 init_tree_ssa (cfun);
8371 pop_cfun ();
8372
8373 tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl);
8374 gcc_assert (!DECL_CHAIN (old_parm_decl));
8375 tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl));
8376 DECL_CONTEXT (new_parm_decl) = kern_fndecl;
8377 DECL_ARGUMENTS (kern_fndecl) = new_parm_decl;
8378 gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl))));
8379 DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl));
8380 DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl;
8381 struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl);
8382 kern_cfun->curr_properties = cfun->curr_properties;
8383
8384 grid_expand_omp_for_loop (kfor, false);
8385
01914336 8386 /* Remove the omp for statement. */
65f4b875 8387 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (gpukernel->entry);
629b3d75
MJ
8388 gsi_remove (&gsi, true);
8389 /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real
8390 return. */
65f4b875 8391 gsi = gsi_last_nondebug_bb (gpukernel->exit);
629b3d75
MJ
8392 gcc_assert (!gsi_end_p (gsi)
8393 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
8394 gimple *ret_stmt = gimple_build_return (NULL);
8395 gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT);
8396 gsi_remove (&gsi, true);
8397
8398 /* Statements in the first BB in the target construct have been produced by
8399 target lowering and must be copied inside the GPUKERNEL, with the two
8400 exceptions of the first OMP statement and the OMP_DATA assignment
8401 statement. */
8402 gsi = gsi_start_bb (single_succ (gpukernel->entry));
8403 tree data_arg = gimple_omp_target_data_arg (tgt_stmt);
8404 tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL;
8405 for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry));
8406 !gsi_end_p (tsi); gsi_next (&tsi))
8407 {
8408 gimple *stmt = gsi_stmt (tsi);
8409 if (is_gimple_omp (stmt))
8410 break;
8411 if (sender
8412 && is_gimple_assign (stmt)
8413 && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR
8414 && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender)
8415 continue;
8416 gimple *copy = gimple_copy (stmt);
8417 gsi_insert_before (&gsi, copy, GSI_SAME_STMT);
8418 gimple_set_block (copy, fniniblock);
8419 }
8420
8421 move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry),
8422 gpukernel->exit, inside_block);
8423
8424 cgraph_node *kcn = cgraph_node::get_create (kern_fndecl);
8425 kcn->mark_force_output ();
8426 cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl);
8427
8428 hsa_register_kernel (kcn, orig_child);
8429
8430 cgraph_node::add_new_function (kern_fndecl, true);
8431 push_cfun (kern_cfun);
8432 cgraph_edge::rebuild_edges ();
8433
8434 /* Re-map any mention of the PARM_DECL of the original function to the
8435 PARM_DECL of the new one.
8436
8437 TODO: It would be great if lowering produced references into the GPU
8438 kernel decl straight away and we did not have to do this. */
8439 struct grid_arg_decl_map adm;
8440 adm.old_arg = old_parm_decl;
8441 adm.new_arg = new_parm_decl;
8442 basic_block bb;
8443 FOR_EACH_BB_FN (bb, kern_cfun)
8444 {
8445 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
8446 {
8447 gimple *stmt = gsi_stmt (gsi);
8448 struct walk_stmt_info wi;
8449 memset (&wi, 0, sizeof (wi));
8450 wi.info = &adm;
8451 walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi);
8452 }
8453 }
8454 pop_cfun ();
8455
8456 return;
8457}
8458
8459/* Expand the parallel region tree rooted at REGION. Expansion
8460 proceeds in depth-first order. Innermost regions are expanded
8461 first. This way, parallel regions that require a new function to
8462 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
8463 internal dependencies in their body. */
8464
8465static void
8466expand_omp (struct omp_region *region)
8467{
8468 omp_any_child_fn_dumped = false;
8469 while (region)
8470 {
8471 location_t saved_location;
8472 gimple *inner_stmt = NULL;
8473
8474 /* First, determine whether this is a combined parallel+workshare
01914336 8475 region. */
629b3d75
MJ
8476 if (region->type == GIMPLE_OMP_PARALLEL)
8477 determine_parallel_type (region);
8478 else if (region->type == GIMPLE_OMP_TARGET)
8479 grid_expand_target_grid_body (region);
8480
8481 if (region->type == GIMPLE_OMP_FOR
8482 && gimple_omp_for_combined_p (last_stmt (region->entry)))
8483 inner_stmt = last_stmt (region->inner->entry);
8484
8485 if (region->inner)
8486 expand_omp (region->inner);
8487
8488 saved_location = input_location;
8489 if (gimple_has_location (last_stmt (region->entry)))
8490 input_location = gimple_location (last_stmt (region->entry));
8491
8492 switch (region->type)
8493 {
8494 case GIMPLE_OMP_PARALLEL:
8495 case GIMPLE_OMP_TASK:
8496 expand_omp_taskreg (region);
8497 break;
8498
8499 case GIMPLE_OMP_FOR:
8500 expand_omp_for (region, inner_stmt);
8501 break;
8502
8503 case GIMPLE_OMP_SECTIONS:
8504 expand_omp_sections (region);
8505 break;
8506
8507 case GIMPLE_OMP_SECTION:
8508 /* Individual omp sections are handled together with their
8509 parent GIMPLE_OMP_SECTIONS region. */
8510 break;
8511
8512 case GIMPLE_OMP_SINGLE:
8513 expand_omp_single (region);
8514 break;
8515
8516 case GIMPLE_OMP_ORDERED:
8517 {
8518 gomp_ordered *ord_stmt
8519 = as_a <gomp_ordered *> (last_stmt (region->entry));
8520 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
8521 OMP_CLAUSE_DEPEND))
8522 {
8523 /* We'll expand these when expanding corresponding
8524 worksharing region with ordered(n) clause. */
8525 gcc_assert (region->outer
8526 && region->outer->type == GIMPLE_OMP_FOR);
8527 region->ord_stmt = ord_stmt;
8528 break;
8529 }
8530 }
8531 /* FALLTHRU */
8532 case GIMPLE_OMP_MASTER:
8533 case GIMPLE_OMP_TASKGROUP:
8534 case GIMPLE_OMP_CRITICAL:
8535 case GIMPLE_OMP_TEAMS:
8536 expand_omp_synch (region);
8537 break;
8538
8539 case GIMPLE_OMP_ATOMIC_LOAD:
8540 expand_omp_atomic (region);
8541 break;
8542
8543 case GIMPLE_OMP_TARGET:
8544 expand_omp_target (region);
8545 break;
8546
8547 default:
8548 gcc_unreachable ();
8549 }
8550
8551 input_location = saved_location;
8552 region = region->next;
8553 }
8554 if (omp_any_child_fn_dumped)
8555 {
8556 if (dump_file)
8557 dump_function_header (dump_file, current_function_decl, dump_flags);
8558 omp_any_child_fn_dumped = false;
8559 }
8560}
8561
8562/* Helper for build_omp_regions. Scan the dominator tree starting at
8563 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
8564 true, the function ends once a single tree is built (otherwise, whole
8565 forest of OMP constructs may be built). */
8566
8567static void
8568build_omp_regions_1 (basic_block bb, struct omp_region *parent,
8569 bool single_tree)
8570{
8571 gimple_stmt_iterator gsi;
8572 gimple *stmt;
8573 basic_block son;
8574
65f4b875 8575 gsi = gsi_last_nondebug_bb (bb);
629b3d75
MJ
8576 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
8577 {
8578 struct omp_region *region;
8579 enum gimple_code code;
8580
8581 stmt = gsi_stmt (gsi);
8582 code = gimple_code (stmt);
8583 if (code == GIMPLE_OMP_RETURN)
8584 {
8585 /* STMT is the return point out of region PARENT. Mark it
8586 as the exit point and make PARENT the immediately
8587 enclosing region. */
8588 gcc_assert (parent);
8589 region = parent;
8590 region->exit = bb;
8591 parent = parent->outer;
8592 }
8593 else if (code == GIMPLE_OMP_ATOMIC_STORE)
8594 {
5764ee3c 8595 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
629b3d75
MJ
8596 GIMPLE_OMP_RETURN, but matches with
8597 GIMPLE_OMP_ATOMIC_LOAD. */
8598 gcc_assert (parent);
8599 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
8600 region = parent;
8601 region->exit = bb;
8602 parent = parent->outer;
8603 }
8604 else if (code == GIMPLE_OMP_CONTINUE)
8605 {
8606 gcc_assert (parent);
8607 parent->cont = bb;
8608 }
8609 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
8610 {
8611 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
8612 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
8613 }
8614 else
8615 {
8616 region = new_omp_region (bb, code, parent);
8617 /* Otherwise... */
8618 if (code == GIMPLE_OMP_TARGET)
8619 {
8620 switch (gimple_omp_target_kind (stmt))
8621 {
8622 case GF_OMP_TARGET_KIND_REGION:
8623 case GF_OMP_TARGET_KIND_DATA:
8624 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8625 case GF_OMP_TARGET_KIND_OACC_KERNELS:
8626 case GF_OMP_TARGET_KIND_OACC_DATA:
8627 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8628 break;
8629 case GF_OMP_TARGET_KIND_UPDATE:
8630 case GF_OMP_TARGET_KIND_ENTER_DATA:
8631 case GF_OMP_TARGET_KIND_EXIT_DATA:
8632 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8633 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8634 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8635 /* ..., other than for those stand-alone directives... */
8636 region = NULL;
8637 break;
8638 default:
8639 gcc_unreachable ();
8640 }
8641 }
8642 else if (code == GIMPLE_OMP_ORDERED
8643 && omp_find_clause (gimple_omp_ordered_clauses
8644 (as_a <gomp_ordered *> (stmt)),
8645 OMP_CLAUSE_DEPEND))
8646 /* #pragma omp ordered depend is also just a stand-alone
8647 directive. */
8648 region = NULL;
28567c40
JJ
8649 else if (code == GIMPLE_OMP_TASK
8650 && gimple_omp_task_taskwait_p (stmt))
8651 /* #pragma omp taskwait depend(...) is a stand-alone directive. */
8652 region = NULL;
629b3d75
MJ
8653 /* ..., this directive becomes the parent for a new region. */
8654 if (region)
8655 parent = region;
8656 }
8657 }
8658
8659 if (single_tree && !parent)
8660 return;
8661
8662 for (son = first_dom_son (CDI_DOMINATORS, bb);
8663 son;
8664 son = next_dom_son (CDI_DOMINATORS, son))
8665 build_omp_regions_1 (son, parent, single_tree);
8666}
8667
8668/* Builds the tree of OMP regions rooted at ROOT, storing it to
8669 root_omp_region. */
8670
8671static void
8672build_omp_regions_root (basic_block root)
8673{
8674 gcc_assert (root_omp_region == NULL);
8675 build_omp_regions_1 (root, NULL, true);
8676 gcc_assert (root_omp_region != NULL);
8677}
8678
8679/* Expands omp construct (and its subconstructs) starting in HEAD. */
8680
8681void
8682omp_expand_local (basic_block head)
8683{
8684 build_omp_regions_root (head);
8685 if (dump_file && (dump_flags & TDF_DETAILS))
8686 {
8687 fprintf (dump_file, "\nOMP region tree\n\n");
8688 dump_omp_region (dump_file, root_omp_region, 0);
8689 fprintf (dump_file, "\n");
8690 }
8691
8692 remove_exit_barriers (root_omp_region);
8693 expand_omp (root_omp_region);
8694
8695 omp_free_regions ();
8696}
8697
8698/* Scan the CFG and build a tree of OMP regions. Return the root of
8699 the OMP region tree. */
8700
8701static void
8702build_omp_regions (void)
8703{
8704 gcc_assert (root_omp_region == NULL);
8705 calculate_dominance_info (CDI_DOMINATORS);
8706 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
8707}
8708
8709/* Main entry point for expanding OMP-GIMPLE into runtime calls. */
8710
8711static unsigned int
8712execute_expand_omp (void)
8713{
8714 build_omp_regions ();
8715
8716 if (!root_omp_region)
8717 return 0;
8718
8719 if (dump_file)
8720 {
8721 fprintf (dump_file, "\nOMP region tree\n\n");
8722 dump_omp_region (dump_file, root_omp_region, 0);
8723 fprintf (dump_file, "\n");
8724 }
8725
8726 remove_exit_barriers (root_omp_region);
8727
8728 expand_omp (root_omp_region);
8729
8730 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
8731 verify_loop_structure ();
8732 cleanup_tree_cfg ();
8733
8734 omp_free_regions ();
8735
8736 return 0;
8737}
8738
8739/* OMP expansion -- the default pass, run before creation of SSA form. */
8740
8741namespace {
8742
8743const pass_data pass_data_expand_omp =
8744{
8745 GIMPLE_PASS, /* type */
8746 "ompexp", /* name */
fd2b8c8b 8747 OPTGROUP_OMP, /* optinfo_flags */
629b3d75
MJ
8748 TV_NONE, /* tv_id */
8749 PROP_gimple_any, /* properties_required */
8750 PROP_gimple_eomp, /* properties_provided */
8751 0, /* properties_destroyed */
8752 0, /* todo_flags_start */
8753 0, /* todo_flags_finish */
8754};
8755
8756class pass_expand_omp : public gimple_opt_pass
8757{
8758public:
8759 pass_expand_omp (gcc::context *ctxt)
8760 : gimple_opt_pass (pass_data_expand_omp, ctxt)
8761 {}
8762
8763 /* opt_pass methods: */
8764 virtual unsigned int execute (function *)
8765 {
5e9d6aa4 8766 bool gate = ((flag_openacc != 0 || flag_openmp != 0
629b3d75
MJ
8767 || flag_openmp_simd != 0)
8768 && !seen_error ());
8769
8770 /* This pass always runs, to provide PROP_gimple_eomp.
8771 But often, there is nothing to do. */
8772 if (!gate)
8773 return 0;
8774
8775 return execute_expand_omp ();
8776 }
8777
8778}; // class pass_expand_omp
8779
8780} // anon namespace
8781
8782gimple_opt_pass *
8783make_pass_expand_omp (gcc::context *ctxt)
8784{
8785 return new pass_expand_omp (ctxt);
8786}
8787
8788namespace {
8789
8790const pass_data pass_data_expand_omp_ssa =
8791{
8792 GIMPLE_PASS, /* type */
8793 "ompexpssa", /* name */
fd2b8c8b 8794 OPTGROUP_OMP, /* optinfo_flags */
629b3d75
MJ
8795 TV_NONE, /* tv_id */
8796 PROP_cfg | PROP_ssa, /* properties_required */
8797 PROP_gimple_eomp, /* properties_provided */
8798 0, /* properties_destroyed */
8799 0, /* todo_flags_start */
8800 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
8801};
8802
8803class pass_expand_omp_ssa : public gimple_opt_pass
8804{
8805public:
8806 pass_expand_omp_ssa (gcc::context *ctxt)
8807 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
8808 {}
8809
8810 /* opt_pass methods: */
8811 virtual bool gate (function *fun)
8812 {
8813 return !(fun->curr_properties & PROP_gimple_eomp);
8814 }
8815 virtual unsigned int execute (function *) { return execute_expand_omp (); }
8816 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
8817
8818}; // class pass_expand_omp_ssa
8819
8820} // anon namespace
8821
8822gimple_opt_pass *
8823make_pass_expand_omp_ssa (gcc::context *ctxt)
8824{
8825 return new pass_expand_omp_ssa (ctxt);
8826}
8827
8828/* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
8829 GIMPLE_* codes. */
8830
8831bool
8832omp_make_gimple_edges (basic_block bb, struct omp_region **region,
8833 int *region_idx)
8834{
8835 gimple *last = last_stmt (bb);
8836 enum gimple_code code = gimple_code (last);
8837 struct omp_region *cur_region = *region;
8838 bool fallthru = false;
8839
8840 switch (code)
8841 {
8842 case GIMPLE_OMP_PARALLEL:
629b3d75
MJ
8843 case GIMPLE_OMP_FOR:
8844 case GIMPLE_OMP_SINGLE:
8845 case GIMPLE_OMP_TEAMS:
8846 case GIMPLE_OMP_MASTER:
8847 case GIMPLE_OMP_TASKGROUP:
8848 case GIMPLE_OMP_CRITICAL:
8849 case GIMPLE_OMP_SECTION:
8850 case GIMPLE_OMP_GRID_BODY:
8851 cur_region = new_omp_region (bb, code, cur_region);
8852 fallthru = true;
8853 break;
8854
28567c40
JJ
8855 case GIMPLE_OMP_TASK:
8856 cur_region = new_omp_region (bb, code, cur_region);
8857 fallthru = true;
8858 if (gimple_omp_task_taskwait_p (last))
8859 cur_region = cur_region->outer;
8860 break;
8861
629b3d75
MJ
8862 case GIMPLE_OMP_ORDERED:
8863 cur_region = new_omp_region (bb, code, cur_region);
8864 fallthru = true;
8865 if (omp_find_clause (gimple_omp_ordered_clauses
8866 (as_a <gomp_ordered *> (last)),
8867 OMP_CLAUSE_DEPEND))
8868 cur_region = cur_region->outer;
8869 break;
8870
8871 case GIMPLE_OMP_TARGET:
8872 cur_region = new_omp_region (bb, code, cur_region);
8873 fallthru = true;
8874 switch (gimple_omp_target_kind (last))
8875 {
8876 case GF_OMP_TARGET_KIND_REGION:
8877 case GF_OMP_TARGET_KIND_DATA:
8878 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8879 case GF_OMP_TARGET_KIND_OACC_KERNELS:
8880 case GF_OMP_TARGET_KIND_OACC_DATA:
8881 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8882 break;
8883 case GF_OMP_TARGET_KIND_UPDATE:
8884 case GF_OMP_TARGET_KIND_ENTER_DATA:
8885 case GF_OMP_TARGET_KIND_EXIT_DATA:
8886 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8887 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8888 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8889 cur_region = cur_region->outer;
8890 break;
8891 default:
8892 gcc_unreachable ();
8893 }
8894 break;
8895
8896 case GIMPLE_OMP_SECTIONS:
8897 cur_region = new_omp_region (bb, code, cur_region);
8898 fallthru = true;
8899 break;
8900
8901 case GIMPLE_OMP_SECTIONS_SWITCH:
8902 fallthru = false;
8903 break;
8904
8905 case GIMPLE_OMP_ATOMIC_LOAD:
8906 case GIMPLE_OMP_ATOMIC_STORE:
8907 fallthru = true;
8908 break;
8909
8910 case GIMPLE_OMP_RETURN:
8911 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
8912 somewhere other than the next block. This will be
8913 created later. */
8914 cur_region->exit = bb;
8915 if (cur_region->type == GIMPLE_OMP_TASK)
8916 /* Add an edge corresponding to not scheduling the task
8917 immediately. */
8918 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
8919 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
8920 cur_region = cur_region->outer;
8921 break;
8922
8923 case GIMPLE_OMP_CONTINUE:
8924 cur_region->cont = bb;
8925 switch (cur_region->type)
8926 {
8927 case GIMPLE_OMP_FOR:
8928 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
8929 succs edges as abnormal to prevent splitting
8930 them. */
8931 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
8932 /* Make the loopback edge. */
8933 make_edge (bb, single_succ (cur_region->entry),
8934 EDGE_ABNORMAL);
8935
8936 /* Create an edge from GIMPLE_OMP_FOR to exit, which
8937 corresponds to the case that the body of the loop
8938 is not executed at all. */
8939 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
8940 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
8941 fallthru = false;
8942 break;
8943
8944 case GIMPLE_OMP_SECTIONS:
8945 /* Wire up the edges into and out of the nested sections. */
8946 {
8947 basic_block switch_bb = single_succ (cur_region->entry);
8948
8949 struct omp_region *i;
8950 for (i = cur_region->inner; i ; i = i->next)
8951 {
8952 gcc_assert (i->type == GIMPLE_OMP_SECTION);
8953 make_edge (switch_bb, i->entry, 0);
8954 make_edge (i->exit, bb, EDGE_FALLTHRU);
8955 }
8956
8957 /* Make the loopback edge to the block with
8958 GIMPLE_OMP_SECTIONS_SWITCH. */
8959 make_edge (bb, switch_bb, 0);
8960
8961 /* Make the edge from the switch to exit. */
8962 make_edge (switch_bb, bb->next_bb, 0);
8963 fallthru = false;
8964 }
8965 break;
8966
8967 case GIMPLE_OMP_TASK:
8968 fallthru = true;
8969 break;
8970
8971 default:
8972 gcc_unreachable ();
8973 }
8974 break;
8975
8976 default:
8977 gcc_unreachable ();
8978 }
8979
8980 if (*region != cur_region)
8981 {
8982 *region = cur_region;
8983 if (cur_region)
8984 *region_idx = cur_region->entry->index;
8985 else
8986 *region_idx = 0;
8987 }
8988
8989 return fallthru;
8990}
8991
8992#include "gt-omp-expand.h"