]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/omp-expand.c
PR c++/61339 - add mismatch between struct and class [-Wmismatched-tags] to non-bugs
[thirdparty/gcc.git] / gcc / omp-expand.c
CommitLineData
629b3d75
MJ
1/* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
4
a5544970 5Copyright (C) 2005-2019 Free Software Foundation, Inc.
629b3d75
MJ
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
11Software Foundation; either version 3, or (at your option) any later
12version.
13
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING3. If not see
21<http://www.gnu.org/licenses/>. */
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "memmodel.h"
27#include "backend.h"
28#include "target.h"
29#include "rtl.h"
30#include "tree.h"
31#include "gimple.h"
32#include "cfghooks.h"
33#include "tree-pass.h"
34#include "ssa.h"
35#include "optabs.h"
36#include "cgraph.h"
37#include "pretty-print.h"
38#include "diagnostic-core.h"
39#include "fold-const.h"
40#include "stor-layout.h"
41#include "cfganal.h"
42#include "internal-fn.h"
43#include "gimplify.h"
44#include "gimple-iterator.h"
45#include "gimplify-me.h"
46#include "gimple-walk.h"
47#include "tree-cfg.h"
48#include "tree-into-ssa.h"
49#include "tree-ssa.h"
50#include "splay-tree.h"
51#include "cfgloop.h"
52#include "omp-general.h"
53#include "omp-offload.h"
54#include "tree-cfgcleanup.h"
55#include "symbol-summary.h"
629b3d75
MJ
56#include "gomp-constants.h"
57#include "gimple-pretty-print.h"
13293add 58#include "hsa-common.h"
314e6352
ML
59#include "stringpool.h"
60#include "attribs.h"
629b3d75
MJ
61
62/* OMP region information. Every parallel and workshare
63 directive is enclosed between two markers, the OMP_* directive
64 and a corresponding GIMPLE_OMP_RETURN statement. */
65
66struct omp_region
67{
68 /* The enclosing region. */
69 struct omp_region *outer;
70
71 /* First child region. */
72 struct omp_region *inner;
73
74 /* Next peer region. */
75 struct omp_region *next;
76
77 /* Block containing the omp directive as its last stmt. */
78 basic_block entry;
79
80 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
81 basic_block exit;
82
83 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
84 basic_block cont;
85
86 /* If this is a combined parallel+workshare region, this is a list
87 of additional arguments needed by the combined parallel+workshare
88 library call. */
89 vec<tree, va_gc> *ws_args;
90
91 /* The code for the omp directive of this region. */
92 enum gimple_code type;
93
94 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
95 enum omp_clause_schedule_kind sched_kind;
96
97 /* Schedule modifiers. */
98 unsigned char sched_modifiers;
99
100 /* True if this is a combined parallel+workshare region. */
101 bool is_combined_parallel;
102
0b887b75
JJ
103 /* Copy of fd.lastprivate_conditional != 0. */
104 bool has_lastprivate_conditional;
105
629b3d75
MJ
106 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
107 a depend clause. */
108 gomp_ordered *ord_stmt;
109};
110
111static struct omp_region *root_omp_region;
112static bool omp_any_child_fn_dumped;
113
114static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
115 bool = false);
116static gphi *find_phi_with_arg_on_edge (tree, edge);
117static void expand_omp (struct omp_region *region);
118
119/* Return true if REGION is a combined parallel+workshare region. */
120
121static inline bool
122is_combined_parallel (struct omp_region *region)
123{
124 return region->is_combined_parallel;
125}
126
127/* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
128 is the immediate dominator of PAR_ENTRY_BB, return true if there
129 are no data dependencies that would prevent expanding the parallel
130 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
131
132 When expanding a combined parallel+workshare region, the call to
133 the child function may need additional arguments in the case of
134 GIMPLE_OMP_FOR regions. In some cases, these arguments are
135 computed out of variables passed in from the parent to the child
136 via 'struct .omp_data_s'. For instance:
137
138 #pragma omp parallel for schedule (guided, i * 4)
139 for (j ...)
140
141 Is lowered into:
142
01914336 143 # BLOCK 2 (PAR_ENTRY_BB)
629b3d75
MJ
144 .omp_data_o.i = i;
145 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
146
147 # BLOCK 3 (WS_ENTRY_BB)
148 .omp_data_i = &.omp_data_o;
149 D.1667 = .omp_data_i->i;
150 D.1598 = D.1667 * 4;
151 #pragma omp for schedule (guided, D.1598)
152
153 When we outline the parallel region, the call to the child function
154 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
155 that value is computed *after* the call site. So, in principle we
156 cannot do the transformation.
157
158 To see whether the code in WS_ENTRY_BB blocks the combined
159 parallel+workshare call, we collect all the variables used in the
160 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
161 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
162 call.
163
164 FIXME. If we had the SSA form built at this point, we could merely
165 hoist the code in block 3 into block 2 and be done with it. But at
166 this point we don't have dataflow information and though we could
167 hack something up here, it is really not worth the aggravation. */
168
169static bool
170workshare_safe_to_combine_p (basic_block ws_entry_bb)
171{
172 struct omp_for_data fd;
173 gimple *ws_stmt = last_stmt (ws_entry_bb);
174
175 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
176 return true;
177
178 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
28567c40
JJ
179 if (gimple_omp_for_kind (ws_stmt) != GF_OMP_FOR_KIND_FOR)
180 return false;
629b3d75
MJ
181
182 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
183
184 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
185 return false;
186 if (fd.iter_type != long_integer_type_node)
187 return false;
188
189 /* FIXME. We give up too easily here. If any of these arguments
190 are not constants, they will likely involve variables that have
191 been mapped into fields of .omp_data_s for sharing with the child
192 function. With appropriate data flow, it would be possible to
193 see through this. */
194 if (!is_gimple_min_invariant (fd.loop.n1)
195 || !is_gimple_min_invariant (fd.loop.n2)
196 || !is_gimple_min_invariant (fd.loop.step)
197 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
198 return false;
199
200 return true;
201}
202
203/* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
204 presence (SIMD_SCHEDULE). */
205
206static tree
207omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
208{
28567c40 209 if (!simd_schedule || integer_zerop (chunk_size))
629b3d75
MJ
210 return chunk_size;
211
9d2f08ab
RS
212 poly_uint64 vf = omp_max_vf ();
213 if (known_eq (vf, 1U))
629b3d75
MJ
214 return chunk_size;
215
216 tree type = TREE_TYPE (chunk_size);
217 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
218 build_int_cst (type, vf - 1));
219 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
220 build_int_cst (type, -vf));
221}
222
223/* Collect additional arguments needed to emit a combined
224 parallel+workshare call. WS_STMT is the workshare directive being
225 expanded. */
226
227static vec<tree, va_gc> *
228get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
229{
230 tree t;
231 location_t loc = gimple_location (ws_stmt);
232 vec<tree, va_gc> *ws_args;
233
234 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
235 {
236 struct omp_for_data fd;
237 tree n1, n2;
238
239 omp_extract_for_data (for_stmt, &fd, NULL);
240 n1 = fd.loop.n1;
241 n2 = fd.loop.n2;
242
243 if (gimple_omp_for_combined_into_p (for_stmt))
244 {
245 tree innerc
246 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
247 OMP_CLAUSE__LOOPTEMP_);
248 gcc_assert (innerc);
249 n1 = OMP_CLAUSE_DECL (innerc);
250 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
251 OMP_CLAUSE__LOOPTEMP_);
252 gcc_assert (innerc);
253 n2 = OMP_CLAUSE_DECL (innerc);
254 }
255
256 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
257
258 t = fold_convert_loc (loc, long_integer_type_node, n1);
259 ws_args->quick_push (t);
260
261 t = fold_convert_loc (loc, long_integer_type_node, n2);
262 ws_args->quick_push (t);
263
264 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
265 ws_args->quick_push (t);
266
267 if (fd.chunk_size)
268 {
269 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
270 t = omp_adjust_chunk_size (t, fd.simd_schedule);
271 ws_args->quick_push (t);
272 }
273
274 return ws_args;
275 }
276 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
277 {
278 /* Number of sections is equal to the number of edges from the
279 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
280 the exit of the sections region. */
281 basic_block bb = single_succ (gimple_bb (ws_stmt));
282 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
283 vec_alloc (ws_args, 1);
284 ws_args->quick_push (t);
285 return ws_args;
286 }
287
288 gcc_unreachable ();
289}
290
291/* Discover whether REGION is a combined parallel+workshare region. */
292
293static void
294determine_parallel_type (struct omp_region *region)
295{
296 basic_block par_entry_bb, par_exit_bb;
297 basic_block ws_entry_bb, ws_exit_bb;
298
299 if (region == NULL || region->inner == NULL
300 || region->exit == NULL || region->inner->exit == NULL
301 || region->inner->cont == NULL)
302 return;
303
304 /* We only support parallel+for and parallel+sections. */
305 if (region->type != GIMPLE_OMP_PARALLEL
306 || (region->inner->type != GIMPLE_OMP_FOR
307 && region->inner->type != GIMPLE_OMP_SECTIONS))
308 return;
309
310 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
311 WS_EXIT_BB -> PAR_EXIT_BB. */
312 par_entry_bb = region->entry;
313 par_exit_bb = region->exit;
314 ws_entry_bb = region->inner->entry;
315 ws_exit_bb = region->inner->exit;
316
28567c40
JJ
317 /* Give up for task reductions on the parallel, while it is implementable,
318 adding another big set of APIs or slowing down the normal paths is
319 not acceptable. */
320 tree pclauses = gimple_omp_parallel_clauses (last_stmt (par_entry_bb));
321 if (omp_find_clause (pclauses, OMP_CLAUSE__REDUCTEMP_))
322 return;
323
629b3d75
MJ
324 if (single_succ (par_entry_bb) == ws_entry_bb
325 && single_succ (ws_exit_bb) == par_exit_bb
326 && workshare_safe_to_combine_p (ws_entry_bb)
327 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
328 || (last_and_only_stmt (ws_entry_bb)
329 && last_and_only_stmt (par_exit_bb))))
330 {
331 gimple *par_stmt = last_stmt (par_entry_bb);
332 gimple *ws_stmt = last_stmt (ws_entry_bb);
333
334 if (region->inner->type == GIMPLE_OMP_FOR)
335 {
336 /* If this is a combined parallel loop, we need to determine
337 whether or not to use the combined library calls. There
338 are two cases where we do not apply the transformation:
339 static loops and any kind of ordered loop. In the first
340 case, we already open code the loop so there is no need
341 to do anything else. In the latter case, the combined
342 parallel loop call would still need extra synchronization
343 to implement ordered semantics, so there would not be any
344 gain in using the combined call. */
345 tree clauses = gimple_omp_for_clauses (ws_stmt);
346 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
347 if (c == NULL
348 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
349 == OMP_CLAUSE_SCHEDULE_STATIC)
28567c40 350 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED)
6c7ae8c5 351 || omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_)
8221c30b
JJ
352 || ((c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_))
353 && POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (c)))))
28567c40 354 return;
629b3d75 355 }
28567c40 356 else if (region->inner->type == GIMPLE_OMP_SECTIONS
8221c30b
JJ
357 && (omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
358 OMP_CLAUSE__REDUCTEMP_)
359 || omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
360 OMP_CLAUSE__CONDTEMP_)))
28567c40 361 return;
629b3d75
MJ
362
363 region->is_combined_parallel = true;
364 region->inner->is_combined_parallel = true;
365 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
366 }
367}
368
369/* Debugging dumps for parallel regions. */
370void dump_omp_region (FILE *, struct omp_region *, int);
371void debug_omp_region (struct omp_region *);
372void debug_all_omp_regions (void);
373
374/* Dump the parallel region tree rooted at REGION. */
375
376void
377dump_omp_region (FILE *file, struct omp_region *region, int indent)
378{
379 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
380 gimple_code_name[region->type]);
381
382 if (region->inner)
383 dump_omp_region (file, region->inner, indent + 4);
384
385 if (region->cont)
386 {
387 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
388 region->cont->index);
389 }
390
391 if (region->exit)
392 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
393 region->exit->index);
394 else
395 fprintf (file, "%*s[no exit marker]\n", indent, "");
396
397 if (region->next)
398 dump_omp_region (file, region->next, indent);
399}
400
401DEBUG_FUNCTION void
402debug_omp_region (struct omp_region *region)
403{
404 dump_omp_region (stderr, region, 0);
405}
406
407DEBUG_FUNCTION void
408debug_all_omp_regions (void)
409{
410 dump_omp_region (stderr, root_omp_region, 0);
411}
412
413/* Create a new parallel region starting at STMT inside region PARENT. */
414
415static struct omp_region *
416new_omp_region (basic_block bb, enum gimple_code type,
417 struct omp_region *parent)
418{
419 struct omp_region *region = XCNEW (struct omp_region);
420
421 region->outer = parent;
422 region->entry = bb;
423 region->type = type;
424
425 if (parent)
426 {
427 /* This is a nested region. Add it to the list of inner
428 regions in PARENT. */
429 region->next = parent->inner;
430 parent->inner = region;
431 }
432 else
433 {
434 /* This is a toplevel region. Add it to the list of toplevel
435 regions in ROOT_OMP_REGION. */
436 region->next = root_omp_region;
437 root_omp_region = region;
438 }
439
440 return region;
441}
442
443/* Release the memory associated with the region tree rooted at REGION. */
444
445static void
446free_omp_region_1 (struct omp_region *region)
447{
448 struct omp_region *i, *n;
449
450 for (i = region->inner; i ; i = n)
451 {
452 n = i->next;
453 free_omp_region_1 (i);
454 }
455
456 free (region);
457}
458
459/* Release the memory for the entire omp region tree. */
460
461void
462omp_free_regions (void)
463{
464 struct omp_region *r, *n;
465 for (r = root_omp_region; r ; r = n)
466 {
467 n = r->next;
468 free_omp_region_1 (r);
469 }
470 root_omp_region = NULL;
471}
472
473/* A convenience function to build an empty GIMPLE_COND with just the
474 condition. */
475
476static gcond *
477gimple_build_cond_empty (tree cond)
478{
479 enum tree_code pred_code;
480 tree lhs, rhs;
481
482 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
483 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
484}
485
486/* Return true if a parallel REGION is within a declare target function or
487 within a target region and is not a part of a gridified target. */
488
489static bool
490parallel_needs_hsa_kernel_p (struct omp_region *region)
491{
492 bool indirect = false;
493 for (region = region->outer; region; region = region->outer)
494 {
495 if (region->type == GIMPLE_OMP_PARALLEL)
496 indirect = true;
497 else if (region->type == GIMPLE_OMP_TARGET)
498 {
499 gomp_target *tgt_stmt
500 = as_a <gomp_target *> (last_stmt (region->entry));
501
502 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
503 OMP_CLAUSE__GRIDDIM_))
504 return indirect;
505 else
506 return true;
507 }
508 }
509
510 if (lookup_attribute ("omp declare target",
511 DECL_ATTRIBUTES (current_function_decl)))
512 return true;
513
514 return false;
515}
516
623c6df5
KB
517/* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
518 Add CHILD_FNDECL to decl chain of the supercontext of the block
519 ENTRY_BLOCK - this is the block which originally contained the
520 code from which CHILD_FNDECL was created.
521
522 Together, these actions ensure that the debug info for the outlined
523 function will be emitted with the correct lexical scope. */
524
525static void
4ccc4e30
JJ
526adjust_context_and_scope (struct omp_region *region, tree entry_block,
527 tree child_fndecl)
623c6df5 528{
4ccc4e30
JJ
529 tree parent_fndecl = NULL_TREE;
530 gimple *entry_stmt;
531 /* OMP expansion expands inner regions before outer ones, so if
532 we e.g. have explicit task region nested in parallel region, when
533 expanding the task region current_function_decl will be the original
534 source function, but we actually want to use as context the child
535 function of the parallel. */
536 for (region = region->outer;
537 region && parent_fndecl == NULL_TREE; region = region->outer)
538 switch (region->type)
539 {
540 case GIMPLE_OMP_PARALLEL:
541 case GIMPLE_OMP_TASK:
542 case GIMPLE_OMP_TEAMS:
543 entry_stmt = last_stmt (region->entry);
544 parent_fndecl = gimple_omp_taskreg_child_fn (entry_stmt);
545 break;
546 case GIMPLE_OMP_TARGET:
547 entry_stmt = last_stmt (region->entry);
548 parent_fndecl
549 = gimple_omp_target_child_fn (as_a <gomp_target *> (entry_stmt));
550 break;
551 default:
552 break;
553 }
554
555 if (parent_fndecl == NULL_TREE)
556 parent_fndecl = current_function_decl;
557 DECL_CONTEXT (child_fndecl) = parent_fndecl;
558
623c6df5
KB
559 if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
560 {
561 tree b = BLOCK_SUPERCONTEXT (entry_block);
623c6df5
KB
562 if (TREE_CODE (b) == BLOCK)
563 {
623c6df5
KB
564 DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
565 BLOCK_VARS (b) = child_fndecl;
566 }
567 }
568}
569
28567c40 570/* Build the function calls to GOMP_parallel etc to actually
629b3d75
MJ
571 generate the parallel operation. REGION is the parallel region
572 being expanded. BB is the block where to insert the code. WS_ARGS
573 will be set if this is a call to a combined parallel+workshare
574 construct, it contains the list of additional arguments needed by
575 the workshare construct. */
576
577static void
578expand_parallel_call (struct omp_region *region, basic_block bb,
579 gomp_parallel *entry_stmt,
580 vec<tree, va_gc> *ws_args)
581{
582 tree t, t1, t2, val, cond, c, clauses, flags;
583 gimple_stmt_iterator gsi;
584 gimple *stmt;
585 enum built_in_function start_ix;
586 int start_ix2;
587 location_t clause_loc;
588 vec<tree, va_gc> *args;
589
590 clauses = gimple_omp_parallel_clauses (entry_stmt);
591
592 /* Determine what flavor of GOMP_parallel we will be
593 emitting. */
594 start_ix = BUILT_IN_GOMP_PARALLEL;
28567c40
JJ
595 tree rtmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
596 if (rtmp)
597 start_ix = BUILT_IN_GOMP_PARALLEL_REDUCTIONS;
598 else if (is_combined_parallel (region))
629b3d75
MJ
599 {
600 switch (region->inner->type)
601 {
602 case GIMPLE_OMP_FOR:
603 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
604 switch (region->inner->sched_kind)
605 {
606 case OMP_CLAUSE_SCHEDULE_RUNTIME:
0b887b75
JJ
607 /* For lastprivate(conditional:), our implementation
608 requires monotonic behavior. */
609 if (region->inner->has_lastprivate_conditional != 0)
610 start_ix2 = 3;
611 else if ((region->inner->sched_modifiers
612 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
28567c40
JJ
613 start_ix2 = 6;
614 else if ((region->inner->sched_modifiers
615 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
616 start_ix2 = 7;
617 else
618 start_ix2 = 3;
629b3d75
MJ
619 break;
620 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
621 case OMP_CLAUSE_SCHEDULE_GUIDED:
28567c40 622 if ((region->inner->sched_modifiers
0b887b75
JJ
623 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
624 && !region->inner->has_lastprivate_conditional)
629b3d75
MJ
625 {
626 start_ix2 = 3 + region->inner->sched_kind;
627 break;
628 }
629 /* FALLTHRU */
630 default:
631 start_ix2 = region->inner->sched_kind;
632 break;
633 }
634 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
635 start_ix = (enum built_in_function) start_ix2;
636 break;
637 case GIMPLE_OMP_SECTIONS:
638 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
639 break;
640 default:
641 gcc_unreachable ();
642 }
643 }
644
645 /* By default, the value of NUM_THREADS is zero (selected at run time)
646 and there is no conditional. */
647 cond = NULL_TREE;
648 val = build_int_cst (unsigned_type_node, 0);
649 flags = build_int_cst (unsigned_type_node, 0);
650
651 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
652 if (c)
653 cond = OMP_CLAUSE_IF_EXPR (c);
654
655 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
656 if (c)
657 {
658 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
659 clause_loc = OMP_CLAUSE_LOCATION (c);
660 }
661 else
662 clause_loc = gimple_location (entry_stmt);
663
664 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
665 if (c)
666 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
667
668 /* Ensure 'val' is of the correct type. */
669 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
670
671 /* If we found the clause 'if (cond)', build either
672 (cond != 0) or (cond ? val : 1u). */
673 if (cond)
674 {
675 cond = gimple_boolify (cond);
676
677 if (integer_zerop (val))
678 val = fold_build2_loc (clause_loc,
679 EQ_EXPR, unsigned_type_node, cond,
680 build_int_cst (TREE_TYPE (cond), 0));
681 else
682 {
683 basic_block cond_bb, then_bb, else_bb;
684 edge e, e_then, e_else;
685 tree tmp_then, tmp_else, tmp_join, tmp_var;
686
687 tmp_var = create_tmp_var (TREE_TYPE (val));
688 if (gimple_in_ssa_p (cfun))
689 {
690 tmp_then = make_ssa_name (tmp_var);
691 tmp_else = make_ssa_name (tmp_var);
692 tmp_join = make_ssa_name (tmp_var);
693 }
694 else
695 {
696 tmp_then = tmp_var;
697 tmp_else = tmp_var;
698 tmp_join = tmp_var;
699 }
700
701 e = split_block_after_labels (bb);
702 cond_bb = e->src;
703 bb = e->dest;
704 remove_edge (e);
705
706 then_bb = create_empty_bb (cond_bb);
707 else_bb = create_empty_bb (then_bb);
708 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
709 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
710
711 stmt = gimple_build_cond_empty (cond);
712 gsi = gsi_start_bb (cond_bb);
713 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
714
715 gsi = gsi_start_bb (then_bb);
716 expand_omp_build_assign (&gsi, tmp_then, val, true);
717
718 gsi = gsi_start_bb (else_bb);
719 expand_omp_build_assign (&gsi, tmp_else,
720 build_int_cst (unsigned_type_node, 1),
721 true);
722
723 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
724 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
725 add_bb_to_loop (then_bb, cond_bb->loop_father);
726 add_bb_to_loop (else_bb, cond_bb->loop_father);
727 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
728 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
729
730 if (gimple_in_ssa_p (cfun))
731 {
732 gphi *phi = create_phi_node (tmp_join, bb);
733 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
734 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
735 }
736
737 val = tmp_join;
738 }
739
740 gsi = gsi_start_bb (bb);
741 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
742 false, GSI_CONTINUE_LINKING);
743 }
744
65f4b875 745 gsi = gsi_last_nondebug_bb (bb);
629b3d75
MJ
746 t = gimple_omp_parallel_data_arg (entry_stmt);
747 if (t == NULL)
748 t1 = null_pointer_node;
749 else
750 t1 = build_fold_addr_expr (t);
751 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
752 t2 = build_fold_addr_expr (child_fndecl);
753
754 vec_alloc (args, 4 + vec_safe_length (ws_args));
755 args->quick_push (t2);
756 args->quick_push (t1);
757 args->quick_push (val);
758 if (ws_args)
759 args->splice (*ws_args);
760 args->quick_push (flags);
761
762 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
763 builtin_decl_explicit (start_ix), args);
764
28567c40
JJ
765 if (rtmp)
766 {
767 tree type = TREE_TYPE (OMP_CLAUSE_DECL (rtmp));
768 t = build2 (MODIFY_EXPR, type, OMP_CLAUSE_DECL (rtmp),
769 fold_convert (type,
770 fold_convert (pointer_sized_int_node, t)));
771 }
629b3d75
MJ
772 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
773 false, GSI_CONTINUE_LINKING);
774
775 if (hsa_gen_requested_p ()
776 && parallel_needs_hsa_kernel_p (region))
777 {
778 cgraph_node *child_cnode = cgraph_node::get (child_fndecl);
779 hsa_register_kernel (child_cnode);
780 }
781}
782
629b3d75
MJ
783/* Build the function call to GOMP_task to actually
784 generate the task operation. BB is the block where to insert the code. */
785
786static void
787expand_task_call (struct omp_region *region, basic_block bb,
788 gomp_task *entry_stmt)
789{
790 tree t1, t2, t3;
791 gimple_stmt_iterator gsi;
792 location_t loc = gimple_location (entry_stmt);
793
794 tree clauses = gimple_omp_task_clauses (entry_stmt);
795
796 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
797 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
798 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
799 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
800 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
801 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
802
803 unsigned int iflags
804 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
805 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
806 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
807
808 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
809 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
810 tree num_tasks = NULL_TREE;
811 bool ull = false;
812 if (taskloop_p)
813 {
814 gimple *g = last_stmt (region->outer->entry);
815 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
816 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
817 struct omp_for_data fd;
818 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
819 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
820 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
821 OMP_CLAUSE__LOOPTEMP_);
822 startvar = OMP_CLAUSE_DECL (startvar);
823 endvar = OMP_CLAUSE_DECL (endvar);
824 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
825 if (fd.loop.cond_code == LT_EXPR)
826 iflags |= GOMP_TASK_FLAG_UP;
827 tree tclauses = gimple_omp_for_clauses (g);
828 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
829 if (num_tasks)
830 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
831 else
832 {
833 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
834 if (num_tasks)
835 {
836 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
837 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
838 }
839 else
840 num_tasks = integer_zero_node;
841 }
842 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
843 if (ifc == NULL_TREE)
844 iflags |= GOMP_TASK_FLAG_IF;
845 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
846 iflags |= GOMP_TASK_FLAG_NOGROUP;
847 ull = fd.iter_type == long_long_unsigned_type_node;
28567c40
JJ
848 if (omp_find_clause (clauses, OMP_CLAUSE_REDUCTION))
849 iflags |= GOMP_TASK_FLAG_REDUCTION;
629b3d75
MJ
850 }
851 else if (priority)
852 iflags |= GOMP_TASK_FLAG_PRIORITY;
853
854 tree flags = build_int_cst (unsigned_type_node, iflags);
855
856 tree cond = boolean_true_node;
857 if (ifc)
858 {
859 if (taskloop_p)
860 {
861 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
862 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
863 build_int_cst (unsigned_type_node,
864 GOMP_TASK_FLAG_IF),
865 build_int_cst (unsigned_type_node, 0));
866 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
867 flags, t);
868 }
869 else
870 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
871 }
872
873 if (finalc)
874 {
875 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
876 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
877 build_int_cst (unsigned_type_node,
878 GOMP_TASK_FLAG_FINAL),
879 build_int_cst (unsigned_type_node, 0));
880 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
881 }
882 if (depend)
883 depend = OMP_CLAUSE_DECL (depend);
884 else
885 depend = build_int_cst (ptr_type_node, 0);
886 if (priority)
887 priority = fold_convert (integer_type_node,
888 OMP_CLAUSE_PRIORITY_EXPR (priority));
889 else
890 priority = integer_zero_node;
891
65f4b875 892 gsi = gsi_last_nondebug_bb (bb);
629b3d75
MJ
893 tree t = gimple_omp_task_data_arg (entry_stmt);
894 if (t == NULL)
895 t2 = null_pointer_node;
896 else
897 t2 = build_fold_addr_expr_loc (loc, t);
898 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
899 t = gimple_omp_task_copy_fn (entry_stmt);
900 if (t == NULL)
901 t3 = null_pointer_node;
902 else
903 t3 = build_fold_addr_expr_loc (loc, t);
904
905 if (taskloop_p)
906 t = build_call_expr (ull
907 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
908 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
909 11, t1, t2, t3,
910 gimple_omp_task_arg_size (entry_stmt),
911 gimple_omp_task_arg_align (entry_stmt), flags,
912 num_tasks, priority, startvar, endvar, step);
913 else
914 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
915 9, t1, t2, t3,
916 gimple_omp_task_arg_size (entry_stmt),
917 gimple_omp_task_arg_align (entry_stmt), cond, flags,
918 depend, priority);
919
920 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
921 false, GSI_CONTINUE_LINKING);
922}
923
28567c40
JJ
924/* Build the function call to GOMP_taskwait_depend to actually
925 generate the taskwait operation. BB is the block where to insert the
926 code. */
927
928static void
929expand_taskwait_call (basic_block bb, gomp_task *entry_stmt)
930{
931 tree clauses = gimple_omp_task_clauses (entry_stmt);
932 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
933 if (depend == NULL_TREE)
934 return;
935
936 depend = OMP_CLAUSE_DECL (depend);
937
938 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
939 tree t
940 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASKWAIT_DEPEND),
941 1, depend);
942
943 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
944 false, GSI_CONTINUE_LINKING);
945}
946
947/* Build the function call to GOMP_teams_reg to actually
948 generate the host teams operation. REGION is the teams region
949 being expanded. BB is the block where to insert the code. */
950
951static void
952expand_teams_call (basic_block bb, gomp_teams *entry_stmt)
953{
954 tree clauses = gimple_omp_teams_clauses (entry_stmt);
955 tree num_teams = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
956 if (num_teams == NULL_TREE)
957 num_teams = build_int_cst (unsigned_type_node, 0);
958 else
959 {
960 num_teams = OMP_CLAUSE_NUM_TEAMS_EXPR (num_teams);
961 num_teams = fold_convert (unsigned_type_node, num_teams);
962 }
963 tree thread_limit = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
964 if (thread_limit == NULL_TREE)
965 thread_limit = build_int_cst (unsigned_type_node, 0);
966 else
967 {
968 thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit);
969 thread_limit = fold_convert (unsigned_type_node, thread_limit);
970 }
971
972 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
973 tree t = gimple_omp_teams_data_arg (entry_stmt), t1;
974 if (t == NULL)
975 t1 = null_pointer_node;
976 else
977 t1 = build_fold_addr_expr (t);
978 tree child_fndecl = gimple_omp_teams_child_fn (entry_stmt);
979 tree t2 = build_fold_addr_expr (child_fndecl);
980
28567c40
JJ
981 vec<tree, va_gc> *args;
982 vec_alloc (args, 5);
983 args->quick_push (t2);
984 args->quick_push (t1);
985 args->quick_push (num_teams);
986 args->quick_push (thread_limit);
987 /* For future extensibility. */
988 args->quick_push (build_zero_cst (unsigned_type_node));
989
990 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
991 builtin_decl_explicit (BUILT_IN_GOMP_TEAMS_REG),
992 args);
993
994 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
995 false, GSI_CONTINUE_LINKING);
996}
997
629b3d75
MJ
998/* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
999
1000static tree
1001vec2chain (vec<tree, va_gc> *v)
1002{
1003 tree chain = NULL_TREE, t;
1004 unsigned ix;
1005
1006 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
1007 {
1008 DECL_CHAIN (t) = chain;
1009 chain = t;
1010 }
1011
1012 return chain;
1013}
1014
1015/* Remove barriers in REGION->EXIT's block. Note that this is only
1016 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
1017 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
1018 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
1019 removed. */
1020
1021static void
1022remove_exit_barrier (struct omp_region *region)
1023{
1024 gimple_stmt_iterator gsi;
1025 basic_block exit_bb;
1026 edge_iterator ei;
1027 edge e;
1028 gimple *stmt;
1029 int any_addressable_vars = -1;
1030
1031 exit_bb = region->exit;
1032
1033 /* If the parallel region doesn't return, we don't have REGION->EXIT
1034 block at all. */
1035 if (! exit_bb)
1036 return;
1037
1038 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
1039 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
1040 statements that can appear in between are extremely limited -- no
1041 memory operations at all. Here, we allow nothing at all, so the
1042 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
65f4b875 1043 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75 1044 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
65f4b875 1045 gsi_prev_nondebug (&gsi);
629b3d75
MJ
1046 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
1047 return;
1048
1049 FOR_EACH_EDGE (e, ei, exit_bb->preds)
1050 {
65f4b875 1051 gsi = gsi_last_nondebug_bb (e->src);
629b3d75
MJ
1052 if (gsi_end_p (gsi))
1053 continue;
1054 stmt = gsi_stmt (gsi);
1055 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
1056 && !gimple_omp_return_nowait_p (stmt))
1057 {
1058 /* OpenMP 3.0 tasks unfortunately prevent this optimization
1059 in many cases. If there could be tasks queued, the barrier
1060 might be needed to let the tasks run before some local
1061 variable of the parallel that the task uses as shared
1062 runs out of scope. The task can be spawned either
1063 from within current function (this would be easy to check)
1064 or from some function it calls and gets passed an address
1065 of such a variable. */
1066 if (any_addressable_vars < 0)
1067 {
1068 gomp_parallel *parallel_stmt
1069 = as_a <gomp_parallel *> (last_stmt (region->entry));
1070 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
1071 tree local_decls, block, decl;
1072 unsigned ix;
1073
1074 any_addressable_vars = 0;
1075 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
1076 if (TREE_ADDRESSABLE (decl))
1077 {
1078 any_addressable_vars = 1;
1079 break;
1080 }
1081 for (block = gimple_block (stmt);
1082 !any_addressable_vars
1083 && block
1084 && TREE_CODE (block) == BLOCK;
1085 block = BLOCK_SUPERCONTEXT (block))
1086 {
1087 for (local_decls = BLOCK_VARS (block);
1088 local_decls;
1089 local_decls = DECL_CHAIN (local_decls))
1090 if (TREE_ADDRESSABLE (local_decls))
1091 {
1092 any_addressable_vars = 1;
1093 break;
1094 }
1095 if (block == gimple_block (parallel_stmt))
1096 break;
1097 }
1098 }
1099 if (!any_addressable_vars)
1100 gimple_omp_return_set_nowait (stmt);
1101 }
1102 }
1103}
1104
1105static void
1106remove_exit_barriers (struct omp_region *region)
1107{
1108 if (region->type == GIMPLE_OMP_PARALLEL)
1109 remove_exit_barrier (region);
1110
1111 if (region->inner)
1112 {
1113 region = region->inner;
1114 remove_exit_barriers (region);
1115 while (region->next)
1116 {
1117 region = region->next;
1118 remove_exit_barriers (region);
1119 }
1120 }
1121}
1122
1123/* Optimize omp_get_thread_num () and omp_get_num_threads ()
1124 calls. These can't be declared as const functions, but
1125 within one parallel body they are constant, so they can be
1126 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1127 which are declared const. Similarly for task body, except
1128 that in untied task omp_get_thread_num () can change at any task
1129 scheduling point. */
1130
1131static void
1132optimize_omp_library_calls (gimple *entry_stmt)
1133{
1134 basic_block bb;
1135 gimple_stmt_iterator gsi;
1136 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1137 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1138 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1139 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1140 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1141 && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1142 OMP_CLAUSE_UNTIED) != NULL);
1143
1144 FOR_EACH_BB_FN (bb, cfun)
1145 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1146 {
1147 gimple *call = gsi_stmt (gsi);
1148 tree decl;
1149
1150 if (is_gimple_call (call)
1151 && (decl = gimple_call_fndecl (call))
1152 && DECL_EXTERNAL (decl)
1153 && TREE_PUBLIC (decl)
1154 && DECL_INITIAL (decl) == NULL)
1155 {
1156 tree built_in;
1157
1158 if (DECL_NAME (decl) == thr_num_id)
1159 {
1160 /* In #pragma omp task untied omp_get_thread_num () can change
1161 during the execution of the task region. */
1162 if (untied_task)
1163 continue;
1164 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1165 }
1166 else if (DECL_NAME (decl) == num_thr_id)
1167 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1168 else
1169 continue;
1170
1171 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1172 || gimple_call_num_args (call) != 0)
1173 continue;
1174
1175 if (flag_exceptions && !TREE_NOTHROW (decl))
1176 continue;
1177
1178 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1179 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1180 TREE_TYPE (TREE_TYPE (built_in))))
1181 continue;
1182
1183 gimple_call_set_fndecl (call, built_in);
1184 }
1185 }
1186}
1187
1188/* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1189 regimplified. */
1190
1191static tree
1192expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1193{
1194 tree t = *tp;
1195
1196 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1197 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1198 return t;
1199
1200 if (TREE_CODE (t) == ADDR_EXPR)
1201 recompute_tree_invariant_for_addr_expr (t);
1202
1203 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1204 return NULL_TREE;
1205}
1206
1207/* Prepend or append TO = FROM assignment before or after *GSI_P. */
1208
1209static void
1210expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1211 bool after)
1212{
1213 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1214 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1215 !after, after ? GSI_CONTINUE_LINKING
1216 : GSI_SAME_STMT);
1217 gimple *stmt = gimple_build_assign (to, from);
1218 if (after)
1219 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1220 else
1221 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1222 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1223 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1224 {
1225 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1226 gimple_regimplify_operands (stmt, &gsi);
1227 }
1228}
1229
1230/* Expand the OpenMP parallel or task directive starting at REGION. */
1231
1232static void
1233expand_omp_taskreg (struct omp_region *region)
1234{
1235 basic_block entry_bb, exit_bb, new_bb;
1236 struct function *child_cfun;
1237 tree child_fn, block, t;
1238 gimple_stmt_iterator gsi;
1239 gimple *entry_stmt, *stmt;
1240 edge e;
1241 vec<tree, va_gc> *ws_args;
1242
1243 entry_stmt = last_stmt (region->entry);
28567c40
JJ
1244 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1245 && gimple_omp_task_taskwait_p (entry_stmt))
1246 {
1247 new_bb = region->entry;
1248 gsi = gsi_last_nondebug_bb (region->entry);
1249 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1250 gsi_remove (&gsi, true);
1251 expand_taskwait_call (new_bb, as_a <gomp_task *> (entry_stmt));
1252 return;
1253 }
1254
629b3d75
MJ
1255 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1256 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1257
1258 entry_bb = region->entry;
1259 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1260 exit_bb = region->cont;
1261 else
1262 exit_bb = region->exit;
1263
5e9d6aa4 1264 if (is_combined_parallel (region))
629b3d75
MJ
1265 ws_args = region->ws_args;
1266 else
1267 ws_args = NULL;
1268
1269 if (child_cfun->cfg)
1270 {
1271 /* Due to inlining, it may happen that we have already outlined
1272 the region, in which case all we need to do is make the
1273 sub-graph unreachable and emit the parallel call. */
1274 edge entry_succ_e, exit_succ_e;
1275
1276 entry_succ_e = single_succ_edge (entry_bb);
1277
65f4b875 1278 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75 1279 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
28567c40
JJ
1280 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK
1281 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TEAMS);
629b3d75
MJ
1282 gsi_remove (&gsi, true);
1283
1284 new_bb = entry_bb;
1285 if (exit_bb)
1286 {
1287 exit_succ_e = single_succ_edge (exit_bb);
1288 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1289 }
1290 remove_edge_and_dominated_blocks (entry_succ_e);
1291 }
1292 else
1293 {
1294 unsigned srcidx, dstidx, num;
1295
1296 /* If the parallel region needs data sent from the parent
1297 function, then the very first statement (except possible
1298 tree profile counter updates) of the parallel body
1299 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1300 &.OMP_DATA_O is passed as an argument to the child function,
1301 we need to replace it with the argument as seen by the child
1302 function.
1303
1304 In most cases, this will end up being the identity assignment
1305 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1306 a function call that has been inlined, the original PARM_DECL
1307 .OMP_DATA_I may have been converted into a different local
1308 variable. In which case, we need to keep the assignment. */
1309 if (gimple_omp_taskreg_data_arg (entry_stmt))
1310 {
1311 basic_block entry_succ_bb
1312 = single_succ_p (entry_bb) ? single_succ (entry_bb)
1313 : FALLTHRU_EDGE (entry_bb)->dest;
1314 tree arg;
1315 gimple *parcopy_stmt = NULL;
1316
1317 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1318 {
1319 gimple *stmt;
1320
1321 gcc_assert (!gsi_end_p (gsi));
1322 stmt = gsi_stmt (gsi);
1323 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1324 continue;
1325
1326 if (gimple_num_ops (stmt) == 2)
1327 {
1328 tree arg = gimple_assign_rhs1 (stmt);
1329
1330 /* We're ignore the subcode because we're
1331 effectively doing a STRIP_NOPS. */
1332
1333 if (TREE_CODE (arg) == ADDR_EXPR
28567c40
JJ
1334 && (TREE_OPERAND (arg, 0)
1335 == gimple_omp_taskreg_data_arg (entry_stmt)))
629b3d75
MJ
1336 {
1337 parcopy_stmt = stmt;
1338 break;
1339 }
1340 }
1341 }
1342
1343 gcc_assert (parcopy_stmt != NULL);
1344 arg = DECL_ARGUMENTS (child_fn);
1345
1346 if (!gimple_in_ssa_p (cfun))
1347 {
1348 if (gimple_assign_lhs (parcopy_stmt) == arg)
1349 gsi_remove (&gsi, true);
1350 else
1351 {
01914336 1352 /* ?? Is setting the subcode really necessary ?? */
629b3d75
MJ
1353 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1354 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1355 }
1356 }
1357 else
1358 {
1359 tree lhs = gimple_assign_lhs (parcopy_stmt);
1360 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1361 /* We'd like to set the rhs to the default def in the child_fn,
1362 but it's too early to create ssa names in the child_fn.
1363 Instead, we set the rhs to the parm. In
1364 move_sese_region_to_fn, we introduce a default def for the
1365 parm, map the parm to it's default def, and once we encounter
1366 this stmt, replace the parm with the default def. */
1367 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1368 update_stmt (parcopy_stmt);
1369 }
1370 }
1371
1372 /* Declare local variables needed in CHILD_CFUN. */
1373 block = DECL_INITIAL (child_fn);
1374 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1375 /* The gimplifier could record temporaries in parallel/task block
1376 rather than in containing function's local_decls chain,
1377 which would mean cgraph missed finalizing them. Do it now. */
1378 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1379 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1380 varpool_node::finalize_decl (t);
1381 DECL_SAVED_TREE (child_fn) = NULL;
1382 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1383 gimple_set_body (child_fn, NULL);
1384 TREE_USED (block) = 1;
1385
1386 /* Reset DECL_CONTEXT on function arguments. */
1387 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1388 DECL_CONTEXT (t) = child_fn;
1389
1390 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1391 so that it can be moved to the child function. */
65f4b875 1392 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
1393 stmt = gsi_stmt (gsi);
1394 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
28567c40
JJ
1395 || gimple_code (stmt) == GIMPLE_OMP_TASK
1396 || gimple_code (stmt) == GIMPLE_OMP_TEAMS));
629b3d75
MJ
1397 e = split_block (entry_bb, stmt);
1398 gsi_remove (&gsi, true);
1399 entry_bb = e->dest;
1400 edge e2 = NULL;
28567c40 1401 if (gimple_code (entry_stmt) != GIMPLE_OMP_TASK)
629b3d75
MJ
1402 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1403 else
1404 {
1405 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1406 gcc_assert (e2->dest == region->exit);
1407 remove_edge (BRANCH_EDGE (entry_bb));
1408 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
65f4b875 1409 gsi = gsi_last_nondebug_bb (region->exit);
629b3d75
MJ
1410 gcc_assert (!gsi_end_p (gsi)
1411 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1412 gsi_remove (&gsi, true);
1413 }
1414
1415 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1416 if (exit_bb)
1417 {
65f4b875 1418 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
1419 gcc_assert (!gsi_end_p (gsi)
1420 && (gimple_code (gsi_stmt (gsi))
1421 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1422 stmt = gimple_build_return (NULL);
1423 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1424 gsi_remove (&gsi, true);
1425 }
1426
1427 /* Move the parallel region into CHILD_CFUN. */
1428
1429 if (gimple_in_ssa_p (cfun))
1430 {
1431 init_tree_ssa (child_cfun);
1432 init_ssa_operands (child_cfun);
1433 child_cfun->gimple_df->in_ssa_p = true;
1434 block = NULL_TREE;
1435 }
1436 else
1437 block = gimple_block (entry_stmt);
1438
1439 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1440 if (exit_bb)
1441 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1442 if (e2)
1443 {
1444 basic_block dest_bb = e2->dest;
1445 if (!exit_bb)
1446 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1447 remove_edge (e2);
1448 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1449 }
1450 /* When the OMP expansion process cannot guarantee an up-to-date
01914336 1451 loop tree arrange for the child function to fixup loops. */
629b3d75
MJ
1452 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1453 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1454
1455 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1456 num = vec_safe_length (child_cfun->local_decls);
1457 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1458 {
1459 t = (*child_cfun->local_decls)[srcidx];
1460 if (DECL_CONTEXT (t) == cfun->decl)
1461 continue;
1462 if (srcidx != dstidx)
1463 (*child_cfun->local_decls)[dstidx] = t;
1464 dstidx++;
1465 }
1466 if (dstidx != num)
1467 vec_safe_truncate (child_cfun->local_decls, dstidx);
1468
1469 /* Inform the callgraph about the new function. */
1470 child_cfun->curr_properties = cfun->curr_properties;
1471 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1472 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1473 cgraph_node *node = cgraph_node::get_create (child_fn);
1474 node->parallelized_function = 1;
1475 cgraph_node::add_new_function (child_fn, true);
1476
1477 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1478 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1479
1480 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1481 fixed in a following pass. */
1482 push_cfun (child_cfun);
1483 if (need_asm)
9579db35 1484 assign_assembler_name_if_needed (child_fn);
629b3d75
MJ
1485
1486 if (optimize)
1487 optimize_omp_library_calls (entry_stmt);
fc06ae0d 1488 update_max_bb_count ();
629b3d75
MJ
1489 cgraph_edge::rebuild_edges ();
1490
1491 /* Some EH regions might become dead, see PR34608. If
1492 pass_cleanup_cfg isn't the first pass to happen with the
1493 new child, these dead EH edges might cause problems.
1494 Clean them up now. */
1495 if (flag_exceptions)
1496 {
1497 basic_block bb;
1498 bool changed = false;
1499
1500 FOR_EACH_BB_FN (bb, cfun)
1501 changed |= gimple_purge_dead_eh_edges (bb);
1502 if (changed)
1503 cleanup_tree_cfg ();
1504 }
1505 if (gimple_in_ssa_p (cfun))
1506 update_ssa (TODO_update_ssa);
1507 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1508 verify_loop_structure ();
1509 pop_cfun ();
1510
1511 if (dump_file && !gimple_in_ssa_p (cfun))
1512 {
1513 omp_any_child_fn_dumped = true;
1514 dump_function_header (dump_file, child_fn, dump_flags);
1515 dump_function_to_file (child_fn, dump_file, dump_flags);
1516 }
1517 }
1518
4ccc4e30
JJ
1519 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
1520
5e9d6aa4 1521 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
629b3d75
MJ
1522 expand_parallel_call (region, new_bb,
1523 as_a <gomp_parallel *> (entry_stmt), ws_args);
28567c40
JJ
1524 else if (gimple_code (entry_stmt) == GIMPLE_OMP_TEAMS)
1525 expand_teams_call (new_bb, as_a <gomp_teams *> (entry_stmt));
629b3d75
MJ
1526 else
1527 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1528 if (gimple_in_ssa_p (cfun))
1529 update_ssa (TODO_update_ssa_only_virtuals);
1530}
1531
1532/* Information about members of an OpenACC collapsed loop nest. */
1533
1534struct oacc_collapse
1535{
01914336 1536 tree base; /* Base value. */
629b3d75 1537 tree iters; /* Number of steps. */
02889d23
CLT
1538 tree step; /* Step size. */
1539 tree tile; /* Tile increment (if tiled). */
1540 tree outer; /* Tile iterator var. */
629b3d75
MJ
1541};
1542
1543/* Helper for expand_oacc_for. Determine collapsed loop information.
1544 Fill in COUNTS array. Emit any initialization code before GSI.
1545 Return the calculated outer loop bound of BOUND_TYPE. */
1546
1547static tree
1548expand_oacc_collapse_init (const struct omp_for_data *fd,
1549 gimple_stmt_iterator *gsi,
02889d23
CLT
1550 oacc_collapse *counts, tree bound_type,
1551 location_t loc)
629b3d75 1552{
02889d23 1553 tree tiling = fd->tiling;
629b3d75
MJ
1554 tree total = build_int_cst (bound_type, 1);
1555 int ix;
1556
1557 gcc_assert (integer_onep (fd->loop.step));
1558 gcc_assert (integer_zerop (fd->loop.n1));
1559
02889d23
CLT
1560 /* When tiling, the first operand of the tile clause applies to the
1561 innermost loop, and we work outwards from there. Seems
1562 backwards, but whatever. */
1563 for (ix = fd->collapse; ix--;)
629b3d75
MJ
1564 {
1565 const omp_for_data_loop *loop = &fd->loops[ix];
1566
1567 tree iter_type = TREE_TYPE (loop->v);
1568 tree diff_type = iter_type;
1569 tree plus_type = iter_type;
1570
1571 gcc_assert (loop->cond_code == fd->loop.cond_code);
1572
1573 if (POINTER_TYPE_P (iter_type))
1574 plus_type = sizetype;
1575 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
1576 diff_type = signed_type_for (diff_type);
c31bc4ac
TV
1577 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
1578 diff_type = integer_type_node;
629b3d75 1579
02889d23
CLT
1580 if (tiling)
1581 {
1582 tree num = build_int_cst (integer_type_node, fd->collapse);
1583 tree loop_no = build_int_cst (integer_type_node, ix);
1584 tree tile = TREE_VALUE (tiling);
1585 gcall *call
1586 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1587 /* gwv-outer=*/integer_zero_node,
1588 /* gwv-inner=*/integer_zero_node);
1589
1590 counts[ix].outer = create_tmp_var (iter_type, ".outer");
1591 counts[ix].tile = create_tmp_var (diff_type, ".tile");
1592 gimple_call_set_lhs (call, counts[ix].tile);
1593 gimple_set_location (call, loc);
1594 gsi_insert_before (gsi, call, GSI_SAME_STMT);
1595
1596 tiling = TREE_CHAIN (tiling);
1597 }
1598 else
1599 {
1600 counts[ix].tile = NULL;
1601 counts[ix].outer = loop->v;
1602 }
1603
629b3d75
MJ
1604 tree b = loop->n1;
1605 tree e = loop->n2;
1606 tree s = loop->step;
1607 bool up = loop->cond_code == LT_EXPR;
1608 tree dir = build_int_cst (diff_type, up ? +1 : -1);
1609 bool negating;
1610 tree expr;
1611
1612 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1613 true, GSI_SAME_STMT);
1614 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1615 true, GSI_SAME_STMT);
1616
01914336 1617 /* Convert the step, avoiding possible unsigned->signed overflow. */
629b3d75
MJ
1618 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1619 if (negating)
1620 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1621 s = fold_convert (diff_type, s);
1622 if (negating)
1623 s = fold_build1 (NEGATE_EXPR, diff_type, s);
1624 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1625 true, GSI_SAME_STMT);
1626
01914336 1627 /* Determine the range, avoiding possible unsigned->signed overflow. */
629b3d75
MJ
1628 negating = !up && TYPE_UNSIGNED (iter_type);
1629 expr = fold_build2 (MINUS_EXPR, plus_type,
1630 fold_convert (plus_type, negating ? b : e),
1631 fold_convert (plus_type, negating ? e : b));
1632 expr = fold_convert (diff_type, expr);
1633 if (negating)
1634 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1635 tree range = force_gimple_operand_gsi
1636 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1637
1638 /* Determine number of iterations. */
1639 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1640 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1641 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1642
1643 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1644 true, GSI_SAME_STMT);
1645
1646 counts[ix].base = b;
1647 counts[ix].iters = iters;
1648 counts[ix].step = s;
1649
1650 total = fold_build2 (MULT_EXPR, bound_type, total,
1651 fold_convert (bound_type, iters));
1652 }
1653
1654 return total;
1655}
1656
02889d23
CLT
1657/* Emit initializers for collapsed loop members. INNER is true if
1658 this is for the element loop of a TILE. IVAR is the outer
629b3d75
MJ
1659 loop iteration variable, from which collapsed loop iteration values
1660 are calculated. COUNTS array has been initialized by
1661 expand_oacc_collapse_inits. */
1662
1663static void
02889d23 1664expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
629b3d75
MJ
1665 gimple_stmt_iterator *gsi,
1666 const oacc_collapse *counts, tree ivar)
1667{
1668 tree ivar_type = TREE_TYPE (ivar);
1669
1670 /* The most rapidly changing iteration variable is the innermost
1671 one. */
1672 for (int ix = fd->collapse; ix--;)
1673 {
1674 const omp_for_data_loop *loop = &fd->loops[ix];
1675 const oacc_collapse *collapse = &counts[ix];
02889d23
CLT
1676 tree v = inner ? loop->v : collapse->outer;
1677 tree iter_type = TREE_TYPE (v);
629b3d75
MJ
1678 tree diff_type = TREE_TYPE (collapse->step);
1679 tree plus_type = iter_type;
1680 enum tree_code plus_code = PLUS_EXPR;
1681 tree expr;
1682
1683 if (POINTER_TYPE_P (iter_type))
1684 {
1685 plus_code = POINTER_PLUS_EXPR;
1686 plus_type = sizetype;
1687 }
1688
02889d23
CLT
1689 expr = ivar;
1690 if (ix)
1691 {
1692 tree mod = fold_convert (ivar_type, collapse->iters);
1693 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1694 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1695 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1696 true, GSI_SAME_STMT);
1697 }
1698
629b3d75
MJ
1699 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1700 collapse->step);
02889d23
CLT
1701 expr = fold_build2 (plus_code, iter_type,
1702 inner ? collapse->outer : collapse->base,
629b3d75
MJ
1703 fold_convert (plus_type, expr));
1704 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1705 true, GSI_SAME_STMT);
02889d23 1706 gassign *ass = gimple_build_assign (v, expr);
629b3d75 1707 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
629b3d75
MJ
1708 }
1709}
1710
1711/* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1712 of the combined collapse > 1 loop constructs, generate code like:
1713 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1714 if (cond3 is <)
1715 adj = STEP3 - 1;
1716 else
1717 adj = STEP3 + 1;
1718 count3 = (adj + N32 - N31) / STEP3;
1719 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1720 if (cond2 is <)
1721 adj = STEP2 - 1;
1722 else
1723 adj = STEP2 + 1;
1724 count2 = (adj + N22 - N21) / STEP2;
1725 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1726 if (cond1 is <)
1727 adj = STEP1 - 1;
1728 else
1729 adj = STEP1 + 1;
1730 count1 = (adj + N12 - N11) / STEP1;
1731 count = count1 * count2 * count3;
1732 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1733 count = 0;
1734 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1735 of the combined loop constructs, just initialize COUNTS array
1736 from the _looptemp_ clauses. */
1737
1738/* NOTE: It *could* be better to moosh all of the BBs together,
1739 creating one larger BB with all the computation and the unexpected
1740 jump at the end. I.e.
1741
1742 bool zero3, zero2, zero1, zero;
1743
1744 zero3 = N32 c3 N31;
1745 count3 = (N32 - N31) /[cl] STEP3;
1746 zero2 = N22 c2 N21;
1747 count2 = (N22 - N21) /[cl] STEP2;
1748 zero1 = N12 c1 N11;
1749 count1 = (N12 - N11) /[cl] STEP1;
1750 zero = zero3 || zero2 || zero1;
1751 count = count1 * count2 * count3;
1752 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1753
1754 After all, we expect the zero=false, and thus we expect to have to
1755 evaluate all of the comparison expressions, so short-circuiting
1756 oughtn't be a win. Since the condition isn't protecting a
1757 denominator, we're not concerned about divide-by-zero, so we can
1758 fully evaluate count even if a numerator turned out to be wrong.
1759
1760 It seems like putting this all together would create much better
1761 scheduling opportunities, and less pressure on the chip's branch
1762 predictor. */
1763
1764static void
1765expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1766 basic_block &entry_bb, tree *counts,
1767 basic_block &zero_iter1_bb, int &first_zero_iter1,
1768 basic_block &zero_iter2_bb, int &first_zero_iter2,
1769 basic_block &l2_dom_bb)
1770{
1771 tree t, type = TREE_TYPE (fd->loop.v);
1772 edge e, ne;
1773 int i;
1774
1775 /* Collapsed loops need work for expansion into SSA form. */
1776 gcc_assert (!gimple_in_ssa_p (cfun));
1777
1778 if (gimple_omp_for_combined_into_p (fd->for_stmt)
1779 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1780 {
1781 gcc_assert (fd->ordered == 0);
1782 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1783 isn't supposed to be handled, as the inner loop doesn't
1784 use it. */
1785 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1786 OMP_CLAUSE__LOOPTEMP_);
1787 gcc_assert (innerc);
1788 for (i = 0; i < fd->collapse; i++)
1789 {
1790 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1791 OMP_CLAUSE__LOOPTEMP_);
1792 gcc_assert (innerc);
1793 if (i)
1794 counts[i] = OMP_CLAUSE_DECL (innerc);
1795 else
1796 counts[0] = NULL_TREE;
1797 }
1798 return;
1799 }
1800
1801 for (i = fd->collapse; i < fd->ordered; i++)
1802 {
1803 tree itype = TREE_TYPE (fd->loops[i].v);
1804 counts[i] = NULL_TREE;
1805 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1806 fold_convert (itype, fd->loops[i].n1),
1807 fold_convert (itype, fd->loops[i].n2));
1808 if (t && integer_zerop (t))
1809 {
1810 for (i = fd->collapse; i < fd->ordered; i++)
1811 counts[i] = build_int_cst (type, 0);
1812 break;
1813 }
1814 }
1815 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1816 {
1817 tree itype = TREE_TYPE (fd->loops[i].v);
1818
1819 if (i >= fd->collapse && counts[i])
1820 continue;
1821 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1822 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1823 fold_convert (itype, fd->loops[i].n1),
1824 fold_convert (itype, fd->loops[i].n2)))
1825 == NULL_TREE || !integer_onep (t)))
1826 {
1827 gcond *cond_stmt;
1828 tree n1, n2;
1829 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1830 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1831 true, GSI_SAME_STMT);
1832 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1833 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1834 true, GSI_SAME_STMT);
1835 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1836 NULL_TREE, NULL_TREE);
1837 gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1838 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1839 expand_omp_regimplify_p, NULL, NULL)
1840 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1841 expand_omp_regimplify_p, NULL, NULL))
1842 {
1843 *gsi = gsi_for_stmt (cond_stmt);
1844 gimple_regimplify_operands (cond_stmt, gsi);
1845 }
1846 e = split_block (entry_bb, cond_stmt);
1847 basic_block &zero_iter_bb
1848 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1849 int &first_zero_iter
1850 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1851 if (zero_iter_bb == NULL)
1852 {
1853 gassign *assign_stmt;
1854 first_zero_iter = i;
1855 zero_iter_bb = create_empty_bb (entry_bb);
1856 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1857 *gsi = gsi_after_labels (zero_iter_bb);
1858 if (i < fd->collapse)
1859 assign_stmt = gimple_build_assign (fd->loop.n2,
1860 build_zero_cst (type));
1861 else
1862 {
1863 counts[i] = create_tmp_reg (type, ".count");
1864 assign_stmt
1865 = gimple_build_assign (counts[i], build_zero_cst (type));
1866 }
1867 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1868 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1869 entry_bb);
1870 }
1871 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
357067f2 1872 ne->probability = profile_probability::very_unlikely ();
629b3d75 1873 e->flags = EDGE_TRUE_VALUE;
357067f2 1874 e->probability = ne->probability.invert ();
629b3d75
MJ
1875 if (l2_dom_bb == NULL)
1876 l2_dom_bb = entry_bb;
1877 entry_bb = e->dest;
65f4b875 1878 *gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
1879 }
1880
1881 if (POINTER_TYPE_P (itype))
1882 itype = signed_type_for (itype);
1883 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1884 ? -1 : 1));
1885 t = fold_build2 (PLUS_EXPR, itype,
1886 fold_convert (itype, fd->loops[i].step), t);
1887 t = fold_build2 (PLUS_EXPR, itype, t,
1888 fold_convert (itype, fd->loops[i].n2));
1889 t = fold_build2 (MINUS_EXPR, itype, t,
1890 fold_convert (itype, fd->loops[i].n1));
1891 /* ?? We could probably use CEIL_DIV_EXPR instead of
1892 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1893 generate the same code in the end because generically we
1894 don't know that the values involved must be negative for
1895 GT?? */
1896 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1897 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1898 fold_build1 (NEGATE_EXPR, itype, t),
1899 fold_build1 (NEGATE_EXPR, itype,
1900 fold_convert (itype,
1901 fd->loops[i].step)));
1902 else
1903 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1904 fold_convert (itype, fd->loops[i].step));
1905 t = fold_convert (type, t);
1906 if (TREE_CODE (t) == INTEGER_CST)
1907 counts[i] = t;
1908 else
1909 {
1910 if (i < fd->collapse || i != first_zero_iter2)
1911 counts[i] = create_tmp_reg (type, ".count");
1912 expand_omp_build_assign (gsi, counts[i], t);
1913 }
1914 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1915 {
1916 if (i == 0)
1917 t = counts[0];
1918 else
1919 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1920 expand_omp_build_assign (gsi, fd->loop.n2, t);
1921 }
1922 }
1923}
1924
1925/* Helper function for expand_omp_{for_*,simd}. Generate code like:
1926 T = V;
1927 V3 = N31 + (T % count3) * STEP3;
1928 T = T / count3;
1929 V2 = N21 + (T % count2) * STEP2;
1930 T = T / count2;
1931 V1 = N11 + T * STEP1;
1932 if this loop doesn't have an inner loop construct combined with it.
1933 If it does have an inner loop construct combined with it and the
1934 iteration count isn't known constant, store values from counts array
1935 into its _looptemp_ temporaries instead. */
1936
1937static void
1938expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1939 tree *counts, gimple *inner_stmt, tree startvar)
1940{
1941 int i;
1942 if (gimple_omp_for_combined_p (fd->for_stmt))
1943 {
1944 /* If fd->loop.n2 is constant, then no propagation of the counts
1945 is needed, they are constant. */
1946 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
1947 return;
1948
1949 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
1950 ? gimple_omp_taskreg_clauses (inner_stmt)
1951 : gimple_omp_for_clauses (inner_stmt);
1952 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1953 isn't supposed to be handled, as the inner loop doesn't
1954 use it. */
1955 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
1956 gcc_assert (innerc);
1957 for (i = 0; i < fd->collapse; i++)
1958 {
1959 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1960 OMP_CLAUSE__LOOPTEMP_);
1961 gcc_assert (innerc);
1962 if (i)
1963 {
1964 tree tem = OMP_CLAUSE_DECL (innerc);
1965 tree t = fold_convert (TREE_TYPE (tem), counts[i]);
1966 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1967 false, GSI_CONTINUE_LINKING);
1968 gassign *stmt = gimple_build_assign (tem, t);
1969 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1970 }
1971 }
1972 return;
1973 }
1974
1975 tree type = TREE_TYPE (fd->loop.v);
1976 tree tem = create_tmp_reg (type, ".tem");
1977 gassign *stmt = gimple_build_assign (tem, startvar);
1978 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1979
1980 for (i = fd->collapse - 1; i >= 0; i--)
1981 {
1982 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
1983 itype = vtype;
1984 if (POINTER_TYPE_P (vtype))
1985 itype = signed_type_for (vtype);
1986 if (i != 0)
1987 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
1988 else
1989 t = tem;
1990 t = fold_convert (itype, t);
1991 t = fold_build2 (MULT_EXPR, itype, t,
1992 fold_convert (itype, fd->loops[i].step));
1993 if (POINTER_TYPE_P (vtype))
1994 t = fold_build_pointer_plus (fd->loops[i].n1, t);
1995 else
1996 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
1997 t = force_gimple_operand_gsi (gsi, t,
1998 DECL_P (fd->loops[i].v)
1999 && TREE_ADDRESSABLE (fd->loops[i].v),
2000 NULL_TREE, false,
2001 GSI_CONTINUE_LINKING);
2002 stmt = gimple_build_assign (fd->loops[i].v, t);
2003 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2004 if (i != 0)
2005 {
2006 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
2007 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
2008 false, GSI_CONTINUE_LINKING);
2009 stmt = gimple_build_assign (tem, t);
2010 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2011 }
2012 }
2013}
2014
2015/* Helper function for expand_omp_for_*. Generate code like:
2016 L10:
2017 V3 += STEP3;
2018 if (V3 cond3 N32) goto BODY_BB; else goto L11;
2019 L11:
2020 V3 = N31;
2021 V2 += STEP2;
2022 if (V2 cond2 N22) goto BODY_BB; else goto L12;
2023 L12:
2024 V2 = N21;
2025 V1 += STEP1;
2026 goto BODY_BB; */
2027
2028static basic_block
2029extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb,
2030 basic_block body_bb)
2031{
2032 basic_block last_bb, bb, collapse_bb = NULL;
2033 int i;
2034 gimple_stmt_iterator gsi;
2035 edge e;
2036 tree t;
2037 gimple *stmt;
2038
2039 last_bb = cont_bb;
2040 for (i = fd->collapse - 1; i >= 0; i--)
2041 {
2042 tree vtype = TREE_TYPE (fd->loops[i].v);
2043
2044 bb = create_empty_bb (last_bb);
2045 add_bb_to_loop (bb, last_bb->loop_father);
2046 gsi = gsi_start_bb (bb);
2047
2048 if (i < fd->collapse - 1)
2049 {
2050 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
357067f2 2051 e->probability = profile_probability::guessed_always ().apply_scale (1, 8);
629b3d75
MJ
2052
2053 t = fd->loops[i + 1].n1;
2054 t = force_gimple_operand_gsi (&gsi, t,
2055 DECL_P (fd->loops[i + 1].v)
2056 && TREE_ADDRESSABLE (fd->loops[i
2057 + 1].v),
2058 NULL_TREE, false,
2059 GSI_CONTINUE_LINKING);
2060 stmt = gimple_build_assign (fd->loops[i + 1].v, t);
2061 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2062 }
2063 else
2064 collapse_bb = bb;
2065
2066 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
2067
2068 if (POINTER_TYPE_P (vtype))
2069 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
2070 else
2071 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
2072 t = force_gimple_operand_gsi (&gsi, t,
2073 DECL_P (fd->loops[i].v)
2074 && TREE_ADDRESSABLE (fd->loops[i].v),
2075 NULL_TREE, false, GSI_CONTINUE_LINKING);
2076 stmt = gimple_build_assign (fd->loops[i].v, t);
2077 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2078
2079 if (i > 0)
2080 {
2081 t = fd->loops[i].n2;
2082 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2083 false, GSI_CONTINUE_LINKING);
2084 tree v = fd->loops[i].v;
2085 if (DECL_P (v) && TREE_ADDRESSABLE (v))
2086 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
2087 false, GSI_CONTINUE_LINKING);
2088 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
2089 stmt = gimple_build_cond_empty (t);
2090 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
d1ffbd43
JJ
2091 if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)),
2092 expand_omp_regimplify_p, NULL, NULL)
2093 || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)),
2094 expand_omp_regimplify_p, NULL, NULL))
2095 gimple_regimplify_operands (stmt, &gsi);
629b3d75 2096 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
357067f2 2097 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
629b3d75
MJ
2098 }
2099 else
2100 make_edge (bb, body_bb, EDGE_FALLTHRU);
2101 last_bb = bb;
2102 }
2103
2104 return collapse_bb;
2105}
2106
2107/* Expand #pragma omp ordered depend(source). */
2108
2109static void
2110expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
2111 tree *counts, location_t loc)
2112{
2113 enum built_in_function source_ix
2114 = fd->iter_type == long_integer_type_node
2115 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
2116 gimple *g
2117 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
2118 build_fold_addr_expr (counts[fd->ordered]));
2119 gimple_set_location (g, loc);
2120 gsi_insert_before (gsi, g, GSI_SAME_STMT);
2121}
2122
2123/* Expand a single depend from #pragma omp ordered depend(sink:...). */
2124
2125static void
2126expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
2127 tree *counts, tree c, location_t loc)
2128{
2129 auto_vec<tree, 10> args;
2130 enum built_in_function sink_ix
2131 = fd->iter_type == long_integer_type_node
2132 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
2133 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
2134 int i;
2135 gimple_stmt_iterator gsi2 = *gsi;
2136 bool warned_step = false;
2137
2138 for (i = 0; i < fd->ordered; i++)
2139 {
2140 tree step = NULL_TREE;
2141 off = TREE_PURPOSE (deps);
2142 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2143 {
2144 step = TREE_OPERAND (off, 1);
2145 off = TREE_OPERAND (off, 0);
2146 }
2147 if (!integer_zerop (off))
2148 {
2149 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2150 || fd->loops[i].cond_code == GT_EXPR);
2151 bool forward = fd->loops[i].cond_code == LT_EXPR;
2152 if (step)
2153 {
2154 /* Non-simple Fortran DO loops. If step is variable,
2155 we don't know at compile even the direction, so can't
2156 warn. */
2157 if (TREE_CODE (step) != INTEGER_CST)
2158 break;
2159 forward = tree_int_cst_sgn (step) != -1;
2160 }
2161 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
90a0bf4e
JJ
2162 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
2163 "waiting for lexically later iteration");
629b3d75
MJ
2164 break;
2165 }
2166 deps = TREE_CHAIN (deps);
2167 }
2168 /* If all offsets corresponding to the collapsed loops are zero,
2169 this depend clause can be ignored. FIXME: but there is still a
2170 flush needed. We need to emit one __sync_synchronize () for it
2171 though (perhaps conditionally)? Solve this together with the
2172 conservative dependence folding optimization.
2173 if (i >= fd->collapse)
2174 return; */
2175
2176 deps = OMP_CLAUSE_DECL (c);
2177 gsi_prev (&gsi2);
2178 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
2179 edge e2 = split_block_after_labels (e1->dest);
2180
2181 gsi2 = gsi_after_labels (e1->dest);
2182 *gsi = gsi_last_bb (e1->src);
2183 for (i = 0; i < fd->ordered; i++)
2184 {
2185 tree itype = TREE_TYPE (fd->loops[i].v);
2186 tree step = NULL_TREE;
2187 tree orig_off = NULL_TREE;
2188 if (POINTER_TYPE_P (itype))
2189 itype = sizetype;
2190 if (i)
2191 deps = TREE_CHAIN (deps);
2192 off = TREE_PURPOSE (deps);
2193 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2194 {
2195 step = TREE_OPERAND (off, 1);
2196 off = TREE_OPERAND (off, 0);
2197 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2198 && integer_onep (fd->loops[i].step)
2199 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
2200 }
2201 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
2202 if (step)
2203 {
2204 off = fold_convert_loc (loc, itype, off);
2205 orig_off = off;
2206 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2207 }
2208
2209 if (integer_zerop (off))
2210 t = boolean_true_node;
2211 else
2212 {
2213 tree a;
2214 tree co = fold_convert_loc (loc, itype, off);
2215 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
2216 {
2217 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2218 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
2219 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
2220 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
2221 co);
2222 }
2223 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2224 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2225 fd->loops[i].v, co);
2226 else
2227 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
2228 fd->loops[i].v, co);
2229 if (step)
2230 {
2231 tree t1, t2;
2232 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2233 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2234 fd->loops[i].n1);
2235 else
2236 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2237 fd->loops[i].n2);
2238 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2239 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2240 fd->loops[i].n2);
2241 else
2242 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2243 fd->loops[i].n1);
2244 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
2245 step, build_int_cst (TREE_TYPE (step), 0));
2246 if (TREE_CODE (step) != INTEGER_CST)
2247 {
2248 t1 = unshare_expr (t1);
2249 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
2250 false, GSI_CONTINUE_LINKING);
2251 t2 = unshare_expr (t2);
2252 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
2253 false, GSI_CONTINUE_LINKING);
2254 }
2255 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
2256 t, t2, t1);
2257 }
2258 else if (fd->loops[i].cond_code == LT_EXPR)
2259 {
2260 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2261 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2262 fd->loops[i].n1);
2263 else
2264 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2265 fd->loops[i].n2);
2266 }
2267 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2268 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
2269 fd->loops[i].n2);
2270 else
2271 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
2272 fd->loops[i].n1);
2273 }
2274 if (cond)
2275 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
2276 else
2277 cond = t;
2278
2279 off = fold_convert_loc (loc, itype, off);
2280
2281 if (step
2282 || (fd->loops[i].cond_code == LT_EXPR
2283 ? !integer_onep (fd->loops[i].step)
2284 : !integer_minus_onep (fd->loops[i].step)))
2285 {
2286 if (step == NULL_TREE
2287 && TYPE_UNSIGNED (itype)
2288 && fd->loops[i].cond_code == GT_EXPR)
2289 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
2290 fold_build1_loc (loc, NEGATE_EXPR, itype,
2291 s));
2292 else
2293 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
2294 orig_off ? orig_off : off, s);
2295 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
2296 build_int_cst (itype, 0));
2297 if (integer_zerop (t) && !warned_step)
2298 {
90a0bf4e
JJ
2299 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
2300 "refers to iteration never in the iteration "
2301 "space");
629b3d75
MJ
2302 warned_step = true;
2303 }
2304 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
2305 cond, t);
2306 }
2307
2308 if (i <= fd->collapse - 1 && fd->collapse > 1)
2309 t = fd->loop.v;
2310 else if (counts[i])
2311 t = counts[i];
2312 else
2313 {
2314 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2315 fd->loops[i].v, fd->loops[i].n1);
2316 t = fold_convert_loc (loc, fd->iter_type, t);
2317 }
2318 if (step)
2319 /* We have divided off by step already earlier. */;
2320 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
2321 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
2322 fold_build1_loc (loc, NEGATE_EXPR, itype,
2323 s));
2324 else
2325 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2326 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2327 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
2328 off = fold_convert_loc (loc, fd->iter_type, off);
2329 if (i <= fd->collapse - 1 && fd->collapse > 1)
2330 {
2331 if (i)
2332 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
2333 off);
2334 if (i < fd->collapse - 1)
2335 {
2336 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
2337 counts[i]);
2338 continue;
2339 }
2340 }
2341 off = unshare_expr (off);
2342 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
2343 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2344 true, GSI_SAME_STMT);
2345 args.safe_push (t);
2346 }
2347 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
2348 gimple_set_location (g, loc);
2349 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
2350
2351 cond = unshare_expr (cond);
2352 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
2353 GSI_CONTINUE_LINKING);
2354 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
2355 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
357067f2
JH
2356 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2357 e1->probability = e3->probability.invert ();
629b3d75
MJ
2358 e1->flags = EDGE_TRUE_VALUE;
2359 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
2360
2361 *gsi = gsi_after_labels (e2->dest);
2362}
2363
2364/* Expand all #pragma omp ordered depend(source) and
2365 #pragma omp ordered depend(sink:...) constructs in the current
2366 #pragma omp for ordered(n) region. */
2367
2368static void
2369expand_omp_ordered_source_sink (struct omp_region *region,
2370 struct omp_for_data *fd, tree *counts,
2371 basic_block cont_bb)
2372{
2373 struct omp_region *inner;
2374 int i;
2375 for (i = fd->collapse - 1; i < fd->ordered; i++)
2376 if (i == fd->collapse - 1 && fd->collapse > 1)
2377 counts[i] = NULL_TREE;
2378 else if (i >= fd->collapse && !cont_bb)
2379 counts[i] = build_zero_cst (fd->iter_type);
2380 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
2381 && integer_onep (fd->loops[i].step))
2382 counts[i] = NULL_TREE;
2383 else
2384 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
2385 tree atype
2386 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
2387 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
2388 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
2389
2390 for (inner = region->inner; inner; inner = inner->next)
2391 if (inner->type == GIMPLE_OMP_ORDERED)
2392 {
2393 gomp_ordered *ord_stmt = inner->ord_stmt;
2394 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
2395 location_t loc = gimple_location (ord_stmt);
2396 tree c;
2397 for (c = gimple_omp_ordered_clauses (ord_stmt);
2398 c; c = OMP_CLAUSE_CHAIN (c))
2399 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
2400 break;
2401 if (c)
2402 expand_omp_ordered_source (&gsi, fd, counts, loc);
2403 for (c = gimple_omp_ordered_clauses (ord_stmt);
2404 c; c = OMP_CLAUSE_CHAIN (c))
2405 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
2406 expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
2407 gsi_remove (&gsi, true);
2408 }
2409}
2410
2411/* Wrap the body into fd->ordered - fd->collapse loops that aren't
2412 collapsed. */
2413
2414static basic_block
2415expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
2416 basic_block cont_bb, basic_block body_bb,
2417 bool ordered_lastprivate)
2418{
2419 if (fd->ordered == fd->collapse)
2420 return cont_bb;
2421
2422 if (!cont_bb)
2423 {
2424 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2425 for (int i = fd->collapse; i < fd->ordered; i++)
2426 {
2427 tree type = TREE_TYPE (fd->loops[i].v);
2428 tree n1 = fold_convert (type, fd->loops[i].n1);
2429 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
2430 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2431 size_int (i - fd->collapse + 1),
2432 NULL_TREE, NULL_TREE);
2433 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2434 }
2435 return NULL;
2436 }
2437
2438 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
2439 {
2440 tree t, type = TREE_TYPE (fd->loops[i].v);
2441 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2442 expand_omp_build_assign (&gsi, fd->loops[i].v,
2443 fold_convert (type, fd->loops[i].n1));
2444 if (counts[i])
2445 expand_omp_build_assign (&gsi, counts[i],
2446 build_zero_cst (fd->iter_type));
2447 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2448 size_int (i - fd->collapse + 1),
2449 NULL_TREE, NULL_TREE);
2450 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2451 if (!gsi_end_p (gsi))
2452 gsi_prev (&gsi);
2453 else
2454 gsi = gsi_last_bb (body_bb);
2455 edge e1 = split_block (body_bb, gsi_stmt (gsi));
2456 basic_block new_body = e1->dest;
2457 if (body_bb == cont_bb)
2458 cont_bb = new_body;
2459 edge e2 = NULL;
2460 basic_block new_header;
2461 if (EDGE_COUNT (cont_bb->preds) > 0)
2462 {
2463 gsi = gsi_last_bb (cont_bb);
2464 if (POINTER_TYPE_P (type))
2465 t = fold_build_pointer_plus (fd->loops[i].v,
2466 fold_convert (sizetype,
2467 fd->loops[i].step));
2468 else
2469 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
2470 fold_convert (type, fd->loops[i].step));
2471 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
2472 if (counts[i])
2473 {
2474 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
2475 build_int_cst (fd->iter_type, 1));
2476 expand_omp_build_assign (&gsi, counts[i], t);
2477 t = counts[i];
2478 }
2479 else
2480 {
2481 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2482 fd->loops[i].v, fd->loops[i].n1);
2483 t = fold_convert (fd->iter_type, t);
2484 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2485 true, GSI_SAME_STMT);
2486 }
2487 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2488 size_int (i - fd->collapse + 1),
2489 NULL_TREE, NULL_TREE);
2490 expand_omp_build_assign (&gsi, aref, t);
2491 gsi_prev (&gsi);
2492 e2 = split_block (cont_bb, gsi_stmt (gsi));
2493 new_header = e2->dest;
2494 }
2495 else
2496 new_header = cont_bb;
2497 gsi = gsi_after_labels (new_header);
2498 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
2499 true, GSI_SAME_STMT);
2500 tree n2
2501 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
2502 true, NULL_TREE, true, GSI_SAME_STMT);
2503 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
2504 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
2505 edge e3 = split_block (new_header, gsi_stmt (gsi));
2506 cont_bb = e3->dest;
2507 remove_edge (e1);
2508 make_edge (body_bb, new_header, EDGE_FALLTHRU);
2509 e3->flags = EDGE_FALSE_VALUE;
357067f2 2510 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
629b3d75 2511 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
357067f2 2512 e1->probability = e3->probability.invert ();
629b3d75
MJ
2513
2514 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
2515 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
2516
2517 if (e2)
2518 {
99b1c316 2519 class loop *loop = alloc_loop ();
629b3d75
MJ
2520 loop->header = new_header;
2521 loop->latch = e2->src;
2522 add_loop (loop, body_bb->loop_father);
2523 }
2524 }
2525
2526 /* If there are any lastprivate clauses and it is possible some loops
2527 might have zero iterations, ensure all the decls are initialized,
2528 otherwise we could crash evaluating C++ class iterators with lastprivate
2529 clauses. */
2530 bool need_inits = false;
2531 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
2532 if (need_inits)
2533 {
2534 tree type = TREE_TYPE (fd->loops[i].v);
2535 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2536 expand_omp_build_assign (&gsi, fd->loops[i].v,
2537 fold_convert (type, fd->loops[i].n1));
2538 }
2539 else
2540 {
2541 tree type = TREE_TYPE (fd->loops[i].v);
2542 tree this_cond = fold_build2 (fd->loops[i].cond_code,
2543 boolean_type_node,
2544 fold_convert (type, fd->loops[i].n1),
2545 fold_convert (type, fd->loops[i].n2));
2546 if (!integer_onep (this_cond))
2547 need_inits = true;
2548 }
2549
2550 return cont_bb;
2551}
2552
2553/* A subroutine of expand_omp_for. Generate code for a parallel
2554 loop with any schedule. Given parameters:
2555
2556 for (V = N1; V cond N2; V += STEP) BODY;
2557
2558 where COND is "<" or ">", we generate pseudocode
2559
2560 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
2561 if (more) goto L0; else goto L3;
2562 L0:
2563 V = istart0;
2564 iend = iend0;
2565 L1:
2566 BODY;
2567 V += STEP;
2568 if (V cond iend) goto L1; else goto L2;
2569 L2:
2570 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2571 L3:
2572
2573 If this is a combined omp parallel loop, instead of the call to
2574 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
2575 If this is gimple_omp_for_combined_p loop, then instead of assigning
2576 V and iend in L0 we assign the first two _looptemp_ clause decls of the
2577 inner GIMPLE_OMP_FOR and V += STEP; and
2578 if (V cond iend) goto L1; else goto L2; are removed.
2579
2580 For collapsed loops, given parameters:
2581 collapse(3)
2582 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2583 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2584 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
2585 BODY;
2586
2587 we generate pseudocode
2588
2589 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
2590 if (cond3 is <)
2591 adj = STEP3 - 1;
2592 else
2593 adj = STEP3 + 1;
2594 count3 = (adj + N32 - N31) / STEP3;
2595 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
2596 if (cond2 is <)
2597 adj = STEP2 - 1;
2598 else
2599 adj = STEP2 + 1;
2600 count2 = (adj + N22 - N21) / STEP2;
2601 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
2602 if (cond1 is <)
2603 adj = STEP1 - 1;
2604 else
2605 adj = STEP1 + 1;
2606 count1 = (adj + N12 - N11) / STEP1;
2607 count = count1 * count2 * count3;
2608 goto Z1;
2609 Z0:
2610 count = 0;
2611 Z1:
2612 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
2613 if (more) goto L0; else goto L3;
2614 L0:
2615 V = istart0;
2616 T = V;
2617 V3 = N31 + (T % count3) * STEP3;
2618 T = T / count3;
2619 V2 = N21 + (T % count2) * STEP2;
2620 T = T / count2;
2621 V1 = N11 + T * STEP1;
2622 iend = iend0;
2623 L1:
2624 BODY;
2625 V += 1;
2626 if (V < iend) goto L10; else goto L2;
2627 L10:
2628 V3 += STEP3;
2629 if (V3 cond3 N32) goto L1; else goto L11;
2630 L11:
2631 V3 = N31;
2632 V2 += STEP2;
2633 if (V2 cond2 N22) goto L1; else goto L12;
2634 L12:
2635 V2 = N21;
2636 V1 += STEP1;
2637 goto L1;
2638 L2:
2639 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2640 L3:
2641
2642 */
2643
2644static void
2645expand_omp_for_generic (struct omp_region *region,
2646 struct omp_for_data *fd,
2647 enum built_in_function start_fn,
2648 enum built_in_function next_fn,
28567c40 2649 tree sched_arg,
629b3d75
MJ
2650 gimple *inner_stmt)
2651{
2652 tree type, istart0, iend0, iend;
2653 tree t, vmain, vback, bias = NULL_TREE;
2654 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
2655 basic_block l2_bb = NULL, l3_bb = NULL;
2656 gimple_stmt_iterator gsi;
2657 gassign *assign_stmt;
2658 bool in_combined_parallel = is_combined_parallel (region);
2659 bool broken_loop = region->cont == NULL;
2660 edge e, ne;
2661 tree *counts = NULL;
2662 int i;
2663 bool ordered_lastprivate = false;
2664
2665 gcc_assert (!broken_loop || !in_combined_parallel);
2666 gcc_assert (fd->iter_type == long_integer_type_node
2667 || !in_combined_parallel);
2668
2669 entry_bb = region->entry;
2670 cont_bb = region->cont;
2671 collapse_bb = NULL;
2672 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
2673 gcc_assert (broken_loop
2674 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
2675 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
2676 l1_bb = single_succ (l0_bb);
2677 if (!broken_loop)
2678 {
2679 l2_bb = create_empty_bb (cont_bb);
2680 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
2681 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
2682 == l1_bb));
2683 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
2684 }
2685 else
2686 l2_bb = NULL;
2687 l3_bb = BRANCH_EDGE (entry_bb)->dest;
2688 exit_bb = region->exit;
2689
65f4b875 2690 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
2691
2692 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
2693 if (fd->ordered
6c7ae8c5 2694 && omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
629b3d75
MJ
2695 OMP_CLAUSE_LASTPRIVATE))
2696 ordered_lastprivate = false;
28567c40 2697 tree reductions = NULL_TREE;
6c7ae8c5
JJ
2698 tree mem = NULL_TREE, cond_var = NULL_TREE, condtemp = NULL_TREE;
2699 tree memv = NULL_TREE;
8221c30b
JJ
2700 if (fd->lastprivate_conditional)
2701 {
2702 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2703 OMP_CLAUSE__CONDTEMP_);
2704 if (fd->have_pointer_condtemp)
2705 condtemp = OMP_CLAUSE_DECL (c);
2706 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
2707 cond_var = OMP_CLAUSE_DECL (c);
2708 }
28567c40
JJ
2709 if (sched_arg)
2710 {
2711 if (fd->have_reductemp)
2712 {
6c7ae8c5 2713 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
28567c40
JJ
2714 OMP_CLAUSE__REDUCTEMP_);
2715 reductions = OMP_CLAUSE_DECL (c);
2716 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
2717 gimple *g = SSA_NAME_DEF_STMT (reductions);
2718 reductions = gimple_assign_rhs1 (g);
2719 OMP_CLAUSE_DECL (c) = reductions;
2720 entry_bb = gimple_bb (g);
2721 edge e = split_block (entry_bb, g);
2722 if (region->entry == entry_bb)
2723 region->entry = e->dest;
2724 gsi = gsi_last_bb (entry_bb);
2725 }
2726 else
2727 reductions = null_pointer_node;
8221c30b 2728 if (fd->have_pointer_condtemp)
6c7ae8c5 2729 {
6c7ae8c5
JJ
2730 tree type = TREE_TYPE (condtemp);
2731 memv = create_tmp_var (type);
2732 TREE_ADDRESSABLE (memv) = 1;
2733 unsigned HOST_WIDE_INT sz
2734 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
2735 sz *= fd->lastprivate_conditional;
2736 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
2737 false);
2738 mem = build_fold_addr_expr (memv);
2739 }
2740 else
2741 mem = null_pointer_node;
28567c40 2742 }
629b3d75
MJ
2743 if (fd->collapse > 1 || fd->ordered)
2744 {
2745 int first_zero_iter1 = -1, first_zero_iter2 = -1;
2746 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
2747
2748 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
2749 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
2750 zero_iter1_bb, first_zero_iter1,
2751 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
2752
2753 if (zero_iter1_bb)
2754 {
2755 /* Some counts[i] vars might be uninitialized if
2756 some loop has zero iterations. But the body shouldn't
2757 be executed in that case, so just avoid uninit warnings. */
2758 for (i = first_zero_iter1;
2759 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
2760 if (SSA_VAR_P (counts[i]))
2761 TREE_NO_WARNING (counts[i]) = 1;
2762 gsi_prev (&gsi);
2763 e = split_block (entry_bb, gsi_stmt (gsi));
2764 entry_bb = e->dest;
2765 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
65f4b875 2766 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
2767 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2768 get_immediate_dominator (CDI_DOMINATORS,
2769 zero_iter1_bb));
2770 }
2771 if (zero_iter2_bb)
2772 {
2773 /* Some counts[i] vars might be uninitialized if
2774 some loop has zero iterations. But the body shouldn't
2775 be executed in that case, so just avoid uninit warnings. */
2776 for (i = first_zero_iter2; i < fd->ordered; i++)
2777 if (SSA_VAR_P (counts[i]))
2778 TREE_NO_WARNING (counts[i]) = 1;
2779 if (zero_iter1_bb)
2780 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2781 else
2782 {
2783 gsi_prev (&gsi);
2784 e = split_block (entry_bb, gsi_stmt (gsi));
2785 entry_bb = e->dest;
2786 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
65f4b875 2787 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
2788 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2789 get_immediate_dominator
2790 (CDI_DOMINATORS, zero_iter2_bb));
2791 }
2792 }
2793 if (fd->collapse == 1)
2794 {
2795 counts[0] = fd->loop.n2;
2796 fd->loop = fd->loops[0];
2797 }
2798 }
2799
2800 type = TREE_TYPE (fd->loop.v);
2801 istart0 = create_tmp_var (fd->iter_type, ".istart0");
2802 iend0 = create_tmp_var (fd->iter_type, ".iend0");
2803 TREE_ADDRESSABLE (istart0) = 1;
2804 TREE_ADDRESSABLE (iend0) = 1;
2805
2806 /* See if we need to bias by LLONG_MIN. */
2807 if (fd->iter_type == long_long_unsigned_type_node
2808 && TREE_CODE (type) == INTEGER_TYPE
2809 && !TYPE_UNSIGNED (type)
2810 && fd->ordered == 0)
2811 {
2812 tree n1, n2;
2813
2814 if (fd->loop.cond_code == LT_EXPR)
2815 {
2816 n1 = fd->loop.n1;
2817 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
2818 }
2819 else
2820 {
2821 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
2822 n2 = fd->loop.n1;
2823 }
2824 if (TREE_CODE (n1) != INTEGER_CST
2825 || TREE_CODE (n2) != INTEGER_CST
2826 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
2827 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
2828 }
2829
2830 gimple_stmt_iterator gsif = gsi;
2831 gsi_prev (&gsif);
2832
2833 tree arr = NULL_TREE;
2834 if (in_combined_parallel)
2835 {
2836 gcc_assert (fd->ordered == 0);
2837 /* In a combined parallel loop, emit a call to
2838 GOMP_loop_foo_next. */
2839 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
2840 build_fold_addr_expr (istart0),
2841 build_fold_addr_expr (iend0));
2842 }
2843 else
2844 {
2845 tree t0, t1, t2, t3, t4;
2846 /* If this is not a combined parallel loop, emit a call to
2847 GOMP_loop_foo_start in ENTRY_BB. */
2848 t4 = build_fold_addr_expr (iend0);
2849 t3 = build_fold_addr_expr (istart0);
2850 if (fd->ordered)
2851 {
2852 t0 = build_int_cst (unsigned_type_node,
2853 fd->ordered - fd->collapse + 1);
2854 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
2855 fd->ordered
2856 - fd->collapse + 1),
2857 ".omp_counts");
2858 DECL_NAMELESS (arr) = 1;
2859 TREE_ADDRESSABLE (arr) = 1;
2860 TREE_STATIC (arr) = 1;
2861 vec<constructor_elt, va_gc> *v;
2862 vec_alloc (v, fd->ordered - fd->collapse + 1);
2863 int idx;
2864
2865 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
2866 {
2867 tree c;
2868 if (idx == 0 && fd->collapse > 1)
2869 c = fd->loop.n2;
2870 else
2871 c = counts[idx + fd->collapse - 1];
2872 tree purpose = size_int (idx);
2873 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
2874 if (TREE_CODE (c) != INTEGER_CST)
2875 TREE_STATIC (arr) = 0;
2876 }
2877
2878 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
2879 if (!TREE_STATIC (arr))
2880 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
2881 void_type_node, arr),
2882 true, NULL_TREE, true, GSI_SAME_STMT);
2883 t1 = build_fold_addr_expr (arr);
2884 t2 = NULL_TREE;
2885 }
2886 else
2887 {
2888 t2 = fold_convert (fd->iter_type, fd->loop.step);
2889 t1 = fd->loop.n2;
2890 t0 = fd->loop.n1;
2891 if (gimple_omp_for_combined_into_p (fd->for_stmt))
2892 {
2893 tree innerc
2894 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2895 OMP_CLAUSE__LOOPTEMP_);
2896 gcc_assert (innerc);
2897 t0 = OMP_CLAUSE_DECL (innerc);
2898 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2899 OMP_CLAUSE__LOOPTEMP_);
2900 gcc_assert (innerc);
2901 t1 = OMP_CLAUSE_DECL (innerc);
2902 }
2903 if (POINTER_TYPE_P (TREE_TYPE (t0))
2904 && TYPE_PRECISION (TREE_TYPE (t0))
2905 != TYPE_PRECISION (fd->iter_type))
2906 {
2907 /* Avoid casting pointers to integer of a different size. */
2908 tree itype = signed_type_for (type);
2909 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
2910 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
2911 }
2912 else
2913 {
2914 t1 = fold_convert (fd->iter_type, t1);
2915 t0 = fold_convert (fd->iter_type, t0);
2916 }
2917 if (bias)
2918 {
2919 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
2920 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
2921 }
2922 }
2923 if (fd->iter_type == long_integer_type_node || fd->ordered)
2924 {
2925 if (fd->chunk_size)
2926 {
2927 t = fold_convert (fd->iter_type, fd->chunk_size);
2928 t = omp_adjust_chunk_size (t, fd->simd_schedule);
28567c40
JJ
2929 if (sched_arg)
2930 {
2931 if (fd->ordered)
2932 t = build_call_expr (builtin_decl_explicit (start_fn),
2933 8, t0, t1, sched_arg, t, t3, t4,
2934 reductions, mem);
2935 else
2936 t = build_call_expr (builtin_decl_explicit (start_fn),
2937 9, t0, t1, t2, sched_arg, t, t3, t4,
2938 reductions, mem);
2939 }
2940 else if (fd->ordered)
629b3d75
MJ
2941 t = build_call_expr (builtin_decl_explicit (start_fn),
2942 5, t0, t1, t, t3, t4);
2943 else
2944 t = build_call_expr (builtin_decl_explicit (start_fn),
2945 6, t0, t1, t2, t, t3, t4);
2946 }
2947 else if (fd->ordered)
2948 t = build_call_expr (builtin_decl_explicit (start_fn),
2949 4, t0, t1, t3, t4);
2950 else
2951 t = build_call_expr (builtin_decl_explicit (start_fn),
2952 5, t0, t1, t2, t3, t4);
2953 }
2954 else
2955 {
2956 tree t5;
2957 tree c_bool_type;
2958 tree bfn_decl;
2959
2960 /* The GOMP_loop_ull_*start functions have additional boolean
2961 argument, true for < loops and false for > loops.
2962 In Fortran, the C bool type can be different from
2963 boolean_type_node. */
2964 bfn_decl = builtin_decl_explicit (start_fn);
2965 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
2966 t5 = build_int_cst (c_bool_type,
2967 fd->loop.cond_code == LT_EXPR ? 1 : 0);
2968 if (fd->chunk_size)
2969 {
2970 tree bfn_decl = builtin_decl_explicit (start_fn);
2971 t = fold_convert (fd->iter_type, fd->chunk_size);
2972 t = omp_adjust_chunk_size (t, fd->simd_schedule);
28567c40
JJ
2973 if (sched_arg)
2974 t = build_call_expr (bfn_decl, 10, t5, t0, t1, t2, sched_arg,
2975 t, t3, t4, reductions, mem);
2976 else
2977 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
629b3d75
MJ
2978 }
2979 else
2980 t = build_call_expr (builtin_decl_explicit (start_fn),
2981 6, t5, t0, t1, t2, t3, t4);
2982 }
2983 }
2984 if (TREE_TYPE (t) != boolean_type_node)
2985 t = fold_build2 (NE_EXPR, boolean_type_node,
2986 t, build_int_cst (TREE_TYPE (t), 0));
2987 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
01914336 2988 true, GSI_SAME_STMT);
629b3d75
MJ
2989 if (arr && !TREE_STATIC (arr))
2990 {
2991 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
2992 TREE_THIS_VOLATILE (clobber) = 1;
2993 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
2994 GSI_SAME_STMT);
2995 }
8221c30b 2996 if (fd->have_pointer_condtemp)
6c7ae8c5 2997 expand_omp_build_assign (&gsi, condtemp, memv, false);
28567c40
JJ
2998 if (fd->have_reductemp)
2999 {
3000 gimple *g = gsi_stmt (gsi);
3001 gsi_remove (&gsi, true);
3002 release_ssa_name (gimple_assign_lhs (g));
3003
3004 entry_bb = region->entry;
3005 gsi = gsi_last_nondebug_bb (entry_bb);
3006
3007 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3008 }
629b3d75
MJ
3009 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3010
3011 /* Remove the GIMPLE_OMP_FOR statement. */
3012 gsi_remove (&gsi, true);
3013
3014 if (gsi_end_p (gsif))
3015 gsif = gsi_after_labels (gsi_bb (gsif));
3016 gsi_next (&gsif);
3017
3018 /* Iteration setup for sequential loop goes in L0_BB. */
3019 tree startvar = fd->loop.v;
3020 tree endvar = NULL_TREE;
3021
3022 if (gimple_omp_for_combined_p (fd->for_stmt))
3023 {
3024 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
3025 && gimple_omp_for_kind (inner_stmt)
3026 == GF_OMP_FOR_KIND_SIMD);
3027 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
3028 OMP_CLAUSE__LOOPTEMP_);
3029 gcc_assert (innerc);
3030 startvar = OMP_CLAUSE_DECL (innerc);
3031 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3032 OMP_CLAUSE__LOOPTEMP_);
3033 gcc_assert (innerc);
3034 endvar = OMP_CLAUSE_DECL (innerc);
3035 }
3036
3037 gsi = gsi_start_bb (l0_bb);
3038 t = istart0;
3039 if (fd->ordered && fd->collapse == 1)
3040 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
3041 fold_convert (fd->iter_type, fd->loop.step));
3042 else if (bias)
3043 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
3044 if (fd->ordered && fd->collapse == 1)
3045 {
3046 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3047 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
3048 fd->loop.n1, fold_convert (sizetype, t));
3049 else
3050 {
3051 t = fold_convert (TREE_TYPE (startvar), t);
3052 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
3053 fd->loop.n1, t);
3054 }
3055 }
3056 else
3057 {
3058 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3059 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
3060 t = fold_convert (TREE_TYPE (startvar), t);
3061 }
3062 t = force_gimple_operand_gsi (&gsi, t,
3063 DECL_P (startvar)
3064 && TREE_ADDRESSABLE (startvar),
3065 NULL_TREE, false, GSI_CONTINUE_LINKING);
3066 assign_stmt = gimple_build_assign (startvar, t);
3067 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6c7ae8c5
JJ
3068 if (cond_var)
3069 {
3070 tree itype = TREE_TYPE (cond_var);
3071 /* For lastprivate(conditional:) itervar, we need some iteration
3072 counter that starts at unsigned non-zero and increases.
3073 Prefer as few IVs as possible, so if we can use startvar
3074 itself, use that, or startvar + constant (those would be
3075 incremented with step), and as last resort use the s0 + 1
3076 incremented by 1. */
3077 if ((fd->ordered && fd->collapse == 1)
3078 || bias
3079 || POINTER_TYPE_P (type)
3080 || TREE_CODE (fd->loop.n1) != INTEGER_CST
3081 || fd->loop.cond_code != LT_EXPR)
3082 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, istart0),
3083 build_int_cst (itype, 1));
3084 else if (tree_int_cst_sgn (fd->loop.n1) == 1)
3085 t = fold_convert (itype, t);
3086 else
3087 {
3088 tree c = fold_convert (itype, fd->loop.n1);
3089 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
3090 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
3091 }
3092 t = force_gimple_operand_gsi (&gsi, t, false,
3093 NULL_TREE, false, GSI_CONTINUE_LINKING);
3094 assign_stmt = gimple_build_assign (cond_var, t);
3095 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3096 }
629b3d75
MJ
3097
3098 t = iend0;
3099 if (fd->ordered && fd->collapse == 1)
3100 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
3101 fold_convert (fd->iter_type, fd->loop.step));
3102 else if (bias)
3103 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
3104 if (fd->ordered && fd->collapse == 1)
3105 {
3106 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3107 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
3108 fd->loop.n1, fold_convert (sizetype, t));
3109 else
3110 {
3111 t = fold_convert (TREE_TYPE (startvar), t);
3112 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
3113 fd->loop.n1, t);
3114 }
3115 }
3116 else
3117 {
3118 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3119 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
3120 t = fold_convert (TREE_TYPE (startvar), t);
3121 }
3122 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3123 false, GSI_CONTINUE_LINKING);
3124 if (endvar)
3125 {
3126 assign_stmt = gimple_build_assign (endvar, iend);
3127 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3128 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
3129 assign_stmt = gimple_build_assign (fd->loop.v, iend);
3130 else
3131 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
3132 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3133 }
3134 /* Handle linear clause adjustments. */
3135 tree itercnt = NULL_TREE;
3136 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
3137 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
3138 c; c = OMP_CLAUSE_CHAIN (c))
3139 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
3140 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
3141 {
3142 tree d = OMP_CLAUSE_DECL (c);
3143 bool is_ref = omp_is_reference (d);
3144 tree t = d, a, dest;
3145 if (is_ref)
3146 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
3147 tree type = TREE_TYPE (t);
3148 if (POINTER_TYPE_P (type))
3149 type = sizetype;
3150 dest = unshare_expr (t);
3151 tree v = create_tmp_var (TREE_TYPE (t), NULL);
3152 expand_omp_build_assign (&gsif, v, t);
3153 if (itercnt == NULL_TREE)
3154 {
3155 itercnt = startvar;
3156 tree n1 = fd->loop.n1;
3157 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
3158 {
3159 itercnt
3160 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
3161 itercnt);
3162 n1 = fold_convert (TREE_TYPE (itercnt), n1);
3163 }
3164 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
3165 itercnt, n1);
3166 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
3167 itercnt, fd->loop.step);
3168 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
3169 NULL_TREE, false,
3170 GSI_CONTINUE_LINKING);
3171 }
3172 a = fold_build2 (MULT_EXPR, type,
3173 fold_convert (type, itercnt),
3174 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
3175 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
3176 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
3177 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3178 false, GSI_CONTINUE_LINKING);
3179 assign_stmt = gimple_build_assign (dest, t);
3180 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3181 }
3182 if (fd->collapse > 1)
3183 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
3184
3185 if (fd->ordered)
3186 {
3187 /* Until now, counts array contained number of iterations or
3188 variable containing it for ith loop. From now on, we need
3189 those counts only for collapsed loops, and only for the 2nd
3190 till the last collapsed one. Move those one element earlier,
3191 we'll use counts[fd->collapse - 1] for the first source/sink
3192 iteration counter and so on and counts[fd->ordered]
3193 as the array holding the current counter values for
3194 depend(source). */
3195 if (fd->collapse > 1)
3196 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
3197 if (broken_loop)
3198 {
3199 int i;
3200 for (i = fd->collapse; i < fd->ordered; i++)
3201 {
3202 tree type = TREE_TYPE (fd->loops[i].v);
3203 tree this_cond
3204 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
3205 fold_convert (type, fd->loops[i].n1),
3206 fold_convert (type, fd->loops[i].n2));
3207 if (!integer_onep (this_cond))
3208 break;
3209 }
3210 if (i < fd->ordered)
3211 {
3212 cont_bb
3213 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
3214 add_bb_to_loop (cont_bb, l1_bb->loop_father);
3215 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
3216 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
3217 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3218 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
3219 make_edge (cont_bb, l1_bb, 0);
3220 l2_bb = create_empty_bb (cont_bb);
3221 broken_loop = false;
3222 }
3223 }
3224 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
3225 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
3226 ordered_lastprivate);
3227 if (counts[fd->collapse - 1])
3228 {
3229 gcc_assert (fd->collapse == 1);
3230 gsi = gsi_last_bb (l0_bb);
3231 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
3232 istart0, true);
3233 gsi = gsi_last_bb (cont_bb);
3234 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1],
3235 build_int_cst (fd->iter_type, 1));
3236 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
3237 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3238 size_zero_node, NULL_TREE, NULL_TREE);
3239 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
3240 t = counts[fd->collapse - 1];
3241 }
3242 else if (fd->collapse > 1)
3243 t = fd->loop.v;
3244 else
3245 {
3246 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3247 fd->loops[0].v, fd->loops[0].n1);
3248 t = fold_convert (fd->iter_type, t);
3249 }
3250 gsi = gsi_last_bb (l0_bb);
3251 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3252 size_zero_node, NULL_TREE, NULL_TREE);
3253 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3254 false, GSI_CONTINUE_LINKING);
3255 expand_omp_build_assign (&gsi, aref, t, true);
3256 }
3257
3258 if (!broken_loop)
3259 {
3260 /* Code to control the increment and predicate for the sequential
3261 loop goes in the CONT_BB. */
65f4b875 3262 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
3263 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3264 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3265 vmain = gimple_omp_continue_control_use (cont_stmt);
3266 vback = gimple_omp_continue_control_def (cont_stmt);
3267
7855700e
JJ
3268 if (cond_var)
3269 {
3270 tree itype = TREE_TYPE (cond_var);
3271 tree t2;
3272 if ((fd->ordered && fd->collapse == 1)
3273 || bias
3274 || POINTER_TYPE_P (type)
3275 || TREE_CODE (fd->loop.n1) != INTEGER_CST
3276 || fd->loop.cond_code != LT_EXPR)
3277 t2 = build_int_cst (itype, 1);
3278 else
3279 t2 = fold_convert (itype, fd->loop.step);
3280 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
3281 t2 = force_gimple_operand_gsi (&gsi, t2, false,
3282 NULL_TREE, true, GSI_SAME_STMT);
3283 assign_stmt = gimple_build_assign (cond_var, t2);
3284 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3285 }
3286
629b3d75
MJ
3287 if (!gimple_omp_for_combined_p (fd->for_stmt))
3288 {
3289 if (POINTER_TYPE_P (type))
3290 t = fold_build_pointer_plus (vmain, fd->loop.step);
3291 else
3292 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
3293 t = force_gimple_operand_gsi (&gsi, t,
3294 DECL_P (vback)
3295 && TREE_ADDRESSABLE (vback),
3296 NULL_TREE, true, GSI_SAME_STMT);
3297 assign_stmt = gimple_build_assign (vback, t);
3298 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3299
3300 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
3301 {
d1ffbd43 3302 tree tem;
629b3d75 3303 if (fd->collapse > 1)
d1ffbd43 3304 tem = fd->loop.v;
629b3d75
MJ
3305 else
3306 {
d1ffbd43
JJ
3307 tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3308 fd->loops[0].v, fd->loops[0].n1);
3309 tem = fold_convert (fd->iter_type, tem);
629b3d75
MJ
3310 }
3311 tree aref = build4 (ARRAY_REF, fd->iter_type,
3312 counts[fd->ordered], size_zero_node,
3313 NULL_TREE, NULL_TREE);
d1ffbd43
JJ
3314 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE,
3315 true, GSI_SAME_STMT);
3316 expand_omp_build_assign (&gsi, aref, tem);
629b3d75
MJ
3317 }
3318
3319 t = build2 (fd->loop.cond_code, boolean_type_node,
3320 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
3321 iend);
3322 gcond *cond_stmt = gimple_build_cond_empty (t);
3323 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3324 }
3325
3326 /* Remove GIMPLE_OMP_CONTINUE. */
3327 gsi_remove (&gsi, true);
3328
3329 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3330 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb);
3331
3332 /* Emit code to get the next parallel iteration in L2_BB. */
3333 gsi = gsi_start_bb (l2_bb);
3334
3335 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
3336 build_fold_addr_expr (istart0),
3337 build_fold_addr_expr (iend0));
3338 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3339 false, GSI_CONTINUE_LINKING);
3340 if (TREE_TYPE (t) != boolean_type_node)
3341 t = fold_build2 (NE_EXPR, boolean_type_node,
3342 t, build_int_cst (TREE_TYPE (t), 0));
3343 gcond *cond_stmt = gimple_build_cond_empty (t);
3344 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
3345 }
3346
3347 /* Add the loop cleanup function. */
65f4b875 3348 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
3349 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3350 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
3351 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3352 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
3353 else
3354 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
3355 gcall *call_stmt = gimple_build_call (t, 0);
629b3d75
MJ
3356 if (fd->ordered)
3357 {
3358 tree arr = counts[fd->ordered];
3359 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
3360 TREE_THIS_VOLATILE (clobber) = 1;
3361 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
3362 GSI_SAME_STMT);
3363 }
28567c40
JJ
3364 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3365 {
3366 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
3367 if (fd->have_reductemp)
3368 {
3369 gimple *g = gimple_build_assign (reductions, NOP_EXPR,
3370 gimple_call_lhs (call_stmt));
3371 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
3372 }
3373 }
3374 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
629b3d75
MJ
3375 gsi_remove (&gsi, true);
3376
3377 /* Connect the new blocks. */
3378 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
3379 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
3380
3381 if (!broken_loop)
3382 {
3383 gimple_seq phis;
3384
3385 e = find_edge (cont_bb, l3_bb);
3386 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
3387
3388 phis = phi_nodes (l3_bb);
3389 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
3390 {
3391 gimple *phi = gsi_stmt (gsi);
3392 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
3393 PHI_ARG_DEF_FROM_EDGE (phi, e));
3394 }
3395 remove_edge (e);
3396
3397 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
3398 e = find_edge (cont_bb, l1_bb);
3399 if (e == NULL)
3400 {
3401 e = BRANCH_EDGE (cont_bb);
3402 gcc_assert (single_succ (e->dest) == l1_bb);
3403 }
3404 if (gimple_omp_for_combined_p (fd->for_stmt))
3405 {
3406 remove_edge (e);
3407 e = NULL;
3408 }
3409 else if (fd->collapse > 1)
3410 {
3411 remove_edge (e);
3412 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3413 }
3414 else
3415 e->flags = EDGE_TRUE_VALUE;
3416 if (e)
3417 {
357067f2
JH
3418 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
3419 find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
629b3d75
MJ
3420 }
3421 else
3422 {
3423 e = find_edge (cont_bb, l2_bb);
3424 e->flags = EDGE_FALLTHRU;
3425 }
3426 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
3427
3428 if (gimple_in_ssa_p (cfun))
3429 {
3430 /* Add phis to the outer loop that connect to the phis in the inner,
3431 original loop, and move the loop entry value of the inner phi to
3432 the loop entry value of the outer phi. */
3433 gphi_iterator psi;
3434 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
3435 {
620e594b 3436 location_t locus;
629b3d75
MJ
3437 gphi *nphi;
3438 gphi *exit_phi = psi.phi ();
3439
164485b5
JJ
3440 if (virtual_operand_p (gimple_phi_result (exit_phi)))
3441 continue;
3442
629b3d75
MJ
3443 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
3444 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
3445
3446 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
3447 edge latch_to_l1 = find_edge (latch, l1_bb);
3448 gphi *inner_phi
3449 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
3450
3451 tree t = gimple_phi_result (exit_phi);
3452 tree new_res = copy_ssa_name (t, NULL);
3453 nphi = create_phi_node (new_res, l0_bb);
3454
3455 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
3456 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
3457 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
3458 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
3459 add_phi_arg (nphi, t, entry_to_l0, locus);
3460
3461 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
3462 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
3463
3464 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
164485b5 3465 }
629b3d75
MJ
3466 }
3467
3468 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
3469 recompute_dominator (CDI_DOMINATORS, l2_bb));
3470 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
3471 recompute_dominator (CDI_DOMINATORS, l3_bb));
3472 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
3473 recompute_dominator (CDI_DOMINATORS, l0_bb));
3474 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
3475 recompute_dominator (CDI_DOMINATORS, l1_bb));
3476
3477 /* We enter expand_omp_for_generic with a loop. This original loop may
3478 have its own loop struct, or it may be part of an outer loop struct
3479 (which may be the fake loop). */
99b1c316 3480 class loop *outer_loop = entry_bb->loop_father;
629b3d75
MJ
3481 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
3482
3483 add_bb_to_loop (l2_bb, outer_loop);
3484
3485 /* We've added a new loop around the original loop. Allocate the
3486 corresponding loop struct. */
99b1c316 3487 class loop *new_loop = alloc_loop ();
629b3d75
MJ
3488 new_loop->header = l0_bb;
3489 new_loop->latch = l2_bb;
3490 add_loop (new_loop, outer_loop);
3491
3492 /* Allocate a loop structure for the original loop unless we already
3493 had one. */
3494 if (!orig_loop_has_loop_struct
3495 && !gimple_omp_for_combined_p (fd->for_stmt))
3496 {
99b1c316 3497 class loop *orig_loop = alloc_loop ();
629b3d75
MJ
3498 orig_loop->header = l1_bb;
3499 /* The loop may have multiple latches. */
3500 add_loop (orig_loop, new_loop);
3501 }
3502 }
3503}
3504
2f6bb511
JJ
3505/* Helper function for expand_omp_for_static_nochunk. If PTR is NULL,
3506 compute needed allocation size. If !ALLOC of team allocations,
3507 if ALLOC of thread allocation. SZ is the initial needed size for
3508 other purposes, ALLOC_ALIGN guaranteed alignment of allocation in bytes,
3509 CNT number of elements of each array, for !ALLOC this is
3510 omp_get_num_threads (), for ALLOC number of iterations handled by the
3511 current thread. If PTR is non-NULL, it is the start of the allocation
3512 and this routine shall assign to OMP_CLAUSE_DECL (c) of those _scantemp_
3513 clauses pointers to the corresponding arrays. */
3514
3515static tree
3516expand_omp_scantemp_alloc (tree clauses, tree ptr, unsigned HOST_WIDE_INT sz,
3517 unsigned HOST_WIDE_INT alloc_align, tree cnt,
3518 gimple_stmt_iterator *gsi, bool alloc)
3519{
3520 tree eltsz = NULL_TREE;
3521 unsigned HOST_WIDE_INT preval = 0;
3522 if (ptr && sz)
3523 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
3524 ptr, size_int (sz));
3525 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
3526 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
3527 && !OMP_CLAUSE__SCANTEMP__CONTROL (c)
3528 && (!OMP_CLAUSE__SCANTEMP__ALLOC (c)) != alloc)
3529 {
3530 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
3531 unsigned HOST_WIDE_INT al = TYPE_ALIGN_UNIT (pointee_type);
3532 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
3533 {
3534 unsigned HOST_WIDE_INT szl
3535 = tree_to_uhwi (TYPE_SIZE_UNIT (pointee_type));
3536 szl = least_bit_hwi (szl);
3537 if (szl)
3538 al = MIN (al, szl);
3539 }
3540 if (ptr == NULL_TREE)
3541 {
3542 if (eltsz == NULL_TREE)
3543 eltsz = TYPE_SIZE_UNIT (pointee_type);
3544 else
3545 eltsz = size_binop (PLUS_EXPR, eltsz,
3546 TYPE_SIZE_UNIT (pointee_type));
3547 }
3548 if (preval == 0 && al <= alloc_align)
3549 {
3550 unsigned HOST_WIDE_INT diff = ROUND_UP (sz, al) - sz;
3551 sz += diff;
3552 if (diff && ptr)
3553 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
3554 ptr, size_int (diff));
3555 }
3556 else if (al > preval)
3557 {
3558 if (ptr)
3559 {
3560 ptr = fold_convert (pointer_sized_int_node, ptr);
3561 ptr = fold_build2 (PLUS_EXPR, pointer_sized_int_node, ptr,
3562 build_int_cst (pointer_sized_int_node,
3563 al - 1));
3564 ptr = fold_build2 (BIT_AND_EXPR, pointer_sized_int_node, ptr,
3565 build_int_cst (pointer_sized_int_node,
3566 -(HOST_WIDE_INT) al));
3567 ptr = fold_convert (ptr_type_node, ptr);
3568 }
3569 else
3570 sz += al - 1;
3571 }
3572 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
3573 preval = al;
3574 else
3575 preval = 1;
3576 if (ptr)
3577 {
3578 expand_omp_build_assign (gsi, OMP_CLAUSE_DECL (c), ptr, false);
3579 ptr = OMP_CLAUSE_DECL (c);
3580 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), ptr,
3581 size_binop (MULT_EXPR, cnt,
3582 TYPE_SIZE_UNIT (pointee_type)));
3583 }
3584 }
3585
3586 if (ptr == NULL_TREE)
3587 {
3588 eltsz = size_binop (MULT_EXPR, eltsz, cnt);
3589 if (sz)
3590 eltsz = size_binop (PLUS_EXPR, eltsz, size_int (sz));
3591 return eltsz;
3592 }
3593 else
3594 return ptr;
3595}
3596
629b3d75
MJ
3597/* A subroutine of expand_omp_for. Generate code for a parallel
3598 loop with static schedule and no specified chunk size. Given
3599 parameters:
3600
3601 for (V = N1; V cond N2; V += STEP) BODY;
3602
3603 where COND is "<" or ">", we generate pseudocode
3604
3605 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3606 if (cond is <)
3607 adj = STEP - 1;
3608 else
3609 adj = STEP + 1;
3610 if ((__typeof (V)) -1 > 0 && cond is >)
3611 n = -(adj + N2 - N1) / -STEP;
3612 else
3613 n = (adj + N2 - N1) / STEP;
3614 q = n / nthreads;
3615 tt = n % nthreads;
3616 if (threadid < tt) goto L3; else goto L4;
3617 L3:
3618 tt = 0;
3619 q = q + 1;
3620 L4:
3621 s0 = q * threadid + tt;
3622 e0 = s0 + q;
3623 V = s0 * STEP + N1;
3624 if (s0 >= e0) goto L2; else goto L0;
3625 L0:
3626 e = e0 * STEP + N1;
3627 L1:
3628 BODY;
3629 V += STEP;
3630 if (V cond e) goto L1;
3631 L2:
3632*/
3633
3634static void
3635expand_omp_for_static_nochunk (struct omp_region *region,
3636 struct omp_for_data *fd,
3637 gimple *inner_stmt)
3638{
2f6bb511 3639 tree n, q, s0, e0, e, t, tt, nthreads = NULL_TREE, threadid;
629b3d75
MJ
3640 tree type, itype, vmain, vback;
3641 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
3642 basic_block body_bb, cont_bb, collapse_bb = NULL;
2f6bb511
JJ
3643 basic_block fin_bb, fourth_bb = NULL, fifth_bb = NULL, sixth_bb = NULL;
3644 basic_block exit1_bb = NULL, exit2_bb = NULL, exit3_bb = NULL;
6c7ae8c5 3645 gimple_stmt_iterator gsi, gsip;
629b3d75
MJ
3646 edge ep;
3647 bool broken_loop = region->cont == NULL;
3648 tree *counts = NULL;
3649 tree n1, n2, step;
28567c40 3650 tree reductions = NULL_TREE;
8221c30b 3651 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
629b3d75
MJ
3652
3653 itype = type = TREE_TYPE (fd->loop.v);
3654 if (POINTER_TYPE_P (type))
3655 itype = signed_type_for (type);
3656
3657 entry_bb = region->entry;
3658 cont_bb = region->cont;
3659 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3660 fin_bb = BRANCH_EDGE (entry_bb)->dest;
3661 gcc_assert (broken_loop
3662 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
3663 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3664 body_bb = single_succ (seq_start_bb);
3665 if (!broken_loop)
3666 {
3667 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3668 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3669 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3670 }
3671 exit_bb = region->exit;
3672
3673 /* Iteration space partitioning goes in ENTRY_BB. */
65f4b875 3674 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75 3675 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
6c7ae8c5
JJ
3676 gsip = gsi;
3677 gsi_prev (&gsip);
629b3d75
MJ
3678
3679 if (fd->collapse > 1)
3680 {
3681 int first_zero_iter = -1, dummy = -1;
3682 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3683
3684 counts = XALLOCAVEC (tree, fd->collapse);
3685 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3686 fin_bb, first_zero_iter,
3687 dummy_bb, dummy, l2_dom_bb);
3688 t = NULL_TREE;
3689 }
3690 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3691 t = integer_one_node;
3692 else
3693 t = fold_binary (fd->loop.cond_code, boolean_type_node,
3694 fold_convert (type, fd->loop.n1),
3695 fold_convert (type, fd->loop.n2));
3696 if (fd->collapse == 1
3697 && TYPE_UNSIGNED (type)
3698 && (t == NULL_TREE || !integer_onep (t)))
3699 {
3700 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3701 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3702 true, GSI_SAME_STMT);
3703 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3704 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3705 true, GSI_SAME_STMT);
3706 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
6c7ae8c5 3707 NULL_TREE, NULL_TREE);
629b3d75
MJ
3708 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3709 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3710 expand_omp_regimplify_p, NULL, NULL)
3711 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3712 expand_omp_regimplify_p, NULL, NULL))
3713 {
3714 gsi = gsi_for_stmt (cond_stmt);
3715 gimple_regimplify_operands (cond_stmt, &gsi);
3716 }
3717 ep = split_block (entry_bb, cond_stmt);
3718 ep->flags = EDGE_TRUE_VALUE;
3719 entry_bb = ep->dest;
357067f2 3720 ep->probability = profile_probability::very_likely ();
629b3d75 3721 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
357067f2 3722 ep->probability = profile_probability::very_unlikely ();
629b3d75
MJ
3723 if (gimple_in_ssa_p (cfun))
3724 {
3725 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
3726 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3727 !gsi_end_p (gpi); gsi_next (&gpi))
3728 {
3729 gphi *phi = gpi.phi ();
3730 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3731 ep, UNKNOWN_LOCATION);
3732 }
3733 }
3734 gsi = gsi_last_bb (entry_bb);
3735 }
3736
8221c30b
JJ
3737 if (fd->lastprivate_conditional)
3738 {
3739 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
3740 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
3741 if (fd->have_pointer_condtemp)
3742 condtemp = OMP_CLAUSE_DECL (c);
3743 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
3744 cond_var = OMP_CLAUSE_DECL (c);
3745 }
2f6bb511 3746 if (fd->have_reductemp
2f03073f
JJ
3747 /* For scan, we don't want to reinitialize condtemp before the
3748 second loop. */
3749 || (fd->have_pointer_condtemp && !fd->have_scantemp)
2f6bb511 3750 || fd->have_nonctrl_scantemp)
28567c40
JJ
3751 {
3752 tree t1 = build_int_cst (long_integer_type_node, 0);
3753 tree t2 = build_int_cst (long_integer_type_node, 1);
3754 tree t3 = build_int_cstu (long_integer_type_node,
3755 (HOST_WIDE_INT_1U << 31) + 1);
3756 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
6c7ae8c5
JJ
3757 gimple_stmt_iterator gsi2 = gsi_none ();
3758 gimple *g = NULL;
3759 tree mem = null_pointer_node, memv = NULL_TREE;
2f6bb511
JJ
3760 unsigned HOST_WIDE_INT condtemp_sz = 0;
3761 unsigned HOST_WIDE_INT alloc_align = 0;
6c7ae8c5
JJ
3762 if (fd->have_reductemp)
3763 {
2f6bb511 3764 gcc_assert (!fd->have_nonctrl_scantemp);
6c7ae8c5
JJ
3765 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
3766 reductions = OMP_CLAUSE_DECL (c);
3767 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
3768 g = SSA_NAME_DEF_STMT (reductions);
3769 reductions = gimple_assign_rhs1 (g);
3770 OMP_CLAUSE_DECL (c) = reductions;
3771 gsi2 = gsi_for_stmt (g);
3772 }
3773 else
3774 {
3775 if (gsi_end_p (gsip))
3776 gsi2 = gsi_after_labels (region->entry);
3777 else
3778 gsi2 = gsip;
3779 reductions = null_pointer_node;
3780 }
2f6bb511 3781 if (fd->have_pointer_condtemp || fd->have_nonctrl_scantemp)
6c7ae8c5 3782 {
2f6bb511
JJ
3783 tree type;
3784 if (fd->have_pointer_condtemp)
3785 type = TREE_TYPE (condtemp);
3786 else
3787 type = ptr_type_node;
6c7ae8c5
JJ
3788 memv = create_tmp_var (type);
3789 TREE_ADDRESSABLE (memv) = 1;
2f6bb511
JJ
3790 unsigned HOST_WIDE_INT sz = 0;
3791 tree size = NULL_TREE;
3792 if (fd->have_pointer_condtemp)
3793 {
3794 sz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
3795 sz *= fd->lastprivate_conditional;
3796 condtemp_sz = sz;
3797 }
3798 if (fd->have_nonctrl_scantemp)
3799 {
3800 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3801 gimple *g = gimple_build_call (nthreads, 0);
3802 nthreads = create_tmp_var (integer_type_node);
3803 gimple_call_set_lhs (g, nthreads);
3804 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3805 nthreads = fold_convert (sizetype, nthreads);
3806 alloc_align = TYPE_ALIGN_UNIT (long_long_integer_type_node);
3807 size = expand_omp_scantemp_alloc (clauses, NULL_TREE, sz,
3808 alloc_align, nthreads, NULL,
3809 false);
3810 size = fold_convert (type, size);
3811 }
3812 else
3813 size = build_int_cst (type, sz);
3814 expand_omp_build_assign (&gsi2, memv, size, false);
6c7ae8c5
JJ
3815 mem = build_fold_addr_expr (memv);
3816 }
28567c40
JJ
3817 tree t
3818 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
3819 9, t1, t2, t2, t3, t1, null_pointer_node,
6c7ae8c5 3820 null_pointer_node, reductions, mem);
28567c40
JJ
3821 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3822 true, GSI_SAME_STMT);
8221c30b 3823 if (fd->have_pointer_condtemp)
6c7ae8c5 3824 expand_omp_build_assign (&gsi2, condtemp, memv, false);
2f6bb511
JJ
3825 if (fd->have_nonctrl_scantemp)
3826 {
3827 tree ptr = fd->have_pointer_condtemp ? condtemp : memv;
3828 expand_omp_scantemp_alloc (clauses, ptr, condtemp_sz,
3829 alloc_align, nthreads, &gsi2, false);
3830 }
6c7ae8c5
JJ
3831 if (fd->have_reductemp)
3832 {
3833 gsi_remove (&gsi2, true);
3834 release_ssa_name (gimple_assign_lhs (g));
3835 }
28567c40 3836 }
629b3d75
MJ
3837 switch (gimple_omp_for_kind (fd->for_stmt))
3838 {
3839 case GF_OMP_FOR_KIND_FOR:
3840 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3841 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3842 break;
3843 case GF_OMP_FOR_KIND_DISTRIBUTE:
3844 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3845 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3846 break;
3847 default:
3848 gcc_unreachable ();
3849 }
3850 nthreads = build_call_expr (nthreads, 0);
3851 nthreads = fold_convert (itype, nthreads);
3852 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3853 true, GSI_SAME_STMT);
3854 threadid = build_call_expr (threadid, 0);
3855 threadid = fold_convert (itype, threadid);
3856 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3857 true, GSI_SAME_STMT);
3858
3859 n1 = fd->loop.n1;
3860 n2 = fd->loop.n2;
3861 step = fd->loop.step;
3862 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3863 {
3864 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3865 OMP_CLAUSE__LOOPTEMP_);
3866 gcc_assert (innerc);
3867 n1 = OMP_CLAUSE_DECL (innerc);
3868 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3869 OMP_CLAUSE__LOOPTEMP_);
3870 gcc_assert (innerc);
3871 n2 = OMP_CLAUSE_DECL (innerc);
3872 }
3873 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3874 true, NULL_TREE, true, GSI_SAME_STMT);
3875 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3876 true, NULL_TREE, true, GSI_SAME_STMT);
3877 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3878 true, NULL_TREE, true, GSI_SAME_STMT);
3879
3880 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3881 t = fold_build2 (PLUS_EXPR, itype, step, t);
3882 t = fold_build2 (PLUS_EXPR, itype, t, n2);
3883 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3884 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3885 t = fold_build2 (TRUNC_DIV_EXPR, itype,
3886 fold_build1 (NEGATE_EXPR, itype, t),
3887 fold_build1 (NEGATE_EXPR, itype, step));
3888 else
3889 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3890 t = fold_convert (itype, t);
3891 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3892
3893 q = create_tmp_reg (itype, "q");
3894 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
3895 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3896 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
3897
3898 tt = create_tmp_reg (itype, "tt");
3899 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
3900 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3901 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
3902
3903 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
3904 gcond *cond_stmt = gimple_build_cond_empty (t);
3905 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3906
3907 second_bb = split_block (entry_bb, cond_stmt)->dest;
65f4b875 3908 gsi = gsi_last_nondebug_bb (second_bb);
629b3d75
MJ
3909 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3910
3911 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
3912 GSI_SAME_STMT);
3913 gassign *assign_stmt
3914 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
3915 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3916
3917 third_bb = split_block (second_bb, assign_stmt)->dest;
65f4b875 3918 gsi = gsi_last_nondebug_bb (third_bb);
629b3d75
MJ
3919 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3920
2f6bb511
JJ
3921 if (fd->have_nonctrl_scantemp)
3922 {
3923 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
3924 tree controlp = NULL_TREE, controlb = NULL_TREE;
3925 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
3926 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
3927 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
3928 {
3929 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
3930 controlb = OMP_CLAUSE_DECL (c);
3931 else
3932 controlp = OMP_CLAUSE_DECL (c);
3933 if (controlb && controlp)
3934 break;
3935 }
3936 gcc_assert (controlp && controlb);
3937 tree cnt = create_tmp_var (sizetype);
3938 gimple *g = gimple_build_assign (cnt, NOP_EXPR, q);
3939 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3940 unsigned HOST_WIDE_INT alloc_align = TYPE_ALIGN_UNIT (ptr_type_node);
3941 tree sz = expand_omp_scantemp_alloc (clauses, NULL_TREE, 0,
3942 alloc_align, cnt, NULL, true);
3943 tree size = create_tmp_var (sizetype);
3944 expand_omp_build_assign (&gsi, size, sz, false);
3945 tree cmp = fold_build2 (GT_EXPR, boolean_type_node,
3946 size, size_int (16384));
3947 expand_omp_build_assign (&gsi, controlb, cmp);
3948 g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
3949 NULL_TREE, NULL_TREE);
3950 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3951 fourth_bb = split_block (third_bb, g)->dest;
3952 gsi = gsi_last_nondebug_bb (fourth_bb);
3953 /* FIXME: Once we have allocators, this should use allocator. */
3954 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_MALLOC), 1, size);
3955 gimple_call_set_lhs (g, controlp);
3956 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3957 expand_omp_scantemp_alloc (clauses, controlp, 0, alloc_align, cnt,
3958 &gsi, true);
3959 gsi_prev (&gsi);
3960 g = gsi_stmt (gsi);
3961 fifth_bb = split_block (fourth_bb, g)->dest;
3962 gsi = gsi_last_nondebug_bb (fifth_bb);
3963
3964 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_SAVE), 0);
3965 gimple_call_set_lhs (g, controlp);
3966 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3967 tree alloca_decl = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN);
3968 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
3969 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
3970 && OMP_CLAUSE__SCANTEMP__ALLOC (c))
3971 {
3972 tree tmp = create_tmp_var (sizetype);
3973 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
3974 g = gimple_build_assign (tmp, MULT_EXPR, cnt,
3975 TYPE_SIZE_UNIT (pointee_type));
3976 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3977 g = gimple_build_call (alloca_decl, 2, tmp,
3978 size_int (TYPE_ALIGN (pointee_type)));
3979 gimple_call_set_lhs (g, OMP_CLAUSE_DECL (c));
3980 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3981 }
3982
3983 sixth_bb = split_block (fifth_bb, g)->dest;
3984 gsi = gsi_last_nondebug_bb (sixth_bb);
3985 }
3986
629b3d75
MJ
3987 t = build2 (MULT_EXPR, itype, q, threadid);
3988 t = build2 (PLUS_EXPR, itype, t, tt);
3989 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3990
3991 t = fold_build2 (PLUS_EXPR, itype, s0, q);
3992 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3993
3994 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
3995 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3996
3997 /* Remove the GIMPLE_OMP_FOR statement. */
3998 gsi_remove (&gsi, true);
3999
4000 /* Setup code for sequential iteration goes in SEQ_START_BB. */
4001 gsi = gsi_start_bb (seq_start_bb);
4002
4003 tree startvar = fd->loop.v;
4004 tree endvar = NULL_TREE;
4005
4006 if (gimple_omp_for_combined_p (fd->for_stmt))
4007 {
4008 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
4009 ? gimple_omp_parallel_clauses (inner_stmt)
4010 : gimple_omp_for_clauses (inner_stmt);
4011 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
4012 gcc_assert (innerc);
4013 startvar = OMP_CLAUSE_DECL (innerc);
4014 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4015 OMP_CLAUSE__LOOPTEMP_);
4016 gcc_assert (innerc);
4017 endvar = OMP_CLAUSE_DECL (innerc);
4018 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
4019 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
4020 {
4021 int i;
4022 for (i = 1; i < fd->collapse; i++)
4023 {
4024 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4025 OMP_CLAUSE__LOOPTEMP_);
4026 gcc_assert (innerc);
4027 }
4028 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4029 OMP_CLAUSE__LOOPTEMP_);
4030 if (innerc)
4031 {
4032 /* If needed (distribute parallel for with lastprivate),
4033 propagate down the total number of iterations. */
4034 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
4035 fd->loop.n2);
4036 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
4037 GSI_CONTINUE_LINKING);
4038 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4039 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4040 }
4041 }
4042 }
4043 t = fold_convert (itype, s0);
4044 t = fold_build2 (MULT_EXPR, itype, t, step);
4045 if (POINTER_TYPE_P (type))
bde84d51
RB
4046 {
4047 t = fold_build_pointer_plus (n1, t);
4048 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4049 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4050 t = fold_convert (signed_type_for (type), t);
4051 }
629b3d75
MJ
4052 else
4053 t = fold_build2 (PLUS_EXPR, type, t, n1);
4054 t = fold_convert (TREE_TYPE (startvar), t);
4055 t = force_gimple_operand_gsi (&gsi, t,
4056 DECL_P (startvar)
4057 && TREE_ADDRESSABLE (startvar),
4058 NULL_TREE, false, GSI_CONTINUE_LINKING);
4059 assign_stmt = gimple_build_assign (startvar, t);
4060 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6c7ae8c5
JJ
4061 if (cond_var)
4062 {
4063 tree itype = TREE_TYPE (cond_var);
4064 /* For lastprivate(conditional:) itervar, we need some iteration
4065 counter that starts at unsigned non-zero and increases.
4066 Prefer as few IVs as possible, so if we can use startvar
4067 itself, use that, or startvar + constant (those would be
4068 incremented with step), and as last resort use the s0 + 1
4069 incremented by 1. */
4070 if (POINTER_TYPE_P (type)
4071 || TREE_CODE (n1) != INTEGER_CST
4072 || fd->loop.cond_code != LT_EXPR)
4073 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
4074 build_int_cst (itype, 1));
4075 else if (tree_int_cst_sgn (n1) == 1)
4076 t = fold_convert (itype, t);
4077 else
4078 {
4079 tree c = fold_convert (itype, n1);
4080 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
4081 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
4082 }
4083 t = force_gimple_operand_gsi (&gsi, t, false,
4084 NULL_TREE, false, GSI_CONTINUE_LINKING);
4085 assign_stmt = gimple_build_assign (cond_var, t);
4086 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4087 }
629b3d75
MJ
4088
4089 t = fold_convert (itype, e0);
4090 t = fold_build2 (MULT_EXPR, itype, t, step);
4091 if (POINTER_TYPE_P (type))
bde84d51
RB
4092 {
4093 t = fold_build_pointer_plus (n1, t);
4094 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4095 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4096 t = fold_convert (signed_type_for (type), t);
4097 }
629b3d75
MJ
4098 else
4099 t = fold_build2 (PLUS_EXPR, type, t, n1);
4100 t = fold_convert (TREE_TYPE (startvar), t);
4101 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4102 false, GSI_CONTINUE_LINKING);
4103 if (endvar)
4104 {
4105 assign_stmt = gimple_build_assign (endvar, e);
4106 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4107 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4108 assign_stmt = gimple_build_assign (fd->loop.v, e);
4109 else
4110 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4111 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4112 }
4113 /* Handle linear clause adjustments. */
4114 tree itercnt = NULL_TREE;
4115 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4116 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4117 c; c = OMP_CLAUSE_CHAIN (c))
4118 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4119 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4120 {
4121 tree d = OMP_CLAUSE_DECL (c);
4122 bool is_ref = omp_is_reference (d);
4123 tree t = d, a, dest;
4124 if (is_ref)
4125 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4126 if (itercnt == NULL_TREE)
4127 {
4128 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4129 {
4130 itercnt = fold_build2 (MINUS_EXPR, itype,
4131 fold_convert (itype, n1),
4132 fold_convert (itype, fd->loop.n1));
4133 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
4134 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
4135 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4136 NULL_TREE, false,
4137 GSI_CONTINUE_LINKING);
4138 }
4139 else
4140 itercnt = s0;
4141 }
4142 tree type = TREE_TYPE (t);
4143 if (POINTER_TYPE_P (type))
4144 type = sizetype;
4145 a = fold_build2 (MULT_EXPR, type,
4146 fold_convert (type, itercnt),
4147 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4148 dest = unshare_expr (t);
4149 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4150 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
4151 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4152 false, GSI_CONTINUE_LINKING);
4153 assign_stmt = gimple_build_assign (dest, t);
4154 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4155 }
4156 if (fd->collapse > 1)
4157 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4158
4159 if (!broken_loop)
4160 {
4161 /* The code controlling the sequential loop replaces the
4162 GIMPLE_OMP_CONTINUE. */
65f4b875 4163 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
4164 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4165 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
4166 vmain = gimple_omp_continue_control_use (cont_stmt);
4167 vback = gimple_omp_continue_control_def (cont_stmt);
4168
7855700e
JJ
4169 if (cond_var)
4170 {
4171 tree itype = TREE_TYPE (cond_var);
4172 tree t2;
4173 if (POINTER_TYPE_P (type)
4174 || TREE_CODE (n1) != INTEGER_CST
4175 || fd->loop.cond_code != LT_EXPR)
4176 t2 = build_int_cst (itype, 1);
4177 else
4178 t2 = fold_convert (itype, step);
4179 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
4180 t2 = force_gimple_operand_gsi (&gsi, t2, false,
4181 NULL_TREE, true, GSI_SAME_STMT);
4182 assign_stmt = gimple_build_assign (cond_var, t2);
4183 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4184 }
4185
629b3d75
MJ
4186 if (!gimple_omp_for_combined_p (fd->for_stmt))
4187 {
4188 if (POINTER_TYPE_P (type))
4189 t = fold_build_pointer_plus (vmain, step);
4190 else
4191 t = fold_build2 (PLUS_EXPR, type, vmain, step);
4192 t = force_gimple_operand_gsi (&gsi, t,
4193 DECL_P (vback)
4194 && TREE_ADDRESSABLE (vback),
4195 NULL_TREE, true, GSI_SAME_STMT);
4196 assign_stmt = gimple_build_assign (vback, t);
4197 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4198
4199 t = build2 (fd->loop.cond_code, boolean_type_node,
4200 DECL_P (vback) && TREE_ADDRESSABLE (vback)
4201 ? t : vback, e);
4202 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4203 }
4204
4205 /* Remove the GIMPLE_OMP_CONTINUE statement. */
4206 gsi_remove (&gsi, true);
4207
4208 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4209 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
4210 }
4211
4212 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
65f4b875 4213 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
4214 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4215 {
4216 t = gimple_omp_return_lhs (gsi_stmt (gsi));
2f6bb511
JJ
4217 if (fd->have_reductemp
4218 || ((fd->have_pointer_condtemp || fd->have_scantemp)
4219 && !fd->have_nonctrl_scantemp))
28567c40
JJ
4220 {
4221 tree fn;
4222 if (t)
4223 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4224 else
4225 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4226 gcall *g = gimple_build_call (fn, 0);
4227 if (t)
4228 {
4229 gimple_call_set_lhs (g, t);
6c7ae8c5
JJ
4230 if (fd->have_reductemp)
4231 gsi_insert_after (&gsi, gimple_build_assign (reductions,
4232 NOP_EXPR, t),
4233 GSI_SAME_STMT);
28567c40
JJ
4234 }
4235 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4236 }
4237 else
4238 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
629b3d75 4239 }
2f03073f
JJ
4240 else if ((fd->have_pointer_condtemp || fd->have_scantemp)
4241 && !fd->have_nonctrl_scantemp)
1a39b3d3
JJ
4242 {
4243 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
4244 gcall *g = gimple_build_call (fn, 0);
4245 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4246 }
2f6bb511
JJ
4247 if (fd->have_scantemp && !fd->have_nonctrl_scantemp)
4248 {
4249 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4250 tree controlp = NULL_TREE, controlb = NULL_TREE;
4251 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
4252 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
4253 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
4254 {
4255 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
4256 controlb = OMP_CLAUSE_DECL (c);
4257 else
4258 controlp = OMP_CLAUSE_DECL (c);
4259 if (controlb && controlp)
4260 break;
4261 }
4262 gcc_assert (controlp && controlb);
4263 gimple *g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
4264 NULL_TREE, NULL_TREE);
4265 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4266 exit1_bb = split_block (exit_bb, g)->dest;
4267 gsi = gsi_after_labels (exit1_bb);
4268 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_FREE), 1,
4269 controlp);
4270 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4271 exit2_bb = split_block (exit1_bb, g)->dest;
4272 gsi = gsi_after_labels (exit2_bb);
4273 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_RESTORE), 1,
4274 controlp);
4275 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4276 exit3_bb = split_block (exit2_bb, g)->dest;
4277 gsi = gsi_after_labels (exit3_bb);
4278 }
629b3d75
MJ
4279 gsi_remove (&gsi, true);
4280
4281 /* Connect all the blocks. */
4282 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
357067f2 4283 ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
629b3d75
MJ
4284 ep = find_edge (entry_bb, second_bb);
4285 ep->flags = EDGE_TRUE_VALUE;
357067f2 4286 ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
2f6bb511
JJ
4287 if (fourth_bb)
4288 {
4289 ep = make_edge (third_bb, fifth_bb, EDGE_FALSE_VALUE);
4290 ep->probability
4291 = profile_probability::guessed_always ().apply_scale (1, 2);
4292 ep = find_edge (third_bb, fourth_bb);
4293 ep->flags = EDGE_TRUE_VALUE;
4294 ep->probability
4295 = profile_probability::guessed_always ().apply_scale (1, 2);
4296 ep = find_edge (fourth_bb, fifth_bb);
4297 redirect_edge_and_branch (ep, sixth_bb);
4298 }
4299 else
4300 sixth_bb = third_bb;
4301 find_edge (sixth_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
4302 find_edge (sixth_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
4303 if (exit1_bb)
4304 {
4305 ep = make_edge (exit_bb, exit2_bb, EDGE_FALSE_VALUE);
4306 ep->probability
4307 = profile_probability::guessed_always ().apply_scale (1, 2);
4308 ep = find_edge (exit_bb, exit1_bb);
4309 ep->flags = EDGE_TRUE_VALUE;
4310 ep->probability
4311 = profile_probability::guessed_always ().apply_scale (1, 2);
4312 ep = find_edge (exit1_bb, exit2_bb);
4313 redirect_edge_and_branch (ep, exit3_bb);
4314 }
629b3d75
MJ
4315
4316 if (!broken_loop)
4317 {
4318 ep = find_edge (cont_bb, body_bb);
4319 if (ep == NULL)
4320 {
4321 ep = BRANCH_EDGE (cont_bb);
4322 gcc_assert (single_succ (ep->dest) == body_bb);
4323 }
4324 if (gimple_omp_for_combined_p (fd->for_stmt))
4325 {
4326 remove_edge (ep);
4327 ep = NULL;
4328 }
4329 else if (fd->collapse > 1)
4330 {
4331 remove_edge (ep);
4332 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4333 }
4334 else
4335 ep->flags = EDGE_TRUE_VALUE;
4336 find_edge (cont_bb, fin_bb)->flags
4337 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
4338 }
4339
4340 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
4341 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
2f6bb511
JJ
4342 if (fourth_bb)
4343 {
4344 set_immediate_dominator (CDI_DOMINATORS, fifth_bb, third_bb);
4345 set_immediate_dominator (CDI_DOMINATORS, sixth_bb, third_bb);
4346 }
4347 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, sixth_bb);
629b3d75
MJ
4348
4349 set_immediate_dominator (CDI_DOMINATORS, body_bb,
4350 recompute_dominator (CDI_DOMINATORS, body_bb));
4351 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
4352 recompute_dominator (CDI_DOMINATORS, fin_bb));
2f6bb511
JJ
4353 if (exit1_bb)
4354 {
4355 set_immediate_dominator (CDI_DOMINATORS, exit2_bb, exit_bb);
4356 set_immediate_dominator (CDI_DOMINATORS, exit3_bb, exit_bb);
4357 }
629b3d75 4358
99b1c316 4359 class loop *loop = body_bb->loop_father;
629b3d75
MJ
4360 if (loop != entry_bb->loop_father)
4361 {
4362 gcc_assert (broken_loop || loop->header == body_bb);
4363 gcc_assert (broken_loop
4364 || loop->latch == region->cont
4365 || single_pred (loop->latch) == region->cont);
4366 return;
4367 }
4368
4369 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
4370 {
4371 loop = alloc_loop ();
4372 loop->header = body_bb;
4373 if (collapse_bb == NULL)
4374 loop->latch = cont_bb;
4375 add_loop (loop, body_bb->loop_father);
4376 }
4377}
4378
4379/* Return phi in E->DEST with ARG on edge E. */
4380
4381static gphi *
4382find_phi_with_arg_on_edge (tree arg, edge e)
4383{
4384 basic_block bb = e->dest;
4385
4386 for (gphi_iterator gpi = gsi_start_phis (bb);
4387 !gsi_end_p (gpi);
4388 gsi_next (&gpi))
4389 {
4390 gphi *phi = gpi.phi ();
4391 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
4392 return phi;
4393 }
4394
4395 return NULL;
4396}
4397
4398/* A subroutine of expand_omp_for. Generate code for a parallel
4399 loop with static schedule and a specified chunk size. Given
4400 parameters:
4401
4402 for (V = N1; V cond N2; V += STEP) BODY;
4403
4404 where COND is "<" or ">", we generate pseudocode
4405
4406 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
4407 if (cond is <)
4408 adj = STEP - 1;
4409 else
4410 adj = STEP + 1;
4411 if ((__typeof (V)) -1 > 0 && cond is >)
4412 n = -(adj + N2 - N1) / -STEP;
4413 else
4414 n = (adj + N2 - N1) / STEP;
4415 trip = 0;
4416 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
4417 here so that V is defined
4418 if the loop is not entered
4419 L0:
4420 s0 = (trip * nthreads + threadid) * CHUNK;
01914336 4421 e0 = min (s0 + CHUNK, n);
629b3d75
MJ
4422 if (s0 < n) goto L1; else goto L4;
4423 L1:
4424 V = s0 * STEP + N1;
4425 e = e0 * STEP + N1;
4426 L2:
4427 BODY;
4428 V += STEP;
4429 if (V cond e) goto L2; else goto L3;
4430 L3:
4431 trip += 1;
4432 goto L0;
4433 L4:
4434*/
4435
4436static void
4437expand_omp_for_static_chunk (struct omp_region *region,
4438 struct omp_for_data *fd, gimple *inner_stmt)
4439{
4440 tree n, s0, e0, e, t;
4441 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
4442 tree type, itype, vmain, vback, vextra;
4443 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
4444 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
6c7ae8c5 4445 gimple_stmt_iterator gsi, gsip;
629b3d75
MJ
4446 edge se;
4447 bool broken_loop = region->cont == NULL;
4448 tree *counts = NULL;
4449 tree n1, n2, step;
28567c40 4450 tree reductions = NULL_TREE;
8221c30b 4451 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
629b3d75
MJ
4452
4453 itype = type = TREE_TYPE (fd->loop.v);
4454 if (POINTER_TYPE_P (type))
4455 itype = signed_type_for (type);
4456
4457 entry_bb = region->entry;
4458 se = split_block (entry_bb, last_stmt (entry_bb));
4459 entry_bb = se->src;
4460 iter_part_bb = se->dest;
4461 cont_bb = region->cont;
4462 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
4463 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
4464 gcc_assert (broken_loop
4465 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
4466 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
4467 body_bb = single_succ (seq_start_bb);
4468 if (!broken_loop)
4469 {
4470 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
4471 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
4472 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4473 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
4474 }
4475 exit_bb = region->exit;
4476
4477 /* Trip and adjustment setup goes in ENTRY_BB. */
65f4b875 4478 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75 4479 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
6c7ae8c5
JJ
4480 gsip = gsi;
4481 gsi_prev (&gsip);
629b3d75
MJ
4482
4483 if (fd->collapse > 1)
4484 {
4485 int first_zero_iter = -1, dummy = -1;
4486 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
4487
4488 counts = XALLOCAVEC (tree, fd->collapse);
4489 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4490 fin_bb, first_zero_iter,
4491 dummy_bb, dummy, l2_dom_bb);
4492 t = NULL_TREE;
4493 }
4494 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
4495 t = integer_one_node;
4496 else
4497 t = fold_binary (fd->loop.cond_code, boolean_type_node,
4498 fold_convert (type, fd->loop.n1),
4499 fold_convert (type, fd->loop.n2));
4500 if (fd->collapse == 1
4501 && TYPE_UNSIGNED (type)
4502 && (t == NULL_TREE || !integer_onep (t)))
4503 {
4504 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
4505 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
4506 true, GSI_SAME_STMT);
4507 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
4508 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
4509 true, GSI_SAME_STMT);
4510 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
4511 NULL_TREE, NULL_TREE);
4512 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4513 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
4514 expand_omp_regimplify_p, NULL, NULL)
4515 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
4516 expand_omp_regimplify_p, NULL, NULL))
4517 {
4518 gsi = gsi_for_stmt (cond_stmt);
4519 gimple_regimplify_operands (cond_stmt, &gsi);
4520 }
4521 se = split_block (entry_bb, cond_stmt);
4522 se->flags = EDGE_TRUE_VALUE;
4523 entry_bb = se->dest;
357067f2 4524 se->probability = profile_probability::very_likely ();
629b3d75 4525 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
357067f2 4526 se->probability = profile_probability::very_unlikely ();
629b3d75
MJ
4527 if (gimple_in_ssa_p (cfun))
4528 {
4529 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
4530 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
4531 !gsi_end_p (gpi); gsi_next (&gpi))
4532 {
4533 gphi *phi = gpi.phi ();
4534 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
4535 se, UNKNOWN_LOCATION);
4536 }
4537 }
4538 gsi = gsi_last_bb (entry_bb);
4539 }
4540
8221c30b
JJ
4541 if (fd->lastprivate_conditional)
4542 {
4543 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4544 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
4545 if (fd->have_pointer_condtemp)
4546 condtemp = OMP_CLAUSE_DECL (c);
4547 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
4548 cond_var = OMP_CLAUSE_DECL (c);
4549 }
4550 if (fd->have_reductemp || fd->have_pointer_condtemp)
28567c40
JJ
4551 {
4552 tree t1 = build_int_cst (long_integer_type_node, 0);
4553 tree t2 = build_int_cst (long_integer_type_node, 1);
4554 tree t3 = build_int_cstu (long_integer_type_node,
4555 (HOST_WIDE_INT_1U << 31) + 1);
4556 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
6c7ae8c5
JJ
4557 gimple_stmt_iterator gsi2 = gsi_none ();
4558 gimple *g = NULL;
4559 tree mem = null_pointer_node, memv = NULL_TREE;
6c7ae8c5
JJ
4560 if (fd->have_reductemp)
4561 {
4562 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
4563 reductions = OMP_CLAUSE_DECL (c);
4564 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
4565 g = SSA_NAME_DEF_STMT (reductions);
4566 reductions = gimple_assign_rhs1 (g);
4567 OMP_CLAUSE_DECL (c) = reductions;
4568 gsi2 = gsi_for_stmt (g);
4569 }
4570 else
4571 {
4572 if (gsi_end_p (gsip))
4573 gsi2 = gsi_after_labels (region->entry);
4574 else
4575 gsi2 = gsip;
4576 reductions = null_pointer_node;
4577 }
8221c30b 4578 if (fd->have_pointer_condtemp)
6c7ae8c5 4579 {
6c7ae8c5
JJ
4580 tree type = TREE_TYPE (condtemp);
4581 memv = create_tmp_var (type);
4582 TREE_ADDRESSABLE (memv) = 1;
4583 unsigned HOST_WIDE_INT sz
4584 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
4585 sz *= fd->lastprivate_conditional;
4586 expand_omp_build_assign (&gsi2, memv, build_int_cst (type, sz),
4587 false);
4588 mem = build_fold_addr_expr (memv);
4589 }
28567c40
JJ
4590 tree t
4591 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
4592 9, t1, t2, t2, t3, t1, null_pointer_node,
6c7ae8c5 4593 null_pointer_node, reductions, mem);
28567c40
JJ
4594 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
4595 true, GSI_SAME_STMT);
8221c30b 4596 if (fd->have_pointer_condtemp)
6c7ae8c5
JJ
4597 expand_omp_build_assign (&gsi2, condtemp, memv, false);
4598 if (fd->have_reductemp)
4599 {
4600 gsi_remove (&gsi2, true);
4601 release_ssa_name (gimple_assign_lhs (g));
4602 }
28567c40 4603 }
629b3d75
MJ
4604 switch (gimple_omp_for_kind (fd->for_stmt))
4605 {
4606 case GF_OMP_FOR_KIND_FOR:
4607 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
4608 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
4609 break;
4610 case GF_OMP_FOR_KIND_DISTRIBUTE:
4611 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
4612 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
4613 break;
4614 default:
4615 gcc_unreachable ();
4616 }
4617 nthreads = build_call_expr (nthreads, 0);
4618 nthreads = fold_convert (itype, nthreads);
4619 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
4620 true, GSI_SAME_STMT);
4621 threadid = build_call_expr (threadid, 0);
4622 threadid = fold_convert (itype, threadid);
4623 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
4624 true, GSI_SAME_STMT);
4625
4626 n1 = fd->loop.n1;
4627 n2 = fd->loop.n2;
4628 step = fd->loop.step;
4629 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4630 {
4631 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4632 OMP_CLAUSE__LOOPTEMP_);
4633 gcc_assert (innerc);
4634 n1 = OMP_CLAUSE_DECL (innerc);
4635 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4636 OMP_CLAUSE__LOOPTEMP_);
4637 gcc_assert (innerc);
4638 n2 = OMP_CLAUSE_DECL (innerc);
4639 }
4640 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
4641 true, NULL_TREE, true, GSI_SAME_STMT);
4642 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
4643 true, NULL_TREE, true, GSI_SAME_STMT);
4644 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
4645 true, NULL_TREE, true, GSI_SAME_STMT);
4646 tree chunk_size = fold_convert (itype, fd->chunk_size);
4647 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
4648 chunk_size
4649 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
4650 GSI_SAME_STMT);
4651
4652 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
4653 t = fold_build2 (PLUS_EXPR, itype, step, t);
4654 t = fold_build2 (PLUS_EXPR, itype, t, n2);
4655 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
4656 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
4657 t = fold_build2 (TRUNC_DIV_EXPR, itype,
4658 fold_build1 (NEGATE_EXPR, itype, t),
4659 fold_build1 (NEGATE_EXPR, itype, step));
4660 else
4661 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
4662 t = fold_convert (itype, t);
4663 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4664 true, GSI_SAME_STMT);
4665
4666 trip_var = create_tmp_reg (itype, ".trip");
4667 if (gimple_in_ssa_p (cfun))
4668 {
4669 trip_init = make_ssa_name (trip_var);
4670 trip_main = make_ssa_name (trip_var);
4671 trip_back = make_ssa_name (trip_var);
4672 }
4673 else
4674 {
4675 trip_init = trip_var;
4676 trip_main = trip_var;
4677 trip_back = trip_var;
4678 }
4679
4680 gassign *assign_stmt
4681 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
4682 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4683
4684 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
4685 t = fold_build2 (MULT_EXPR, itype, t, step);
4686 if (POINTER_TYPE_P (type))
4687 t = fold_build_pointer_plus (n1, t);
4688 else
4689 t = fold_build2 (PLUS_EXPR, type, t, n1);
4690 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4691 true, GSI_SAME_STMT);
4692
4693 /* Remove the GIMPLE_OMP_FOR. */
4694 gsi_remove (&gsi, true);
4695
4696 gimple_stmt_iterator gsif = gsi;
4697
4698 /* Iteration space partitioning goes in ITER_PART_BB. */
4699 gsi = gsi_last_bb (iter_part_bb);
4700
4701 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
4702 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
4703 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
4704 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4705 false, GSI_CONTINUE_LINKING);
4706
4707 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
4708 t = fold_build2 (MIN_EXPR, itype, t, n);
4709 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4710 false, GSI_CONTINUE_LINKING);
4711
4712 t = build2 (LT_EXPR, boolean_type_node, s0, n);
4713 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
4714
4715 /* Setup code for sequential iteration goes in SEQ_START_BB. */
4716 gsi = gsi_start_bb (seq_start_bb);
4717
4718 tree startvar = fd->loop.v;
4719 tree endvar = NULL_TREE;
4720
4721 if (gimple_omp_for_combined_p (fd->for_stmt))
4722 {
4723 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
4724 ? gimple_omp_parallel_clauses (inner_stmt)
4725 : gimple_omp_for_clauses (inner_stmt);
4726 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
4727 gcc_assert (innerc);
4728 startvar = OMP_CLAUSE_DECL (innerc);
4729 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4730 OMP_CLAUSE__LOOPTEMP_);
4731 gcc_assert (innerc);
4732 endvar = OMP_CLAUSE_DECL (innerc);
4733 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
4734 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
4735 {
4736 int i;
4737 for (i = 1; i < fd->collapse; i++)
4738 {
4739 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4740 OMP_CLAUSE__LOOPTEMP_);
4741 gcc_assert (innerc);
4742 }
4743 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4744 OMP_CLAUSE__LOOPTEMP_);
4745 if (innerc)
4746 {
4747 /* If needed (distribute parallel for with lastprivate),
4748 propagate down the total number of iterations. */
4749 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
4750 fd->loop.n2);
4751 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
4752 GSI_CONTINUE_LINKING);
4753 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4754 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4755 }
4756 }
4757 }
4758
4759 t = fold_convert (itype, s0);
4760 t = fold_build2 (MULT_EXPR, itype, t, step);
4761 if (POINTER_TYPE_P (type))
bde84d51
RB
4762 {
4763 t = fold_build_pointer_plus (n1, t);
4764 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4765 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4766 t = fold_convert (signed_type_for (type), t);
4767 }
629b3d75
MJ
4768 else
4769 t = fold_build2 (PLUS_EXPR, type, t, n1);
4770 t = fold_convert (TREE_TYPE (startvar), t);
4771 t = force_gimple_operand_gsi (&gsi, t,
4772 DECL_P (startvar)
4773 && TREE_ADDRESSABLE (startvar),
4774 NULL_TREE, false, GSI_CONTINUE_LINKING);
4775 assign_stmt = gimple_build_assign (startvar, t);
4776 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6c7ae8c5
JJ
4777 if (cond_var)
4778 {
4779 tree itype = TREE_TYPE (cond_var);
4780 /* For lastprivate(conditional:) itervar, we need some iteration
4781 counter that starts at unsigned non-zero and increases.
4782 Prefer as few IVs as possible, so if we can use startvar
4783 itself, use that, or startvar + constant (those would be
4784 incremented with step), and as last resort use the s0 + 1
4785 incremented by 1. */
4786 if (POINTER_TYPE_P (type)
4787 || TREE_CODE (n1) != INTEGER_CST
4788 || fd->loop.cond_code != LT_EXPR)
4789 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
4790 build_int_cst (itype, 1));
4791 else if (tree_int_cst_sgn (n1) == 1)
4792 t = fold_convert (itype, t);
4793 else
4794 {
4795 tree c = fold_convert (itype, n1);
4796 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
4797 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
4798 }
4799 t = force_gimple_operand_gsi (&gsi, t, false,
4800 NULL_TREE, false, GSI_CONTINUE_LINKING);
4801 assign_stmt = gimple_build_assign (cond_var, t);
4802 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4803 }
629b3d75
MJ
4804
4805 t = fold_convert (itype, e0);
4806 t = fold_build2 (MULT_EXPR, itype, t, step);
4807 if (POINTER_TYPE_P (type))
bde84d51
RB
4808 {
4809 t = fold_build_pointer_plus (n1, t);
4810 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4811 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4812 t = fold_convert (signed_type_for (type), t);
4813 }
629b3d75
MJ
4814 else
4815 t = fold_build2 (PLUS_EXPR, type, t, n1);
4816 t = fold_convert (TREE_TYPE (startvar), t);
4817 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4818 false, GSI_CONTINUE_LINKING);
4819 if (endvar)
4820 {
4821 assign_stmt = gimple_build_assign (endvar, e);
4822 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4823 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4824 assign_stmt = gimple_build_assign (fd->loop.v, e);
4825 else
4826 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4827 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4828 }
4829 /* Handle linear clause adjustments. */
4830 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
4831 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4832 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4833 c; c = OMP_CLAUSE_CHAIN (c))
4834 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4835 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4836 {
4837 tree d = OMP_CLAUSE_DECL (c);
4838 bool is_ref = omp_is_reference (d);
4839 tree t = d, a, dest;
4840 if (is_ref)
4841 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4842 tree type = TREE_TYPE (t);
4843 if (POINTER_TYPE_P (type))
4844 type = sizetype;
4845 dest = unshare_expr (t);
4846 tree v = create_tmp_var (TREE_TYPE (t), NULL);
4847 expand_omp_build_assign (&gsif, v, t);
4848 if (itercnt == NULL_TREE)
4849 {
4850 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4851 {
4852 itercntbias
4853 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
4854 fold_convert (itype, fd->loop.n1));
4855 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
4856 itercntbias, step);
4857 itercntbias
4858 = force_gimple_operand_gsi (&gsif, itercntbias, true,
4859 NULL_TREE, true,
4860 GSI_SAME_STMT);
4861 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
4862 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4863 NULL_TREE, false,
4864 GSI_CONTINUE_LINKING);
4865 }
4866 else
4867 itercnt = s0;
4868 }
4869 a = fold_build2 (MULT_EXPR, type,
4870 fold_convert (type, itercnt),
4871 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4872 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4873 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4874 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4875 false, GSI_CONTINUE_LINKING);
4876 assign_stmt = gimple_build_assign (dest, t);
4877 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4878 }
4879 if (fd->collapse > 1)
4880 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4881
4882 if (!broken_loop)
4883 {
4884 /* The code controlling the sequential loop goes in CONT_BB,
4885 replacing the GIMPLE_OMP_CONTINUE. */
65f4b875 4886 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
4887 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4888 vmain = gimple_omp_continue_control_use (cont_stmt);
4889 vback = gimple_omp_continue_control_def (cont_stmt);
4890
7855700e
JJ
4891 if (cond_var)
4892 {
4893 tree itype = TREE_TYPE (cond_var);
4894 tree t2;
4895 if (POINTER_TYPE_P (type)
4896 || TREE_CODE (n1) != INTEGER_CST
4897 || fd->loop.cond_code != LT_EXPR)
4898 t2 = build_int_cst (itype, 1);
4899 else
4900 t2 = fold_convert (itype, step);
4901 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
4902 t2 = force_gimple_operand_gsi (&gsi, t2, false,
4903 NULL_TREE, true, GSI_SAME_STMT);
4904 assign_stmt = gimple_build_assign (cond_var, t2);
4905 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4906 }
4907
629b3d75
MJ
4908 if (!gimple_omp_for_combined_p (fd->for_stmt))
4909 {
4910 if (POINTER_TYPE_P (type))
4911 t = fold_build_pointer_plus (vmain, step);
4912 else
4913 t = fold_build2 (PLUS_EXPR, type, vmain, step);
4914 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
4915 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4916 true, GSI_SAME_STMT);
4917 assign_stmt = gimple_build_assign (vback, t);
4918 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4919
4920 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
4921 t = build2 (EQ_EXPR, boolean_type_node,
4922 build_int_cst (itype, 0),
4923 build_int_cst (itype, 1));
4924 else
4925 t = build2 (fd->loop.cond_code, boolean_type_node,
4926 DECL_P (vback) && TREE_ADDRESSABLE (vback)
4927 ? t : vback, e);
4928 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4929 }
4930
4931 /* Remove GIMPLE_OMP_CONTINUE. */
4932 gsi_remove (&gsi, true);
4933
4934 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4935 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
4936
4937 /* Trip update code goes into TRIP_UPDATE_BB. */
4938 gsi = gsi_start_bb (trip_update_bb);
4939
4940 t = build_int_cst (itype, 1);
4941 t = build2 (PLUS_EXPR, itype, trip_main, t);
4942 assign_stmt = gimple_build_assign (trip_back, t);
4943 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4944 }
4945
4946 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
65f4b875 4947 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
4948 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4949 {
4950 t = gimple_omp_return_lhs (gsi_stmt (gsi));
8221c30b 4951 if (fd->have_reductemp || fd->have_pointer_condtemp)
28567c40
JJ
4952 {
4953 tree fn;
4954 if (t)
4955 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4956 else
4957 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4958 gcall *g = gimple_build_call (fn, 0);
4959 if (t)
4960 {
4961 gimple_call_set_lhs (g, t);
6c7ae8c5
JJ
4962 if (fd->have_reductemp)
4963 gsi_insert_after (&gsi, gimple_build_assign (reductions,
4964 NOP_EXPR, t),
4965 GSI_SAME_STMT);
28567c40
JJ
4966 }
4967 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4968 }
4969 else
4970 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
629b3d75 4971 }
1a39b3d3
JJ
4972 else if (fd->have_pointer_condtemp)
4973 {
4974 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
4975 gcall *g = gimple_build_call (fn, 0);
4976 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4977 }
629b3d75
MJ
4978 gsi_remove (&gsi, true);
4979
4980 /* Connect the new blocks. */
4981 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
4982 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
4983
4984 if (!broken_loop)
4985 {
4986 se = find_edge (cont_bb, body_bb);
4987 if (se == NULL)
4988 {
4989 se = BRANCH_EDGE (cont_bb);
4990 gcc_assert (single_succ (se->dest) == body_bb);
4991 }
4992 if (gimple_omp_for_combined_p (fd->for_stmt))
4993 {
4994 remove_edge (se);
4995 se = NULL;
4996 }
4997 else if (fd->collapse > 1)
4998 {
4999 remove_edge (se);
5000 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5001 }
5002 else
5003 se->flags = EDGE_TRUE_VALUE;
5004 find_edge (cont_bb, trip_update_bb)->flags
5005 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5006
01914336
MJ
5007 redirect_edge_and_branch (single_succ_edge (trip_update_bb),
5008 iter_part_bb);
629b3d75
MJ
5009 }
5010
5011 if (gimple_in_ssa_p (cfun))
5012 {
5013 gphi_iterator psi;
5014 gphi *phi;
5015 edge re, ene;
5016 edge_var_map *vm;
5017 size_t i;
5018
5019 gcc_assert (fd->collapse == 1 && !broken_loop);
5020
5021 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
5022 remove arguments of the phi nodes in fin_bb. We need to create
5023 appropriate phi nodes in iter_part_bb instead. */
5024 se = find_edge (iter_part_bb, fin_bb);
5025 re = single_succ_edge (trip_update_bb);
5026 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
5027 ene = single_succ_edge (entry_bb);
5028
5029 psi = gsi_start_phis (fin_bb);
5030 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
5031 gsi_next (&psi), ++i)
5032 {
5033 gphi *nphi;
620e594b 5034 location_t locus;
629b3d75
MJ
5035
5036 phi = psi.phi ();
d83cc5cc
TV
5037 if (operand_equal_p (gimple_phi_arg_def (phi, 0),
5038 redirect_edge_var_map_def (vm), 0))
5039 continue;
5040
629b3d75
MJ
5041 t = gimple_phi_result (phi);
5042 gcc_assert (t == redirect_edge_var_map_result (vm));
5043
5044 if (!single_pred_p (fin_bb))
5045 t = copy_ssa_name (t, phi);
5046
5047 nphi = create_phi_node (t, iter_part_bb);
5048
5049 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
5050 locus = gimple_phi_arg_location_from_edge (phi, se);
5051
5052 /* A special case -- fd->loop.v is not yet computed in
5053 iter_part_bb, we need to use vextra instead. */
5054 if (t == fd->loop.v)
5055 t = vextra;
5056 add_phi_arg (nphi, t, ene, locus);
5057 locus = redirect_edge_var_map_location (vm);
5058 tree back_arg = redirect_edge_var_map_def (vm);
5059 add_phi_arg (nphi, back_arg, re, locus);
5060 edge ce = find_edge (cont_bb, body_bb);
5061 if (ce == NULL)
5062 {
5063 ce = BRANCH_EDGE (cont_bb);
5064 gcc_assert (single_succ (ce->dest) == body_bb);
5065 ce = single_succ_edge (ce->dest);
5066 }
5067 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
5068 gcc_assert (inner_loop_phi != NULL);
5069 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
5070 find_edge (seq_start_bb, body_bb), locus);
5071
5072 if (!single_pred_p (fin_bb))
5073 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
5074 }
5075 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
5076 redirect_edge_var_map_clear (re);
5077 if (single_pred_p (fin_bb))
5078 while (1)
5079 {
5080 psi = gsi_start_phis (fin_bb);
5081 if (gsi_end_p (psi))
5082 break;
5083 remove_phi_node (&psi, false);
5084 }
5085
5086 /* Make phi node for trip. */
5087 phi = create_phi_node (trip_main, iter_part_bb);
5088 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
5089 UNKNOWN_LOCATION);
5090 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
5091 UNKNOWN_LOCATION);
5092 }
5093
5094 if (!broken_loop)
5095 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
5096 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
5097 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
5098 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5099 recompute_dominator (CDI_DOMINATORS, fin_bb));
5100 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
5101 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
5102 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5103 recompute_dominator (CDI_DOMINATORS, body_bb));
5104
5105 if (!broken_loop)
5106 {
99b1c316
MS
5107 class loop *loop = body_bb->loop_father;
5108 class loop *trip_loop = alloc_loop ();
629b3d75
MJ
5109 trip_loop->header = iter_part_bb;
5110 trip_loop->latch = trip_update_bb;
5111 add_loop (trip_loop, iter_part_bb->loop_father);
5112
5113 if (loop != entry_bb->loop_father)
5114 {
5115 gcc_assert (loop->header == body_bb);
5116 gcc_assert (loop->latch == region->cont
5117 || single_pred (loop->latch) == region->cont);
5118 trip_loop->inner = loop;
5119 return;
5120 }
5121
5122 if (!gimple_omp_for_combined_p (fd->for_stmt))
5123 {
5124 loop = alloc_loop ();
5125 loop->header = body_bb;
5126 if (collapse_bb == NULL)
5127 loop->latch = cont_bb;
5128 add_loop (loop, trip_loop);
5129 }
5130 }
5131}
5132
629b3d75
MJ
5133/* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
5134 loop. Given parameters:
5135
5136 for (V = N1; V cond N2; V += STEP) BODY;
5137
5138 where COND is "<" or ">", we generate pseudocode
5139
5140 V = N1;
5141 goto L1;
5142 L0:
5143 BODY;
5144 V += STEP;
5145 L1:
5146 if (V cond N2) goto L0; else goto L2;
5147 L2:
5148
5149 For collapsed loops, given parameters:
5150 collapse(3)
5151 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
5152 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
5153 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
5154 BODY;
5155
5156 we generate pseudocode
5157
5158 if (cond3 is <)
5159 adj = STEP3 - 1;
5160 else
5161 adj = STEP3 + 1;
5162 count3 = (adj + N32 - N31) / STEP3;
5163 if (cond2 is <)
5164 adj = STEP2 - 1;
5165 else
5166 adj = STEP2 + 1;
5167 count2 = (adj + N22 - N21) / STEP2;
5168 if (cond1 is <)
5169 adj = STEP1 - 1;
5170 else
5171 adj = STEP1 + 1;
5172 count1 = (adj + N12 - N11) / STEP1;
5173 count = count1 * count2 * count3;
5174 V = 0;
5175 V1 = N11;
5176 V2 = N21;
5177 V3 = N31;
5178 goto L1;
5179 L0:
5180 BODY;
5181 V += 1;
5182 V3 += STEP3;
5183 V2 += (V3 cond3 N32) ? 0 : STEP2;
5184 V3 = (V3 cond3 N32) ? V3 : N31;
5185 V1 += (V2 cond2 N22) ? 0 : STEP1;
5186 V2 = (V2 cond2 N22) ? V2 : N21;
5187 L1:
5188 if (V < count) goto L0; else goto L2;
5189 L2:
5190
5191 */
5192
5193static void
5194expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
5195{
5196 tree type, t;
5197 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
5198 gimple_stmt_iterator gsi;
5199 gimple *stmt;
5200 gcond *cond_stmt;
5201 bool broken_loop = region->cont == NULL;
5202 edge e, ne;
5203 tree *counts = NULL;
5204 int i;
5205 int safelen_int = INT_MAX;
fed2a43c 5206 bool dont_vectorize = false;
629b3d75
MJ
5207 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5208 OMP_CLAUSE_SAFELEN);
5209 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5210 OMP_CLAUSE__SIMDUID_);
fed2a43c
JJ
5211 tree ifc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5212 OMP_CLAUSE_IF);
5213 tree simdlen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5214 OMP_CLAUSE_SIMDLEN);
e7393c89
JJ
5215 tree condtemp = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5216 OMP_CLAUSE__CONDTEMP_);
629b3d75 5217 tree n1, n2;
e7393c89 5218 tree cond_var = condtemp ? OMP_CLAUSE_DECL (condtemp) : NULL_TREE;
629b3d75
MJ
5219
5220 if (safelen)
5221 {
9d2f08ab 5222 poly_uint64 val;
629b3d75 5223 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
9d2f08ab 5224 if (!poly_int_tree_p (safelen, &val))
629b3d75 5225 safelen_int = 0;
9d2f08ab
RS
5226 else
5227 safelen_int = MIN (constant_lower_bound (val), INT_MAX);
629b3d75
MJ
5228 if (safelen_int == 1)
5229 safelen_int = 0;
5230 }
fed2a43c
JJ
5231 if ((ifc && integer_zerop (OMP_CLAUSE_IF_EXPR (ifc)))
5232 || (simdlen && integer_onep (OMP_CLAUSE_SIMDLEN_EXPR (simdlen))))
5233 {
5234 safelen_int = 0;
5235 dont_vectorize = true;
5236 }
629b3d75
MJ
5237 type = TREE_TYPE (fd->loop.v);
5238 entry_bb = region->entry;
5239 cont_bb = region->cont;
5240 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5241 gcc_assert (broken_loop
5242 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
5243 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
5244 if (!broken_loop)
5245 {
5246 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
5247 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5248 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
5249 l2_bb = BRANCH_EDGE (entry_bb)->dest;
5250 }
5251 else
5252 {
5253 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
5254 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
5255 l2_bb = single_succ (l1_bb);
5256 }
5257 exit_bb = region->exit;
5258 l2_dom_bb = NULL;
5259
65f4b875 5260 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
5261
5262 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5263 /* Not needed in SSA form right now. */
5264 gcc_assert (!gimple_in_ssa_p (cfun));
5265 if (fd->collapse > 1)
5266 {
5267 int first_zero_iter = -1, dummy = -1;
5268 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
5269
5270 counts = XALLOCAVEC (tree, fd->collapse);
5271 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5272 zero_iter_bb, first_zero_iter,
5273 dummy_bb, dummy, l2_dom_bb);
5274 }
5275 if (l2_dom_bb == NULL)
5276 l2_dom_bb = l1_bb;
5277
5278 n1 = fd->loop.n1;
5279 n2 = fd->loop.n2;
5280 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5281 {
5282 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5283 OMP_CLAUSE__LOOPTEMP_);
5284 gcc_assert (innerc);
5285 n1 = OMP_CLAUSE_DECL (innerc);
5286 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5287 OMP_CLAUSE__LOOPTEMP_);
5288 gcc_assert (innerc);
5289 n2 = OMP_CLAUSE_DECL (innerc);
5290 }
5291 tree step = fd->loop.step;
5292
4cea8675
AM
5293 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5294 OMP_CLAUSE__SIMT_);
629b3d75
MJ
5295 if (is_simt)
5296 {
5297 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
4cea8675
AM
5298 is_simt = safelen_int > 1;
5299 }
5300 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
5301 if (is_simt)
5302 {
629b3d75
MJ
5303 simt_lane = create_tmp_var (unsigned_type_node);
5304 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
5305 gimple_call_set_lhs (g, simt_lane);
5306 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5307 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
5308 fold_convert (TREE_TYPE (step), simt_lane));
5309 n1 = fold_convert (type, n1);
5310 if (POINTER_TYPE_P (type))
5311 n1 = fold_build_pointer_plus (n1, offset);
5312 else
5313 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
5314
5315 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
5316 if (fd->collapse > 1)
5317 simt_maxlane = build_one_cst (unsigned_type_node);
5318 else if (safelen_int < omp_max_simt_vf ())
5319 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
5320 tree vf
5321 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
5322 unsigned_type_node, 0);
5323 if (simt_maxlane)
5324 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
5325 vf = fold_convert (TREE_TYPE (step), vf);
5326 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
5327 }
5328
5329 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
5330 if (fd->collapse > 1)
5331 {
5332 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5333 {
5334 gsi_prev (&gsi);
5335 expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1);
5336 gsi_next (&gsi);
5337 }
5338 else
5339 for (i = 0; i < fd->collapse; i++)
5340 {
5341 tree itype = TREE_TYPE (fd->loops[i].v);
5342 if (POINTER_TYPE_P (itype))
5343 itype = signed_type_for (itype);
5344 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
5345 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
5346 }
5347 }
e7393c89
JJ
5348 if (cond_var)
5349 {
5350 if (POINTER_TYPE_P (type)
5351 || TREE_CODE (n1) != INTEGER_CST
5352 || fd->loop.cond_code != LT_EXPR
5353 || tree_int_cst_sgn (n1) != 1)
5354 expand_omp_build_assign (&gsi, cond_var,
5355 build_one_cst (TREE_TYPE (cond_var)));
5356 else
5357 expand_omp_build_assign (&gsi, cond_var,
5358 fold_convert (TREE_TYPE (cond_var), n1));
5359 }
629b3d75
MJ
5360
5361 /* Remove the GIMPLE_OMP_FOR statement. */
5362 gsi_remove (&gsi, true);
5363
5364 if (!broken_loop)
5365 {
5366 /* Code to control the increment goes in the CONT_BB. */
65f4b875 5367 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
5368 stmt = gsi_stmt (gsi);
5369 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
5370
5371 if (POINTER_TYPE_P (type))
5372 t = fold_build_pointer_plus (fd->loop.v, step);
5373 else
5374 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
5375 expand_omp_build_assign (&gsi, fd->loop.v, t);
5376
5377 if (fd->collapse > 1)
5378 {
5379 i = fd->collapse - 1;
5380 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
5381 {
5382 t = fold_convert (sizetype, fd->loops[i].step);
5383 t = fold_build_pointer_plus (fd->loops[i].v, t);
5384 }
5385 else
5386 {
5387 t = fold_convert (TREE_TYPE (fd->loops[i].v),
5388 fd->loops[i].step);
5389 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
5390 fd->loops[i].v, t);
5391 }
5392 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
5393
5394 for (i = fd->collapse - 1; i > 0; i--)
5395 {
5396 tree itype = TREE_TYPE (fd->loops[i].v);
5397 tree itype2 = TREE_TYPE (fd->loops[i - 1].v);
5398 if (POINTER_TYPE_P (itype2))
5399 itype2 = signed_type_for (itype2);
bcc6842b
JJ
5400 t = fold_convert (itype2, fd->loops[i - 1].step);
5401 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
5402 GSI_SAME_STMT);
629b3d75
MJ
5403 t = build3 (COND_EXPR, itype2,
5404 build2 (fd->loops[i].cond_code, boolean_type_node,
5405 fd->loops[i].v,
5406 fold_convert (itype, fd->loops[i].n2)),
bcc6842b 5407 build_int_cst (itype2, 0), t);
629b3d75
MJ
5408 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v)))
5409 t = fold_build_pointer_plus (fd->loops[i - 1].v, t);
5410 else
5411 t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t);
5412 expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t);
5413
bcc6842b
JJ
5414 t = fold_convert (itype, fd->loops[i].n1);
5415 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
5416 GSI_SAME_STMT);
629b3d75
MJ
5417 t = build3 (COND_EXPR, itype,
5418 build2 (fd->loops[i].cond_code, boolean_type_node,
5419 fd->loops[i].v,
5420 fold_convert (itype, fd->loops[i].n2)),
bcc6842b 5421 fd->loops[i].v, t);
629b3d75
MJ
5422 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
5423 }
5424 }
e7393c89
JJ
5425 if (cond_var)
5426 {
5427 if (POINTER_TYPE_P (type)
5428 || TREE_CODE (n1) != INTEGER_CST
5429 || fd->loop.cond_code != LT_EXPR
5430 || tree_int_cst_sgn (n1) != 1)
5431 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
5432 build_one_cst (TREE_TYPE (cond_var)));
5433 else
5434 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
5435 fold_convert (TREE_TYPE (cond_var), step));
5436 expand_omp_build_assign (&gsi, cond_var, t);
5437 }
629b3d75
MJ
5438
5439 /* Remove GIMPLE_OMP_CONTINUE. */
5440 gsi_remove (&gsi, true);
5441 }
5442
5443 /* Emit the condition in L1_BB. */
5444 gsi = gsi_start_bb (l1_bb);
5445
5446 t = fold_convert (type, n2);
5447 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5448 false, GSI_CONTINUE_LINKING);
5449 tree v = fd->loop.v;
5450 if (DECL_P (v) && TREE_ADDRESSABLE (v))
5451 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
5452 false, GSI_CONTINUE_LINKING);
5453 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
5454 cond_stmt = gimple_build_cond_empty (t);
5455 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
5456 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
5457 NULL, NULL)
5458 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
5459 NULL, NULL))
5460 {
5461 gsi = gsi_for_stmt (cond_stmt);
5462 gimple_regimplify_operands (cond_stmt, &gsi);
5463 }
5464
5465 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
5466 if (is_simt)
5467 {
5468 gsi = gsi_start_bb (l2_bb);
5469 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step);
5470 if (POINTER_TYPE_P (type))
5471 t = fold_build_pointer_plus (fd->loop.v, step);
5472 else
5473 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
5474 expand_omp_build_assign (&gsi, fd->loop.v, t);
5475 }
5476
5477 /* Remove GIMPLE_OMP_RETURN. */
65f4b875 5478 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
5479 gsi_remove (&gsi, true);
5480
5481 /* Connect the new blocks. */
5482 remove_edge (FALLTHRU_EDGE (entry_bb));
5483
5484 if (!broken_loop)
5485 {
5486 remove_edge (BRANCH_EDGE (entry_bb));
5487 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
5488
5489 e = BRANCH_EDGE (l1_bb);
5490 ne = FALLTHRU_EDGE (l1_bb);
5491 e->flags = EDGE_TRUE_VALUE;
5492 }
5493 else
5494 {
5495 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
5496
5497 ne = single_succ_edge (l1_bb);
5498 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
5499
5500 }
5501 ne->flags = EDGE_FALSE_VALUE;
357067f2
JH
5502 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
5503 ne->probability = e->probability.invert ();
629b3d75
MJ
5504
5505 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
5506 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
5507
5508 if (simt_maxlane)
5509 {
5510 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
5511 NULL_TREE, NULL_TREE);
5512 gsi = gsi_last_bb (entry_bb);
5513 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
5514 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
5515 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
357067f2
JH
5516 FALLTHRU_EDGE (entry_bb)->probability
5517 = profile_probability::guessed_always ().apply_scale (7, 8);
5518 BRANCH_EDGE (entry_bb)->probability
5519 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
629b3d75
MJ
5520 l2_dom_bb = entry_bb;
5521 }
5522 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
5523
5524 if (!broken_loop)
5525 {
99b1c316 5526 class loop *loop = alloc_loop ();
629b3d75
MJ
5527 loop->header = l1_bb;
5528 loop->latch = cont_bb;
5529 add_loop (loop, l1_bb->loop_father);
5530 loop->safelen = safelen_int;
5531 if (simduid)
5532 {
5533 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
5534 cfun->has_simduid_loops = true;
5535 }
5536 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
5537 the loop. */
5538 if ((flag_tree_loop_vectorize
26d476cd 5539 || !global_options_set.x_flag_tree_loop_vectorize)
629b3d75
MJ
5540 && flag_tree_loop_optimize
5541 && loop->safelen > 1)
5542 {
5543 loop->force_vectorize = true;
f63445e5
JJ
5544 if (simdlen && tree_fits_uhwi_p (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)))
5545 {
5546 unsigned HOST_WIDE_INT v
5547 = tree_to_uhwi (OMP_CLAUSE_SIMDLEN_EXPR (simdlen));
5548 if (v < INT_MAX && v <= (unsigned HOST_WIDE_INT) loop->safelen)
5549 loop->simdlen = v;
5550 }
629b3d75
MJ
5551 cfun->has_force_vectorize_loops = true;
5552 }
fed2a43c
JJ
5553 else if (dont_vectorize)
5554 loop->dont_vectorize = true;
629b3d75
MJ
5555 }
5556 else if (simduid)
5557 cfun->has_simduid_loops = true;
5558}
5559
5560/* Taskloop construct is represented after gimplification with
5561 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
5562 in between them. This routine expands the outer GIMPLE_OMP_FOR,
5563 which should just compute all the needed loop temporaries
5564 for GIMPLE_OMP_TASK. */
5565
5566static void
5567expand_omp_taskloop_for_outer (struct omp_region *region,
5568 struct omp_for_data *fd,
5569 gimple *inner_stmt)
5570{
5571 tree type, bias = NULL_TREE;
5572 basic_block entry_bb, cont_bb, exit_bb;
5573 gimple_stmt_iterator gsi;
5574 gassign *assign_stmt;
5575 tree *counts = NULL;
5576 int i;
5577
5578 gcc_assert (inner_stmt);
5579 gcc_assert (region->cont);
5580 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
5581 && gimple_omp_task_taskloop_p (inner_stmt));
5582 type = TREE_TYPE (fd->loop.v);
5583
5584 /* See if we need to bias by LLONG_MIN. */
5585 if (fd->iter_type == long_long_unsigned_type_node
5586 && TREE_CODE (type) == INTEGER_TYPE
5587 && !TYPE_UNSIGNED (type))
5588 {
5589 tree n1, n2;
5590
5591 if (fd->loop.cond_code == LT_EXPR)
5592 {
5593 n1 = fd->loop.n1;
5594 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
5595 }
5596 else
5597 {
5598 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
5599 n2 = fd->loop.n1;
5600 }
5601 if (TREE_CODE (n1) != INTEGER_CST
5602 || TREE_CODE (n2) != INTEGER_CST
5603 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
5604 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
5605 }
5606
5607 entry_bb = region->entry;
5608 cont_bb = region->cont;
5609 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5610 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
5611 exit_bb = region->exit;
5612
65f4b875 5613 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
5614 gimple *for_stmt = gsi_stmt (gsi);
5615 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
5616 if (fd->collapse > 1)
5617 {
5618 int first_zero_iter = -1, dummy = -1;
5619 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
5620
5621 counts = XALLOCAVEC (tree, fd->collapse);
5622 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5623 zero_iter_bb, first_zero_iter,
5624 dummy_bb, dummy, l2_dom_bb);
5625
5626 if (zero_iter_bb)
5627 {
5628 /* Some counts[i] vars might be uninitialized if
5629 some loop has zero iterations. But the body shouldn't
5630 be executed in that case, so just avoid uninit warnings. */
5631 for (i = first_zero_iter; i < fd->collapse; i++)
5632 if (SSA_VAR_P (counts[i]))
5633 TREE_NO_WARNING (counts[i]) = 1;
5634 gsi_prev (&gsi);
5635 edge e = split_block (entry_bb, gsi_stmt (gsi));
5636 entry_bb = e->dest;
5637 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
5638 gsi = gsi_last_bb (entry_bb);
5639 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
5640 get_immediate_dominator (CDI_DOMINATORS,
5641 zero_iter_bb));
5642 }
5643 }
5644
5645 tree t0, t1;
5646 t1 = fd->loop.n2;
5647 t0 = fd->loop.n1;
5648 if (POINTER_TYPE_P (TREE_TYPE (t0))
5649 && TYPE_PRECISION (TREE_TYPE (t0))
5650 != TYPE_PRECISION (fd->iter_type))
5651 {
5652 /* Avoid casting pointers to integer of a different size. */
5653 tree itype = signed_type_for (type);
5654 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
5655 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
5656 }
5657 else
5658 {
5659 t1 = fold_convert (fd->iter_type, t1);
5660 t0 = fold_convert (fd->iter_type, t0);
5661 }
5662 if (bias)
5663 {
5664 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
5665 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
5666 }
5667
5668 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
5669 OMP_CLAUSE__LOOPTEMP_);
5670 gcc_assert (innerc);
5671 tree startvar = OMP_CLAUSE_DECL (innerc);
5672 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
5673 gcc_assert (innerc);
5674 tree endvar = OMP_CLAUSE_DECL (innerc);
5675 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
5676 {
5677 gcc_assert (innerc);
5678 for (i = 1; i < fd->collapse; i++)
5679 {
5680 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5681 OMP_CLAUSE__LOOPTEMP_);
5682 gcc_assert (innerc);
5683 }
5684 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5685 OMP_CLAUSE__LOOPTEMP_);
5686 if (innerc)
5687 {
5688 /* If needed (inner taskloop has lastprivate clause), propagate
5689 down the total number of iterations. */
5690 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
5691 NULL_TREE, false,
5692 GSI_CONTINUE_LINKING);
5693 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5694 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5695 }
5696 }
5697
5698 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
5699 GSI_CONTINUE_LINKING);
5700 assign_stmt = gimple_build_assign (startvar, t0);
5701 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5702
5703 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
5704 GSI_CONTINUE_LINKING);
5705 assign_stmt = gimple_build_assign (endvar, t1);
5706 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5707 if (fd->collapse > 1)
5708 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5709
5710 /* Remove the GIMPLE_OMP_FOR statement. */
5711 gsi = gsi_for_stmt (for_stmt);
5712 gsi_remove (&gsi, true);
5713
65f4b875 5714 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
5715 gsi_remove (&gsi, true);
5716
65f4b875 5717 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
5718 gsi_remove (&gsi, true);
5719
357067f2 5720 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
629b3d75 5721 remove_edge (BRANCH_EDGE (entry_bb));
357067f2 5722 FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
629b3d75
MJ
5723 remove_edge (BRANCH_EDGE (cont_bb));
5724 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
5725 set_immediate_dominator (CDI_DOMINATORS, region->entry,
5726 recompute_dominator (CDI_DOMINATORS, region->entry));
5727}
5728
5729/* Taskloop construct is represented after gimplification with
5730 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
5731 in between them. This routine expands the inner GIMPLE_OMP_FOR.
5732 GOMP_taskloop{,_ull} function arranges for each task to be given just
5733 a single range of iterations. */
5734
5735static void
5736expand_omp_taskloop_for_inner (struct omp_region *region,
5737 struct omp_for_data *fd,
5738 gimple *inner_stmt)
5739{
5740 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
5741 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
5742 basic_block fin_bb;
5743 gimple_stmt_iterator gsi;
5744 edge ep;
5745 bool broken_loop = region->cont == NULL;
5746 tree *counts = NULL;
5747 tree n1, n2, step;
5748
5749 itype = type = TREE_TYPE (fd->loop.v);
5750 if (POINTER_TYPE_P (type))
5751 itype = signed_type_for (type);
5752
5753 /* See if we need to bias by LLONG_MIN. */
5754 if (fd->iter_type == long_long_unsigned_type_node
5755 && TREE_CODE (type) == INTEGER_TYPE
5756 && !TYPE_UNSIGNED (type))
5757 {
5758 tree n1, n2;
5759
5760 if (fd->loop.cond_code == LT_EXPR)
5761 {
5762 n1 = fd->loop.n1;
5763 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
5764 }
5765 else
5766 {
5767 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
5768 n2 = fd->loop.n1;
5769 }
5770 if (TREE_CODE (n1) != INTEGER_CST
5771 || TREE_CODE (n2) != INTEGER_CST
5772 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
5773 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
5774 }
5775
5776 entry_bb = region->entry;
5777 cont_bb = region->cont;
5778 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5779 fin_bb = BRANCH_EDGE (entry_bb)->dest;
5780 gcc_assert (broken_loop
5781 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
5782 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5783 if (!broken_loop)
5784 {
5785 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
5786 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5787 }
5788 exit_bb = region->exit;
5789
5790 /* Iteration space partitioning goes in ENTRY_BB. */
65f4b875 5791 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
5792 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5793
5794 if (fd->collapse > 1)
5795 {
5796 int first_zero_iter = -1, dummy = -1;
5797 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5798
5799 counts = XALLOCAVEC (tree, fd->collapse);
5800 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5801 fin_bb, first_zero_iter,
5802 dummy_bb, dummy, l2_dom_bb);
5803 t = NULL_TREE;
5804 }
5805 else
5806 t = integer_one_node;
5807
5808 step = fd->loop.step;
5809 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5810 OMP_CLAUSE__LOOPTEMP_);
5811 gcc_assert (innerc);
5812 n1 = OMP_CLAUSE_DECL (innerc);
5813 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
5814 gcc_assert (innerc);
5815 n2 = OMP_CLAUSE_DECL (innerc);
5816 if (bias)
5817 {
5818 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
5819 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
5820 }
5821 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5822 true, NULL_TREE, true, GSI_SAME_STMT);
5823 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5824 true, NULL_TREE, true, GSI_SAME_STMT);
5825 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5826 true, NULL_TREE, true, GSI_SAME_STMT);
5827
5828 tree startvar = fd->loop.v;
5829 tree endvar = NULL_TREE;
5830
5831 if (gimple_omp_for_combined_p (fd->for_stmt))
5832 {
5833 tree clauses = gimple_omp_for_clauses (inner_stmt);
5834 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5835 gcc_assert (innerc);
5836 startvar = OMP_CLAUSE_DECL (innerc);
5837 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5838 OMP_CLAUSE__LOOPTEMP_);
5839 gcc_assert (innerc);
5840 endvar = OMP_CLAUSE_DECL (innerc);
5841 }
5842 t = fold_convert (TREE_TYPE (startvar), n1);
5843 t = force_gimple_operand_gsi (&gsi, t,
5844 DECL_P (startvar)
5845 && TREE_ADDRESSABLE (startvar),
5846 NULL_TREE, false, GSI_CONTINUE_LINKING);
5847 gimple *assign_stmt = gimple_build_assign (startvar, t);
5848 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5849
5850 t = fold_convert (TREE_TYPE (startvar), n2);
5851 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5852 false, GSI_CONTINUE_LINKING);
5853 if (endvar)
5854 {
5855 assign_stmt = gimple_build_assign (endvar, e);
5856 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5857 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5858 assign_stmt = gimple_build_assign (fd->loop.v, e);
5859 else
5860 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5861 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5862 }
5863 if (fd->collapse > 1)
5864 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5865
5866 if (!broken_loop)
5867 {
5868 /* The code controlling the sequential loop replaces the
5869 GIMPLE_OMP_CONTINUE. */
65f4b875 5870 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
5871 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5872 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5873 vmain = gimple_omp_continue_control_use (cont_stmt);
5874 vback = gimple_omp_continue_control_def (cont_stmt);
5875
5876 if (!gimple_omp_for_combined_p (fd->for_stmt))
5877 {
5878 if (POINTER_TYPE_P (type))
5879 t = fold_build_pointer_plus (vmain, step);
5880 else
5881 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5882 t = force_gimple_operand_gsi (&gsi, t,
5883 DECL_P (vback)
5884 && TREE_ADDRESSABLE (vback),
5885 NULL_TREE, true, GSI_SAME_STMT);
5886 assign_stmt = gimple_build_assign (vback, t);
5887 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5888
5889 t = build2 (fd->loop.cond_code, boolean_type_node,
5890 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5891 ? t : vback, e);
5892 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5893 }
5894
5895 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5896 gsi_remove (&gsi, true);
5897
5898 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5899 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
5900 }
5901
5902 /* Remove the GIMPLE_OMP_FOR statement. */
5903 gsi = gsi_for_stmt (fd->for_stmt);
5904 gsi_remove (&gsi, true);
5905
5906 /* Remove the GIMPLE_OMP_RETURN statement. */
65f4b875 5907 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
5908 gsi_remove (&gsi, true);
5909
357067f2 5910 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
629b3d75
MJ
5911 if (!broken_loop)
5912 remove_edge (BRANCH_EDGE (entry_bb));
5913 else
5914 {
5915 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
5916 region->outer->cont = NULL;
5917 }
5918
5919 /* Connect all the blocks. */
5920 if (!broken_loop)
5921 {
5922 ep = find_edge (cont_bb, body_bb);
5923 if (gimple_omp_for_combined_p (fd->for_stmt))
5924 {
5925 remove_edge (ep);
5926 ep = NULL;
5927 }
5928 else if (fd->collapse > 1)
5929 {
5930 remove_edge (ep);
5931 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5932 }
5933 else
5934 ep->flags = EDGE_TRUE_VALUE;
5935 find_edge (cont_bb, fin_bb)->flags
5936 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5937 }
5938
5939 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5940 recompute_dominator (CDI_DOMINATORS, body_bb));
5941 if (!broken_loop)
5942 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5943 recompute_dominator (CDI_DOMINATORS, fin_bb));
5944
5945 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5946 {
99b1c316 5947 class loop *loop = alloc_loop ();
629b3d75
MJ
5948 loop->header = body_bb;
5949 if (collapse_bb == NULL)
5950 loop->latch = cont_bb;
5951 add_loop (loop, body_bb->loop_father);
5952 }
5953}
5954
5955/* A subroutine of expand_omp_for. Generate code for an OpenACC
5956 partitioned loop. The lowering here is abstracted, in that the
5957 loop parameters are passed through internal functions, which are
5958 further lowered by oacc_device_lower, once we get to the target
5959 compiler. The loop is of the form:
5960
5961 for (V = B; V LTGT E; V += S) {BODY}
5962
5963 where LTGT is < or >. We may have a specified chunking size, CHUNKING
5964 (constant 0 for no chunking) and we will have a GWV partitioning
5965 mask, specifying dimensions over which the loop is to be
02889d23
CLT
5966 partitioned (see note below). We generate code that looks like
5967 (this ignores tiling):
629b3d75
MJ
5968
5969 <entry_bb> [incoming FALL->body, BRANCH->exit]
5970 typedef signedintify (typeof (V)) T; // underlying signed integral type
5971 T range = E - B;
5972 T chunk_no = 0;
5973 T DIR = LTGT == '<' ? +1 : -1;
5974 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
5975 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
5976
5977 <head_bb> [created by splitting end of entry_bb]
5978 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
5979 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
5980 if (!(offset LTGT bound)) goto bottom_bb;
5981
5982 <body_bb> [incoming]
5983 V = B + offset;
5984 {BODY}
5985
5986 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
5987 offset += step;
5988 if (offset LTGT bound) goto body_bb; [*]
5989
5990 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
5991 chunk_no++;
5992 if (chunk < chunk_max) goto head_bb;
5993
5994 <exit_bb> [incoming]
5995 V = B + ((range -/+ 1) / S +/- 1) * S [*]
5996
02889d23 5997 [*] Needed if V live at end of loop. */
629b3d75
MJ
5998
5999static void
6000expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
6001{
6002 tree v = fd->loop.v;
6003 enum tree_code cond_code = fd->loop.cond_code;
6004 enum tree_code plus_code = PLUS_EXPR;
6005
6006 tree chunk_size = integer_minus_one_node;
6007 tree gwv = integer_zero_node;
6008 tree iter_type = TREE_TYPE (v);
6009 tree diff_type = iter_type;
6010 tree plus_type = iter_type;
6011 struct oacc_collapse *counts = NULL;
6012
6013 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
6014 == GF_OMP_FOR_KIND_OACC_LOOP);
6015 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
6016 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
6017
6018 if (POINTER_TYPE_P (iter_type))
6019 {
6020 plus_code = POINTER_PLUS_EXPR;
6021 plus_type = sizetype;
6022 }
6023 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
6024 diff_type = signed_type_for (diff_type);
f4c222c0
TV
6025 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
6026 diff_type = integer_type_node;
629b3d75
MJ
6027
6028 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
6029 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
6030 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
6031 basic_block bottom_bb = NULL;
6032
6033 /* entry_bb has two sucessors; the branch edge is to the exit
6034 block, fallthrough edge to body. */
6035 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
6036 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
6037
6038 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
6039 body_bb, or to a block whose only successor is the body_bb. Its
6040 fallthrough successor is the final block (same as the branch
6041 successor of the entry_bb). */
6042 if (cont_bb)
6043 {
6044 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
6045 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
6046
6047 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
6048 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
6049 }
6050 else
6051 gcc_assert (!gimple_in_ssa_p (cfun));
6052
6053 /* The exit block only has entry_bb and cont_bb as predecessors. */
6054 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
6055
6056 tree chunk_no;
6057 tree chunk_max = NULL_TREE;
6058 tree bound, offset;
6059 tree step = create_tmp_var (diff_type, ".step");
6060 bool up = cond_code == LT_EXPR;
6061 tree dir = build_int_cst (diff_type, up ? +1 : -1);
02889d23 6062 bool chunking = !gimple_in_ssa_p (cfun);
629b3d75
MJ
6063 bool negating;
6064
02889d23
CLT
6065 /* Tiling vars. */
6066 tree tile_size = NULL_TREE;
6067 tree element_s = NULL_TREE;
6068 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
6069 basic_block elem_body_bb = NULL;
6070 basic_block elem_cont_bb = NULL;
6071
629b3d75
MJ
6072 /* SSA instances. */
6073 tree offset_incr = NULL_TREE;
6074 tree offset_init = NULL_TREE;
6075
6076 gimple_stmt_iterator gsi;
6077 gassign *ass;
6078 gcall *call;
6079 gimple *stmt;
6080 tree expr;
6081 location_t loc;
6082 edge split, be, fte;
6083
6084 /* Split the end of entry_bb to create head_bb. */
6085 split = split_block (entry_bb, last_stmt (entry_bb));
6086 basic_block head_bb = split->dest;
6087 entry_bb = split->src;
6088
6089 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
65f4b875 6090 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
6091 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
6092 loc = gimple_location (for_stmt);
6093
6094 if (gimple_in_ssa_p (cfun))
6095 {
6096 offset_init = gimple_omp_for_index (for_stmt, 0);
6097 gcc_assert (integer_zerop (fd->loop.n1));
6098 /* The SSA parallelizer does gang parallelism. */
6099 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
6100 }
6101
02889d23 6102 if (fd->collapse > 1 || fd->tiling)
629b3d75 6103 {
02889d23 6104 gcc_assert (!gimple_in_ssa_p (cfun) && up);
629b3d75
MJ
6105 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
6106 tree total = expand_oacc_collapse_init (fd, &gsi, counts,
02889d23 6107 TREE_TYPE (fd->loop.n2), loc);
629b3d75
MJ
6108
6109 if (SSA_VAR_P (fd->loop.n2))
6110 {
6111 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
6112 true, GSI_SAME_STMT);
6113 ass = gimple_build_assign (fd->loop.n2, total);
6114 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6115 }
629b3d75
MJ
6116 }
6117
6118 tree b = fd->loop.n1;
6119 tree e = fd->loop.n2;
6120 tree s = fd->loop.step;
6121
6122 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
6123 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
6124
01914336 6125 /* Convert the step, avoiding possible unsigned->signed overflow. */
629b3d75
MJ
6126 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
6127 if (negating)
6128 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
6129 s = fold_convert (diff_type, s);
6130 if (negating)
6131 s = fold_build1 (NEGATE_EXPR, diff_type, s);
6132 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
6133
6134 if (!chunking)
6135 chunk_size = integer_zero_node;
6136 expr = fold_convert (diff_type, chunk_size);
6137 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
6138 NULL_TREE, true, GSI_SAME_STMT);
02889d23
CLT
6139
6140 if (fd->tiling)
6141 {
6142 /* Determine the tile size and element step,
6143 modify the outer loop step size. */
6144 tile_size = create_tmp_var (diff_type, ".tile_size");
6145 expr = build_int_cst (diff_type, 1);
6146 for (int ix = 0; ix < fd->collapse; ix++)
6147 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
6148 expr = force_gimple_operand_gsi (&gsi, expr, true,
6149 NULL_TREE, true, GSI_SAME_STMT);
6150 ass = gimple_build_assign (tile_size, expr);
6151 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6152
6153 element_s = create_tmp_var (diff_type, ".element_s");
6154 ass = gimple_build_assign (element_s, s);
6155 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6156
6157 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
6158 s = force_gimple_operand_gsi (&gsi, expr, true,
6159 NULL_TREE, true, GSI_SAME_STMT);
6160 }
6161
01914336 6162 /* Determine the range, avoiding possible unsigned->signed overflow. */
629b3d75
MJ
6163 negating = !up && TYPE_UNSIGNED (iter_type);
6164 expr = fold_build2 (MINUS_EXPR, plus_type,
6165 fold_convert (plus_type, negating ? b : e),
6166 fold_convert (plus_type, negating ? e : b));
6167 expr = fold_convert (diff_type, expr);
6168 if (negating)
6169 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
6170 tree range = force_gimple_operand_gsi (&gsi, expr, true,
6171 NULL_TREE, true, GSI_SAME_STMT);
6172
6173 chunk_no = build_int_cst (diff_type, 0);
6174 if (chunking)
6175 {
6176 gcc_assert (!gimple_in_ssa_p (cfun));
6177
6178 expr = chunk_no;
6179 chunk_max = create_tmp_var (diff_type, ".chunk_max");
6180 chunk_no = create_tmp_var (diff_type, ".chunk_no");
6181
6182 ass = gimple_build_assign (chunk_no, expr);
6183 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6184
6185 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
6186 build_int_cst (integer_type_node,
6187 IFN_GOACC_LOOP_CHUNKS),
6188 dir, range, s, chunk_size, gwv);
6189 gimple_call_set_lhs (call, chunk_max);
6190 gimple_set_location (call, loc);
6191 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
6192 }
6193 else
6194 chunk_size = chunk_no;
6195
6196 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
6197 build_int_cst (integer_type_node,
6198 IFN_GOACC_LOOP_STEP),
6199 dir, range, s, chunk_size, gwv);
6200 gimple_call_set_lhs (call, step);
6201 gimple_set_location (call, loc);
6202 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
6203
6204 /* Remove the GIMPLE_OMP_FOR. */
6205 gsi_remove (&gsi, true);
6206
01914336 6207 /* Fixup edges from head_bb. */
629b3d75
MJ
6208 be = BRANCH_EDGE (head_bb);
6209 fte = FALLTHRU_EDGE (head_bb);
6210 be->flags |= EDGE_FALSE_VALUE;
6211 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
6212
6213 basic_block body_bb = fte->dest;
6214
6215 if (gimple_in_ssa_p (cfun))
6216 {
65f4b875 6217 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
6218 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
6219
6220 offset = gimple_omp_continue_control_use (cont_stmt);
6221 offset_incr = gimple_omp_continue_control_def (cont_stmt);
6222 }
6223 else
6224 {
6225 offset = create_tmp_var (diff_type, ".offset");
6226 offset_init = offset_incr = offset;
6227 }
6228 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
6229
6230 /* Loop offset & bound go into head_bb. */
6231 gsi = gsi_start_bb (head_bb);
6232
6233 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
6234 build_int_cst (integer_type_node,
6235 IFN_GOACC_LOOP_OFFSET),
6236 dir, range, s,
6237 chunk_size, gwv, chunk_no);
6238 gimple_call_set_lhs (call, offset_init);
6239 gimple_set_location (call, loc);
6240 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
6241
6242 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
6243 build_int_cst (integer_type_node,
6244 IFN_GOACC_LOOP_BOUND),
6245 dir, range, s,
6246 chunk_size, gwv, offset_init);
6247 gimple_call_set_lhs (call, bound);
6248 gimple_set_location (call, loc);
6249 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
6250
6251 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
6252 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
6253 GSI_CONTINUE_LINKING);
6254
6255 /* V assignment goes into body_bb. */
6256 if (!gimple_in_ssa_p (cfun))
6257 {
6258 gsi = gsi_start_bb (body_bb);
6259
6260 expr = build2 (plus_code, iter_type, b,
6261 fold_convert (plus_type, offset));
6262 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
6263 true, GSI_SAME_STMT);
6264 ass = gimple_build_assign (v, expr);
6265 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
02889d23
CLT
6266
6267 if (fd->collapse > 1 || fd->tiling)
6268 expand_oacc_collapse_vars (fd, false, &gsi, counts, v);
6269
6270 if (fd->tiling)
6271 {
6272 /* Determine the range of the element loop -- usually simply
6273 the tile_size, but could be smaller if the final
6274 iteration of the outer loop is a partial tile. */
6275 tree e_range = create_tmp_var (diff_type, ".e_range");
6276
6277 expr = build2 (MIN_EXPR, diff_type,
6278 build2 (MINUS_EXPR, diff_type, bound, offset),
6279 build2 (MULT_EXPR, diff_type, tile_size,
6280 element_s));
6281 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
6282 true, GSI_SAME_STMT);
6283 ass = gimple_build_assign (e_range, expr);
6284 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6285
6286 /* Determine bound, offset & step of inner loop. */
6287 e_bound = create_tmp_var (diff_type, ".e_bound");
6288 e_offset = create_tmp_var (diff_type, ".e_offset");
6289 e_step = create_tmp_var (diff_type, ".e_step");
6290
6291 /* Mark these as element loops. */
6292 tree t, e_gwv = integer_minus_one_node;
6293 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */
6294
6295 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
6296 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
6297 element_s, chunk, e_gwv, chunk);
6298 gimple_call_set_lhs (call, e_offset);
6299 gimple_set_location (call, loc);
6300 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
6301
6302 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
6303 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
6304 element_s, chunk, e_gwv, e_offset);
6305 gimple_call_set_lhs (call, e_bound);
6306 gimple_set_location (call, loc);
6307 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
6308
6309 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
6310 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
6311 element_s, chunk, e_gwv);
6312 gimple_call_set_lhs (call, e_step);
6313 gimple_set_location (call, loc);
6314 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
6315
6316 /* Add test and split block. */
6317 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
6318 stmt = gimple_build_cond_empty (expr);
6319 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
6320 split = split_block (body_bb, stmt);
6321 elem_body_bb = split->dest;
6322 if (cont_bb == body_bb)
6323 cont_bb = elem_body_bb;
6324 body_bb = split->src;
6325
6326 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
6327
05e0af43
CP
6328 /* Add a dummy exit for the tiled block when cont_bb is missing. */
6329 if (cont_bb == NULL)
6330 {
6331 edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
6332 e->probability = profile_probability::even ();
6333 split->probability = profile_probability::even ();
6334 }
6335
02889d23
CLT
6336 /* Initialize the user's loop vars. */
6337 gsi = gsi_start_bb (elem_body_bb);
6338 expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset);
6339 }
629b3d75
MJ
6340 }
6341
6342 /* Loop increment goes into cont_bb. If this is not a loop, we
6343 will have spawned threads as if it was, and each one will
6344 execute one iteration. The specification is not explicit about
6345 whether such constructs are ill-formed or not, and they can
6346 occur, especially when noreturn routines are involved. */
6347 if (cont_bb)
6348 {
65f4b875 6349 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
6350 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
6351 loc = gimple_location (cont_stmt);
6352
02889d23
CLT
6353 if (fd->tiling)
6354 {
6355 /* Insert element loop increment and test. */
6356 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
6357 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
6358 true, GSI_SAME_STMT);
6359 ass = gimple_build_assign (e_offset, expr);
6360 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6361 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
6362
6363 stmt = gimple_build_cond_empty (expr);
6364 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
6365 split = split_block (cont_bb, stmt);
6366 elem_cont_bb = split->src;
6367 cont_bb = split->dest;
6368
6369 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
9fba2943
TV
6370 split->probability = profile_probability::unlikely ().guessed ();
6371 edge latch_edge
6372 = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
6373 latch_edge->probability = profile_probability::likely ().guessed ();
6374
6375 edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
6376 skip_edge->probability = profile_probability::unlikely ().guessed ();
6377 edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
6378 loop_entry_edge->probability
6379 = profile_probability::likely ().guessed ();
02889d23
CLT
6380
6381 gsi = gsi_for_stmt (cont_stmt);
6382 }
6383
629b3d75
MJ
6384 /* Increment offset. */
6385 if (gimple_in_ssa_p (cfun))
02889d23
CLT
6386 expr = build2 (plus_code, iter_type, offset,
6387 fold_convert (plus_type, step));
629b3d75
MJ
6388 else
6389 expr = build2 (PLUS_EXPR, diff_type, offset, step);
6390 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
6391 true, GSI_SAME_STMT);
6392 ass = gimple_build_assign (offset_incr, expr);
6393 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6394 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
6395 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
6396
6397 /* Remove the GIMPLE_OMP_CONTINUE. */
6398 gsi_remove (&gsi, true);
6399
01914336 6400 /* Fixup edges from cont_bb. */
629b3d75
MJ
6401 be = BRANCH_EDGE (cont_bb);
6402 fte = FALLTHRU_EDGE (cont_bb);
6403 be->flags |= EDGE_TRUE_VALUE;
6404 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
6405
6406 if (chunking)
6407 {
6408 /* Split the beginning of exit_bb to make bottom_bb. We
6409 need to insert a nop at the start, because splitting is
01914336 6410 after a stmt, not before. */
629b3d75
MJ
6411 gsi = gsi_start_bb (exit_bb);
6412 stmt = gimple_build_nop ();
6413 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
6414 split = split_block (exit_bb, stmt);
6415 bottom_bb = split->src;
6416 exit_bb = split->dest;
6417 gsi = gsi_last_bb (bottom_bb);
6418
6419 /* Chunk increment and test goes into bottom_bb. */
6420 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
6421 build_int_cst (diff_type, 1));
6422 ass = gimple_build_assign (chunk_no, expr);
6423 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
6424
6425 /* Chunk test at end of bottom_bb. */
6426 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
6427 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
6428 GSI_CONTINUE_LINKING);
6429
01914336 6430 /* Fixup edges from bottom_bb. */
629b3d75 6431 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
f38057cc
TV
6432 split->probability = profile_probability::unlikely ().guessed ();
6433 edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
6434 latch_edge->probability = profile_probability::likely ().guessed ();
629b3d75
MJ
6435 }
6436 }
6437
65f4b875 6438 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
6439 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
6440 loc = gimple_location (gsi_stmt (gsi));
6441
6442 if (!gimple_in_ssa_p (cfun))
6443 {
6444 /* Insert the final value of V, in case it is live. This is the
6445 value for the only thread that survives past the join. */
6446 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
6447 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
6448 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
6449 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
6450 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
6451 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
6452 true, GSI_SAME_STMT);
6453 ass = gimple_build_assign (v, expr);
6454 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6455 }
6456
01914336 6457 /* Remove the OMP_RETURN. */
629b3d75
MJ
6458 gsi_remove (&gsi, true);
6459
6460 if (cont_bb)
6461 {
02889d23 6462 /* We now have one, two or three nested loops. Update the loop
629b3d75 6463 structures. */
99b1c316
MS
6464 class loop *parent = entry_bb->loop_father;
6465 class loop *body = body_bb->loop_father;
629b3d75
MJ
6466
6467 if (chunking)
6468 {
99b1c316 6469 class loop *chunk_loop = alloc_loop ();
629b3d75
MJ
6470 chunk_loop->header = head_bb;
6471 chunk_loop->latch = bottom_bb;
6472 add_loop (chunk_loop, parent);
6473 parent = chunk_loop;
6474 }
6475 else if (parent != body)
6476 {
6477 gcc_assert (body->header == body_bb);
6478 gcc_assert (body->latch == cont_bb
6479 || single_pred (body->latch) == cont_bb);
6480 parent = NULL;
6481 }
6482
6483 if (parent)
6484 {
99b1c316 6485 class loop *body_loop = alloc_loop ();
629b3d75
MJ
6486 body_loop->header = body_bb;
6487 body_loop->latch = cont_bb;
6488 add_loop (body_loop, parent);
02889d23
CLT
6489
6490 if (fd->tiling)
6491 {
6492 /* Insert tiling's element loop. */
99b1c316 6493 class loop *inner_loop = alloc_loop ();
02889d23
CLT
6494 inner_loop->header = elem_body_bb;
6495 inner_loop->latch = elem_cont_bb;
6496 add_loop (inner_loop, body_loop);
6497 }
629b3d75
MJ
6498 }
6499 }
6500}
6501
6502/* Expand the OMP loop defined by REGION. */
6503
6504static void
6505expand_omp_for (struct omp_region *region, gimple *inner_stmt)
6506{
6507 struct omp_for_data fd;
6508 struct omp_for_data_loop *loops;
6509
6510 loops
6511 = (struct omp_for_data_loop *)
6512 alloca (gimple_omp_for_collapse (last_stmt (region->entry))
6513 * sizeof (struct omp_for_data_loop));
6514 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
6515 &fd, loops);
6516 region->sched_kind = fd.sched_kind;
6517 region->sched_modifiers = fd.sched_modifiers;
0b887b75 6518 region->has_lastprivate_conditional = fd.lastprivate_conditional != 0;
629b3d75
MJ
6519
6520 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
6521 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
6522 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
6523 if (region->cont)
6524 {
6525 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
6526 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
6527 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
6528 }
6529 else
6530 /* If there isn't a continue then this is a degerate case where
6531 the introduction of abnormal edges during lowering will prevent
6532 original loops from being detected. Fix that up. */
6533 loops_state_set (LOOPS_NEED_FIXUP);
6534
6535 if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_SIMD)
6536 expand_omp_simd (region, &fd);
629b3d75
MJ
6537 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
6538 {
6539 gcc_assert (!inner_stmt);
6540 expand_oacc_for (region, &fd);
6541 }
6542 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
6543 {
6544 if (gimple_omp_for_combined_into_p (fd.for_stmt))
6545 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
6546 else
6547 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
6548 }
6549 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
6550 && !fd.have_ordered)
6551 {
6552 if (fd.chunk_size == NULL)
6553 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
6554 else
6555 expand_omp_for_static_chunk (region, &fd, inner_stmt);
6556 }
6557 else
6558 {
6559 int fn_index, start_ix, next_ix;
28567c40
JJ
6560 unsigned HOST_WIDE_INT sched = 0;
6561 tree sched_arg = NULL_TREE;
629b3d75
MJ
6562
6563 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
6564 == GF_OMP_FOR_KIND_FOR);
6565 if (fd.chunk_size == NULL
6566 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
6567 fd.chunk_size = integer_zero_node;
629b3d75
MJ
6568 switch (fd.sched_kind)
6569 {
6570 case OMP_CLAUSE_SCHEDULE_RUNTIME:
0b887b75
JJ
6571 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0
6572 && fd.lastprivate_conditional == 0)
28567c40
JJ
6573 {
6574 gcc_assert (!fd.have_ordered);
6575 fn_index = 6;
6576 sched = 4;
6577 }
6578 else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
0b887b75
JJ
6579 && !fd.have_ordered
6580 && fd.lastprivate_conditional == 0)
28567c40
JJ
6581 fn_index = 7;
6582 else
6583 {
6584 fn_index = 3;
6585 sched = (HOST_WIDE_INT_1U << 31);
6586 }
629b3d75
MJ
6587 break;
6588 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
6589 case OMP_CLAUSE_SCHEDULE_GUIDED:
28567c40 6590 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
0b887b75
JJ
6591 && !fd.have_ordered
6592 && fd.lastprivate_conditional == 0)
629b3d75
MJ
6593 {
6594 fn_index = 3 + fd.sched_kind;
28567c40 6595 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
629b3d75
MJ
6596 break;
6597 }
629b3d75 6598 fn_index = fd.sched_kind;
28567c40
JJ
6599 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
6600 sched += (HOST_WIDE_INT_1U << 31);
629b3d75 6601 break;
28567c40
JJ
6602 case OMP_CLAUSE_SCHEDULE_STATIC:
6603 gcc_assert (fd.have_ordered);
6604 fn_index = 0;
6605 sched = (HOST_WIDE_INT_1U << 31) + 1;
6606 break;
6607 default:
6608 gcc_unreachable ();
629b3d75
MJ
6609 }
6610 if (!fd.ordered)
28567c40 6611 fn_index += fd.have_ordered * 8;
629b3d75
MJ
6612 if (fd.ordered)
6613 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
6614 else
6615 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
6616 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
8221c30b 6617 if (fd.have_reductemp || fd.have_pointer_condtemp)
28567c40
JJ
6618 {
6619 if (fd.ordered)
6620 start_ix = (int)BUILT_IN_GOMP_LOOP_DOACROSS_START;
6621 else if (fd.have_ordered)
6622 start_ix = (int)BUILT_IN_GOMP_LOOP_ORDERED_START;
6623 else
6624 start_ix = (int)BUILT_IN_GOMP_LOOP_START;
6625 sched_arg = build_int_cstu (long_integer_type_node, sched);
6626 if (!fd.chunk_size)
6627 fd.chunk_size = integer_zero_node;
6628 }
629b3d75
MJ
6629 if (fd.iter_type == long_long_unsigned_type_node)
6630 {
6631 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
6632 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
6633 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
6634 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
6635 }
6636 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
28567c40
JJ
6637 (enum built_in_function) next_ix, sched_arg,
6638 inner_stmt);
629b3d75
MJ
6639 }
6640
6641 if (gimple_in_ssa_p (cfun))
6642 update_ssa (TODO_update_ssa_only_virtuals);
6643}
6644
6645/* Expand code for an OpenMP sections directive. In pseudo code, we generate
6646
6647 v = GOMP_sections_start (n);
6648 L0:
6649 switch (v)
6650 {
6651 case 0:
6652 goto L2;
6653 case 1:
6654 section 1;
6655 goto L1;
6656 case 2:
6657 ...
6658 case n:
6659 ...
6660 default:
6661 abort ();
6662 }
6663 L1:
6664 v = GOMP_sections_next ();
6665 goto L0;
6666 L2:
6667 reduction;
6668
6669 If this is a combined parallel sections, replace the call to
6670 GOMP_sections_start with call to GOMP_sections_next. */
6671
6672static void
6673expand_omp_sections (struct omp_region *region)
6674{
6675 tree t, u, vin = NULL, vmain, vnext, l2;
6676 unsigned len;
6677 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
6678 gimple_stmt_iterator si, switch_si;
6679 gomp_sections *sections_stmt;
6680 gimple *stmt;
6681 gomp_continue *cont;
6682 edge_iterator ei;
6683 edge e;
6684 struct omp_region *inner;
6685 unsigned i, casei;
6686 bool exit_reachable = region->cont != NULL;
6687
6688 gcc_assert (region->exit != NULL);
6689 entry_bb = region->entry;
6690 l0_bb = single_succ (entry_bb);
6691 l1_bb = region->cont;
6692 l2_bb = region->exit;
6693 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
6694 l2 = gimple_block_label (l2_bb);
6695 else
6696 {
6697 /* This can happen if there are reductions. */
6698 len = EDGE_COUNT (l0_bb->succs);
6699 gcc_assert (len > 0);
6700 e = EDGE_SUCC (l0_bb, len - 1);
65f4b875 6701 si = gsi_last_nondebug_bb (e->dest);
629b3d75
MJ
6702 l2 = NULL_TREE;
6703 if (gsi_end_p (si)
01914336 6704 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
629b3d75
MJ
6705 l2 = gimple_block_label (e->dest);
6706 else
6707 FOR_EACH_EDGE (e, ei, l0_bb->succs)
6708 {
65f4b875 6709 si = gsi_last_nondebug_bb (e->dest);
629b3d75
MJ
6710 if (gsi_end_p (si)
6711 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
6712 {
6713 l2 = gimple_block_label (e->dest);
6714 break;
6715 }
6716 }
6717 }
6718 if (exit_reachable)
6719 default_bb = create_empty_bb (l1_bb->prev_bb);
6720 else
6721 default_bb = create_empty_bb (l0_bb);
6722
6723 /* We will build a switch() with enough cases for all the
6724 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
6725 and a default case to abort if something goes wrong. */
6726 len = EDGE_COUNT (l0_bb->succs);
6727
6728 /* Use vec::quick_push on label_vec throughout, since we know the size
6729 in advance. */
6730 auto_vec<tree> label_vec (len);
6731
6732 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
6733 GIMPLE_OMP_SECTIONS statement. */
65f4b875 6734 si = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
6735 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
6736 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
6737 vin = gimple_omp_sections_control (sections_stmt);
28567c40
JJ
6738 tree clauses = gimple_omp_sections_clauses (sections_stmt);
6739 tree reductmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
8e7757ba
JJ
6740 tree condtmp = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
6741 tree cond_var = NULL_TREE;
6742 if (reductmp || condtmp)
6743 {
6744 tree reductions = null_pointer_node, mem = null_pointer_node;
6745 tree memv = NULL_TREE, condtemp = NULL_TREE;
6746 gimple_stmt_iterator gsi = gsi_none ();
6747 gimple *g = NULL;
6748 if (reductmp)
6749 {
6750 reductions = OMP_CLAUSE_DECL (reductmp);
6751 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
6752 g = SSA_NAME_DEF_STMT (reductions);
6753 reductions = gimple_assign_rhs1 (g);
6754 OMP_CLAUSE_DECL (reductmp) = reductions;
6755 gsi = gsi_for_stmt (g);
6756 }
6757 else
6758 gsi = si;
6759 if (condtmp)
6760 {
6761 condtemp = OMP_CLAUSE_DECL (condtmp);
6762 tree c = omp_find_clause (OMP_CLAUSE_CHAIN (condtmp),
6763 OMP_CLAUSE__CONDTEMP_);
6764 cond_var = OMP_CLAUSE_DECL (c);
6765 tree type = TREE_TYPE (condtemp);
6766 memv = create_tmp_var (type);
6767 TREE_ADDRESSABLE (memv) = 1;
6768 unsigned cnt = 0;
6769 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
6770 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
6771 && OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (c))
6772 ++cnt;
6773 unsigned HOST_WIDE_INT sz
6774 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))) * cnt;
6775 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
6776 false);
6777 mem = build_fold_addr_expr (memv);
6778 }
28567c40
JJ
6779 t = build_int_cst (unsigned_type_node, len - 1);
6780 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS2_START);
8e7757ba 6781 stmt = gimple_build_call (u, 3, t, reductions, mem);
28567c40
JJ
6782 gimple_call_set_lhs (stmt, vin);
6783 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
8e7757ba
JJ
6784 if (condtmp)
6785 {
6786 expand_omp_build_assign (&gsi, condtemp, memv, false);
6787 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
6788 vin, build_one_cst (TREE_TYPE (cond_var)));
6789 expand_omp_build_assign (&gsi, cond_var, t, false);
6790 }
6791 if (reductmp)
6792 {
6793 gsi_remove (&gsi, true);
6794 release_ssa_name (gimple_assign_lhs (g));
6795 }
28567c40
JJ
6796 }
6797 else if (!is_combined_parallel (region))
629b3d75
MJ
6798 {
6799 /* If we are not inside a combined parallel+sections region,
6800 call GOMP_sections_start. */
6801 t = build_int_cst (unsigned_type_node, len - 1);
6802 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
6803 stmt = gimple_build_call (u, 1, t);
6804 }
6805 else
6806 {
6807 /* Otherwise, call GOMP_sections_next. */
6808 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
6809 stmt = gimple_build_call (u, 0);
6810 }
8e7757ba 6811 if (!reductmp && !condtmp)
28567c40
JJ
6812 {
6813 gimple_call_set_lhs (stmt, vin);
6814 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6815 }
629b3d75
MJ
6816 gsi_remove (&si, true);
6817
6818 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
6819 L0_BB. */
65f4b875 6820 switch_si = gsi_last_nondebug_bb (l0_bb);
629b3d75
MJ
6821 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
6822 if (exit_reachable)
6823 {
6824 cont = as_a <gomp_continue *> (last_stmt (l1_bb));
6825 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
6826 vmain = gimple_omp_continue_control_use (cont);
6827 vnext = gimple_omp_continue_control_def (cont);
6828 }
6829 else
6830 {
6831 vmain = vin;
6832 vnext = NULL_TREE;
6833 }
6834
6835 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
6836 label_vec.quick_push (t);
6837 i = 1;
6838
6839 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
6840 for (inner = region->inner, casei = 1;
6841 inner;
6842 inner = inner->next, i++, casei++)
6843 {
6844 basic_block s_entry_bb, s_exit_bb;
6845
6846 /* Skip optional reduction region. */
6847 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
6848 {
6849 --i;
6850 --casei;
6851 continue;
6852 }
6853
6854 s_entry_bb = inner->entry;
6855 s_exit_bb = inner->exit;
6856
6857 t = gimple_block_label (s_entry_bb);
6858 u = build_int_cst (unsigned_type_node, casei);
6859 u = build_case_label (u, NULL, t);
6860 label_vec.quick_push (u);
6861
65f4b875 6862 si = gsi_last_nondebug_bb (s_entry_bb);
629b3d75
MJ
6863 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
6864 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
6865 gsi_remove (&si, true);
6866 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
6867
6868 if (s_exit_bb == NULL)
6869 continue;
6870
65f4b875 6871 si = gsi_last_nondebug_bb (s_exit_bb);
629b3d75
MJ
6872 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6873 gsi_remove (&si, true);
6874
6875 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
6876 }
6877
6878 /* Error handling code goes in DEFAULT_BB. */
6879 t = gimple_block_label (default_bb);
6880 u = build_case_label (NULL, NULL, t);
6881 make_edge (l0_bb, default_bb, 0);
6882 add_bb_to_loop (default_bb, current_loops->tree_root);
6883
6884 stmt = gimple_build_switch (vmain, u, label_vec);
6885 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
6886 gsi_remove (&switch_si, true);
6887
6888 si = gsi_start_bb (default_bb);
6889 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
6890 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
6891
6892 if (exit_reachable)
6893 {
6894 tree bfn_decl;
6895
6896 /* Code to get the next section goes in L1_BB. */
65f4b875 6897 si = gsi_last_nondebug_bb (l1_bb);
629b3d75
MJ
6898 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
6899
6900 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
6901 stmt = gimple_build_call (bfn_decl, 0);
6902 gimple_call_set_lhs (stmt, vnext);
8e7757ba
JJ
6903 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6904 if (cond_var)
6905 {
6906 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
6907 vnext, build_one_cst (TREE_TYPE (cond_var)));
6908 expand_omp_build_assign (&si, cond_var, t, false);
6909 }
629b3d75
MJ
6910 gsi_remove (&si, true);
6911
6912 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
6913 }
6914
6915 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
65f4b875 6916 si = gsi_last_nondebug_bb (l2_bb);
629b3d75
MJ
6917 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
6918 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
6919 else if (gimple_omp_return_lhs (gsi_stmt (si)))
6920 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
6921 else
6922 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
6923 stmt = gimple_build_call (t, 0);
6924 if (gimple_omp_return_lhs (gsi_stmt (si)))
6925 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
6926 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6927 gsi_remove (&si, true);
6928
6929 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
6930}
6931
6932/* Expand code for an OpenMP single directive. We've already expanded
6933 much of the code, here we simply place the GOMP_barrier call. */
6934
6935static void
6936expand_omp_single (struct omp_region *region)
6937{
6938 basic_block entry_bb, exit_bb;
6939 gimple_stmt_iterator si;
6940
6941 entry_bb = region->entry;
6942 exit_bb = region->exit;
6943
65f4b875 6944 si = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
6945 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
6946 gsi_remove (&si, true);
6947 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6948
65f4b875 6949 si = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
6950 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
6951 {
6952 tree t = gimple_omp_return_lhs (gsi_stmt (si));
6953 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
6954 }
6955 gsi_remove (&si, true);
6956 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6957}
6958
6959/* Generic expansion for OpenMP synchronization directives: master,
6960 ordered and critical. All we need to do here is remove the entry
6961 and exit markers for REGION. */
6962
6963static void
6964expand_omp_synch (struct omp_region *region)
6965{
6966 basic_block entry_bb, exit_bb;
6967 gimple_stmt_iterator si;
6968
6969 entry_bb = region->entry;
6970 exit_bb = region->exit;
6971
65f4b875 6972 si = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
6973 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
6974 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
6975 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
6976 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
6977 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
6978 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
28567c40
JJ
6979 if (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS
6980 && gimple_omp_teams_host (as_a <gomp_teams *> (gsi_stmt (si))))
6981 {
6982 expand_omp_taskreg (region);
6983 return;
6984 }
629b3d75
MJ
6985 gsi_remove (&si, true);
6986 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6987
6988 if (exit_bb)
6989 {
65f4b875 6990 si = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
6991 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6992 gsi_remove (&si, true);
6993 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6994 }
6995}
6996
28567c40
JJ
6997/* Translate enum omp_memory_order to enum memmodel. The two enums
6998 are using different numbers so that OMP_MEMORY_ORDER_UNSPECIFIED
6999 is 0. */
7000
7001static enum memmodel
7002omp_memory_order_to_memmodel (enum omp_memory_order mo)
7003{
7004 switch (mo)
7005 {
7006 case OMP_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
7007 case OMP_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
7008 case OMP_MEMORY_ORDER_RELEASE: return MEMMODEL_RELEASE;
7009 case OMP_MEMORY_ORDER_ACQ_REL: return MEMMODEL_ACQ_REL;
7010 case OMP_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
7011 default: gcc_unreachable ();
7012 }
7013}
7014
629b3d75
MJ
7015/* A subroutine of expand_omp_atomic. Attempt to implement the atomic
7016 operation as a normal volatile load. */
7017
7018static bool
7019expand_omp_atomic_load (basic_block load_bb, tree addr,
7020 tree loaded_val, int index)
7021{
7022 enum built_in_function tmpbase;
7023 gimple_stmt_iterator gsi;
7024 basic_block store_bb;
7025 location_t loc;
7026 gimple *stmt;
7027 tree decl, call, type, itype;
7028
65f4b875 7029 gsi = gsi_last_nondebug_bb (load_bb);
629b3d75
MJ
7030 stmt = gsi_stmt (gsi);
7031 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
7032 loc = gimple_location (stmt);
7033
7034 /* ??? If the target does not implement atomic_load_optab[mode], and mode
7035 is smaller than word size, then expand_atomic_load assumes that the load
7036 is atomic. We could avoid the builtin entirely in this case. */
7037
7038 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
7039 decl = builtin_decl_explicit (tmpbase);
7040 if (decl == NULL_TREE)
7041 return false;
7042
7043 type = TREE_TYPE (loaded_val);
7044 itype = TREE_TYPE (TREE_TYPE (decl));
7045
28567c40
JJ
7046 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
7047 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
7048 call = build_call_expr_loc (loc, decl, 2, addr, mo);
629b3d75
MJ
7049 if (!useless_type_conversion_p (type, itype))
7050 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
7051 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
7052
7053 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
7054 gsi_remove (&gsi, true);
7055
7056 store_bb = single_succ (load_bb);
65f4b875 7057 gsi = gsi_last_nondebug_bb (store_bb);
629b3d75
MJ
7058 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
7059 gsi_remove (&gsi, true);
7060
7061 if (gimple_in_ssa_p (cfun))
7062 update_ssa (TODO_update_ssa_no_phi);
7063
7064 return true;
7065}
7066
7067/* A subroutine of expand_omp_atomic. Attempt to implement the atomic
7068 operation as a normal volatile store. */
7069
7070static bool
7071expand_omp_atomic_store (basic_block load_bb, tree addr,
7072 tree loaded_val, tree stored_val, int index)
7073{
7074 enum built_in_function tmpbase;
7075 gimple_stmt_iterator gsi;
7076 basic_block store_bb = single_succ (load_bb);
7077 location_t loc;
7078 gimple *stmt;
7079 tree decl, call, type, itype;
7080 machine_mode imode;
7081 bool exchange;
7082
65f4b875 7083 gsi = gsi_last_nondebug_bb (load_bb);
629b3d75
MJ
7084 stmt = gsi_stmt (gsi);
7085 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
7086
7087 /* If the load value is needed, then this isn't a store but an exchange. */
7088 exchange = gimple_omp_atomic_need_value_p (stmt);
7089
65f4b875 7090 gsi = gsi_last_nondebug_bb (store_bb);
629b3d75
MJ
7091 stmt = gsi_stmt (gsi);
7092 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
7093 loc = gimple_location (stmt);
7094
7095 /* ??? If the target does not implement atomic_store_optab[mode], and mode
7096 is smaller than word size, then expand_atomic_store assumes that the store
7097 is atomic. We could avoid the builtin entirely in this case. */
7098
7099 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
7100 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
7101 decl = builtin_decl_explicit (tmpbase);
7102 if (decl == NULL_TREE)
7103 return false;
7104
7105 type = TREE_TYPE (stored_val);
7106
7107 /* Dig out the type of the function's second argument. */
7108 itype = TREE_TYPE (decl);
7109 itype = TYPE_ARG_TYPES (itype);
7110 itype = TREE_CHAIN (itype);
7111 itype = TREE_VALUE (itype);
7112 imode = TYPE_MODE (itype);
7113
7114 if (exchange && !can_atomic_exchange_p (imode, true))
7115 return false;
7116
7117 if (!useless_type_conversion_p (itype, type))
7118 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
28567c40
JJ
7119 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
7120 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
7121 call = build_call_expr_loc (loc, decl, 3, addr, stored_val, mo);
629b3d75
MJ
7122 if (exchange)
7123 {
7124 if (!useless_type_conversion_p (type, itype))
7125 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
7126 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
7127 }
7128
7129 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
7130 gsi_remove (&gsi, true);
7131
7132 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
65f4b875 7133 gsi = gsi_last_nondebug_bb (load_bb);
629b3d75
MJ
7134 gsi_remove (&gsi, true);
7135
7136 if (gimple_in_ssa_p (cfun))
7137 update_ssa (TODO_update_ssa_no_phi);
7138
7139 return true;
7140}
7141
7142/* A subroutine of expand_omp_atomic. Attempt to implement the atomic
7143 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
7144 size of the data type, and thus usable to find the index of the builtin
7145 decl. Returns false if the expression is not of the proper form. */
7146
7147static bool
7148expand_omp_atomic_fetch_op (basic_block load_bb,
7149 tree addr, tree loaded_val,
7150 tree stored_val, int index)
7151{
7152 enum built_in_function oldbase, newbase, tmpbase;
7153 tree decl, itype, call;
7154 tree lhs, rhs;
7155 basic_block store_bb = single_succ (load_bb);
7156 gimple_stmt_iterator gsi;
7157 gimple *stmt;
7158 location_t loc;
7159 enum tree_code code;
7160 bool need_old, need_new;
7161 machine_mode imode;
629b3d75
MJ
7162
7163 /* We expect to find the following sequences:
7164
7165 load_bb:
7166 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
7167
7168 store_bb:
7169 val = tmp OP something; (or: something OP tmp)
7170 GIMPLE_OMP_STORE (val)
7171
7172 ???FIXME: Allow a more flexible sequence.
7173 Perhaps use data flow to pick the statements.
7174
7175 */
7176
7177 gsi = gsi_after_labels (store_bb);
7178 stmt = gsi_stmt (gsi);
65f4b875
AO
7179 if (is_gimple_debug (stmt))
7180 {
7181 gsi_next_nondebug (&gsi);
7182 if (gsi_end_p (gsi))
7183 return false;
7184 stmt = gsi_stmt (gsi);
7185 }
629b3d75
MJ
7186 loc = gimple_location (stmt);
7187 if (!is_gimple_assign (stmt))
7188 return false;
65f4b875 7189 gsi_next_nondebug (&gsi);
629b3d75
MJ
7190 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
7191 return false;
7192 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
7193 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
28567c40
JJ
7194 enum omp_memory_order omo
7195 = gimple_omp_atomic_memory_order (last_stmt (load_bb));
7196 enum memmodel mo = omp_memory_order_to_memmodel (omo);
629b3d75
MJ
7197 gcc_checking_assert (!need_old || !need_new);
7198
7199 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
7200 return false;
7201
7202 /* Check for one of the supported fetch-op operations. */
7203 code = gimple_assign_rhs_code (stmt);
7204 switch (code)
7205 {
7206 case PLUS_EXPR:
7207 case POINTER_PLUS_EXPR:
7208 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
7209 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
7210 break;
7211 case MINUS_EXPR:
7212 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
7213 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
7214 break;
7215 case BIT_AND_EXPR:
7216 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
7217 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
7218 break;
7219 case BIT_IOR_EXPR:
7220 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
7221 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
7222 break;
7223 case BIT_XOR_EXPR:
7224 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
7225 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
7226 break;
7227 default:
7228 return false;
7229 }
7230
7231 /* Make sure the expression is of the proper form. */
7232 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
7233 rhs = gimple_assign_rhs2 (stmt);
7234 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
7235 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
7236 rhs = gimple_assign_rhs1 (stmt);
7237 else
7238 return false;
7239
7240 tmpbase = ((enum built_in_function)
7241 ((need_new ? newbase : oldbase) + index + 1));
7242 decl = builtin_decl_explicit (tmpbase);
7243 if (decl == NULL_TREE)
7244 return false;
7245 itype = TREE_TYPE (TREE_TYPE (decl));
7246 imode = TYPE_MODE (itype);
7247
7248 /* We could test all of the various optabs involved, but the fact of the
7249 matter is that (with the exception of i486 vs i586 and xadd) all targets
7250 that support any atomic operaton optab also implements compare-and-swap.
7251 Let optabs.c take care of expanding any compare-and-swap loop. */
dc06356a 7252 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
629b3d75
MJ
7253 return false;
7254
65f4b875 7255 gsi = gsi_last_nondebug_bb (load_bb);
629b3d75
MJ
7256 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
7257
7258 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
7259 It only requires that the operation happen atomically. Thus we can
7260 use the RELAXED memory model. */
7261 call = build_call_expr_loc (loc, decl, 3, addr,
7262 fold_convert_loc (loc, itype, rhs),
28567c40 7263 build_int_cst (NULL, mo));
629b3d75
MJ
7264
7265 if (need_old || need_new)
7266 {
7267 lhs = need_old ? loaded_val : stored_val;
7268 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
7269 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
7270 }
7271 else
7272 call = fold_convert_loc (loc, void_type_node, call);
7273 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
7274 gsi_remove (&gsi, true);
7275
65f4b875 7276 gsi = gsi_last_nondebug_bb (store_bb);
629b3d75
MJ
7277 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
7278 gsi_remove (&gsi, true);
65f4b875 7279 gsi = gsi_last_nondebug_bb (store_bb);
629b3d75
MJ
7280 stmt = gsi_stmt (gsi);
7281 gsi_remove (&gsi, true);
7282
7283 if (gimple_in_ssa_p (cfun))
7284 {
7285 release_defs (stmt);
7286 update_ssa (TODO_update_ssa_no_phi);
7287 }
7288
7289 return true;
7290}
7291
7292/* A subroutine of expand_omp_atomic. Implement the atomic operation as:
7293
7294 oldval = *addr;
7295 repeat:
01914336 7296 newval = rhs; // with oldval replacing *addr in rhs
629b3d75
MJ
7297 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
7298 if (oldval != newval)
7299 goto repeat;
7300
7301 INDEX is log2 of the size of the data type, and thus usable to find the
7302 index of the builtin decl. */
7303
7304static bool
7305expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
7306 tree addr, tree loaded_val, tree stored_val,
7307 int index)
7308{
7309 tree loadedi, storedi, initial, new_storedi, old_vali;
b4e47472 7310 tree type, itype, cmpxchg, iaddr, atype;
629b3d75
MJ
7311 gimple_stmt_iterator si;
7312 basic_block loop_header = single_succ (load_bb);
7313 gimple *phi, *stmt;
7314 edge e;
7315 enum built_in_function fncode;
7316
7317 /* ??? We need a non-pointer interface to __atomic_compare_exchange in
7318 order to use the RELAXED memory model effectively. */
7319 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
7320 + index + 1);
7321 cmpxchg = builtin_decl_explicit (fncode);
7322 if (cmpxchg == NULL_TREE)
7323 return false;
b4e47472
JJ
7324 type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
7325 atype = type;
629b3d75
MJ
7326 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
7327
dc06356a
JJ
7328 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
7329 || !can_atomic_load_p (TYPE_MODE (itype)))
629b3d75
MJ
7330 return false;
7331
7332 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
65f4b875 7333 si = gsi_last_nondebug_bb (load_bb);
629b3d75
MJ
7334 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
7335
7336 /* For floating-point values, we'll need to view-convert them to integers
7337 so that we can perform the atomic compare and swap. Simplify the
7338 following code by always setting up the "i"ntegral variables. */
7339 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
7340 {
7341 tree iaddr_val;
7342
7343 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
7344 true));
b4e47472 7345 atype = itype;
629b3d75
MJ
7346 iaddr_val
7347 = force_gimple_operand_gsi (&si,
7348 fold_convert (TREE_TYPE (iaddr), addr),
7349 false, NULL_TREE, true, GSI_SAME_STMT);
7350 stmt = gimple_build_assign (iaddr, iaddr_val);
7351 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7352 loadedi = create_tmp_var (itype);
7353 if (gimple_in_ssa_p (cfun))
7354 loadedi = make_ssa_name (loadedi);
7355 }
7356 else
7357 {
7358 iaddr = addr;
7359 loadedi = loaded_val;
7360 }
7361
7362 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
7363 tree loaddecl = builtin_decl_explicit (fncode);
7364 if (loaddecl)
7365 initial
b4e47472 7366 = fold_convert (atype,
629b3d75
MJ
7367 build_call_expr (loaddecl, 2, iaddr,
7368 build_int_cst (NULL_TREE,
7369 MEMMODEL_RELAXED)));
7370 else
b4e47472
JJ
7371 {
7372 tree off
7373 = build_int_cst (build_pointer_type_for_mode (atype, ptr_mode,
7374 true), 0);
7375 initial = build2 (MEM_REF, atype, iaddr, off);
7376 }
629b3d75
MJ
7377
7378 initial
7379 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
7380 GSI_SAME_STMT);
7381
7382 /* Move the value to the LOADEDI temporary. */
7383 if (gimple_in_ssa_p (cfun))
7384 {
7385 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
7386 phi = create_phi_node (loadedi, loop_header);
7387 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
7388 initial);
7389 }
7390 else
7391 gsi_insert_before (&si,
7392 gimple_build_assign (loadedi, initial),
7393 GSI_SAME_STMT);
7394 if (loadedi != loaded_val)
7395 {
7396 gimple_stmt_iterator gsi2;
7397 tree x;
7398
7399 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
7400 gsi2 = gsi_start_bb (loop_header);
7401 if (gimple_in_ssa_p (cfun))
7402 {
7403 gassign *stmt;
7404 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
7405 true, GSI_SAME_STMT);
7406 stmt = gimple_build_assign (loaded_val, x);
7407 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
7408 }
7409 else
7410 {
7411 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
7412 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
7413 true, GSI_SAME_STMT);
7414 }
7415 }
7416 gsi_remove (&si, true);
7417
65f4b875 7418 si = gsi_last_nondebug_bb (store_bb);
629b3d75
MJ
7419 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
7420
7421 if (iaddr == addr)
7422 storedi = stored_val;
7423 else
01914336
MJ
7424 storedi
7425 = force_gimple_operand_gsi (&si,
7426 build1 (VIEW_CONVERT_EXPR, itype,
7427 stored_val), true, NULL_TREE, true,
7428 GSI_SAME_STMT);
629b3d75
MJ
7429
7430 /* Build the compare&swap statement. */
7431 new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
7432 new_storedi = force_gimple_operand_gsi (&si,
7433 fold_convert (TREE_TYPE (loadedi),
7434 new_storedi),
7435 true, NULL_TREE,
7436 true, GSI_SAME_STMT);
7437
7438 if (gimple_in_ssa_p (cfun))
7439 old_vali = loadedi;
7440 else
7441 {
7442 old_vali = create_tmp_var (TREE_TYPE (loadedi));
7443 stmt = gimple_build_assign (old_vali, loadedi);
7444 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7445
7446 stmt = gimple_build_assign (loadedi, new_storedi);
7447 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7448 }
7449
7450 /* Note that we always perform the comparison as an integer, even for
7451 floating point. This allows the atomic operation to properly
7452 succeed even with NaNs and -0.0. */
01914336
MJ
7453 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
7454 stmt = gimple_build_cond_empty (ne);
629b3d75
MJ
7455 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7456
7457 /* Update cfg. */
7458 e = single_succ_edge (store_bb);
7459 e->flags &= ~EDGE_FALLTHRU;
7460 e->flags |= EDGE_FALSE_VALUE;
357067f2
JH
7461 /* Expect no looping. */
7462 e->probability = profile_probability::guessed_always ();
629b3d75
MJ
7463
7464 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
357067f2 7465 e->probability = profile_probability::guessed_never ();
629b3d75
MJ
7466
7467 /* Copy the new value to loadedi (we already did that before the condition
7468 if we are not in SSA). */
7469 if (gimple_in_ssa_p (cfun))
7470 {
7471 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
7472 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
7473 }
7474
7475 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
7476 gsi_remove (&si, true);
7477
99b1c316 7478 class loop *loop = alloc_loop ();
629b3d75
MJ
7479 loop->header = loop_header;
7480 loop->latch = store_bb;
7481 add_loop (loop, loop_header->loop_father);
7482
7483 if (gimple_in_ssa_p (cfun))
7484 update_ssa (TODO_update_ssa_no_phi);
7485
7486 return true;
7487}
7488
7489/* A subroutine of expand_omp_atomic. Implement the atomic operation as:
7490
01914336
MJ
7491 GOMP_atomic_start ();
7492 *addr = rhs;
7493 GOMP_atomic_end ();
629b3d75
MJ
7494
7495 The result is not globally atomic, but works so long as all parallel
7496 references are within #pragma omp atomic directives. According to
7497 responses received from omp@openmp.org, appears to be within spec.
7498 Which makes sense, since that's how several other compilers handle
7499 this situation as well.
7500 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
7501 expanding. STORED_VAL is the operand of the matching
7502 GIMPLE_OMP_ATOMIC_STORE.
7503
7504 We replace
7505 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
7506 loaded_val = *addr;
7507
7508 and replace
7509 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
7510 *addr = stored_val;
7511*/
7512
7513static bool
7514expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
7515 tree addr, tree loaded_val, tree stored_val)
7516{
7517 gimple_stmt_iterator si;
7518 gassign *stmt;
7519 tree t;
7520
65f4b875 7521 si = gsi_last_nondebug_bb (load_bb);
629b3d75
MJ
7522 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
7523
7524 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
7525 t = build_call_expr (t, 0);
7526 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
7527
b4e47472
JJ
7528 tree mem = build_simple_mem_ref (addr);
7529 TREE_TYPE (mem) = TREE_TYPE (loaded_val);
7530 TREE_OPERAND (mem, 1)
7531 = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode,
7532 true),
7533 TREE_OPERAND (mem, 1));
7534 stmt = gimple_build_assign (loaded_val, mem);
629b3d75
MJ
7535 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7536 gsi_remove (&si, true);
7537
65f4b875 7538 si = gsi_last_nondebug_bb (store_bb);
629b3d75
MJ
7539 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
7540
b4e47472 7541 stmt = gimple_build_assign (unshare_expr (mem), stored_val);
629b3d75
MJ
7542 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7543
7544 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
7545 t = build_call_expr (t, 0);
7546 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
7547 gsi_remove (&si, true);
7548
7549 if (gimple_in_ssa_p (cfun))
7550 update_ssa (TODO_update_ssa_no_phi);
7551 return true;
7552}
7553
7554/* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
01914336 7555 using expand_omp_atomic_fetch_op. If it failed, we try to
629b3d75
MJ
7556 call expand_omp_atomic_pipeline, and if it fails too, the
7557 ultimate fallback is wrapping the operation in a mutex
7558 (expand_omp_atomic_mutex). REGION is the atomic region built
7559 by build_omp_regions_1(). */
7560
7561static void
7562expand_omp_atomic (struct omp_region *region)
7563{
7564 basic_block load_bb = region->entry, store_bb = region->exit;
7565 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
7566 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
7567 tree loaded_val = gimple_omp_atomic_load_lhs (load);
7568 tree addr = gimple_omp_atomic_load_rhs (load);
7569 tree stored_val = gimple_omp_atomic_store_val (store);
b4e47472 7570 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
629b3d75
MJ
7571 HOST_WIDE_INT index;
7572
7573 /* Make sure the type is one of the supported sizes. */
7574 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
7575 index = exact_log2 (index);
7576 if (index >= 0 && index <= 4)
7577 {
7578 unsigned int align = TYPE_ALIGN_UNIT (type);
7579
7580 /* __sync builtins require strict data alignment. */
7581 if (exact_log2 (align) >= index)
7582 {
7583 /* Atomic load. */
3bd8f481 7584 scalar_mode smode;
629b3d75 7585 if (loaded_val == stored_val
3bd8f481
RS
7586 && (is_int_mode (TYPE_MODE (type), &smode)
7587 || is_float_mode (TYPE_MODE (type), &smode))
7588 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
629b3d75
MJ
7589 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
7590 return;
7591
7592 /* Atomic store. */
3bd8f481
RS
7593 if ((is_int_mode (TYPE_MODE (type), &smode)
7594 || is_float_mode (TYPE_MODE (type), &smode))
7595 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
629b3d75
MJ
7596 && store_bb == single_succ (load_bb)
7597 && first_stmt (store_bb) == store
7598 && expand_omp_atomic_store (load_bb, addr, loaded_val,
7599 stored_val, index))
7600 return;
7601
7602 /* When possible, use specialized atomic update functions. */
7603 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
7604 && store_bb == single_succ (load_bb)
7605 && expand_omp_atomic_fetch_op (load_bb, addr,
7606 loaded_val, stored_val, index))
7607 return;
7608
7609 /* If we don't have specialized __sync builtins, try and implement
7610 as a compare and swap loop. */
7611 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
7612 loaded_val, stored_val, index))
7613 return;
7614 }
7615 }
7616
7617 /* The ultimate fallback is wrapping the operation in a mutex. */
7618 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
7619}
7620
7621/* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
7622 at REGION_EXIT. */
7623
7624static void
7625mark_loops_in_oacc_kernels_region (basic_block region_entry,
7626 basic_block region_exit)
7627{
99b1c316 7628 class loop *outer = region_entry->loop_father;
629b3d75
MJ
7629 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
7630
7631 /* Don't parallelize the kernels region if it contains more than one outer
7632 loop. */
7633 unsigned int nr_outer_loops = 0;
99b1c316
MS
7634 class loop *single_outer = NULL;
7635 for (class loop *loop = outer->inner; loop != NULL; loop = loop->next)
629b3d75
MJ
7636 {
7637 gcc_assert (loop_outer (loop) == outer);
7638
7639 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
7640 continue;
7641
7642 if (region_exit != NULL
7643 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
7644 continue;
7645
7646 nr_outer_loops++;
7647 single_outer = loop;
7648 }
7649 if (nr_outer_loops != 1)
7650 return;
7651
99b1c316 7652 for (class loop *loop = single_outer->inner;
01914336
MJ
7653 loop != NULL;
7654 loop = loop->inner)
629b3d75
MJ
7655 if (loop->next)
7656 return;
7657
7658 /* Mark the loops in the region. */
99b1c316 7659 for (class loop *loop = single_outer; loop != NULL; loop = loop->inner)
629b3d75
MJ
7660 loop->in_oacc_kernels_region = true;
7661}
7662
7663/* Types used to pass grid and wortkgroup sizes to kernel invocation. */
7664
7665struct GTY(()) grid_launch_attributes_trees
7666{
7667 tree kernel_dim_array_type;
7668 tree kernel_lattrs_dimnum_decl;
7669 tree kernel_lattrs_grid_decl;
7670 tree kernel_lattrs_group_decl;
7671 tree kernel_launch_attributes_type;
7672};
7673
7674static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees;
7675
7676/* Create types used to pass kernel launch attributes to target. */
7677
7678static void
7679grid_create_kernel_launch_attr_types (void)
7680{
7681 if (grid_attr_trees)
7682 return;
7683 grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> ();
7684
7685 tree dim_arr_index_type
7686 = build_index_type (build_int_cst (integer_type_node, 2));
7687 grid_attr_trees->kernel_dim_array_type
7688 = build_array_type (uint32_type_node, dim_arr_index_type);
7689
7690 grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE);
7691 grid_attr_trees->kernel_lattrs_dimnum_decl
7692 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"),
7693 uint32_type_node);
7694 DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE;
7695
7696 grid_attr_trees->kernel_lattrs_grid_decl
7697 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"),
7698 grid_attr_trees->kernel_dim_array_type);
7699 DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl)
7700 = grid_attr_trees->kernel_lattrs_dimnum_decl;
7701 grid_attr_trees->kernel_lattrs_group_decl
7702 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"),
7703 grid_attr_trees->kernel_dim_array_type);
7704 DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl)
7705 = grid_attr_trees->kernel_lattrs_grid_decl;
7706 finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type,
7707 "__gomp_kernel_launch_attributes",
7708 grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE);
7709}
7710
7711/* Insert before the current statement in GSI a store of VALUE to INDEX of
7712 array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be
7713 of type uint32_type_node. */
7714
7715static void
7716grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var,
7717 tree fld_decl, int index, tree value)
7718{
7719 tree ref = build4 (ARRAY_REF, uint32_type_node,
7720 build3 (COMPONENT_REF,
7721 grid_attr_trees->kernel_dim_array_type,
7722 range_var, fld_decl, NULL_TREE),
7723 build_int_cst (integer_type_node, index),
7724 NULL_TREE, NULL_TREE);
7725 gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT);
7726}
7727
7728/* Return a tree representation of a pointer to a structure with grid and
7729 work-group size information. Statements filling that information will be
7730 inserted before GSI, TGT_STMT is the target statement which has the
7731 necessary information in it. */
7732
7733static tree
7734grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi,
7735 gomp_target *tgt_stmt)
7736{
7737 grid_create_kernel_launch_attr_types ();
7738 tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type,
7739 "__kernel_launch_attrs");
7740
7741 unsigned max_dim = 0;
7742 for (tree clause = gimple_omp_target_clauses (tgt_stmt);
7743 clause;
7744 clause = OMP_CLAUSE_CHAIN (clause))
7745 {
7746 if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_)
7747 continue;
7748
7749 unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause);
7750 max_dim = MAX (dim, max_dim);
7751
7752 grid_insert_store_range_dim (gsi, lattrs,
7753 grid_attr_trees->kernel_lattrs_grid_decl,
7754 dim, OMP_CLAUSE__GRIDDIM__SIZE (clause));
7755 grid_insert_store_range_dim (gsi, lattrs,
7756 grid_attr_trees->kernel_lattrs_group_decl,
7757 dim, OMP_CLAUSE__GRIDDIM__GROUP (clause));
7758 }
7759
7760 tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs,
7761 grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE);
7762 gcc_checking_assert (max_dim <= 2);
7763 tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1);
7764 gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions),
7765 GSI_SAME_STMT);
7766 TREE_ADDRESSABLE (lattrs) = 1;
7767 return build_fold_addr_expr (lattrs);
7768}
7769
7770/* Build target argument identifier from the DEVICE identifier, value
7771 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
7772
7773static tree
7774get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
7775{
7776 tree t = build_int_cst (integer_type_node, device);
7777 if (subseqent_param)
7778 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
7779 build_int_cst (integer_type_node,
7780 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
7781 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
7782 build_int_cst (integer_type_node, id));
7783 return t;
7784}
7785
7786/* Like above but return it in type that can be directly stored as an element
7787 of the argument array. */
7788
7789static tree
7790get_target_argument_identifier (int device, bool subseqent_param, int id)
7791{
7792 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
7793 return fold_convert (ptr_type_node, t);
7794}
7795
7796/* Return a target argument consisting of DEVICE identifier, value identifier
7797 ID, and the actual VALUE. */
7798
7799static tree
7800get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
7801 tree value)
7802{
7803 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
7804 fold_convert (integer_type_node, value),
7805 build_int_cst (unsigned_type_node,
7806 GOMP_TARGET_ARG_VALUE_SHIFT));
7807 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
7808 get_target_argument_identifier_1 (device, false, id));
7809 t = fold_convert (ptr_type_node, t);
7810 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
7811}
7812
7813/* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
7814 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
7815 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
7816 arguments. */
7817
7818static void
7819push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
7820 int id, tree value, vec <tree> *args)
7821{
7822 if (tree_fits_shwi_p (value)
7823 && tree_to_shwi (value) > -(1 << 15)
7824 && tree_to_shwi (value) < (1 << 15))
7825 args->quick_push (get_target_argument_value (gsi, device, id, value));
7826 else
7827 {
7828 args->quick_push (get_target_argument_identifier (device, true, id));
7829 value = fold_convert (ptr_type_node, value);
7830 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
7831 GSI_SAME_STMT);
7832 args->quick_push (value);
7833 }
7834}
7835
01914336 7836/* Create an array of arguments that is then passed to GOMP_target. */
629b3d75
MJ
7837
7838static tree
7839get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
7840{
7841 auto_vec <tree, 6> args;
7842 tree clauses = gimple_omp_target_clauses (tgt_stmt);
7843 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
7844 if (c)
7845 t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
7846 else
7847 t = integer_minus_one_node;
7848 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
7849 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
7850
7851 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
7852 if (c)
7853 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
7854 else
7855 t = integer_minus_one_node;
7856 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
7857 GOMP_TARGET_ARG_THREAD_LIMIT, t,
7858 &args);
7859
7860 /* Add HSA-specific grid sizes, if available. */
7861 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7862 OMP_CLAUSE__GRIDDIM_))
7863 {
01914336
MJ
7864 int id = GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES;
7865 t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, id);
629b3d75
MJ
7866 args.quick_push (t);
7867 args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt));
7868 }
7869
7870 /* Produce more, perhaps device specific, arguments here. */
7871
7872 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
7873 args.length () + 1),
7874 ".omp_target_args");
7875 for (unsigned i = 0; i < args.length (); i++)
7876 {
7877 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
7878 build_int_cst (integer_type_node, i),
7879 NULL_TREE, NULL_TREE);
7880 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
7881 GSI_SAME_STMT);
7882 }
7883 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
7884 build_int_cst (integer_type_node, args.length ()),
7885 NULL_TREE, NULL_TREE);
7886 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
7887 GSI_SAME_STMT);
7888 TREE_ADDRESSABLE (argarray) = 1;
7889 return build_fold_addr_expr (argarray);
7890}
7891
7892/* Expand the GIMPLE_OMP_TARGET starting at REGION. */
7893
7894static void
7895expand_omp_target (struct omp_region *region)
7896{
7897 basic_block entry_bb, exit_bb, new_bb;
7898 struct function *child_cfun;
7899 tree child_fn, block, t;
7900 gimple_stmt_iterator gsi;
7901 gomp_target *entry_stmt;
7902 gimple *stmt;
7903 edge e;
7904 bool offloaded, data_region;
7905
7906 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
7907 new_bb = region->entry;
7908
7909 offloaded = is_gimple_omp_offloaded (entry_stmt);
7910 switch (gimple_omp_target_kind (entry_stmt))
7911 {
7912 case GF_OMP_TARGET_KIND_REGION:
7913 case GF_OMP_TARGET_KIND_UPDATE:
7914 case GF_OMP_TARGET_KIND_ENTER_DATA:
7915 case GF_OMP_TARGET_KIND_EXIT_DATA:
7916 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7917 case GF_OMP_TARGET_KIND_OACC_KERNELS:
7918 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7919 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7920 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7921 data_region = false;
7922 break;
7923 case GF_OMP_TARGET_KIND_DATA:
7924 case GF_OMP_TARGET_KIND_OACC_DATA:
7925 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7926 data_region = true;
7927 break;
7928 default:
7929 gcc_unreachable ();
7930 }
7931
7932 child_fn = NULL_TREE;
7933 child_cfun = NULL;
7934 if (offloaded)
7935 {
7936 child_fn = gimple_omp_target_child_fn (entry_stmt);
7937 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
7938 }
7939
7940 /* Supported by expand_omp_taskreg, but not here. */
7941 if (child_cfun != NULL)
7942 gcc_checking_assert (!child_cfun->cfg);
7943 gcc_checking_assert (!gimple_in_ssa_p (cfun));
7944
7945 entry_bb = region->entry;
7946 exit_bb = region->exit;
7947
7948 if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
25651634
TS
7949 {
7950 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
7951
7952 /* Further down, both OpenACC kernels and OpenACC parallel constructs
7953 will be mappted to BUILT_IN_GOACC_PARALLEL, and to distinguish the
7954 two, there is an "oacc kernels" attribute set for OpenACC kernels. */
7955 DECL_ATTRIBUTES (child_fn)
7956 = tree_cons (get_identifier ("oacc kernels"),
7957 NULL_TREE, DECL_ATTRIBUTES (child_fn));
7958 }
629b3d75
MJ
7959
7960 if (offloaded)
7961 {
7962 unsigned srcidx, dstidx, num;
7963
7964 /* If the offloading region needs data sent from the parent
7965 function, then the very first statement (except possible
7966 tree profile counter updates) of the offloading body
7967 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
7968 &.OMP_DATA_O is passed as an argument to the child function,
7969 we need to replace it with the argument as seen by the child
7970 function.
7971
7972 In most cases, this will end up being the identity assignment
7973 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
7974 a function call that has been inlined, the original PARM_DECL
7975 .OMP_DATA_I may have been converted into a different local
7976 variable. In which case, we need to keep the assignment. */
7977 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
7978 if (data_arg)
7979 {
7980 basic_block entry_succ_bb = single_succ (entry_bb);
7981 gimple_stmt_iterator gsi;
7982 tree arg;
7983 gimple *tgtcopy_stmt = NULL;
7984 tree sender = TREE_VEC_ELT (data_arg, 0);
7985
7986 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
7987 {
7988 gcc_assert (!gsi_end_p (gsi));
7989 stmt = gsi_stmt (gsi);
7990 if (gimple_code (stmt) != GIMPLE_ASSIGN)
7991 continue;
7992
7993 if (gimple_num_ops (stmt) == 2)
7994 {
7995 tree arg = gimple_assign_rhs1 (stmt);
7996
7997 /* We're ignoring the subcode because we're
7998 effectively doing a STRIP_NOPS. */
7999
8000 if (TREE_CODE (arg) == ADDR_EXPR
8001 && TREE_OPERAND (arg, 0) == sender)
8002 {
8003 tgtcopy_stmt = stmt;
8004 break;
8005 }
8006 }
8007 }
8008
8009 gcc_assert (tgtcopy_stmt != NULL);
8010 arg = DECL_ARGUMENTS (child_fn);
8011
8012 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
8013 gsi_remove (&gsi, true);
8014 }
8015
8016 /* Declare local variables needed in CHILD_CFUN. */
8017 block = DECL_INITIAL (child_fn);
8018 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
8019 /* The gimplifier could record temporaries in the offloading block
8020 rather than in containing function's local_decls chain,
8021 which would mean cgraph missed finalizing them. Do it now. */
8022 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
8023 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
8024 varpool_node::finalize_decl (t);
8025 DECL_SAVED_TREE (child_fn) = NULL;
8026 /* We'll create a CFG for child_fn, so no gimple body is needed. */
8027 gimple_set_body (child_fn, NULL);
8028 TREE_USED (block) = 1;
8029
8030 /* Reset DECL_CONTEXT on function arguments. */
8031 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
8032 DECL_CONTEXT (t) = child_fn;
8033
8034 /* Split ENTRY_BB at GIMPLE_*,
8035 so that it can be moved to the child function. */
65f4b875 8036 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
8037 stmt = gsi_stmt (gsi);
8038 gcc_assert (stmt
8039 && gimple_code (stmt) == gimple_code (entry_stmt));
8040 e = split_block (entry_bb, stmt);
8041 gsi_remove (&gsi, true);
8042 entry_bb = e->dest;
8043 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8044
8045 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
8046 if (exit_bb)
8047 {
65f4b875 8048 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
8049 gcc_assert (!gsi_end_p (gsi)
8050 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
8051 stmt = gimple_build_return (NULL);
8052 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
8053 gsi_remove (&gsi, true);
8054 }
8055
8056 /* Move the offloading region into CHILD_CFUN. */
8057
8058 block = gimple_block (entry_stmt);
8059
8060 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
8061 if (exit_bb)
8062 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
8063 /* When the OMP expansion process cannot guarantee an up-to-date
8064 loop tree arrange for the child function to fixup loops. */
8065 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
8066 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
8067
8068 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
8069 num = vec_safe_length (child_cfun->local_decls);
8070 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
8071 {
8072 t = (*child_cfun->local_decls)[srcidx];
8073 if (DECL_CONTEXT (t) == cfun->decl)
8074 continue;
8075 if (srcidx != dstidx)
8076 (*child_cfun->local_decls)[dstidx] = t;
8077 dstidx++;
8078 }
8079 if (dstidx != num)
8080 vec_safe_truncate (child_cfun->local_decls, dstidx);
8081
8082 /* Inform the callgraph about the new function. */
8083 child_cfun->curr_properties = cfun->curr_properties;
8084 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
8085 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
8086 cgraph_node *node = cgraph_node::get_create (child_fn);
8087 node->parallelized_function = 1;
8088 cgraph_node::add_new_function (child_fn, true);
8089
8090 /* Add the new function to the offload table. */
8091 if (ENABLE_OFFLOADING)
60bf575c
TV
8092 {
8093 if (in_lto_p)
8094 DECL_PRESERVE_P (child_fn) = 1;
8095 vec_safe_push (offload_funcs, child_fn);
8096 }
629b3d75
MJ
8097
8098 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
8099 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
8100
8101 /* Fix the callgraph edges for child_cfun. Those for cfun will be
8102 fixed in a following pass. */
8103 push_cfun (child_cfun);
8104 if (need_asm)
9579db35 8105 assign_assembler_name_if_needed (child_fn);
629b3d75
MJ
8106 cgraph_edge::rebuild_edges ();
8107
8108 /* Some EH regions might become dead, see PR34608. If
8109 pass_cleanup_cfg isn't the first pass to happen with the
8110 new child, these dead EH edges might cause problems.
8111 Clean them up now. */
8112 if (flag_exceptions)
8113 {
8114 basic_block bb;
8115 bool changed = false;
8116
8117 FOR_EACH_BB_FN (bb, cfun)
8118 changed |= gimple_purge_dead_eh_edges (bb);
8119 if (changed)
8120 cleanup_tree_cfg ();
8121 }
8122 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
8123 verify_loop_structure ();
8124 pop_cfun ();
8125
8126 if (dump_file && !gimple_in_ssa_p (cfun))
8127 {
8128 omp_any_child_fn_dumped = true;
8129 dump_function_header (dump_file, child_fn, dump_flags);
8130 dump_function_to_file (child_fn, dump_file, dump_flags);
8131 }
4ccc4e30
JJ
8132
8133 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
629b3d75
MJ
8134 }
8135
8136 /* Emit a library call to launch the offloading region, or do data
8137 transfers. */
59d5960c 8138 tree t1, t2, t3, t4, depend, c, clauses;
629b3d75 8139 enum built_in_function start_ix;
629b3d75 8140 unsigned int flags_i = 0;
629b3d75
MJ
8141
8142 switch (gimple_omp_target_kind (entry_stmt))
8143 {
8144 case GF_OMP_TARGET_KIND_REGION:
8145 start_ix = BUILT_IN_GOMP_TARGET;
8146 break;
8147 case GF_OMP_TARGET_KIND_DATA:
8148 start_ix = BUILT_IN_GOMP_TARGET_DATA;
8149 break;
8150 case GF_OMP_TARGET_KIND_UPDATE:
8151 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
8152 break;
8153 case GF_OMP_TARGET_KIND_ENTER_DATA:
8154 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
8155 break;
8156 case GF_OMP_TARGET_KIND_EXIT_DATA:
8157 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
8158 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
8159 break;
8160 case GF_OMP_TARGET_KIND_OACC_KERNELS:
629b3d75
MJ
8161 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8162 start_ix = BUILT_IN_GOACC_PARALLEL;
8163 break;
8164 case GF_OMP_TARGET_KIND_OACC_DATA:
8165 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8166 start_ix = BUILT_IN_GOACC_DATA_START;
8167 break;
8168 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8169 start_ix = BUILT_IN_GOACC_UPDATE;
8170 break;
8171 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8172 start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA;
8173 break;
8174 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8175 start_ix = BUILT_IN_GOACC_DECLARE;
8176 break;
8177 default:
8178 gcc_unreachable ();
8179 }
8180
8181 clauses = gimple_omp_target_clauses (entry_stmt);
8182
59d5960c
TS
8183 tree device = NULL_TREE;
8184 location_t device_loc = UNKNOWN_LOCATION;
8185 tree goacc_flags = NULL_TREE;
8186 if (is_gimple_omp_oacc (entry_stmt))
629b3d75 8187 {
59d5960c
TS
8188 /* By default, no GOACC_FLAGs are set. */
8189 goacc_flags = integer_zero_node;
629b3d75
MJ
8190 }
8191 else
59d5960c
TS
8192 {
8193 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
8194 if (c)
8195 {
8196 device = OMP_CLAUSE_DEVICE_ID (c);
8197 device_loc = OMP_CLAUSE_LOCATION (c);
8198 }
8199 else
8200 {
8201 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
8202 library choose). */
8203 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
8204 device_loc = gimple_location (entry_stmt);
8205 }
629b3d75 8206
59d5960c
TS
8207 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
8208 if (c)
8209 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
8210 }
629b3d75 8211
59d5960c
TS
8212 /* By default, there is no conditional. */
8213 tree cond = NULL_TREE;
8214 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
8215 if (c)
8216 cond = OMP_CLAUSE_IF_EXPR (c);
8217 /* If we found the clause 'if (cond)', build:
8218 OpenACC: goacc_flags = (cond ? goacc_flags : flags | GOACC_FLAG_HOST_FALLBACK)
8219 OpenMP: device = (cond ? device : GOMP_DEVICE_HOST_FALLBACK) */
629b3d75
MJ
8220 if (cond)
8221 {
59d5960c
TS
8222 tree *tp;
8223 if (is_gimple_omp_oacc (entry_stmt))
8224 tp = &goacc_flags;
8225 else
8226 {
8227 /* Ensure 'device' is of the correct type. */
8228 device = fold_convert_loc (device_loc, integer_type_node, device);
8229
8230 tp = &device;
8231 }
8232
629b3d75
MJ
8233 cond = gimple_boolify (cond);
8234
8235 basic_block cond_bb, then_bb, else_bb;
8236 edge e;
8237 tree tmp_var;
8238
59d5960c 8239 tmp_var = create_tmp_var (TREE_TYPE (*tp));
629b3d75
MJ
8240 if (offloaded)
8241 e = split_block_after_labels (new_bb);
8242 else
8243 {
65f4b875 8244 gsi = gsi_last_nondebug_bb (new_bb);
629b3d75
MJ
8245 gsi_prev (&gsi);
8246 e = split_block (new_bb, gsi_stmt (gsi));
8247 }
8248 cond_bb = e->src;
8249 new_bb = e->dest;
8250 remove_edge (e);
8251
8252 then_bb = create_empty_bb (cond_bb);
8253 else_bb = create_empty_bb (then_bb);
8254 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
8255 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
8256
8257 stmt = gimple_build_cond_empty (cond);
8258 gsi = gsi_last_bb (cond_bb);
8259 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
8260
8261 gsi = gsi_start_bb (then_bb);
59d5960c 8262 stmt = gimple_build_assign (tmp_var, *tp);
629b3d75
MJ
8263 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
8264
8265 gsi = gsi_start_bb (else_bb);
59d5960c
TS
8266 if (is_gimple_omp_oacc (entry_stmt))
8267 stmt = gimple_build_assign (tmp_var,
8268 BIT_IOR_EXPR,
8269 *tp,
8270 build_int_cst (integer_type_node,
8271 GOACC_FLAG_HOST_FALLBACK));
8272 else
8273 stmt = gimple_build_assign (tmp_var,
8274 build_int_cst (integer_type_node,
8275 GOMP_DEVICE_HOST_FALLBACK));
629b3d75
MJ
8276 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
8277
8278 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
8279 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
8280 add_bb_to_loop (then_bb, cond_bb->loop_father);
8281 add_bb_to_loop (else_bb, cond_bb->loop_father);
8282 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
8283 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
8284
59d5960c
TS
8285 *tp = tmp_var;
8286
65f4b875 8287 gsi = gsi_last_nondebug_bb (new_bb);
629b3d75
MJ
8288 }
8289 else
8290 {
65f4b875 8291 gsi = gsi_last_nondebug_bb (new_bb);
59d5960c
TS
8292
8293 if (device != NULL_TREE)
8294 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
8295 true, GSI_SAME_STMT);
629b3d75
MJ
8296 }
8297
8298 t = gimple_omp_target_data_arg (entry_stmt);
8299 if (t == NULL)
8300 {
8301 t1 = size_zero_node;
8302 t2 = build_zero_cst (ptr_type_node);
8303 t3 = t2;
8304 t4 = t2;
8305 }
8306 else
8307 {
8308 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
8309 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
8310 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
8311 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
8312 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
8313 }
8314
8315 gimple *g;
8316 bool tagging = false;
8317 /* The maximum number used by any start_ix, without varargs. */
8318 auto_vec<tree, 11> args;
59d5960c
TS
8319 if (is_gimple_omp_oacc (entry_stmt))
8320 {
8321 tree goacc_flags_m = fold_build1 (GOACC_FLAGS_MARSHAL_OP,
8322 TREE_TYPE (goacc_flags), goacc_flags);
8323 goacc_flags_m = force_gimple_operand_gsi (&gsi, goacc_flags_m, true,
8324 NULL_TREE, true,
8325 GSI_SAME_STMT);
8326 args.quick_push (goacc_flags_m);
8327 }
8328 else
8329 args.quick_push (device);
629b3d75
MJ
8330 if (offloaded)
8331 args.quick_push (build_fold_addr_expr (child_fn));
8332 args.quick_push (t1);
8333 args.quick_push (t2);
8334 args.quick_push (t3);
8335 args.quick_push (t4);
8336 switch (start_ix)
8337 {
8338 case BUILT_IN_GOACC_DATA_START:
8339 case BUILT_IN_GOACC_DECLARE:
8340 case BUILT_IN_GOMP_TARGET_DATA:
8341 break;
8342 case BUILT_IN_GOMP_TARGET:
8343 case BUILT_IN_GOMP_TARGET_UPDATE:
8344 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
8345 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
8346 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
8347 if (c)
8348 depend = OMP_CLAUSE_DECL (c);
8349 else
8350 depend = build_int_cst (ptr_type_node, 0);
8351 args.quick_push (depend);
8352 if (start_ix == BUILT_IN_GOMP_TARGET)
8353 args.quick_push (get_target_arguments (&gsi, entry_stmt));
8354 break;
8355 case BUILT_IN_GOACC_PARALLEL:
25651634
TS
8356 oacc_set_fn_attrib (child_fn, clauses, &args);
8357 tagging = true;
629b3d75
MJ
8358 /* FALLTHRU */
8359 case BUILT_IN_GOACC_ENTER_EXIT_DATA:
8360 case BUILT_IN_GOACC_UPDATE:
8361 {
8362 tree t_async = NULL_TREE;
8363
8364 /* If present, use the value specified by the respective
8365 clause, making sure that is of the correct type. */
8366 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
8367 if (c)
8368 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
8369 integer_type_node,
8370 OMP_CLAUSE_ASYNC_EXPR (c));
8371 else if (!tagging)
8372 /* Default values for t_async. */
8373 t_async = fold_convert_loc (gimple_location (entry_stmt),
8374 integer_type_node,
8375 build_int_cst (integer_type_node,
8376 GOMP_ASYNC_SYNC));
8377 if (tagging && t_async)
8378 {
8379 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
8380
8381 if (TREE_CODE (t_async) == INTEGER_CST)
8382 {
8383 /* See if we can pack the async arg in to the tag's
8384 operand. */
8385 i_async = TREE_INT_CST_LOW (t_async);
8386 if (i_async < GOMP_LAUNCH_OP_MAX)
8387 t_async = NULL_TREE;
8388 else
8389 i_async = GOMP_LAUNCH_OP_MAX;
8390 }
8391 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
8392 i_async));
8393 }
8394 if (t_async)
8395 args.safe_push (t_async);
8396
8397 /* Save the argument index, and ... */
8398 unsigned t_wait_idx = args.length ();
8399 unsigned num_waits = 0;
8400 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
8401 if (!tagging || c)
8402 /* ... push a placeholder. */
8403 args.safe_push (integer_zero_node);
8404
8405 for (; c; c = OMP_CLAUSE_CHAIN (c))
8406 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
8407 {
8408 args.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c),
8409 integer_type_node,
8410 OMP_CLAUSE_WAIT_EXPR (c)));
8411 num_waits++;
8412 }
8413
8414 if (!tagging || num_waits)
8415 {
8416 tree len;
8417
8418 /* Now that we know the number, update the placeholder. */
8419 if (tagging)
8420 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
8421 else
8422 len = build_int_cst (integer_type_node, num_waits);
8423 len = fold_convert_loc (gimple_location (entry_stmt),
8424 unsigned_type_node, len);
8425 args[t_wait_idx] = len;
8426 }
8427 }
8428 break;
8429 default:
8430 gcc_unreachable ();
8431 }
8432 if (tagging)
8433 /* Push terminal marker - zero. */
8434 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
8435
8436 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
8437 gimple_set_location (g, gimple_location (entry_stmt));
8438 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
8439 if (!offloaded)
8440 {
8441 g = gsi_stmt (gsi);
8442 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
8443 gsi_remove (&gsi, true);
8444 }
8445 if (data_region && region->exit)
8446 {
65f4b875 8447 gsi = gsi_last_nondebug_bb (region->exit);
629b3d75
MJ
8448 g = gsi_stmt (gsi);
8449 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN);
8450 gsi_remove (&gsi, true);
8451 }
8452}
8453
8454/* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with
8455 iteration variable derived from the thread number. INTRA_GROUP means this
8456 is an expansion of a loop iterating over work-items within a separate
01914336 8457 iteration over groups. */
629b3d75
MJ
8458
8459static void
8460grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group)
8461{
8462 gimple_stmt_iterator gsi;
8463 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
8464 gcc_checking_assert (gimple_omp_for_kind (for_stmt)
8465 == GF_OMP_FOR_KIND_GRID_LOOP);
8466 size_t collapse = gimple_omp_for_collapse (for_stmt);
8467 struct omp_for_data_loop *loops
8468 = XALLOCAVEC (struct omp_for_data_loop,
01914336 8469 gimple_omp_for_collapse (for_stmt));
629b3d75
MJ
8470 struct omp_for_data fd;
8471
8472 remove_edge (BRANCH_EDGE (kfor->entry));
8473 basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest;
8474
8475 gcc_assert (kfor->cont);
8476 omp_extract_for_data (for_stmt, &fd, loops);
8477
8478 gsi = gsi_start_bb (body_bb);
8479
8480 for (size_t dim = 0; dim < collapse; dim++)
8481 {
8482 tree type, itype;
8483 itype = type = TREE_TYPE (fd.loops[dim].v);
8484 if (POINTER_TYPE_P (type))
8485 itype = signed_type_for (type);
8486
8487 tree n1 = fd.loops[dim].n1;
8488 tree step = fd.loops[dim].step;
8489 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
8490 true, NULL_TREE, true, GSI_SAME_STMT);
8491 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
8492 true, NULL_TREE, true, GSI_SAME_STMT);
8493 tree threadid;
8494 if (gimple_omp_for_grid_group_iter (for_stmt))
8495 {
8496 gcc_checking_assert (!intra_group);
8497 threadid = build_call_expr (builtin_decl_explicit
8498 (BUILT_IN_HSA_WORKGROUPID), 1,
8499 build_int_cstu (unsigned_type_node, dim));
8500 }
8501 else if (intra_group)
8502 threadid = build_call_expr (builtin_decl_explicit
8503 (BUILT_IN_HSA_WORKITEMID), 1,
8504 build_int_cstu (unsigned_type_node, dim));
8505 else
8506 threadid = build_call_expr (builtin_decl_explicit
8507 (BUILT_IN_HSA_WORKITEMABSID), 1,
8508 build_int_cstu (unsigned_type_node, dim));
8509 threadid = fold_convert (itype, threadid);
8510 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
8511 true, GSI_SAME_STMT);
8512
8513 tree startvar = fd.loops[dim].v;
8514 tree t = fold_build2 (MULT_EXPR, itype, threadid, step);
8515 if (POINTER_TYPE_P (type))
8516 t = fold_build_pointer_plus (n1, t);
8517 else
8518 t = fold_build2 (PLUS_EXPR, type, t, n1);
8519 t = fold_convert (type, t);
8520 t = force_gimple_operand_gsi (&gsi, t,
8521 DECL_P (startvar)
8522 && TREE_ADDRESSABLE (startvar),
8523 NULL_TREE, true, GSI_SAME_STMT);
8524 gassign *assign_stmt = gimple_build_assign (startvar, t);
8525 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
8526 }
01914336 8527 /* Remove the omp for statement. */
65f4b875 8528 gsi = gsi_last_nondebug_bb (kfor->entry);
629b3d75
MJ
8529 gsi_remove (&gsi, true);
8530
8531 /* Remove the GIMPLE_OMP_CONTINUE statement. */
65f4b875 8532 gsi = gsi_last_nondebug_bb (kfor->cont);
629b3d75
MJ
8533 gcc_assert (!gsi_end_p (gsi)
8534 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE);
8535 gsi_remove (&gsi, true);
8536
8537 /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary. */
65f4b875 8538 gsi = gsi_last_nondebug_bb (kfor->exit);
629b3d75
MJ
8539 gcc_assert (!gsi_end_p (gsi)
8540 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
8541 if (intra_group)
8542 gsi_insert_before (&gsi, omp_build_barrier (NULL_TREE), GSI_SAME_STMT);
8543 gsi_remove (&gsi, true);
8544
8545 /* Fixup the much simpler CFG. */
8546 remove_edge (find_edge (kfor->cont, body_bb));
8547
8548 if (kfor->cont != body_bb)
8549 set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb);
8550 set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont);
8551}
8552
8553/* Structure passed to grid_remap_kernel_arg_accesses so that it can remap
8554 argument_decls. */
8555
8556struct grid_arg_decl_map
8557{
8558 tree old_arg;
8559 tree new_arg;
8560};
8561
8562/* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones
8563 pertaining to kernel function. */
8564
8565static tree
8566grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data)
8567{
8568 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
8569 struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info;
8570 tree t = *tp;
8571
8572 if (t == adm->old_arg)
8573 *tp = adm->new_arg;
8574 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
8575 return NULL_TREE;
8576}
8577
8578/* If TARGET region contains a kernel body for loop, remove its region from the
01914336 8579 TARGET and expand it in HSA gridified kernel fashion. */
629b3d75
MJ
8580
8581static void
8582grid_expand_target_grid_body (struct omp_region *target)
8583{
8584 if (!hsa_gen_requested_p ())
8585 return;
8586
8587 gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry));
8588 struct omp_region **pp;
8589
8590 for (pp = &target->inner; *pp; pp = &(*pp)->next)
8591 if ((*pp)->type == GIMPLE_OMP_GRID_BODY)
8592 break;
8593
8594 struct omp_region *gpukernel = *pp;
8595
8596 tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt);
8597 if (!gpukernel)
8598 {
8599 /* HSA cannot handle OACC stuff. */
8600 if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION)
8601 return;
8602 gcc_checking_assert (orig_child_fndecl);
8603 gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
8604 OMP_CLAUSE__GRIDDIM_));
8605 cgraph_node *n = cgraph_node::get (orig_child_fndecl);
8606
8607 hsa_register_kernel (n);
8608 return;
8609 }
8610
8611 gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
8612 OMP_CLAUSE__GRIDDIM_));
01914336
MJ
8613 tree inside_block
8614 = gimple_block (first_stmt (single_succ (gpukernel->entry)));
629b3d75
MJ
8615 *pp = gpukernel->next;
8616 for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next)
8617 if ((*pp)->type == GIMPLE_OMP_FOR)
8618 break;
8619
8620 struct omp_region *kfor = *pp;
8621 gcc_assert (kfor);
8622 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
8623 gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP);
8624 *pp = kfor->next;
8625 if (kfor->inner)
8626 {
8627 if (gimple_omp_for_grid_group_iter (for_stmt))
8628 {
8629 struct omp_region **next_pp;
8630 for (pp = &kfor->inner; *pp; pp = next_pp)
8631 {
8632 next_pp = &(*pp)->next;
8633 if ((*pp)->type != GIMPLE_OMP_FOR)
8634 continue;
8635 gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry));
8636 gcc_assert (gimple_omp_for_kind (inner)
8637 == GF_OMP_FOR_KIND_GRID_LOOP);
8638 grid_expand_omp_for_loop (*pp, true);
8639 *pp = (*pp)->next;
8640 next_pp = pp;
8641 }
8642 }
8643 expand_omp (kfor->inner);
8644 }
8645 if (gpukernel->inner)
8646 expand_omp (gpukernel->inner);
8647
8648 tree kern_fndecl = copy_node (orig_child_fndecl);
7958186b
MP
8649 DECL_NAME (kern_fndecl) = clone_function_name_numbered (kern_fndecl,
8650 "kernel");
629b3d75
MJ
8651 SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl));
8652 tree tgtblock = gimple_block (tgt_stmt);
8653 tree fniniblock = make_node (BLOCK);
dc16b007 8654 BLOCK_ABSTRACT_ORIGIN (fniniblock) = BLOCK_ORIGIN (tgtblock);
629b3d75
MJ
8655 BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock);
8656 BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock);
8657 BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl;
8658 DECL_INITIAL (kern_fndecl) = fniniblock;
8659 push_struct_function (kern_fndecl);
8660 cfun->function_end_locus = gimple_location (tgt_stmt);
8661 init_tree_ssa (cfun);
8662 pop_cfun ();
8663
8664 tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl);
8665 gcc_assert (!DECL_CHAIN (old_parm_decl));
8666 tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl));
8667 DECL_CONTEXT (new_parm_decl) = kern_fndecl;
8668 DECL_ARGUMENTS (kern_fndecl) = new_parm_decl;
8669 gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl))));
8670 DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl));
8671 DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl;
8672 struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl);
8673 kern_cfun->curr_properties = cfun->curr_properties;
8674
8675 grid_expand_omp_for_loop (kfor, false);
8676
01914336 8677 /* Remove the omp for statement. */
65f4b875 8678 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (gpukernel->entry);
629b3d75
MJ
8679 gsi_remove (&gsi, true);
8680 /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real
8681 return. */
65f4b875 8682 gsi = gsi_last_nondebug_bb (gpukernel->exit);
629b3d75
MJ
8683 gcc_assert (!gsi_end_p (gsi)
8684 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
8685 gimple *ret_stmt = gimple_build_return (NULL);
8686 gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT);
8687 gsi_remove (&gsi, true);
8688
8689 /* Statements in the first BB in the target construct have been produced by
8690 target lowering and must be copied inside the GPUKERNEL, with the two
8691 exceptions of the first OMP statement and the OMP_DATA assignment
8692 statement. */
8693 gsi = gsi_start_bb (single_succ (gpukernel->entry));
8694 tree data_arg = gimple_omp_target_data_arg (tgt_stmt);
8695 tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL;
8696 for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry));
8697 !gsi_end_p (tsi); gsi_next (&tsi))
8698 {
8699 gimple *stmt = gsi_stmt (tsi);
8700 if (is_gimple_omp (stmt))
8701 break;
8702 if (sender
8703 && is_gimple_assign (stmt)
8704 && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR
8705 && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender)
8706 continue;
8707 gimple *copy = gimple_copy (stmt);
8708 gsi_insert_before (&gsi, copy, GSI_SAME_STMT);
8709 gimple_set_block (copy, fniniblock);
8710 }
8711
8712 move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry),
8713 gpukernel->exit, inside_block);
8714
8715 cgraph_node *kcn = cgraph_node::get_create (kern_fndecl);
8716 kcn->mark_force_output ();
8717 cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl);
8718
8719 hsa_register_kernel (kcn, orig_child);
8720
8721 cgraph_node::add_new_function (kern_fndecl, true);
8722 push_cfun (kern_cfun);
8723 cgraph_edge::rebuild_edges ();
8724
8725 /* Re-map any mention of the PARM_DECL of the original function to the
8726 PARM_DECL of the new one.
8727
8728 TODO: It would be great if lowering produced references into the GPU
8729 kernel decl straight away and we did not have to do this. */
8730 struct grid_arg_decl_map adm;
8731 adm.old_arg = old_parm_decl;
8732 adm.new_arg = new_parm_decl;
8733 basic_block bb;
8734 FOR_EACH_BB_FN (bb, kern_cfun)
8735 {
8736 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
8737 {
8738 gimple *stmt = gsi_stmt (gsi);
8739 struct walk_stmt_info wi;
8740 memset (&wi, 0, sizeof (wi));
8741 wi.info = &adm;
8742 walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi);
8743 }
8744 }
8745 pop_cfun ();
8746
8747 return;
8748}
8749
8750/* Expand the parallel region tree rooted at REGION. Expansion
8751 proceeds in depth-first order. Innermost regions are expanded
8752 first. This way, parallel regions that require a new function to
8753 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
8754 internal dependencies in their body. */
8755
8756static void
8757expand_omp (struct omp_region *region)
8758{
8759 omp_any_child_fn_dumped = false;
8760 while (region)
8761 {
8762 location_t saved_location;
8763 gimple *inner_stmt = NULL;
8764
8765 /* First, determine whether this is a combined parallel+workshare
01914336 8766 region. */
629b3d75
MJ
8767 if (region->type == GIMPLE_OMP_PARALLEL)
8768 determine_parallel_type (region);
8769 else if (region->type == GIMPLE_OMP_TARGET)
8770 grid_expand_target_grid_body (region);
8771
8772 if (region->type == GIMPLE_OMP_FOR
8773 && gimple_omp_for_combined_p (last_stmt (region->entry)))
8774 inner_stmt = last_stmt (region->inner->entry);
8775
8776 if (region->inner)
8777 expand_omp (region->inner);
8778
8779 saved_location = input_location;
8780 if (gimple_has_location (last_stmt (region->entry)))
8781 input_location = gimple_location (last_stmt (region->entry));
8782
8783 switch (region->type)
8784 {
8785 case GIMPLE_OMP_PARALLEL:
8786 case GIMPLE_OMP_TASK:
8787 expand_omp_taskreg (region);
8788 break;
8789
8790 case GIMPLE_OMP_FOR:
8791 expand_omp_for (region, inner_stmt);
8792 break;
8793
8794 case GIMPLE_OMP_SECTIONS:
8795 expand_omp_sections (region);
8796 break;
8797
8798 case GIMPLE_OMP_SECTION:
8799 /* Individual omp sections are handled together with their
8800 parent GIMPLE_OMP_SECTIONS region. */
8801 break;
8802
8803 case GIMPLE_OMP_SINGLE:
8804 expand_omp_single (region);
8805 break;
8806
8807 case GIMPLE_OMP_ORDERED:
8808 {
8809 gomp_ordered *ord_stmt
8810 = as_a <gomp_ordered *> (last_stmt (region->entry));
8811 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
8812 OMP_CLAUSE_DEPEND))
8813 {
8814 /* We'll expand these when expanding corresponding
8815 worksharing region with ordered(n) clause. */
8816 gcc_assert (region->outer
8817 && region->outer->type == GIMPLE_OMP_FOR);
8818 region->ord_stmt = ord_stmt;
8819 break;
8820 }
8821 }
8822 /* FALLTHRU */
8823 case GIMPLE_OMP_MASTER:
8824 case GIMPLE_OMP_TASKGROUP:
8825 case GIMPLE_OMP_CRITICAL:
8826 case GIMPLE_OMP_TEAMS:
8827 expand_omp_synch (region);
8828 break;
8829
8830 case GIMPLE_OMP_ATOMIC_LOAD:
8831 expand_omp_atomic (region);
8832 break;
8833
8834 case GIMPLE_OMP_TARGET:
8835 expand_omp_target (region);
8836 break;
8837
8838 default:
8839 gcc_unreachable ();
8840 }
8841
8842 input_location = saved_location;
8843 region = region->next;
8844 }
8845 if (omp_any_child_fn_dumped)
8846 {
8847 if (dump_file)
8848 dump_function_header (dump_file, current_function_decl, dump_flags);
8849 omp_any_child_fn_dumped = false;
8850 }
8851}
8852
8853/* Helper for build_omp_regions. Scan the dominator tree starting at
8854 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
8855 true, the function ends once a single tree is built (otherwise, whole
8856 forest of OMP constructs may be built). */
8857
8858static void
8859build_omp_regions_1 (basic_block bb, struct omp_region *parent,
8860 bool single_tree)
8861{
8862 gimple_stmt_iterator gsi;
8863 gimple *stmt;
8864 basic_block son;
8865
65f4b875 8866 gsi = gsi_last_nondebug_bb (bb);
629b3d75
MJ
8867 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
8868 {
8869 struct omp_region *region;
8870 enum gimple_code code;
8871
8872 stmt = gsi_stmt (gsi);
8873 code = gimple_code (stmt);
8874 if (code == GIMPLE_OMP_RETURN)
8875 {
8876 /* STMT is the return point out of region PARENT. Mark it
8877 as the exit point and make PARENT the immediately
8878 enclosing region. */
8879 gcc_assert (parent);
8880 region = parent;
8881 region->exit = bb;
8882 parent = parent->outer;
8883 }
8884 else if (code == GIMPLE_OMP_ATOMIC_STORE)
8885 {
5764ee3c 8886 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
629b3d75
MJ
8887 GIMPLE_OMP_RETURN, but matches with
8888 GIMPLE_OMP_ATOMIC_LOAD. */
8889 gcc_assert (parent);
8890 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
8891 region = parent;
8892 region->exit = bb;
8893 parent = parent->outer;
8894 }
8895 else if (code == GIMPLE_OMP_CONTINUE)
8896 {
8897 gcc_assert (parent);
8898 parent->cont = bb;
8899 }
8900 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
8901 {
8902 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
8903 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
8904 }
8905 else
8906 {
8907 region = new_omp_region (bb, code, parent);
8908 /* Otherwise... */
8909 if (code == GIMPLE_OMP_TARGET)
8910 {
8911 switch (gimple_omp_target_kind (stmt))
8912 {
8913 case GF_OMP_TARGET_KIND_REGION:
8914 case GF_OMP_TARGET_KIND_DATA:
8915 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8916 case GF_OMP_TARGET_KIND_OACC_KERNELS:
8917 case GF_OMP_TARGET_KIND_OACC_DATA:
8918 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8919 break;
8920 case GF_OMP_TARGET_KIND_UPDATE:
8921 case GF_OMP_TARGET_KIND_ENTER_DATA:
8922 case GF_OMP_TARGET_KIND_EXIT_DATA:
8923 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8924 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8925 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8926 /* ..., other than for those stand-alone directives... */
8927 region = NULL;
8928 break;
8929 default:
8930 gcc_unreachable ();
8931 }
8932 }
8933 else if (code == GIMPLE_OMP_ORDERED
8934 && omp_find_clause (gimple_omp_ordered_clauses
8935 (as_a <gomp_ordered *> (stmt)),
8936 OMP_CLAUSE_DEPEND))
8937 /* #pragma omp ordered depend is also just a stand-alone
8938 directive. */
8939 region = NULL;
28567c40
JJ
8940 else if (code == GIMPLE_OMP_TASK
8941 && gimple_omp_task_taskwait_p (stmt))
8942 /* #pragma omp taskwait depend(...) is a stand-alone directive. */
8943 region = NULL;
629b3d75
MJ
8944 /* ..., this directive becomes the parent for a new region. */
8945 if (region)
8946 parent = region;
8947 }
8948 }
8949
8950 if (single_tree && !parent)
8951 return;
8952
8953 for (son = first_dom_son (CDI_DOMINATORS, bb);
8954 son;
8955 son = next_dom_son (CDI_DOMINATORS, son))
8956 build_omp_regions_1 (son, parent, single_tree);
8957}
8958
8959/* Builds the tree of OMP regions rooted at ROOT, storing it to
8960 root_omp_region. */
8961
8962static void
8963build_omp_regions_root (basic_block root)
8964{
8965 gcc_assert (root_omp_region == NULL);
8966 build_omp_regions_1 (root, NULL, true);
8967 gcc_assert (root_omp_region != NULL);
8968}
8969
8970/* Expands omp construct (and its subconstructs) starting in HEAD. */
8971
8972void
8973omp_expand_local (basic_block head)
8974{
8975 build_omp_regions_root (head);
8976 if (dump_file && (dump_flags & TDF_DETAILS))
8977 {
8978 fprintf (dump_file, "\nOMP region tree\n\n");
8979 dump_omp_region (dump_file, root_omp_region, 0);
8980 fprintf (dump_file, "\n");
8981 }
8982
8983 remove_exit_barriers (root_omp_region);
8984 expand_omp (root_omp_region);
8985
8986 omp_free_regions ();
8987}
8988
8989/* Scan the CFG and build a tree of OMP regions. Return the root of
8990 the OMP region tree. */
8991
8992static void
8993build_omp_regions (void)
8994{
8995 gcc_assert (root_omp_region == NULL);
8996 calculate_dominance_info (CDI_DOMINATORS);
8997 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
8998}
8999
9000/* Main entry point for expanding OMP-GIMPLE into runtime calls. */
9001
9002static unsigned int
9003execute_expand_omp (void)
9004{
9005 build_omp_regions ();
9006
9007 if (!root_omp_region)
9008 return 0;
9009
9010 if (dump_file)
9011 {
9012 fprintf (dump_file, "\nOMP region tree\n\n");
9013 dump_omp_region (dump_file, root_omp_region, 0);
9014 fprintf (dump_file, "\n");
9015 }
9016
9017 remove_exit_barriers (root_omp_region);
9018
9019 expand_omp (root_omp_region);
9020
9021 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9022 verify_loop_structure ();
9023 cleanup_tree_cfg ();
9024
9025 omp_free_regions ();
9026
9027 return 0;
9028}
9029
9030/* OMP expansion -- the default pass, run before creation of SSA form. */
9031
9032namespace {
9033
9034const pass_data pass_data_expand_omp =
9035{
9036 GIMPLE_PASS, /* type */
9037 "ompexp", /* name */
fd2b8c8b 9038 OPTGROUP_OMP, /* optinfo_flags */
629b3d75
MJ
9039 TV_NONE, /* tv_id */
9040 PROP_gimple_any, /* properties_required */
9041 PROP_gimple_eomp, /* properties_provided */
9042 0, /* properties_destroyed */
9043 0, /* todo_flags_start */
9044 0, /* todo_flags_finish */
9045};
9046
9047class pass_expand_omp : public gimple_opt_pass
9048{
9049public:
9050 pass_expand_omp (gcc::context *ctxt)
9051 : gimple_opt_pass (pass_data_expand_omp, ctxt)
9052 {}
9053
9054 /* opt_pass methods: */
9055 virtual unsigned int execute (function *)
9056 {
5e9d6aa4 9057 bool gate = ((flag_openacc != 0 || flag_openmp != 0
629b3d75
MJ
9058 || flag_openmp_simd != 0)
9059 && !seen_error ());
9060
9061 /* This pass always runs, to provide PROP_gimple_eomp.
9062 But often, there is nothing to do. */
9063 if (!gate)
9064 return 0;
9065
9066 return execute_expand_omp ();
9067 }
9068
9069}; // class pass_expand_omp
9070
9071} // anon namespace
9072
9073gimple_opt_pass *
9074make_pass_expand_omp (gcc::context *ctxt)
9075{
9076 return new pass_expand_omp (ctxt);
9077}
9078
9079namespace {
9080
9081const pass_data pass_data_expand_omp_ssa =
9082{
9083 GIMPLE_PASS, /* type */
9084 "ompexpssa", /* name */
fd2b8c8b 9085 OPTGROUP_OMP, /* optinfo_flags */
629b3d75
MJ
9086 TV_NONE, /* tv_id */
9087 PROP_cfg | PROP_ssa, /* properties_required */
9088 PROP_gimple_eomp, /* properties_provided */
9089 0, /* properties_destroyed */
9090 0, /* todo_flags_start */
9091 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
9092};
9093
9094class pass_expand_omp_ssa : public gimple_opt_pass
9095{
9096public:
9097 pass_expand_omp_ssa (gcc::context *ctxt)
9098 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
9099 {}
9100
9101 /* opt_pass methods: */
9102 virtual bool gate (function *fun)
9103 {
9104 return !(fun->curr_properties & PROP_gimple_eomp);
9105 }
9106 virtual unsigned int execute (function *) { return execute_expand_omp (); }
9107 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
9108
9109}; // class pass_expand_omp_ssa
9110
9111} // anon namespace
9112
9113gimple_opt_pass *
9114make_pass_expand_omp_ssa (gcc::context *ctxt)
9115{
9116 return new pass_expand_omp_ssa (ctxt);
9117}
9118
9119/* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
9120 GIMPLE_* codes. */
9121
9122bool
9123omp_make_gimple_edges (basic_block bb, struct omp_region **region,
9124 int *region_idx)
9125{
9126 gimple *last = last_stmt (bb);
9127 enum gimple_code code = gimple_code (last);
9128 struct omp_region *cur_region = *region;
9129 bool fallthru = false;
9130
9131 switch (code)
9132 {
9133 case GIMPLE_OMP_PARALLEL:
629b3d75
MJ
9134 case GIMPLE_OMP_FOR:
9135 case GIMPLE_OMP_SINGLE:
9136 case GIMPLE_OMP_TEAMS:
9137 case GIMPLE_OMP_MASTER:
9138 case GIMPLE_OMP_TASKGROUP:
9139 case GIMPLE_OMP_CRITICAL:
9140 case GIMPLE_OMP_SECTION:
9141 case GIMPLE_OMP_GRID_BODY:
9142 cur_region = new_omp_region (bb, code, cur_region);
9143 fallthru = true;
9144 break;
9145
28567c40
JJ
9146 case GIMPLE_OMP_TASK:
9147 cur_region = new_omp_region (bb, code, cur_region);
9148 fallthru = true;
9149 if (gimple_omp_task_taskwait_p (last))
9150 cur_region = cur_region->outer;
9151 break;
9152
629b3d75
MJ
9153 case GIMPLE_OMP_ORDERED:
9154 cur_region = new_omp_region (bb, code, cur_region);
9155 fallthru = true;
9156 if (omp_find_clause (gimple_omp_ordered_clauses
9157 (as_a <gomp_ordered *> (last)),
9158 OMP_CLAUSE_DEPEND))
9159 cur_region = cur_region->outer;
9160 break;
9161
9162 case GIMPLE_OMP_TARGET:
9163 cur_region = new_omp_region (bb, code, cur_region);
9164 fallthru = true;
9165 switch (gimple_omp_target_kind (last))
9166 {
9167 case GF_OMP_TARGET_KIND_REGION:
9168 case GF_OMP_TARGET_KIND_DATA:
9169 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9170 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9171 case GF_OMP_TARGET_KIND_OACC_DATA:
9172 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9173 break;
9174 case GF_OMP_TARGET_KIND_UPDATE:
9175 case GF_OMP_TARGET_KIND_ENTER_DATA:
9176 case GF_OMP_TARGET_KIND_EXIT_DATA:
9177 case GF_OMP_TARGET_KIND_OACC_UPDATE:
9178 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
9179 case GF_OMP_TARGET_KIND_OACC_DECLARE:
9180 cur_region = cur_region->outer;
9181 break;
9182 default:
9183 gcc_unreachable ();
9184 }
9185 break;
9186
9187 case GIMPLE_OMP_SECTIONS:
9188 cur_region = new_omp_region (bb, code, cur_region);
9189 fallthru = true;
9190 break;
9191
9192 case GIMPLE_OMP_SECTIONS_SWITCH:
9193 fallthru = false;
9194 break;
9195
9196 case GIMPLE_OMP_ATOMIC_LOAD:
9197 case GIMPLE_OMP_ATOMIC_STORE:
9198 fallthru = true;
9199 break;
9200
9201 case GIMPLE_OMP_RETURN:
9202 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
9203 somewhere other than the next block. This will be
9204 created later. */
9205 cur_region->exit = bb;
9206 if (cur_region->type == GIMPLE_OMP_TASK)
9207 /* Add an edge corresponding to not scheduling the task
9208 immediately. */
9209 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
9210 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
9211 cur_region = cur_region->outer;
9212 break;
9213
9214 case GIMPLE_OMP_CONTINUE:
9215 cur_region->cont = bb;
9216 switch (cur_region->type)
9217 {
9218 case GIMPLE_OMP_FOR:
9219 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
9220 succs edges as abnormal to prevent splitting
9221 them. */
9222 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
9223 /* Make the loopback edge. */
9224 make_edge (bb, single_succ (cur_region->entry),
9225 EDGE_ABNORMAL);
9226
9227 /* Create an edge from GIMPLE_OMP_FOR to exit, which
9228 corresponds to the case that the body of the loop
9229 is not executed at all. */
9230 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
9231 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
9232 fallthru = false;
9233 break;
9234
9235 case GIMPLE_OMP_SECTIONS:
9236 /* Wire up the edges into and out of the nested sections. */
9237 {
9238 basic_block switch_bb = single_succ (cur_region->entry);
9239
9240 struct omp_region *i;
9241 for (i = cur_region->inner; i ; i = i->next)
9242 {
9243 gcc_assert (i->type == GIMPLE_OMP_SECTION);
9244 make_edge (switch_bb, i->entry, 0);
9245 make_edge (i->exit, bb, EDGE_FALLTHRU);
9246 }
9247
9248 /* Make the loopback edge to the block with
9249 GIMPLE_OMP_SECTIONS_SWITCH. */
9250 make_edge (bb, switch_bb, 0);
9251
9252 /* Make the edge from the switch to exit. */
9253 make_edge (switch_bb, bb->next_bb, 0);
9254 fallthru = false;
9255 }
9256 break;
9257
9258 case GIMPLE_OMP_TASK:
9259 fallthru = true;
9260 break;
9261
9262 default:
9263 gcc_unreachable ();
9264 }
9265 break;
9266
9267 default:
9268 gcc_unreachable ();
9269 }
9270
9271 if (*region != cur_region)
9272 {
9273 *region = cur_region;
9274 if (cur_region)
9275 *region_idx = cur_region->entry->index;
9276 else
9277 *region_idx = 0;
9278 }
9279
9280 return fallthru;
9281}
9282
9283#include "gt-omp-expand.h"