]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/omp-expand.c
c++: refactor some parser code
[thirdparty/gcc.git] / gcc / omp-expand.c
CommitLineData
629b3d75
MJ
1/* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
4
8d9254fc 5Copyright (C) 2005-2020 Free Software Foundation, Inc.
629b3d75
MJ
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
11Software Foundation; either version 3, or (at your option) any later
12version.
13
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING3. If not see
21<http://www.gnu.org/licenses/>. */
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "memmodel.h"
27#include "backend.h"
28#include "target.h"
29#include "rtl.h"
30#include "tree.h"
31#include "gimple.h"
32#include "cfghooks.h"
33#include "tree-pass.h"
34#include "ssa.h"
35#include "optabs.h"
36#include "cgraph.h"
37#include "pretty-print.h"
38#include "diagnostic-core.h"
39#include "fold-const.h"
40#include "stor-layout.h"
41#include "cfganal.h"
42#include "internal-fn.h"
43#include "gimplify.h"
44#include "gimple-iterator.h"
45#include "gimplify-me.h"
46#include "gimple-walk.h"
47#include "tree-cfg.h"
48#include "tree-into-ssa.h"
49#include "tree-ssa.h"
50#include "splay-tree.h"
51#include "cfgloop.h"
52#include "omp-general.h"
53#include "omp-offload.h"
54#include "tree-cfgcleanup.h"
a895e6d7 55#include "alloc-pool.h"
629b3d75 56#include "symbol-summary.h"
629b3d75
MJ
57#include "gomp-constants.h"
58#include "gimple-pretty-print.h"
13293add 59#include "hsa-common.h"
314e6352
ML
60#include "stringpool.h"
61#include "attribs.h"
629b3d75
MJ
62
63/* OMP region information. Every parallel and workshare
64 directive is enclosed between two markers, the OMP_* directive
65 and a corresponding GIMPLE_OMP_RETURN statement. */
66
67struct omp_region
68{
69 /* The enclosing region. */
70 struct omp_region *outer;
71
72 /* First child region. */
73 struct omp_region *inner;
74
75 /* Next peer region. */
76 struct omp_region *next;
77
78 /* Block containing the omp directive as its last stmt. */
79 basic_block entry;
80
81 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
82 basic_block exit;
83
84 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
85 basic_block cont;
86
87 /* If this is a combined parallel+workshare region, this is a list
88 of additional arguments needed by the combined parallel+workshare
89 library call. */
90 vec<tree, va_gc> *ws_args;
91
92 /* The code for the omp directive of this region. */
93 enum gimple_code type;
94
95 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
96 enum omp_clause_schedule_kind sched_kind;
97
98 /* Schedule modifiers. */
99 unsigned char sched_modifiers;
100
101 /* True if this is a combined parallel+workshare region. */
102 bool is_combined_parallel;
103
0b887b75
JJ
104 /* Copy of fd.lastprivate_conditional != 0. */
105 bool has_lastprivate_conditional;
106
629b3d75
MJ
107 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
108 a depend clause. */
109 gomp_ordered *ord_stmt;
110};
111
112static struct omp_region *root_omp_region;
113static bool omp_any_child_fn_dumped;
114
115static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
116 bool = false);
117static gphi *find_phi_with_arg_on_edge (tree, edge);
118static void expand_omp (struct omp_region *region);
119
120/* Return true if REGION is a combined parallel+workshare region. */
121
122static inline bool
123is_combined_parallel (struct omp_region *region)
124{
125 return region->is_combined_parallel;
126}
127
128/* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
129 is the immediate dominator of PAR_ENTRY_BB, return true if there
130 are no data dependencies that would prevent expanding the parallel
131 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
132
133 When expanding a combined parallel+workshare region, the call to
134 the child function may need additional arguments in the case of
135 GIMPLE_OMP_FOR regions. In some cases, these arguments are
136 computed out of variables passed in from the parent to the child
137 via 'struct .omp_data_s'. For instance:
138
139 #pragma omp parallel for schedule (guided, i * 4)
140 for (j ...)
141
142 Is lowered into:
143
01914336 144 # BLOCK 2 (PAR_ENTRY_BB)
629b3d75
MJ
145 .omp_data_o.i = i;
146 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
147
148 # BLOCK 3 (WS_ENTRY_BB)
149 .omp_data_i = &.omp_data_o;
150 D.1667 = .omp_data_i->i;
151 D.1598 = D.1667 * 4;
152 #pragma omp for schedule (guided, D.1598)
153
154 When we outline the parallel region, the call to the child function
155 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
156 that value is computed *after* the call site. So, in principle we
157 cannot do the transformation.
158
159 To see whether the code in WS_ENTRY_BB blocks the combined
160 parallel+workshare call, we collect all the variables used in the
161 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
162 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
163 call.
164
165 FIXME. If we had the SSA form built at this point, we could merely
166 hoist the code in block 3 into block 2 and be done with it. But at
167 this point we don't have dataflow information and though we could
168 hack something up here, it is really not worth the aggravation. */
169
170static bool
171workshare_safe_to_combine_p (basic_block ws_entry_bb)
172{
173 struct omp_for_data fd;
174 gimple *ws_stmt = last_stmt (ws_entry_bb);
175
176 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
177 return true;
178
179 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
28567c40
JJ
180 if (gimple_omp_for_kind (ws_stmt) != GF_OMP_FOR_KIND_FOR)
181 return false;
629b3d75
MJ
182
183 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
184
185 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
186 return false;
187 if (fd.iter_type != long_integer_type_node)
188 return false;
189
190 /* FIXME. We give up too easily here. If any of these arguments
191 are not constants, they will likely involve variables that have
192 been mapped into fields of .omp_data_s for sharing with the child
193 function. With appropriate data flow, it would be possible to
194 see through this. */
195 if (!is_gimple_min_invariant (fd.loop.n1)
196 || !is_gimple_min_invariant (fd.loop.n2)
197 || !is_gimple_min_invariant (fd.loop.step)
198 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
199 return false;
200
201 return true;
202}
203
204/* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
205 presence (SIMD_SCHEDULE). */
206
207static tree
208omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
209{
28567c40 210 if (!simd_schedule || integer_zerop (chunk_size))
629b3d75
MJ
211 return chunk_size;
212
9d2f08ab
RS
213 poly_uint64 vf = omp_max_vf ();
214 if (known_eq (vf, 1U))
629b3d75
MJ
215 return chunk_size;
216
217 tree type = TREE_TYPE (chunk_size);
218 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
219 build_int_cst (type, vf - 1));
220 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
221 build_int_cst (type, -vf));
222}
223
224/* Collect additional arguments needed to emit a combined
225 parallel+workshare call. WS_STMT is the workshare directive being
226 expanded. */
227
228static vec<tree, va_gc> *
229get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
230{
231 tree t;
232 location_t loc = gimple_location (ws_stmt);
233 vec<tree, va_gc> *ws_args;
234
235 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
236 {
237 struct omp_for_data fd;
238 tree n1, n2;
239
240 omp_extract_for_data (for_stmt, &fd, NULL);
241 n1 = fd.loop.n1;
242 n2 = fd.loop.n2;
243
244 if (gimple_omp_for_combined_into_p (for_stmt))
245 {
246 tree innerc
247 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
248 OMP_CLAUSE__LOOPTEMP_);
249 gcc_assert (innerc);
250 n1 = OMP_CLAUSE_DECL (innerc);
251 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
252 OMP_CLAUSE__LOOPTEMP_);
253 gcc_assert (innerc);
254 n2 = OMP_CLAUSE_DECL (innerc);
255 }
256
257 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
258
259 t = fold_convert_loc (loc, long_integer_type_node, n1);
260 ws_args->quick_push (t);
261
262 t = fold_convert_loc (loc, long_integer_type_node, n2);
263 ws_args->quick_push (t);
264
265 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
266 ws_args->quick_push (t);
267
268 if (fd.chunk_size)
269 {
270 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
271 t = omp_adjust_chunk_size (t, fd.simd_schedule);
272 ws_args->quick_push (t);
273 }
274
275 return ws_args;
276 }
277 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
278 {
279 /* Number of sections is equal to the number of edges from the
280 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
281 the exit of the sections region. */
282 basic_block bb = single_succ (gimple_bb (ws_stmt));
283 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
284 vec_alloc (ws_args, 1);
285 ws_args->quick_push (t);
286 return ws_args;
287 }
288
289 gcc_unreachable ();
290}
291
292/* Discover whether REGION is a combined parallel+workshare region. */
293
294static void
295determine_parallel_type (struct omp_region *region)
296{
297 basic_block par_entry_bb, par_exit_bb;
298 basic_block ws_entry_bb, ws_exit_bb;
299
300 if (region == NULL || region->inner == NULL
301 || region->exit == NULL || region->inner->exit == NULL
302 || region->inner->cont == NULL)
303 return;
304
305 /* We only support parallel+for and parallel+sections. */
306 if (region->type != GIMPLE_OMP_PARALLEL
307 || (region->inner->type != GIMPLE_OMP_FOR
308 && region->inner->type != GIMPLE_OMP_SECTIONS))
309 return;
310
311 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
312 WS_EXIT_BB -> PAR_EXIT_BB. */
313 par_entry_bb = region->entry;
314 par_exit_bb = region->exit;
315 ws_entry_bb = region->inner->entry;
316 ws_exit_bb = region->inner->exit;
317
28567c40
JJ
318 /* Give up for task reductions on the parallel, while it is implementable,
319 adding another big set of APIs or slowing down the normal paths is
320 not acceptable. */
321 tree pclauses = gimple_omp_parallel_clauses (last_stmt (par_entry_bb));
322 if (omp_find_clause (pclauses, OMP_CLAUSE__REDUCTEMP_))
323 return;
324
629b3d75
MJ
325 if (single_succ (par_entry_bb) == ws_entry_bb
326 && single_succ (ws_exit_bb) == par_exit_bb
327 && workshare_safe_to_combine_p (ws_entry_bb)
328 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
329 || (last_and_only_stmt (ws_entry_bb)
330 && last_and_only_stmt (par_exit_bb))))
331 {
332 gimple *par_stmt = last_stmt (par_entry_bb);
333 gimple *ws_stmt = last_stmt (ws_entry_bb);
334
335 if (region->inner->type == GIMPLE_OMP_FOR)
336 {
337 /* If this is a combined parallel loop, we need to determine
338 whether or not to use the combined library calls. There
339 are two cases where we do not apply the transformation:
340 static loops and any kind of ordered loop. In the first
341 case, we already open code the loop so there is no need
342 to do anything else. In the latter case, the combined
343 parallel loop call would still need extra synchronization
344 to implement ordered semantics, so there would not be any
345 gain in using the combined call. */
346 tree clauses = gimple_omp_for_clauses (ws_stmt);
347 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
348 if (c == NULL
349 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
350 == OMP_CLAUSE_SCHEDULE_STATIC)
28567c40 351 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED)
6c7ae8c5 352 || omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_)
8221c30b
JJ
353 || ((c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_))
354 && POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (c)))))
28567c40 355 return;
629b3d75 356 }
28567c40 357 else if (region->inner->type == GIMPLE_OMP_SECTIONS
8221c30b
JJ
358 && (omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
359 OMP_CLAUSE__REDUCTEMP_)
360 || omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
361 OMP_CLAUSE__CONDTEMP_)))
28567c40 362 return;
629b3d75
MJ
363
364 region->is_combined_parallel = true;
365 region->inner->is_combined_parallel = true;
366 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
367 }
368}
369
370/* Debugging dumps for parallel regions. */
371void dump_omp_region (FILE *, struct omp_region *, int);
372void debug_omp_region (struct omp_region *);
373void debug_all_omp_regions (void);
374
375/* Dump the parallel region tree rooted at REGION. */
376
377void
378dump_omp_region (FILE *file, struct omp_region *region, int indent)
379{
380 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
381 gimple_code_name[region->type]);
382
383 if (region->inner)
384 dump_omp_region (file, region->inner, indent + 4);
385
386 if (region->cont)
387 {
388 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
389 region->cont->index);
390 }
391
392 if (region->exit)
393 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
394 region->exit->index);
395 else
396 fprintf (file, "%*s[no exit marker]\n", indent, "");
397
398 if (region->next)
399 dump_omp_region (file, region->next, indent);
400}
401
402DEBUG_FUNCTION void
403debug_omp_region (struct omp_region *region)
404{
405 dump_omp_region (stderr, region, 0);
406}
407
408DEBUG_FUNCTION void
409debug_all_omp_regions (void)
410{
411 dump_omp_region (stderr, root_omp_region, 0);
412}
413
414/* Create a new parallel region starting at STMT inside region PARENT. */
415
416static struct omp_region *
417new_omp_region (basic_block bb, enum gimple_code type,
418 struct omp_region *parent)
419{
420 struct omp_region *region = XCNEW (struct omp_region);
421
422 region->outer = parent;
423 region->entry = bb;
424 region->type = type;
425
426 if (parent)
427 {
428 /* This is a nested region. Add it to the list of inner
429 regions in PARENT. */
430 region->next = parent->inner;
431 parent->inner = region;
432 }
433 else
434 {
435 /* This is a toplevel region. Add it to the list of toplevel
436 regions in ROOT_OMP_REGION. */
437 region->next = root_omp_region;
438 root_omp_region = region;
439 }
440
441 return region;
442}
443
444/* Release the memory associated with the region tree rooted at REGION. */
445
446static void
447free_omp_region_1 (struct omp_region *region)
448{
449 struct omp_region *i, *n;
450
451 for (i = region->inner; i ; i = n)
452 {
453 n = i->next;
454 free_omp_region_1 (i);
455 }
456
457 free (region);
458}
459
460/* Release the memory for the entire omp region tree. */
461
462void
463omp_free_regions (void)
464{
465 struct omp_region *r, *n;
466 for (r = root_omp_region; r ; r = n)
467 {
468 n = r->next;
469 free_omp_region_1 (r);
470 }
471 root_omp_region = NULL;
472}
473
474/* A convenience function to build an empty GIMPLE_COND with just the
475 condition. */
476
477static gcond *
478gimple_build_cond_empty (tree cond)
479{
480 enum tree_code pred_code;
481 tree lhs, rhs;
482
483 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
484 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
485}
486
487/* Return true if a parallel REGION is within a declare target function or
488 within a target region and is not a part of a gridified target. */
489
490static bool
491parallel_needs_hsa_kernel_p (struct omp_region *region)
492{
493 bool indirect = false;
494 for (region = region->outer; region; region = region->outer)
495 {
496 if (region->type == GIMPLE_OMP_PARALLEL)
497 indirect = true;
498 else if (region->type == GIMPLE_OMP_TARGET)
499 {
500 gomp_target *tgt_stmt
501 = as_a <gomp_target *> (last_stmt (region->entry));
502
503 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
504 OMP_CLAUSE__GRIDDIM_))
505 return indirect;
506 else
507 return true;
508 }
509 }
510
511 if (lookup_attribute ("omp declare target",
512 DECL_ATTRIBUTES (current_function_decl)))
513 return true;
514
515 return false;
516}
517
623c6df5
KB
518/* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
519 Add CHILD_FNDECL to decl chain of the supercontext of the block
520 ENTRY_BLOCK - this is the block which originally contained the
521 code from which CHILD_FNDECL was created.
522
523 Together, these actions ensure that the debug info for the outlined
524 function will be emitted with the correct lexical scope. */
525
526static void
4ccc4e30
JJ
527adjust_context_and_scope (struct omp_region *region, tree entry_block,
528 tree child_fndecl)
623c6df5 529{
4ccc4e30
JJ
530 tree parent_fndecl = NULL_TREE;
531 gimple *entry_stmt;
532 /* OMP expansion expands inner regions before outer ones, so if
533 we e.g. have explicit task region nested in parallel region, when
534 expanding the task region current_function_decl will be the original
535 source function, but we actually want to use as context the child
536 function of the parallel. */
537 for (region = region->outer;
538 region && parent_fndecl == NULL_TREE; region = region->outer)
539 switch (region->type)
540 {
541 case GIMPLE_OMP_PARALLEL:
542 case GIMPLE_OMP_TASK:
543 case GIMPLE_OMP_TEAMS:
544 entry_stmt = last_stmt (region->entry);
545 parent_fndecl = gimple_omp_taskreg_child_fn (entry_stmt);
546 break;
547 case GIMPLE_OMP_TARGET:
548 entry_stmt = last_stmt (region->entry);
549 parent_fndecl
550 = gimple_omp_target_child_fn (as_a <gomp_target *> (entry_stmt));
551 break;
552 default:
553 break;
554 }
555
556 if (parent_fndecl == NULL_TREE)
557 parent_fndecl = current_function_decl;
558 DECL_CONTEXT (child_fndecl) = parent_fndecl;
559
623c6df5
KB
560 if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
561 {
562 tree b = BLOCK_SUPERCONTEXT (entry_block);
623c6df5
KB
563 if (TREE_CODE (b) == BLOCK)
564 {
623c6df5
KB
565 DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
566 BLOCK_VARS (b) = child_fndecl;
567 }
568 }
569}
570
28567c40 571/* Build the function calls to GOMP_parallel etc to actually
629b3d75
MJ
572 generate the parallel operation. REGION is the parallel region
573 being expanded. BB is the block where to insert the code. WS_ARGS
574 will be set if this is a call to a combined parallel+workshare
575 construct, it contains the list of additional arguments needed by
576 the workshare construct. */
577
578static void
579expand_parallel_call (struct omp_region *region, basic_block bb,
580 gomp_parallel *entry_stmt,
581 vec<tree, va_gc> *ws_args)
582{
583 tree t, t1, t2, val, cond, c, clauses, flags;
584 gimple_stmt_iterator gsi;
585 gimple *stmt;
586 enum built_in_function start_ix;
587 int start_ix2;
588 location_t clause_loc;
589 vec<tree, va_gc> *args;
590
591 clauses = gimple_omp_parallel_clauses (entry_stmt);
592
593 /* Determine what flavor of GOMP_parallel we will be
594 emitting. */
595 start_ix = BUILT_IN_GOMP_PARALLEL;
28567c40
JJ
596 tree rtmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
597 if (rtmp)
598 start_ix = BUILT_IN_GOMP_PARALLEL_REDUCTIONS;
599 else if (is_combined_parallel (region))
629b3d75
MJ
600 {
601 switch (region->inner->type)
602 {
603 case GIMPLE_OMP_FOR:
604 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
605 switch (region->inner->sched_kind)
606 {
607 case OMP_CLAUSE_SCHEDULE_RUNTIME:
0b887b75
JJ
608 /* For lastprivate(conditional:), our implementation
609 requires monotonic behavior. */
610 if (region->inner->has_lastprivate_conditional != 0)
611 start_ix2 = 3;
612 else if ((region->inner->sched_modifiers
613 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
28567c40
JJ
614 start_ix2 = 6;
615 else if ((region->inner->sched_modifiers
616 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
617 start_ix2 = 7;
618 else
619 start_ix2 = 3;
629b3d75
MJ
620 break;
621 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
622 case OMP_CLAUSE_SCHEDULE_GUIDED:
28567c40 623 if ((region->inner->sched_modifiers
0b887b75
JJ
624 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
625 && !region->inner->has_lastprivate_conditional)
629b3d75
MJ
626 {
627 start_ix2 = 3 + region->inner->sched_kind;
628 break;
629 }
630 /* FALLTHRU */
631 default:
632 start_ix2 = region->inner->sched_kind;
633 break;
634 }
635 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
636 start_ix = (enum built_in_function) start_ix2;
637 break;
638 case GIMPLE_OMP_SECTIONS:
639 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
640 break;
641 default:
642 gcc_unreachable ();
643 }
644 }
645
646 /* By default, the value of NUM_THREADS is zero (selected at run time)
647 and there is no conditional. */
648 cond = NULL_TREE;
649 val = build_int_cst (unsigned_type_node, 0);
650 flags = build_int_cst (unsigned_type_node, 0);
651
652 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
653 if (c)
654 cond = OMP_CLAUSE_IF_EXPR (c);
655
656 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
657 if (c)
658 {
659 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
660 clause_loc = OMP_CLAUSE_LOCATION (c);
661 }
662 else
663 clause_loc = gimple_location (entry_stmt);
664
665 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
666 if (c)
667 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
668
669 /* Ensure 'val' is of the correct type. */
670 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
671
672 /* If we found the clause 'if (cond)', build either
673 (cond != 0) or (cond ? val : 1u). */
674 if (cond)
675 {
676 cond = gimple_boolify (cond);
677
678 if (integer_zerop (val))
679 val = fold_build2_loc (clause_loc,
680 EQ_EXPR, unsigned_type_node, cond,
681 build_int_cst (TREE_TYPE (cond), 0));
682 else
683 {
684 basic_block cond_bb, then_bb, else_bb;
685 edge e, e_then, e_else;
686 tree tmp_then, tmp_else, tmp_join, tmp_var;
687
688 tmp_var = create_tmp_var (TREE_TYPE (val));
689 if (gimple_in_ssa_p (cfun))
690 {
691 tmp_then = make_ssa_name (tmp_var);
692 tmp_else = make_ssa_name (tmp_var);
693 tmp_join = make_ssa_name (tmp_var);
694 }
695 else
696 {
697 tmp_then = tmp_var;
698 tmp_else = tmp_var;
699 tmp_join = tmp_var;
700 }
701
702 e = split_block_after_labels (bb);
703 cond_bb = e->src;
704 bb = e->dest;
705 remove_edge (e);
706
707 then_bb = create_empty_bb (cond_bb);
708 else_bb = create_empty_bb (then_bb);
709 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
710 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
711
712 stmt = gimple_build_cond_empty (cond);
713 gsi = gsi_start_bb (cond_bb);
714 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
715
716 gsi = gsi_start_bb (then_bb);
717 expand_omp_build_assign (&gsi, tmp_then, val, true);
718
719 gsi = gsi_start_bb (else_bb);
720 expand_omp_build_assign (&gsi, tmp_else,
721 build_int_cst (unsigned_type_node, 1),
722 true);
723
724 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
725 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
726 add_bb_to_loop (then_bb, cond_bb->loop_father);
727 add_bb_to_loop (else_bb, cond_bb->loop_father);
728 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
729 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
730
731 if (gimple_in_ssa_p (cfun))
732 {
733 gphi *phi = create_phi_node (tmp_join, bb);
734 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
735 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
736 }
737
738 val = tmp_join;
739 }
740
741 gsi = gsi_start_bb (bb);
742 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
743 false, GSI_CONTINUE_LINKING);
744 }
745
65f4b875 746 gsi = gsi_last_nondebug_bb (bb);
629b3d75
MJ
747 t = gimple_omp_parallel_data_arg (entry_stmt);
748 if (t == NULL)
749 t1 = null_pointer_node;
750 else
751 t1 = build_fold_addr_expr (t);
752 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
753 t2 = build_fold_addr_expr (child_fndecl);
754
755 vec_alloc (args, 4 + vec_safe_length (ws_args));
756 args->quick_push (t2);
757 args->quick_push (t1);
758 args->quick_push (val);
759 if (ws_args)
760 args->splice (*ws_args);
761 args->quick_push (flags);
762
763 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
764 builtin_decl_explicit (start_ix), args);
765
28567c40
JJ
766 if (rtmp)
767 {
768 tree type = TREE_TYPE (OMP_CLAUSE_DECL (rtmp));
769 t = build2 (MODIFY_EXPR, type, OMP_CLAUSE_DECL (rtmp),
770 fold_convert (type,
771 fold_convert (pointer_sized_int_node, t)));
772 }
629b3d75
MJ
773 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
774 false, GSI_CONTINUE_LINKING);
775
776 if (hsa_gen_requested_p ()
777 && parallel_needs_hsa_kernel_p (region))
778 {
779 cgraph_node *child_cnode = cgraph_node::get (child_fndecl);
780 hsa_register_kernel (child_cnode);
781 }
782}
783
629b3d75
MJ
784/* Build the function call to GOMP_task to actually
785 generate the task operation. BB is the block where to insert the code. */
786
787static void
788expand_task_call (struct omp_region *region, basic_block bb,
789 gomp_task *entry_stmt)
790{
791 tree t1, t2, t3;
792 gimple_stmt_iterator gsi;
793 location_t loc = gimple_location (entry_stmt);
794
795 tree clauses = gimple_omp_task_clauses (entry_stmt);
796
797 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
798 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
799 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
800 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
801 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
802 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
803
804 unsigned int iflags
805 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
806 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
807 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
808
809 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
810 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
811 tree num_tasks = NULL_TREE;
812 bool ull = false;
813 if (taskloop_p)
814 {
815 gimple *g = last_stmt (region->outer->entry);
816 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
817 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
818 struct omp_for_data fd;
819 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
820 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
821 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
822 OMP_CLAUSE__LOOPTEMP_);
823 startvar = OMP_CLAUSE_DECL (startvar);
824 endvar = OMP_CLAUSE_DECL (endvar);
825 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
826 if (fd.loop.cond_code == LT_EXPR)
827 iflags |= GOMP_TASK_FLAG_UP;
828 tree tclauses = gimple_omp_for_clauses (g);
829 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
830 if (num_tasks)
831 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
832 else
833 {
834 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
835 if (num_tasks)
836 {
837 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
838 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
839 }
840 else
841 num_tasks = integer_zero_node;
842 }
843 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
844 if (ifc == NULL_TREE)
845 iflags |= GOMP_TASK_FLAG_IF;
846 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
847 iflags |= GOMP_TASK_FLAG_NOGROUP;
848 ull = fd.iter_type == long_long_unsigned_type_node;
28567c40
JJ
849 if (omp_find_clause (clauses, OMP_CLAUSE_REDUCTION))
850 iflags |= GOMP_TASK_FLAG_REDUCTION;
629b3d75
MJ
851 }
852 else if (priority)
853 iflags |= GOMP_TASK_FLAG_PRIORITY;
854
855 tree flags = build_int_cst (unsigned_type_node, iflags);
856
857 tree cond = boolean_true_node;
858 if (ifc)
859 {
860 if (taskloop_p)
861 {
862 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
863 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
864 build_int_cst (unsigned_type_node,
865 GOMP_TASK_FLAG_IF),
866 build_int_cst (unsigned_type_node, 0));
867 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
868 flags, t);
869 }
870 else
871 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
872 }
873
874 if (finalc)
875 {
876 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
877 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
878 build_int_cst (unsigned_type_node,
879 GOMP_TASK_FLAG_FINAL),
880 build_int_cst (unsigned_type_node, 0));
881 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
882 }
883 if (depend)
884 depend = OMP_CLAUSE_DECL (depend);
885 else
886 depend = build_int_cst (ptr_type_node, 0);
887 if (priority)
888 priority = fold_convert (integer_type_node,
889 OMP_CLAUSE_PRIORITY_EXPR (priority));
890 else
891 priority = integer_zero_node;
892
65f4b875 893 gsi = gsi_last_nondebug_bb (bb);
629b3d75
MJ
894 tree t = gimple_omp_task_data_arg (entry_stmt);
895 if (t == NULL)
896 t2 = null_pointer_node;
897 else
898 t2 = build_fold_addr_expr_loc (loc, t);
899 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
900 t = gimple_omp_task_copy_fn (entry_stmt);
901 if (t == NULL)
902 t3 = null_pointer_node;
903 else
904 t3 = build_fold_addr_expr_loc (loc, t);
905
906 if (taskloop_p)
907 t = build_call_expr (ull
908 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
909 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
910 11, t1, t2, t3,
911 gimple_omp_task_arg_size (entry_stmt),
912 gimple_omp_task_arg_align (entry_stmt), flags,
913 num_tasks, priority, startvar, endvar, step);
914 else
915 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
916 9, t1, t2, t3,
917 gimple_omp_task_arg_size (entry_stmt),
918 gimple_omp_task_arg_align (entry_stmt), cond, flags,
919 depend, priority);
920
921 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
922 false, GSI_CONTINUE_LINKING);
923}
924
28567c40
JJ
925/* Build the function call to GOMP_taskwait_depend to actually
926 generate the taskwait operation. BB is the block where to insert the
927 code. */
928
929static void
930expand_taskwait_call (basic_block bb, gomp_task *entry_stmt)
931{
932 tree clauses = gimple_omp_task_clauses (entry_stmt);
933 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
934 if (depend == NULL_TREE)
935 return;
936
937 depend = OMP_CLAUSE_DECL (depend);
938
939 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
940 tree t
941 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASKWAIT_DEPEND),
942 1, depend);
943
944 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
945 false, GSI_CONTINUE_LINKING);
946}
947
948/* Build the function call to GOMP_teams_reg to actually
949 generate the host teams operation. REGION is the teams region
950 being expanded. BB is the block where to insert the code. */
951
952static void
953expand_teams_call (basic_block bb, gomp_teams *entry_stmt)
954{
955 tree clauses = gimple_omp_teams_clauses (entry_stmt);
956 tree num_teams = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
957 if (num_teams == NULL_TREE)
958 num_teams = build_int_cst (unsigned_type_node, 0);
959 else
960 {
961 num_teams = OMP_CLAUSE_NUM_TEAMS_EXPR (num_teams);
962 num_teams = fold_convert (unsigned_type_node, num_teams);
963 }
964 tree thread_limit = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
965 if (thread_limit == NULL_TREE)
966 thread_limit = build_int_cst (unsigned_type_node, 0);
967 else
968 {
969 thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit);
970 thread_limit = fold_convert (unsigned_type_node, thread_limit);
971 }
972
973 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
974 tree t = gimple_omp_teams_data_arg (entry_stmt), t1;
975 if (t == NULL)
976 t1 = null_pointer_node;
977 else
978 t1 = build_fold_addr_expr (t);
979 tree child_fndecl = gimple_omp_teams_child_fn (entry_stmt);
980 tree t2 = build_fold_addr_expr (child_fndecl);
981
28567c40
JJ
982 vec<tree, va_gc> *args;
983 vec_alloc (args, 5);
984 args->quick_push (t2);
985 args->quick_push (t1);
986 args->quick_push (num_teams);
987 args->quick_push (thread_limit);
988 /* For future extensibility. */
989 args->quick_push (build_zero_cst (unsigned_type_node));
990
991 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
992 builtin_decl_explicit (BUILT_IN_GOMP_TEAMS_REG),
993 args);
994
995 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
996 false, GSI_CONTINUE_LINKING);
997}
998
629b3d75
MJ
999/* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
1000
1001static tree
1002vec2chain (vec<tree, va_gc> *v)
1003{
1004 tree chain = NULL_TREE, t;
1005 unsigned ix;
1006
1007 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
1008 {
1009 DECL_CHAIN (t) = chain;
1010 chain = t;
1011 }
1012
1013 return chain;
1014}
1015
1016/* Remove barriers in REGION->EXIT's block. Note that this is only
1017 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
1018 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
1019 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
1020 removed. */
1021
1022static void
1023remove_exit_barrier (struct omp_region *region)
1024{
1025 gimple_stmt_iterator gsi;
1026 basic_block exit_bb;
1027 edge_iterator ei;
1028 edge e;
1029 gimple *stmt;
1030 int any_addressable_vars = -1;
1031
1032 exit_bb = region->exit;
1033
1034 /* If the parallel region doesn't return, we don't have REGION->EXIT
1035 block at all. */
1036 if (! exit_bb)
1037 return;
1038
1039 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
1040 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
1041 statements that can appear in between are extremely limited -- no
1042 memory operations at all. Here, we allow nothing at all, so the
1043 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
65f4b875 1044 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75 1045 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
65f4b875 1046 gsi_prev_nondebug (&gsi);
629b3d75
MJ
1047 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
1048 return;
1049
1050 FOR_EACH_EDGE (e, ei, exit_bb->preds)
1051 {
65f4b875 1052 gsi = gsi_last_nondebug_bb (e->src);
629b3d75
MJ
1053 if (gsi_end_p (gsi))
1054 continue;
1055 stmt = gsi_stmt (gsi);
1056 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
1057 && !gimple_omp_return_nowait_p (stmt))
1058 {
1059 /* OpenMP 3.0 tasks unfortunately prevent this optimization
1060 in many cases. If there could be tasks queued, the barrier
1061 might be needed to let the tasks run before some local
1062 variable of the parallel that the task uses as shared
1063 runs out of scope. The task can be spawned either
1064 from within current function (this would be easy to check)
1065 or from some function it calls and gets passed an address
1066 of such a variable. */
1067 if (any_addressable_vars < 0)
1068 {
1069 gomp_parallel *parallel_stmt
1070 = as_a <gomp_parallel *> (last_stmt (region->entry));
1071 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
1072 tree local_decls, block, decl;
1073 unsigned ix;
1074
1075 any_addressable_vars = 0;
1076 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
1077 if (TREE_ADDRESSABLE (decl))
1078 {
1079 any_addressable_vars = 1;
1080 break;
1081 }
1082 for (block = gimple_block (stmt);
1083 !any_addressable_vars
1084 && block
1085 && TREE_CODE (block) == BLOCK;
1086 block = BLOCK_SUPERCONTEXT (block))
1087 {
1088 for (local_decls = BLOCK_VARS (block);
1089 local_decls;
1090 local_decls = DECL_CHAIN (local_decls))
1091 if (TREE_ADDRESSABLE (local_decls))
1092 {
1093 any_addressable_vars = 1;
1094 break;
1095 }
1096 if (block == gimple_block (parallel_stmt))
1097 break;
1098 }
1099 }
1100 if (!any_addressable_vars)
1101 gimple_omp_return_set_nowait (stmt);
1102 }
1103 }
1104}
1105
1106static void
1107remove_exit_barriers (struct omp_region *region)
1108{
1109 if (region->type == GIMPLE_OMP_PARALLEL)
1110 remove_exit_barrier (region);
1111
1112 if (region->inner)
1113 {
1114 region = region->inner;
1115 remove_exit_barriers (region);
1116 while (region->next)
1117 {
1118 region = region->next;
1119 remove_exit_barriers (region);
1120 }
1121 }
1122}
1123
1124/* Optimize omp_get_thread_num () and omp_get_num_threads ()
1125 calls. These can't be declared as const functions, but
1126 within one parallel body they are constant, so they can be
1127 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1128 which are declared const. Similarly for task body, except
1129 that in untied task omp_get_thread_num () can change at any task
1130 scheduling point. */
1131
1132static void
1133optimize_omp_library_calls (gimple *entry_stmt)
1134{
1135 basic_block bb;
1136 gimple_stmt_iterator gsi;
1137 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1138 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1139 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1140 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1141 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1142 && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1143 OMP_CLAUSE_UNTIED) != NULL);
1144
1145 FOR_EACH_BB_FN (bb, cfun)
1146 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1147 {
1148 gimple *call = gsi_stmt (gsi);
1149 tree decl;
1150
1151 if (is_gimple_call (call)
1152 && (decl = gimple_call_fndecl (call))
1153 && DECL_EXTERNAL (decl)
1154 && TREE_PUBLIC (decl)
1155 && DECL_INITIAL (decl) == NULL)
1156 {
1157 tree built_in;
1158
1159 if (DECL_NAME (decl) == thr_num_id)
1160 {
1161 /* In #pragma omp task untied omp_get_thread_num () can change
1162 during the execution of the task region. */
1163 if (untied_task)
1164 continue;
1165 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1166 }
1167 else if (DECL_NAME (decl) == num_thr_id)
1168 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1169 else
1170 continue;
1171
1172 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1173 || gimple_call_num_args (call) != 0)
1174 continue;
1175
1176 if (flag_exceptions && !TREE_NOTHROW (decl))
1177 continue;
1178
1179 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1180 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1181 TREE_TYPE (TREE_TYPE (built_in))))
1182 continue;
1183
1184 gimple_call_set_fndecl (call, built_in);
1185 }
1186 }
1187}
1188
1189/* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1190 regimplified. */
1191
1192static tree
1193expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1194{
1195 tree t = *tp;
1196
1197 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1198 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1199 return t;
1200
1201 if (TREE_CODE (t) == ADDR_EXPR)
1202 recompute_tree_invariant_for_addr_expr (t);
1203
1204 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1205 return NULL_TREE;
1206}
1207
1208/* Prepend or append TO = FROM assignment before or after *GSI_P. */
1209
1210static void
1211expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1212 bool after)
1213{
1214 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1215 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1216 !after, after ? GSI_CONTINUE_LINKING
1217 : GSI_SAME_STMT);
1218 gimple *stmt = gimple_build_assign (to, from);
1219 if (after)
1220 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1221 else
1222 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1223 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1224 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1225 {
1226 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1227 gimple_regimplify_operands (stmt, &gsi);
1228 }
1229}
1230
1231/* Expand the OpenMP parallel or task directive starting at REGION. */
1232
1233static void
1234expand_omp_taskreg (struct omp_region *region)
1235{
1236 basic_block entry_bb, exit_bb, new_bb;
1237 struct function *child_cfun;
1238 tree child_fn, block, t;
1239 gimple_stmt_iterator gsi;
1240 gimple *entry_stmt, *stmt;
1241 edge e;
1242 vec<tree, va_gc> *ws_args;
1243
1244 entry_stmt = last_stmt (region->entry);
28567c40
JJ
1245 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1246 && gimple_omp_task_taskwait_p (entry_stmt))
1247 {
1248 new_bb = region->entry;
1249 gsi = gsi_last_nondebug_bb (region->entry);
1250 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1251 gsi_remove (&gsi, true);
1252 expand_taskwait_call (new_bb, as_a <gomp_task *> (entry_stmt));
1253 return;
1254 }
1255
629b3d75
MJ
1256 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1257 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1258
1259 entry_bb = region->entry;
1260 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1261 exit_bb = region->cont;
1262 else
1263 exit_bb = region->exit;
1264
5e9d6aa4 1265 if (is_combined_parallel (region))
629b3d75
MJ
1266 ws_args = region->ws_args;
1267 else
1268 ws_args = NULL;
1269
1270 if (child_cfun->cfg)
1271 {
1272 /* Due to inlining, it may happen that we have already outlined
1273 the region, in which case all we need to do is make the
1274 sub-graph unreachable and emit the parallel call. */
1275 edge entry_succ_e, exit_succ_e;
1276
1277 entry_succ_e = single_succ_edge (entry_bb);
1278
65f4b875 1279 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75 1280 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
28567c40
JJ
1281 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK
1282 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TEAMS);
629b3d75
MJ
1283 gsi_remove (&gsi, true);
1284
1285 new_bb = entry_bb;
1286 if (exit_bb)
1287 {
1288 exit_succ_e = single_succ_edge (exit_bb);
1289 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1290 }
1291 remove_edge_and_dominated_blocks (entry_succ_e);
1292 }
1293 else
1294 {
1295 unsigned srcidx, dstidx, num;
1296
1297 /* If the parallel region needs data sent from the parent
1298 function, then the very first statement (except possible
1299 tree profile counter updates) of the parallel body
1300 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1301 &.OMP_DATA_O is passed as an argument to the child function,
1302 we need to replace it with the argument as seen by the child
1303 function.
1304
1305 In most cases, this will end up being the identity assignment
1306 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1307 a function call that has been inlined, the original PARM_DECL
1308 .OMP_DATA_I may have been converted into a different local
1309 variable. In which case, we need to keep the assignment. */
1310 if (gimple_omp_taskreg_data_arg (entry_stmt))
1311 {
1312 basic_block entry_succ_bb
1313 = single_succ_p (entry_bb) ? single_succ (entry_bb)
1314 : FALLTHRU_EDGE (entry_bb)->dest;
1315 tree arg;
1316 gimple *parcopy_stmt = NULL;
1317
1318 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1319 {
1320 gimple *stmt;
1321
1322 gcc_assert (!gsi_end_p (gsi));
1323 stmt = gsi_stmt (gsi);
1324 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1325 continue;
1326
1327 if (gimple_num_ops (stmt) == 2)
1328 {
1329 tree arg = gimple_assign_rhs1 (stmt);
1330
1331 /* We're ignore the subcode because we're
1332 effectively doing a STRIP_NOPS. */
1333
1334 if (TREE_CODE (arg) == ADDR_EXPR
28567c40
JJ
1335 && (TREE_OPERAND (arg, 0)
1336 == gimple_omp_taskreg_data_arg (entry_stmt)))
629b3d75
MJ
1337 {
1338 parcopy_stmt = stmt;
1339 break;
1340 }
1341 }
1342 }
1343
1344 gcc_assert (parcopy_stmt != NULL);
1345 arg = DECL_ARGUMENTS (child_fn);
1346
1347 if (!gimple_in_ssa_p (cfun))
1348 {
1349 if (gimple_assign_lhs (parcopy_stmt) == arg)
1350 gsi_remove (&gsi, true);
1351 else
1352 {
01914336 1353 /* ?? Is setting the subcode really necessary ?? */
629b3d75
MJ
1354 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1355 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1356 }
1357 }
1358 else
1359 {
1360 tree lhs = gimple_assign_lhs (parcopy_stmt);
1361 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1362 /* We'd like to set the rhs to the default def in the child_fn,
1363 but it's too early to create ssa names in the child_fn.
1364 Instead, we set the rhs to the parm. In
1365 move_sese_region_to_fn, we introduce a default def for the
1366 parm, map the parm to it's default def, and once we encounter
1367 this stmt, replace the parm with the default def. */
1368 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1369 update_stmt (parcopy_stmt);
1370 }
1371 }
1372
1373 /* Declare local variables needed in CHILD_CFUN. */
1374 block = DECL_INITIAL (child_fn);
1375 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1376 /* The gimplifier could record temporaries in parallel/task block
1377 rather than in containing function's local_decls chain,
1378 which would mean cgraph missed finalizing them. Do it now. */
1379 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1380 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1381 varpool_node::finalize_decl (t);
1382 DECL_SAVED_TREE (child_fn) = NULL;
1383 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1384 gimple_set_body (child_fn, NULL);
1385 TREE_USED (block) = 1;
1386
1387 /* Reset DECL_CONTEXT on function arguments. */
1388 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1389 DECL_CONTEXT (t) = child_fn;
1390
1391 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1392 so that it can be moved to the child function. */
65f4b875 1393 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
1394 stmt = gsi_stmt (gsi);
1395 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
28567c40
JJ
1396 || gimple_code (stmt) == GIMPLE_OMP_TASK
1397 || gimple_code (stmt) == GIMPLE_OMP_TEAMS));
629b3d75
MJ
1398 e = split_block (entry_bb, stmt);
1399 gsi_remove (&gsi, true);
1400 entry_bb = e->dest;
1401 edge e2 = NULL;
28567c40 1402 if (gimple_code (entry_stmt) != GIMPLE_OMP_TASK)
629b3d75
MJ
1403 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1404 else
1405 {
1406 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1407 gcc_assert (e2->dest == region->exit);
1408 remove_edge (BRANCH_EDGE (entry_bb));
1409 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
65f4b875 1410 gsi = gsi_last_nondebug_bb (region->exit);
629b3d75
MJ
1411 gcc_assert (!gsi_end_p (gsi)
1412 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1413 gsi_remove (&gsi, true);
1414 }
1415
1416 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1417 if (exit_bb)
1418 {
65f4b875 1419 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
1420 gcc_assert (!gsi_end_p (gsi)
1421 && (gimple_code (gsi_stmt (gsi))
1422 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1423 stmt = gimple_build_return (NULL);
1424 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1425 gsi_remove (&gsi, true);
1426 }
1427
1428 /* Move the parallel region into CHILD_CFUN. */
1429
1430 if (gimple_in_ssa_p (cfun))
1431 {
1432 init_tree_ssa (child_cfun);
1433 init_ssa_operands (child_cfun);
1434 child_cfun->gimple_df->in_ssa_p = true;
1435 block = NULL_TREE;
1436 }
1437 else
1438 block = gimple_block (entry_stmt);
1439
1440 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1441 if (exit_bb)
1442 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1443 if (e2)
1444 {
1445 basic_block dest_bb = e2->dest;
1446 if (!exit_bb)
1447 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1448 remove_edge (e2);
1449 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1450 }
1451 /* When the OMP expansion process cannot guarantee an up-to-date
01914336 1452 loop tree arrange for the child function to fixup loops. */
629b3d75
MJ
1453 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1454 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1455
1456 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1457 num = vec_safe_length (child_cfun->local_decls);
1458 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1459 {
1460 t = (*child_cfun->local_decls)[srcidx];
1461 if (DECL_CONTEXT (t) == cfun->decl)
1462 continue;
1463 if (srcidx != dstidx)
1464 (*child_cfun->local_decls)[dstidx] = t;
1465 dstidx++;
1466 }
1467 if (dstidx != num)
1468 vec_safe_truncate (child_cfun->local_decls, dstidx);
1469
1470 /* Inform the callgraph about the new function. */
1471 child_cfun->curr_properties = cfun->curr_properties;
1472 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1473 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1474 cgraph_node *node = cgraph_node::get_create (child_fn);
1475 node->parallelized_function = 1;
1476 cgraph_node::add_new_function (child_fn, true);
1477
1478 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1479 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1480
1481 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1482 fixed in a following pass. */
1483 push_cfun (child_cfun);
1484 if (need_asm)
9579db35 1485 assign_assembler_name_if_needed (child_fn);
629b3d75
MJ
1486
1487 if (optimize)
1488 optimize_omp_library_calls (entry_stmt);
fc06ae0d 1489 update_max_bb_count ();
629b3d75
MJ
1490 cgraph_edge::rebuild_edges ();
1491
1492 /* Some EH regions might become dead, see PR34608. If
1493 pass_cleanup_cfg isn't the first pass to happen with the
1494 new child, these dead EH edges might cause problems.
1495 Clean them up now. */
1496 if (flag_exceptions)
1497 {
1498 basic_block bb;
1499 bool changed = false;
1500
1501 FOR_EACH_BB_FN (bb, cfun)
1502 changed |= gimple_purge_dead_eh_edges (bb);
1503 if (changed)
1504 cleanup_tree_cfg ();
1505 }
1506 if (gimple_in_ssa_p (cfun))
1507 update_ssa (TODO_update_ssa);
1508 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1509 verify_loop_structure ();
1510 pop_cfun ();
1511
1512 if (dump_file && !gimple_in_ssa_p (cfun))
1513 {
1514 omp_any_child_fn_dumped = true;
1515 dump_function_header (dump_file, child_fn, dump_flags);
1516 dump_function_to_file (child_fn, dump_file, dump_flags);
1517 }
1518 }
1519
4ccc4e30
JJ
1520 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
1521
5e9d6aa4 1522 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
629b3d75
MJ
1523 expand_parallel_call (region, new_bb,
1524 as_a <gomp_parallel *> (entry_stmt), ws_args);
28567c40
JJ
1525 else if (gimple_code (entry_stmt) == GIMPLE_OMP_TEAMS)
1526 expand_teams_call (new_bb, as_a <gomp_teams *> (entry_stmt));
629b3d75
MJ
1527 else
1528 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1529 if (gimple_in_ssa_p (cfun))
1530 update_ssa (TODO_update_ssa_only_virtuals);
1531}
1532
1533/* Information about members of an OpenACC collapsed loop nest. */
1534
1535struct oacc_collapse
1536{
01914336 1537 tree base; /* Base value. */
629b3d75 1538 tree iters; /* Number of steps. */
02889d23
CLT
1539 tree step; /* Step size. */
1540 tree tile; /* Tile increment (if tiled). */
1541 tree outer; /* Tile iterator var. */
629b3d75
MJ
1542};
1543
1544/* Helper for expand_oacc_for. Determine collapsed loop information.
1545 Fill in COUNTS array. Emit any initialization code before GSI.
1546 Return the calculated outer loop bound of BOUND_TYPE. */
1547
1548static tree
1549expand_oacc_collapse_init (const struct omp_for_data *fd,
1550 gimple_stmt_iterator *gsi,
02889d23
CLT
1551 oacc_collapse *counts, tree bound_type,
1552 location_t loc)
629b3d75 1553{
02889d23 1554 tree tiling = fd->tiling;
629b3d75
MJ
1555 tree total = build_int_cst (bound_type, 1);
1556 int ix;
1557
1558 gcc_assert (integer_onep (fd->loop.step));
1559 gcc_assert (integer_zerop (fd->loop.n1));
1560
02889d23
CLT
1561 /* When tiling, the first operand of the tile clause applies to the
1562 innermost loop, and we work outwards from there. Seems
1563 backwards, but whatever. */
1564 for (ix = fd->collapse; ix--;)
629b3d75
MJ
1565 {
1566 const omp_for_data_loop *loop = &fd->loops[ix];
1567
1568 tree iter_type = TREE_TYPE (loop->v);
1569 tree diff_type = iter_type;
1570 tree plus_type = iter_type;
1571
1572 gcc_assert (loop->cond_code == fd->loop.cond_code);
1573
1574 if (POINTER_TYPE_P (iter_type))
1575 plus_type = sizetype;
1576 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
1577 diff_type = signed_type_for (diff_type);
c31bc4ac
TV
1578 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
1579 diff_type = integer_type_node;
629b3d75 1580
02889d23
CLT
1581 if (tiling)
1582 {
1583 tree num = build_int_cst (integer_type_node, fd->collapse);
1584 tree loop_no = build_int_cst (integer_type_node, ix);
1585 tree tile = TREE_VALUE (tiling);
1586 gcall *call
1587 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1588 /* gwv-outer=*/integer_zero_node,
1589 /* gwv-inner=*/integer_zero_node);
1590
1591 counts[ix].outer = create_tmp_var (iter_type, ".outer");
1592 counts[ix].tile = create_tmp_var (diff_type, ".tile");
1593 gimple_call_set_lhs (call, counts[ix].tile);
1594 gimple_set_location (call, loc);
1595 gsi_insert_before (gsi, call, GSI_SAME_STMT);
1596
1597 tiling = TREE_CHAIN (tiling);
1598 }
1599 else
1600 {
1601 counts[ix].tile = NULL;
1602 counts[ix].outer = loop->v;
1603 }
1604
629b3d75
MJ
1605 tree b = loop->n1;
1606 tree e = loop->n2;
1607 tree s = loop->step;
1608 bool up = loop->cond_code == LT_EXPR;
1609 tree dir = build_int_cst (diff_type, up ? +1 : -1);
1610 bool negating;
1611 tree expr;
1612
1613 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1614 true, GSI_SAME_STMT);
1615 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1616 true, GSI_SAME_STMT);
1617
01914336 1618 /* Convert the step, avoiding possible unsigned->signed overflow. */
629b3d75
MJ
1619 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1620 if (negating)
1621 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1622 s = fold_convert (diff_type, s);
1623 if (negating)
1624 s = fold_build1 (NEGATE_EXPR, diff_type, s);
1625 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1626 true, GSI_SAME_STMT);
1627
01914336 1628 /* Determine the range, avoiding possible unsigned->signed overflow. */
629b3d75
MJ
1629 negating = !up && TYPE_UNSIGNED (iter_type);
1630 expr = fold_build2 (MINUS_EXPR, plus_type,
1631 fold_convert (plus_type, negating ? b : e),
1632 fold_convert (plus_type, negating ? e : b));
1633 expr = fold_convert (diff_type, expr);
1634 if (negating)
1635 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1636 tree range = force_gimple_operand_gsi
1637 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1638
1639 /* Determine number of iterations. */
1640 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1641 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1642 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1643
1644 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1645 true, GSI_SAME_STMT);
1646
1647 counts[ix].base = b;
1648 counts[ix].iters = iters;
1649 counts[ix].step = s;
1650
1651 total = fold_build2 (MULT_EXPR, bound_type, total,
1652 fold_convert (bound_type, iters));
1653 }
1654
1655 return total;
1656}
1657
02889d23
CLT
1658/* Emit initializers for collapsed loop members. INNER is true if
1659 this is for the element loop of a TILE. IVAR is the outer
629b3d75
MJ
1660 loop iteration variable, from which collapsed loop iteration values
1661 are calculated. COUNTS array has been initialized by
1662 expand_oacc_collapse_inits. */
1663
1664static void
02889d23 1665expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
629b3d75
MJ
1666 gimple_stmt_iterator *gsi,
1667 const oacc_collapse *counts, tree ivar)
1668{
1669 tree ivar_type = TREE_TYPE (ivar);
1670
1671 /* The most rapidly changing iteration variable is the innermost
1672 one. */
1673 for (int ix = fd->collapse; ix--;)
1674 {
1675 const omp_for_data_loop *loop = &fd->loops[ix];
1676 const oacc_collapse *collapse = &counts[ix];
02889d23
CLT
1677 tree v = inner ? loop->v : collapse->outer;
1678 tree iter_type = TREE_TYPE (v);
629b3d75
MJ
1679 tree diff_type = TREE_TYPE (collapse->step);
1680 tree plus_type = iter_type;
1681 enum tree_code plus_code = PLUS_EXPR;
1682 tree expr;
1683
1684 if (POINTER_TYPE_P (iter_type))
1685 {
1686 plus_code = POINTER_PLUS_EXPR;
1687 plus_type = sizetype;
1688 }
1689
02889d23
CLT
1690 expr = ivar;
1691 if (ix)
1692 {
1693 tree mod = fold_convert (ivar_type, collapse->iters);
1694 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1695 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1696 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1697 true, GSI_SAME_STMT);
1698 }
1699
629b3d75
MJ
1700 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1701 collapse->step);
02889d23
CLT
1702 expr = fold_build2 (plus_code, iter_type,
1703 inner ? collapse->outer : collapse->base,
629b3d75
MJ
1704 fold_convert (plus_type, expr));
1705 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1706 true, GSI_SAME_STMT);
02889d23 1707 gassign *ass = gimple_build_assign (v, expr);
629b3d75 1708 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
629b3d75
MJ
1709 }
1710}
1711
1712/* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1713 of the combined collapse > 1 loop constructs, generate code like:
1714 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1715 if (cond3 is <)
1716 adj = STEP3 - 1;
1717 else
1718 adj = STEP3 + 1;
1719 count3 = (adj + N32 - N31) / STEP3;
1720 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1721 if (cond2 is <)
1722 adj = STEP2 - 1;
1723 else
1724 adj = STEP2 + 1;
1725 count2 = (adj + N22 - N21) / STEP2;
1726 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1727 if (cond1 is <)
1728 adj = STEP1 - 1;
1729 else
1730 adj = STEP1 + 1;
1731 count1 = (adj + N12 - N11) / STEP1;
1732 count = count1 * count2 * count3;
1733 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1734 count = 0;
1735 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1736 of the combined loop constructs, just initialize COUNTS array
aed3ab25
JJ
1737 from the _looptemp_ clauses. For loop nests with non-rectangular
1738 loops, do this only for the rectangular loops. Then pick
1739 the loops which reference outer vars in their bound expressions
1740 and the loops which they refer to and for this sub-nest compute
1741 number of iterations. For triangular loops use Faulhaber's formula
1742 (TBD.), otherwise as a fallback, compute by iterating the loops.
1743 If e.g. the sub-nest is
1744 for (I = N11; I COND1 N12; I += STEP1)
1745 for (J = M21 * I + N21; J COND2 M22 * I + N22; J += STEP2)
1746 for (K = M31 * J + N31; K COND3 M32 * J + N32; K += STEP3)
1747 do:
1748 COUNT = 0;
1749 for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
1750 for (tmpj = M21 * tmpi + N21;
1751 tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
1752 {
1753 int tmpk1 = M31 * tmpj + N31;
1754 int tmpk2 = M32 * tmpj + N32;
1755 if (tmpk1 COND3 tmpk2)
1756 {
1757 if (COND3 is <)
1758 adj = STEP3 - 1;
1759 else
1760 adj = STEP3 + 1;
1761 COUNT += (adj + tmpk2 - tmpk1) / STEP3;
1762 }
1763 }
1764 and finally multiply the counts of the rectangular loops not
1765 in the sub-nest with COUNT. Also, as counts[fd->last_nonrect]
1766 store number of iterations of the loops from fd->first_nonrect
1767 to fd->last_nonrect inclusive, i.e. the above COUNT multiplied
1768 by the counts of rectangular loops not referenced in any non-rectangular
1769 loops sandwitched in between those. */
629b3d75
MJ
1770
1771/* NOTE: It *could* be better to moosh all of the BBs together,
1772 creating one larger BB with all the computation and the unexpected
1773 jump at the end. I.e.
1774
1775 bool zero3, zero2, zero1, zero;
1776
1777 zero3 = N32 c3 N31;
1778 count3 = (N32 - N31) /[cl] STEP3;
1779 zero2 = N22 c2 N21;
1780 count2 = (N22 - N21) /[cl] STEP2;
1781 zero1 = N12 c1 N11;
1782 count1 = (N12 - N11) /[cl] STEP1;
1783 zero = zero3 || zero2 || zero1;
1784 count = count1 * count2 * count3;
1785 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1786
1787 After all, we expect the zero=false, and thus we expect to have to
1788 evaluate all of the comparison expressions, so short-circuiting
1789 oughtn't be a win. Since the condition isn't protecting a
1790 denominator, we're not concerned about divide-by-zero, so we can
1791 fully evaluate count even if a numerator turned out to be wrong.
1792
1793 It seems like putting this all together would create much better
1794 scheduling opportunities, and less pressure on the chip's branch
1795 predictor. */
1796
1797static void
1798expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1799 basic_block &entry_bb, tree *counts,
1800 basic_block &zero_iter1_bb, int &first_zero_iter1,
1801 basic_block &zero_iter2_bb, int &first_zero_iter2,
1802 basic_block &l2_dom_bb)
1803{
1804 tree t, type = TREE_TYPE (fd->loop.v);
1805 edge e, ne;
1806 int i;
1807
1808 /* Collapsed loops need work for expansion into SSA form. */
1809 gcc_assert (!gimple_in_ssa_p (cfun));
1810
1811 if (gimple_omp_for_combined_into_p (fd->for_stmt)
1812 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1813 {
1814 gcc_assert (fd->ordered == 0);
1815 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1816 isn't supposed to be handled, as the inner loop doesn't
1817 use it. */
1818 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1819 OMP_CLAUSE__LOOPTEMP_);
1820 gcc_assert (innerc);
1821 for (i = 0; i < fd->collapse; i++)
1822 {
1823 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1824 OMP_CLAUSE__LOOPTEMP_);
1825 gcc_assert (innerc);
1826 if (i)
1827 counts[i] = OMP_CLAUSE_DECL (innerc);
1828 else
1829 counts[0] = NULL_TREE;
1830 }
1831 return;
1832 }
1833
1834 for (i = fd->collapse; i < fd->ordered; i++)
1835 {
1836 tree itype = TREE_TYPE (fd->loops[i].v);
1837 counts[i] = NULL_TREE;
1838 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1839 fold_convert (itype, fd->loops[i].n1),
1840 fold_convert (itype, fd->loops[i].n2));
1841 if (t && integer_zerop (t))
1842 {
1843 for (i = fd->collapse; i < fd->ordered; i++)
1844 counts[i] = build_int_cst (type, 0);
1845 break;
1846 }
1847 }
aed3ab25 1848 bool rect_count_seen = false;
629b3d75
MJ
1849 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1850 {
1851 tree itype = TREE_TYPE (fd->loops[i].v);
1852
1853 if (i >= fd->collapse && counts[i])
1854 continue;
aed3ab25
JJ
1855 if (fd->non_rect)
1856 {
1857 /* Skip loops that use outer iterators in their expressions
1858 during this phase. */
1859 if (fd->loops[i].m1 || fd->loops[i].m2)
1860 {
1861 counts[i] = build_zero_cst (type);
1862 continue;
1863 }
1864 }
629b3d75
MJ
1865 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1866 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1867 fold_convert (itype, fd->loops[i].n1),
1868 fold_convert (itype, fd->loops[i].n2)))
1869 == NULL_TREE || !integer_onep (t)))
1870 {
1871 gcond *cond_stmt;
1872 tree n1, n2;
1873 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1874 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1875 true, GSI_SAME_STMT);
1876 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1877 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1878 true, GSI_SAME_STMT);
1879 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1880 NULL_TREE, NULL_TREE);
1881 gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1882 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1883 expand_omp_regimplify_p, NULL, NULL)
1884 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1885 expand_omp_regimplify_p, NULL, NULL))
1886 {
1887 *gsi = gsi_for_stmt (cond_stmt);
1888 gimple_regimplify_operands (cond_stmt, gsi);
1889 }
1890 e = split_block (entry_bb, cond_stmt);
1891 basic_block &zero_iter_bb
1892 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1893 int &first_zero_iter
1894 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1895 if (zero_iter_bb == NULL)
1896 {
1897 gassign *assign_stmt;
1898 first_zero_iter = i;
1899 zero_iter_bb = create_empty_bb (entry_bb);
1900 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1901 *gsi = gsi_after_labels (zero_iter_bb);
1902 if (i < fd->collapse)
1903 assign_stmt = gimple_build_assign (fd->loop.n2,
1904 build_zero_cst (type));
1905 else
1906 {
1907 counts[i] = create_tmp_reg (type, ".count");
1908 assign_stmt
1909 = gimple_build_assign (counts[i], build_zero_cst (type));
1910 }
1911 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1912 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1913 entry_bb);
1914 }
1915 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
357067f2 1916 ne->probability = profile_probability::very_unlikely ();
629b3d75 1917 e->flags = EDGE_TRUE_VALUE;
357067f2 1918 e->probability = ne->probability.invert ();
629b3d75
MJ
1919 if (l2_dom_bb == NULL)
1920 l2_dom_bb = entry_bb;
1921 entry_bb = e->dest;
65f4b875 1922 *gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
1923 }
1924
1925 if (POINTER_TYPE_P (itype))
1926 itype = signed_type_for (itype);
1927 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1928 ? -1 : 1));
1929 t = fold_build2 (PLUS_EXPR, itype,
1930 fold_convert (itype, fd->loops[i].step), t);
1931 t = fold_build2 (PLUS_EXPR, itype, t,
1932 fold_convert (itype, fd->loops[i].n2));
1933 t = fold_build2 (MINUS_EXPR, itype, t,
1934 fold_convert (itype, fd->loops[i].n1));
1935 /* ?? We could probably use CEIL_DIV_EXPR instead of
1936 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1937 generate the same code in the end because generically we
1938 don't know that the values involved must be negative for
1939 GT?? */
1940 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1941 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1942 fold_build1 (NEGATE_EXPR, itype, t),
1943 fold_build1 (NEGATE_EXPR, itype,
1944 fold_convert (itype,
1945 fd->loops[i].step)));
1946 else
1947 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1948 fold_convert (itype, fd->loops[i].step));
1949 t = fold_convert (type, t);
1950 if (TREE_CODE (t) == INTEGER_CST)
1951 counts[i] = t;
1952 else
1953 {
1954 if (i < fd->collapse || i != first_zero_iter2)
1955 counts[i] = create_tmp_reg (type, ".count");
1956 expand_omp_build_assign (gsi, counts[i], t);
1957 }
1958 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1959 {
aed3ab25
JJ
1960 if (fd->non_rect && i >= fd->first_nonrect && i <= fd->last_nonrect)
1961 continue;
1962 if (!rect_count_seen)
1963 {
1964 t = counts[i];
1965 rect_count_seen = true;
1966 }
629b3d75
MJ
1967 else
1968 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1969 expand_omp_build_assign (gsi, fd->loop.n2, t);
1970 }
1971 }
aed3ab25
JJ
1972 if (fd->non_rect && SSA_VAR_P (fd->loop.n2))
1973 {
1974 gcc_assert (fd->last_nonrect != -1);
1975
1976 /* Fallback implementation. Evaluate the loops with m1/m2
1977 non-NULL as well as their outer loops at runtime using temporaries
1978 instead of the original iteration variables, and in the
1979 body just bump the counter. */
1980 counts[fd->last_nonrect] = create_tmp_reg (type, ".count");
1981 expand_omp_build_assign (gsi, counts[fd->last_nonrect],
1982 build_zero_cst (type));
1983 gimple_stmt_iterator gsi2 = *gsi;
1984 gsi_prev (&gsi2);
1985 e = split_block (entry_bb, gsi_stmt (gsi2));
1986 e = split_block (e->dest, (gimple *) NULL);
1987 basic_block cur_bb = e->src;
1988 basic_block next_bb = e->dest;
1989 entry_bb = e->dest;
1990 *gsi = gsi_after_labels (entry_bb);
1991
1992 tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
1993 memset (vs, 0, fd->last_nonrect * sizeof (tree));
1994
1995 for (i = 0; i <= fd->last_nonrect; i++)
1996 {
1997 if (fd->loops[i].m1 == NULL_TREE
1998 && fd->loops[i].m2 == NULL_TREE
1999 && !fd->loops[i].non_rect_referenced)
2000 continue;
2001
2002 tree itype = TREE_TYPE (fd->loops[i].v);
2003
2004 gsi2 = gsi_after_labels (cur_bb);
2005 tree n1, n2;
2006 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2007 if (fd->loops[i].m1)
2008 {
2009 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2010 n1 = fold_build2 (MULT_EXPR, itype, vs[i - fd->loops[i].outer],
2011 n1);
2012 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2013 }
2014 else
2015 n1 = t;
2016 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2017 true, GSI_SAME_STMT);
2018 if (i < fd->last_nonrect)
2019 {
2020 vs[i] = create_tmp_reg (itype, ".it");
2021 expand_omp_build_assign (&gsi2, vs[i], n1);
2022 }
2023 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2024 if (fd->loops[i].m2)
2025 {
2026 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2027 n2 = fold_build2 (MULT_EXPR, itype, vs[i - fd->loops[i].outer],
2028 n2);
2029 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2030 }
2031 else
2032 n2 = t;
2033 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2034 true, GSI_SAME_STMT);
2035 if (i == fd->last_nonrect)
2036 {
2037 gcond *cond_stmt
2038 = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
2039 NULL_TREE, NULL_TREE);
2040 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2041 e = split_block (cur_bb, cond_stmt);
2042 e->flags = EDGE_TRUE_VALUE;
2043 ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2044 e->probability = profile_probability::likely ().guessed ();
2045 ne->probability = e->probability.invert ();
2046 gsi2 = gsi_after_labels (e->dest);
2047
2048 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
2049 ? -1 : 1));
2050 t = fold_build2 (PLUS_EXPR, itype,
2051 fold_convert (itype, fd->loops[i].step), t);
2052 t = fold_build2 (PLUS_EXPR, itype, t, n2);
2053 t = fold_build2 (MINUS_EXPR, itype, t, n1);
2054 tree step = fold_convert (itype, fd->loops[i].step);
2055 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
2056 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2057 fold_build1 (NEGATE_EXPR, itype, t),
2058 fold_build1 (NEGATE_EXPR, itype, step));
2059 else
2060 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2061 t = fold_convert (type, t);
2062 t = fold_build2 (PLUS_EXPR, type, counts[fd->last_nonrect], t);
2063 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2064 true, GSI_SAME_STMT);
2065 expand_omp_build_assign (&gsi2, counts[fd->last_nonrect], t);
2066 e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2067 set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2068 break;
2069 }
2070 e = split_block (cur_bb, last_stmt (cur_bb));
2071
2072 basic_block new_cur_bb = create_empty_bb (cur_bb);
2073 add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2074
2075 gsi2 = gsi_after_labels (e->dest);
2076 tree step = fold_convert (itype, unshare_expr (fd->loops[i].step));
2077 t = fold_build2 (PLUS_EXPR, itype, vs[i], step);
2078 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2079 true, GSI_SAME_STMT);
2080 expand_omp_build_assign (&gsi2, vs[i], t);
2081
2082 ne = split_block (e->dest, last_stmt (e->dest));
2083 gsi2 = gsi_after_labels (ne->dest);
2084
2085 gcond *cond_stmt
2086 = gimple_build_cond (fd->loops[i].cond_code, vs[i], n2,
2087 NULL_TREE, NULL_TREE);
2088 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2089 edge e3, e4;
2090 if (next_bb == entry_bb)
2091 {
2092 e3 = find_edge (ne->dest, next_bb);
2093 e3->flags = EDGE_FALSE_VALUE;
2094 }
2095 else
2096 e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2097 e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2098 e4->probability = profile_probability::likely ().guessed ();
2099 e3->probability = e4->probability.invert ();
2100 basic_block esrc = e->src;
2101 make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2102 cur_bb = new_cur_bb;
2103 basic_block latch_bb = next_bb;
2104 next_bb = e->dest;
2105 remove_edge (e);
2106 set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2107 set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2108 set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
2109 }
2110 t = NULL_TREE;
2111 for (i = fd->first_nonrect; i < fd->last_nonrect; i++)
2112 if (!fd->loops[i].non_rect_referenced
2113 && fd->loops[i].m1 == NULL_TREE
2114 && fd->loops[i].m2 == NULL_TREE)
2115 {
2116 if (t == NULL_TREE)
2117 t = counts[i];
2118 else
2119 t = fold_build2 (MULT_EXPR, type, t, counts[i]);
2120 }
2121 if (t)
2122 {
2123 t = fold_build2 (MULT_EXPR, type, counts[fd->last_nonrect], t);
2124 expand_omp_build_assign (gsi, counts[fd->last_nonrect], t);
2125 }
2126 if (!rect_count_seen)
2127 t = counts[fd->last_nonrect];
2128 else
2129 t = fold_build2 (MULT_EXPR, type, fd->loop.n2,
2130 counts[fd->last_nonrect]);
2131 expand_omp_build_assign (gsi, fd->loop.n2, t);
2132 }
2133 else if (fd->non_rect)
2134 {
2135 tree t = fd->loop.n2;
2136 gcc_assert (TREE_CODE (t) == INTEGER_CST);
2137 int non_rect_referenced = 0, non_rect = 0;
2138 for (i = 0; i < fd->collapse; i++)
2139 {
5acef69f 2140 if ((i < fd->first_nonrect || i > fd->last_nonrect)
aed3ab25
JJ
2141 && !integer_zerop (counts[i]))
2142 t = fold_build2 (TRUNC_DIV_EXPR, type, t, counts[i]);
2143 if (fd->loops[i].non_rect_referenced)
2144 non_rect_referenced++;
2145 if (fd->loops[i].m1 || fd->loops[i].m2)
2146 non_rect++;
2147 }
2148 gcc_assert (non_rect == 1 && non_rect_referenced == 1);
2149 counts[fd->last_nonrect] = t;
2150 }
629b3d75
MJ
2151}
2152
2153/* Helper function for expand_omp_{for_*,simd}. Generate code like:
2154 T = V;
2155 V3 = N31 + (T % count3) * STEP3;
2156 T = T / count3;
2157 V2 = N21 + (T % count2) * STEP2;
2158 T = T / count2;
2159 V1 = N11 + T * STEP1;
2160 if this loop doesn't have an inner loop construct combined with it.
2161 If it does have an inner loop construct combined with it and the
2162 iteration count isn't known constant, store values from counts array
aed3ab25
JJ
2163 into its _looptemp_ temporaries instead.
2164 For non-rectangular loops (between fd->first_nonrect and fd->last_nonrect
2165 inclusive), use the count of all those loops together, and either
2166 find quadratic etc. equation roots (TBD), or as a fallback, do:
2167 COUNT = 0;
2168 for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
2169 for (tmpj = M21 * tmpi + N21;
2170 tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
2171 {
2172 int tmpk1 = M31 * tmpj + N31;
2173 int tmpk2 = M32 * tmpj + N32;
2174 if (tmpk1 COND3 tmpk2)
2175 {
2176 if (COND3 is <)
2177 adj = STEP3 - 1;
2178 else
2179 adj = STEP3 + 1;
2180 int temp = (adj + tmpk2 - tmpk1) / STEP3;
2181 if (COUNT + temp > T)
2182 {
2183 V1 = tmpi;
2184 V2 = tmpj;
2185 V3 = tmpk1 + (T - COUNT) * STEP3;
2186 goto done;
2187 }
2188 else
2189 COUNT += temp;
2190 }
2191 }
2192 done:;
2193 but for optional innermost or outermost rectangular loops that aren't
2194 referenced by other loop expressions keep doing the division/modulo. */
629b3d75
MJ
2195
2196static void
2197expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
aed3ab25
JJ
2198 tree *counts, tree *nonrect_bounds,
2199 gimple *inner_stmt, tree startvar)
629b3d75
MJ
2200{
2201 int i;
2202 if (gimple_omp_for_combined_p (fd->for_stmt))
2203 {
2204 /* If fd->loop.n2 is constant, then no propagation of the counts
2205 is needed, they are constant. */
2206 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
2207 return;
2208
2209 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
2210 ? gimple_omp_taskreg_clauses (inner_stmt)
2211 : gimple_omp_for_clauses (inner_stmt);
2212 /* First two _looptemp_ clauses are for istart/iend, counts[0]
2213 isn't supposed to be handled, as the inner loop doesn't
2214 use it. */
2215 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
2216 gcc_assert (innerc);
2217 for (i = 0; i < fd->collapse; i++)
2218 {
2219 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2220 OMP_CLAUSE__LOOPTEMP_);
2221 gcc_assert (innerc);
2222 if (i)
2223 {
2224 tree tem = OMP_CLAUSE_DECL (innerc);
2225 tree t = fold_convert (TREE_TYPE (tem), counts[i]);
2226 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
2227 false, GSI_CONTINUE_LINKING);
2228 gassign *stmt = gimple_build_assign (tem, t);
2229 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2230 }
2231 }
2232 return;
2233 }
2234
2235 tree type = TREE_TYPE (fd->loop.v);
2236 tree tem = create_tmp_reg (type, ".tem");
2237 gassign *stmt = gimple_build_assign (tem, startvar);
2238 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2239
2240 for (i = fd->collapse - 1; i >= 0; i--)
2241 {
2242 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
2243 itype = vtype;
2244 if (POINTER_TYPE_P (vtype))
2245 itype = signed_type_for (vtype);
aed3ab25 2246 if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
629b3d75
MJ
2247 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
2248 else
2249 t = tem;
aed3ab25
JJ
2250 if (i == fd->last_nonrect)
2251 {
aed3ab25
JJ
2252 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
2253 false, GSI_CONTINUE_LINKING);
2254 tree stopval = t;
2255 tree idx = create_tmp_reg (type, ".count");
2256 expand_omp_build_assign (gsi, idx,
2257 build_zero_cst (type), true);
5acef69f
JJ
2258 basic_block bb_triang = NULL;
2259 if (fd->first_nonrect + 1 == fd->last_nonrect
2260 /* For now. */
2261 && TREE_CODE (fd->loop.n2) == INTEGER_CST
2262 && (optab_handler (sqrt_optab, TYPE_MODE (double_type_node))
2263 != CODE_FOR_nothing))
2264 {
f418bd4b 2265 tree outer_n1 = fd->adjn1 ? fd->adjn1 : fd->loops[i - 1].n1;
5acef69f
JJ
2266 tree itype = TREE_TYPE (fd->loops[i].v);
2267 tree min_inner_iterations = fd->min_inner_iterations;
2268 tree factor = fd->factor;
2269 gcond *cond_stmt
2270 = gimple_build_cond (NE_EXPR, factor,
2271 build_zero_cst (TREE_TYPE (factor)),
2272 NULL_TREE, NULL_TREE);
2273 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2274 edge e = split_block (gsi_bb (*gsi), cond_stmt);
2275 basic_block bb0 = e->src;
2276 e->flags = EDGE_TRUE_VALUE;
2277 e->probability = profile_probability::likely ();
2278 *gsi = gsi_after_labels (e->dest);
2279 tree slltype = long_long_integer_type_node;
2280 tree ulltype = long_long_unsigned_type_node;
2281 tree stopvalull = fold_convert (ulltype, stopval);
2282 stopvalull
2283 = force_gimple_operand_gsi (gsi, stopvalull, true, NULL_TREE,
2284 false, GSI_CONTINUE_LINKING);
2285 min_inner_iterations
2286 = fold_convert (slltype, min_inner_iterations);
2287 min_inner_iterations
2288 = force_gimple_operand_gsi (gsi, min_inner_iterations, true,
2289 NULL_TREE, false,
2290 GSI_CONTINUE_LINKING);
2291 factor = fold_convert (slltype, factor);
2292 factor
2293 = force_gimple_operand_gsi (gsi, factor, true, NULL_TREE,
2294 false, GSI_CONTINUE_LINKING);
2295 tree min_inner_iterationsd
2296 = fold_build1 (FLOAT_EXPR, double_type_node,
2297 min_inner_iterations);
2298 min_inner_iterationsd
2299 = force_gimple_operand_gsi (gsi, min_inner_iterationsd, true,
2300 NULL_TREE, false,
2301 GSI_CONTINUE_LINKING);
2302 tree factord = fold_build1 (FLOAT_EXPR, double_type_node,
2303 factor);
2304 factord = force_gimple_operand_gsi (gsi, factord, true,
2305 NULL_TREE, false,
2306 GSI_CONTINUE_LINKING);
2307 tree stopvald = fold_build1 (FLOAT_EXPR, double_type_node,
2308 stopvalull);
2309 stopvald = force_gimple_operand_gsi (gsi, stopvald, true,
2310 NULL_TREE, false,
2311 GSI_CONTINUE_LINKING);
2312 /* Temporarily disable flag_rounding_math, values will be
2313 decimal numbers divided by 2 and worst case imprecisions
2314 due to too large values ought to be caught later by the
2315 checks for fallback. */
2316 int save_flag_rounding_math = flag_rounding_math;
2317 flag_rounding_math = 0;
2318 t = fold_build2 (RDIV_EXPR, double_type_node, factord,
2319 build_real (double_type_node, dconst2));
2320 tree t3 = fold_build2 (MINUS_EXPR, double_type_node,
2321 min_inner_iterationsd, t);
2322 t3 = force_gimple_operand_gsi (gsi, t3, true, NULL_TREE, false,
2323 GSI_CONTINUE_LINKING);
2324 t = fold_build2 (MULT_EXPR, double_type_node, factord,
2325 build_real (double_type_node, dconst2));
2326 t = fold_build2 (MULT_EXPR, double_type_node, t, stopvald);
2327 t = fold_build2 (PLUS_EXPR, double_type_node, t,
2328 fold_build2 (MULT_EXPR, double_type_node,
2329 t3, t3));
2330 flag_rounding_math = save_flag_rounding_math;
2331 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2332 GSI_CONTINUE_LINKING);
2333 cond_stmt
2334 = gimple_build_cond (LT_EXPR, t,
2335 build_zero_cst (double_type_node),
2336 NULL_TREE, NULL_TREE);
2337 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2338 e = split_block (gsi_bb (*gsi), cond_stmt);
2339 basic_block bb1 = e->src;
2340 e->flags = EDGE_FALSE_VALUE;
2341 e->probability = profile_probability::very_likely ();
2342 *gsi = gsi_after_labels (e->dest);
2343 gcall *call = gimple_build_call_internal (IFN_SQRT, 1, t);
2344 tree sqrtr = create_tmp_var (double_type_node);
2345 gimple_call_set_lhs (call, sqrtr);
2346 gsi_insert_after (gsi, call, GSI_CONTINUE_LINKING);
2347 t = fold_build2 (MINUS_EXPR, double_type_node, sqrtr, t3);
2348 t = fold_build2 (RDIV_EXPR, double_type_node, t, factord);
2349 t = fold_build1 (FIX_TRUNC_EXPR, ulltype, t);
2350 tree c = create_tmp_var (ulltype);
2351 tree d = create_tmp_var (ulltype);
2352 expand_omp_build_assign (gsi, c, t, true);
2353 t = fold_build2 (MINUS_EXPR, ulltype, c,
2354 build_one_cst (ulltype));
2355 t = fold_build2 (MULT_EXPR, ulltype, c, t);
2356 t = fold_build2 (RSHIFT_EXPR, ulltype, t, integer_one_node);
2357 t = fold_build2 (MULT_EXPR, ulltype, fd->factor, t);
2358 tree t2 = fold_build2 (MULT_EXPR, ulltype, c,
2359 fd->min_inner_iterations);
2360 t = fold_build2 (PLUS_EXPR, ulltype, t, t2);
2361 expand_omp_build_assign (gsi, d, t, true);
2362 t = fold_build2 (MULT_EXPR, ulltype, fd->factor, c);
2363 t = fold_build2 (PLUS_EXPR, ulltype,
2364 t, fd->min_inner_iterations);
2365 t2 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2366 GSI_CONTINUE_LINKING);
2367 cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, d,
2368 NULL_TREE, NULL_TREE);
2369 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2370 e = split_block (gsi_bb (*gsi), cond_stmt);
2371 basic_block bb2 = e->src;
2372 e->flags = EDGE_TRUE_VALUE;
2373 e->probability = profile_probability::very_likely ();
2374 *gsi = gsi_after_labels (e->dest);
2375 t = fold_build2 (PLUS_EXPR, ulltype, d, t2);
2376 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2377 GSI_CONTINUE_LINKING);
2378 cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, t,
2379 NULL_TREE, NULL_TREE);
2380 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2381 e = split_block (gsi_bb (*gsi), cond_stmt);
2382 basic_block bb3 = e->src;
2383 e->flags = EDGE_FALSE_VALUE;
2384 e->probability = profile_probability::very_likely ();
2385 *gsi = gsi_after_labels (e->dest);
2386 t = fold_convert (itype, c);
2387 t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i - 1].step);
f418bd4b 2388 t = fold_build2 (PLUS_EXPR, itype, outer_n1, t);
5acef69f
JJ
2389 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2390 GSI_CONTINUE_LINKING);
2391 expand_omp_build_assign (gsi, fd->loops[i - 1].v, t, true);
2392 t2 = fold_build2 (MINUS_EXPR, ulltype, stopvalull, d);
2393 t2 = fold_convert (itype, t2);
2394 t2 = fold_build2 (MULT_EXPR, itype, t2, fd->loops[i].step);
2395 t2 = fold_build2 (PLUS_EXPR, itype, t2, fd->loops[i].n1);
2396 if (fd->loops[i].m1)
2397 {
2398 t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i].m1);
2399 t2 = fold_build2 (PLUS_EXPR, itype, t2, t);
2400 }
2401 expand_omp_build_assign (gsi, fd->loops[i].v, t2, true);
2402 e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
2403 bb_triang = e->src;
2404 *gsi = gsi_after_labels (e->dest);
2405 remove_edge (e);
2406 e = make_edge (bb1, gsi_bb (*gsi), EDGE_TRUE_VALUE);
2407 e->probability = profile_probability::very_unlikely ();
2408 e = make_edge (bb2, gsi_bb (*gsi), EDGE_FALSE_VALUE);
2409 e->probability = profile_probability::very_unlikely ();
2410 e = make_edge (bb3, gsi_bb (*gsi), EDGE_TRUE_VALUE);
2411 e->probability = profile_probability::very_unlikely ();
2412
2413 basic_block bb4 = create_empty_bb (bb0);
2414 add_bb_to_loop (bb4, bb0->loop_father);
2415 e = make_edge (bb0, bb4, EDGE_FALSE_VALUE);
2416 e->probability = profile_probability::unlikely ();
2417 make_edge (bb4, gsi_bb (*gsi), EDGE_FALLTHRU);
2418 set_immediate_dominator (CDI_DOMINATORS, bb4, bb0);
2419 set_immediate_dominator (CDI_DOMINATORS, gsi_bb (*gsi), bb0);
2420 gimple_stmt_iterator gsi2 = gsi_after_labels (bb4);
2421 t2 = fold_build2 (TRUNC_DIV_EXPR, type,
2422 counts[i], counts[i - 1]);
2423 t2 = force_gimple_operand_gsi (&gsi2, t2, true, NULL_TREE, false,
2424 GSI_CONTINUE_LINKING);
2425 t = fold_build2 (TRUNC_MOD_EXPR, type, stopval, t2);
2426 t2 = fold_build2 (TRUNC_DIV_EXPR, type, stopval, t2);
2427 t = fold_convert (itype, t);
2428 t2 = fold_convert (itype, t2);
2429 t = fold_build2 (MULT_EXPR, itype, t,
2430 fold_convert (itype, fd->loops[i].step));
2431 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
2432 t2 = fold_build2 (MULT_EXPR, itype, t2,
2433 fold_convert (itype, fd->loops[i - 1].step));
2434 t2 = fold_build2 (PLUS_EXPR, itype, fd->loops[i - 1].n1, t2);
2435 t2 = force_gimple_operand_gsi (&gsi2, t2, false, NULL_TREE,
2436 false, GSI_CONTINUE_LINKING);
2437 stmt = gimple_build_assign (fd->loops[i - 1].v, t2);
2438 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2439 if (fd->loops[i].m1)
2440 {
2441 t2 = fold_build2 (MULT_EXPR, itype, fd->loops[i].m1,
2442 fd->loops[i - 1].v);
2443 t = fold_build2 (PLUS_EXPR, itype, t, t2);
2444 }
2445 t = force_gimple_operand_gsi (&gsi2, t, false, NULL_TREE,
2446 false, GSI_CONTINUE_LINKING);
2447 stmt = gimple_build_assign (fd->loops[i].v, t);
2448 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2449 }
2450 /* Fallback implementation. Evaluate the loops in between
2451 (inclusive) fd->first_nonrect and fd->last_nonrect at
2452 runtime unsing temporaries instead of the original iteration
2453 variables, in the body just bump the counter and compare
2454 with the desired value. */
aed3ab25
JJ
2455 gimple_stmt_iterator gsi2 = *gsi;
2456 basic_block entry_bb = gsi_bb (gsi2);
2457 edge e = split_block (entry_bb, gsi_stmt (gsi2));
2458 e = split_block (e->dest, (gimple *) NULL);
2459 basic_block dom_bb = NULL;
2460 basic_block cur_bb = e->src;
2461 basic_block next_bb = e->dest;
2462 entry_bb = e->dest;
2463 *gsi = gsi_after_labels (entry_bb);
2464
2465 tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
2466 tree n1 = NULL_TREE, n2 = NULL_TREE;
2467 memset (vs, 0, fd->last_nonrect * sizeof (tree));
2468
2469 for (int j = fd->first_nonrect; j <= fd->last_nonrect; j++)
2470 {
2471 tree itype = TREE_TYPE (fd->loops[j].v);
2472 bool rect_p = (fd->loops[j].m1 == NULL_TREE
2473 && fd->loops[j].m2 == NULL_TREE
2474 && !fd->loops[j].non_rect_referenced);
2475 gsi2 = gsi_after_labels (cur_bb);
2476 t = fold_convert (itype, unshare_expr (fd->loops[j].n1));
2477 if (fd->loops[j].m1)
2478 {
2479 n1 = fold_convert (itype, unshare_expr (fd->loops[j].m1));
2480 n1 = fold_build2 (MULT_EXPR, itype,
2481 vs[j - fd->loops[j].outer], n1);
2482 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2483 }
2484 else if (rect_p)
2485 n1 = build_zero_cst (type);
2486 else
2487 n1 = t;
2488 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2489 true, GSI_SAME_STMT);
2490 if (j < fd->last_nonrect)
2491 {
2492 vs[j] = create_tmp_reg (rect_p ? type : itype, ".it");
2493 expand_omp_build_assign (&gsi2, vs[j], n1);
2494 }
2495 t = fold_convert (itype, unshare_expr (fd->loops[j].n2));
2496 if (fd->loops[j].m2)
2497 {
2498 n2 = fold_convert (itype, unshare_expr (fd->loops[j].m2));
2499 n2 = fold_build2 (MULT_EXPR, itype,
2500 vs[j - fd->loops[j].outer], n2);
2501 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2502 }
2503 else if (rect_p)
2504 n2 = counts[j];
2505 else
2506 n2 = t;
2507 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2508 true, GSI_SAME_STMT);
2509 if (j == fd->last_nonrect)
2510 {
2511 gcond *cond_stmt
2512 = gimple_build_cond (fd->loops[j].cond_code, n1, n2,
2513 NULL_TREE, NULL_TREE);
2514 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2515 e = split_block (cur_bb, cond_stmt);
2516 e->flags = EDGE_TRUE_VALUE;
2517 edge ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2518 e->probability = profile_probability::likely ().guessed ();
2519 ne->probability = e->probability.invert ();
2520 gsi2 = gsi_after_labels (e->dest);
2521
2522 t = build_int_cst (itype, (fd->loops[j].cond_code == LT_EXPR
2523 ? -1 : 1));
2524 t = fold_build2 (PLUS_EXPR, itype,
2525 fold_convert (itype, fd->loops[j].step), t);
2526 t = fold_build2 (PLUS_EXPR, itype, t, n2);
2527 t = fold_build2 (MINUS_EXPR, itype, t, n1);
2528 tree step = fold_convert (itype, fd->loops[j].step);
2529 if (TYPE_UNSIGNED (itype)
2530 && fd->loops[j].cond_code == GT_EXPR)
2531 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2532 fold_build1 (NEGATE_EXPR, itype, t),
2533 fold_build1 (NEGATE_EXPR, itype, step));
2534 else
2535 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2536 t = fold_convert (type, t);
2537 t = fold_build2 (PLUS_EXPR, type, idx, t);
2538 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2539 true, GSI_SAME_STMT);
2540 e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2541 set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2542 cond_stmt
2543 = gimple_build_cond (LE_EXPR, t, stopval, NULL_TREE,
2544 NULL_TREE);
2545 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2546 e = split_block (gsi_bb (gsi2), cond_stmt);
2547 e->flags = EDGE_TRUE_VALUE;
2548 e->probability = profile_probability::likely ().guessed ();
2549 ne = make_edge (e->src, entry_bb, EDGE_FALSE_VALUE);
2550 ne->probability = e->probability.invert ();
2551 gsi2 = gsi_after_labels (e->dest);
2552 expand_omp_build_assign (&gsi2, idx, t);
2553 set_immediate_dominator (CDI_DOMINATORS, entry_bb, dom_bb);
2554 break;
2555 }
2556 e = split_block (cur_bb, last_stmt (cur_bb));
2557
2558 basic_block new_cur_bb = create_empty_bb (cur_bb);
2559 add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2560
2561 gsi2 = gsi_after_labels (e->dest);
2562 if (rect_p)
2563 t = fold_build2 (PLUS_EXPR, type, vs[j],
2564 build_one_cst (type));
2565 else
2566 {
2567 tree step
2568 = fold_convert (itype, unshare_expr (fd->loops[j].step));
2569 t = fold_build2 (PLUS_EXPR, itype, vs[j], step);
2570 }
2571 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2572 true, GSI_SAME_STMT);
2573 expand_omp_build_assign (&gsi2, vs[j], t);
2574
2575 edge ne = split_block (e->dest, last_stmt (e->dest));
2576 gsi2 = gsi_after_labels (ne->dest);
2577
2578 gcond *cond_stmt;
2579 if (next_bb == entry_bb)
2580 /* No need to actually check the outermost condition. */
2581 cond_stmt
2582 = gimple_build_cond (EQ_EXPR, boolean_true_node,
2583 boolean_true_node,
2584 NULL_TREE, NULL_TREE);
2585 else
2586 cond_stmt
2587 = gimple_build_cond (rect_p ? LT_EXPR
2588 : fd->loops[j].cond_code,
2589 vs[j], n2, NULL_TREE, NULL_TREE);
2590 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2591 edge e3, e4;
2592 if (next_bb == entry_bb)
2593 {
2594 e3 = find_edge (ne->dest, next_bb);
2595 e3->flags = EDGE_FALSE_VALUE;
2596 dom_bb = ne->dest;
2597 }
2598 else
2599 e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2600 e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2601 e4->probability = profile_probability::likely ().guessed ();
2602 e3->probability = e4->probability.invert ();
2603 basic_block esrc = e->src;
2604 make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2605 cur_bb = new_cur_bb;
2606 basic_block latch_bb = next_bb;
2607 next_bb = e->dest;
2608 remove_edge (e);
2609 set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2610 set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2611 set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
2612 }
2613 for (int j = fd->last_nonrect; j >= fd->first_nonrect; j--)
2614 {
2615 tree itype = TREE_TYPE (fd->loops[j].v);
2616 bool rect_p = (fd->loops[j].m1 == NULL_TREE
2617 && fd->loops[j].m2 == NULL_TREE
2618 && !fd->loops[j].non_rect_referenced);
2619 if (j == fd->last_nonrect)
2620 {
2621 t = fold_build2 (MINUS_EXPR, type, stopval, idx);
2622 t = fold_convert (itype, t);
2623 tree t2
2624 = fold_convert (itype, unshare_expr (fd->loops[j].step));
2625 t = fold_build2 (MULT_EXPR, itype, t, t2);
2626 t = fold_build2 (PLUS_EXPR, itype, n1, t);
2627 }
2628 else if (rect_p)
2629 {
2630 t = fold_convert (itype, vs[j]);
2631 t = fold_build2 (MULT_EXPR, itype, t,
2632 fold_convert (itype, fd->loops[j].step));
2633 if (POINTER_TYPE_P (vtype))
2634 t = fold_build_pointer_plus (fd->loops[j].n1, t);
2635 else
2636 t = fold_build2 (PLUS_EXPR, itype, fd->loops[j].n1, t);
2637 }
2638 else
2639 t = vs[j];
2640 t = force_gimple_operand_gsi (gsi, t, false,
2641 NULL_TREE, true,
2642 GSI_SAME_STMT);
2643 stmt = gimple_build_assign (fd->loops[j].v, t);
2644 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
2645 }
2646 if (gsi_end_p (*gsi))
2647 *gsi = gsi_last_bb (gsi_bb (*gsi));
2648 else
2649 gsi_prev (gsi);
5acef69f
JJ
2650 if (bb_triang)
2651 {
2652 e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
2653 make_edge (bb_triang, e->dest, EDGE_FALLTHRU);
2654 *gsi = gsi_after_labels (e->dest);
2655 if (!gsi_end_p (*gsi))
2656 gsi_insert_before (gsi, gimple_build_nop (), GSI_NEW_STMT);
2657 }
aed3ab25 2658 }
629b3d75 2659 else
aed3ab25
JJ
2660 {
2661 t = fold_convert (itype, t);
2662 t = fold_build2 (MULT_EXPR, itype, t,
2663 fold_convert (itype, fd->loops[i].step));
2664 if (POINTER_TYPE_P (vtype))
2665 t = fold_build_pointer_plus (fd->loops[i].n1, t);
2666 else
2667 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
2668 t = force_gimple_operand_gsi (gsi, t,
2669 DECL_P (fd->loops[i].v)
2670 && TREE_ADDRESSABLE (fd->loops[i].v),
2671 NULL_TREE, false,
2672 GSI_CONTINUE_LINKING);
2673 stmt = gimple_build_assign (fd->loops[i].v, t);
2674 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2675 }
2676 if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
629b3d75
MJ
2677 {
2678 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
2679 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
2680 false, GSI_CONTINUE_LINKING);
2681 stmt = gimple_build_assign (tem, t);
2682 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2683 }
aed3ab25
JJ
2684 if (i == fd->last_nonrect)
2685 i = fd->first_nonrect;
629b3d75 2686 }
aed3ab25
JJ
2687 if (fd->non_rect)
2688 for (i = 0; i <= fd->last_nonrect; i++)
2689 if (fd->loops[i].m2)
2690 {
2691 tree itype = TREE_TYPE (fd->loops[i].v);
2692
2693 tree t = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2694 t = fold_build2 (MULT_EXPR, itype,
2695 fd->loops[i - fd->loops[i].outer].v, t);
2696 t = fold_build2 (PLUS_EXPR, itype, t,
2697 fold_convert (itype,
2698 unshare_expr (fd->loops[i].n2)));
2699 nonrect_bounds[i] = create_tmp_reg (itype, ".bound");
2700 t = force_gimple_operand_gsi (gsi, t, false,
2701 NULL_TREE, false,
2702 GSI_CONTINUE_LINKING);
2703 stmt = gimple_build_assign (nonrect_bounds[i], t);
2704 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2705 }
629b3d75
MJ
2706}
2707
2708/* Helper function for expand_omp_for_*. Generate code like:
2709 L10:
2710 V3 += STEP3;
2711 if (V3 cond3 N32) goto BODY_BB; else goto L11;
2712 L11:
2713 V3 = N31;
2714 V2 += STEP2;
2715 if (V2 cond2 N22) goto BODY_BB; else goto L12;
2716 L12:
2717 V2 = N21;
2718 V1 += STEP1;
aed3ab25
JJ
2719 goto BODY_BB;
2720 For non-rectangular loops, use temporaries stored in nonrect_bounds
2721 for the upper bounds if M?2 multiplier is present. Given e.g.
2722 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2723 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2724 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
2725 for (V4 = N41 + M41 * V2; V4 cond4 N42 + M42 * V2; V4 += STEP4)
2726 do:
2727 L10:
2728 V4 += STEP4;
2729 if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L11;
2730 L11:
2731 V4 = N41 + M41 * V2; // This can be left out if the loop
2732 // refers to the immediate parent loop
2733 V3 += STEP3;
2734 if (V3 cond3 N32) goto BODY_BB; else goto L12;
2735 L12:
2736 V3 = N31;
2737 V2 += STEP2;
2738 if (V2 cond2 N22) goto L120; else goto L13;
2739 L120:
2740 V4 = N41 + M41 * V2;
2741 NONRECT_BOUND4 = N42 + M42 * V2;
2742 if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L12;
2743 L13:
2744 V2 = N21;
2745 V1 += STEP1;
2746 goto L120; */
629b3d75
MJ
2747
2748static basic_block
aed3ab25
JJ
2749extract_omp_for_update_vars (struct omp_for_data *fd, tree *nonrect_bounds,
2750 basic_block cont_bb, basic_block body_bb)
629b3d75
MJ
2751{
2752 basic_block last_bb, bb, collapse_bb = NULL;
2753 int i;
2754 gimple_stmt_iterator gsi;
2755 edge e;
2756 tree t;
2757 gimple *stmt;
2758
2759 last_bb = cont_bb;
2760 for (i = fd->collapse - 1; i >= 0; i--)
2761 {
2762 tree vtype = TREE_TYPE (fd->loops[i].v);
2763
2764 bb = create_empty_bb (last_bb);
2765 add_bb_to_loop (bb, last_bb->loop_father);
2766 gsi = gsi_start_bb (bb);
2767
2768 if (i < fd->collapse - 1)
2769 {
2770 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
aed3ab25
JJ
2771 e->probability
2772 = profile_probability::guessed_always ().apply_scale (1, 8);
629b3d75 2773
aed3ab25
JJ
2774 struct omp_for_data_loop *l = &fd->loops[i + 1];
2775 if (l->m1 == NULL_TREE || l->outer != 1)
2776 {
2777 t = l->n1;
2778 if (l->m1)
2779 {
2780 tree t2
2781 = fold_build2 (MULT_EXPR, TREE_TYPE (t),
2782 fd->loops[i + 1 - l->outer].v, l->m1);
2783 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t2, t);
2784 }
2785 t = force_gimple_operand_gsi (&gsi, t,
2786 DECL_P (l->v)
2787 && TREE_ADDRESSABLE (l->v),
2788 NULL_TREE, false,
2789 GSI_CONTINUE_LINKING);
2790 stmt = gimple_build_assign (l->v, t);
2791 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2792 }
629b3d75
MJ
2793 }
2794 else
2795 collapse_bb = bb;
2796
2797 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
2798
2799 if (POINTER_TYPE_P (vtype))
2800 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
2801 else
2802 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
2803 t = force_gimple_operand_gsi (&gsi, t,
2804 DECL_P (fd->loops[i].v)
2805 && TREE_ADDRESSABLE (fd->loops[i].v),
2806 NULL_TREE, false, GSI_CONTINUE_LINKING);
2807 stmt = gimple_build_assign (fd->loops[i].v, t);
2808 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2809
aed3ab25
JJ
2810 if (fd->loops[i].non_rect_referenced)
2811 {
2812 basic_block update_bb = NULL, prev_bb = NULL;
2813 for (int j = i + 1; j <= fd->last_nonrect; j++)
2814 if (j - fd->loops[j].outer == i)
2815 {
2816 tree n1, n2;
2817 struct omp_for_data_loop *l = &fd->loops[j];
2818 basic_block this_bb = create_empty_bb (last_bb);
2819 add_bb_to_loop (this_bb, last_bb->loop_father);
2820 gimple_stmt_iterator gsi2 = gsi_start_bb (this_bb);
2821 if (prev_bb)
2822 {
2823 e = make_edge (prev_bb, this_bb, EDGE_TRUE_VALUE);
2824 e->probability
2825 = profile_probability::guessed_always ().apply_scale (7,
2826 8);
2827 set_immediate_dominator (CDI_DOMINATORS, this_bb, prev_bb);
2828
2829 }
2830 if (l->m1)
2831 {
2832 t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m1), l->m1,
2833 fd->loops[i].v);
2834 t = fold_build2 (PLUS_EXPR, TREE_TYPE (l->v), t, l->n1);
2835 n1 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2836 false,
2837 GSI_CONTINUE_LINKING);
2838 stmt = gimple_build_assign (l->v, n1);
2839 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2840 n1 = l->v;
2841 }
2842 else
2843 n1 = force_gimple_operand_gsi (&gsi2, l->n1, true,
2844 NULL_TREE, false,
2845 GSI_CONTINUE_LINKING);
2846 if (l->m2)
2847 {
2848 t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m2), l->m2,
2849 fd->loops[i].v);
2850 t = fold_build2 (PLUS_EXPR, TREE_TYPE (nonrect_bounds[j]),
2851 t, unshare_expr (l->n2));
2852 n2 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2853 false,
2854 GSI_CONTINUE_LINKING);
2855 stmt = gimple_build_assign (nonrect_bounds[j], n2);
2856 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2857 n2 = nonrect_bounds[j];
2858 }
2859 else
2860 n2 = force_gimple_operand_gsi (&gsi2, unshare_expr (l->n2),
2861 true, NULL_TREE, false,
2862 GSI_CONTINUE_LINKING);
2863 gcond *cond_stmt
2864 = gimple_build_cond (l->cond_code, n1, n2,
2865 NULL_TREE, NULL_TREE);
2866 gsi_insert_after (&gsi2, cond_stmt, GSI_CONTINUE_LINKING);
2867 if (update_bb == NULL)
2868 update_bb = this_bb;
2869 e = make_edge (this_bb, bb, EDGE_FALSE_VALUE);
2870 e->probability
2871 = profile_probability::guessed_always ().apply_scale (1, 8);
2872 if (prev_bb == NULL)
2873 set_immediate_dominator (CDI_DOMINATORS, this_bb, last_bb);
2874 prev_bb = this_bb;
2875 }
2876 e = make_edge (prev_bb, body_bb, EDGE_TRUE_VALUE);
2877 e->probability
2878 = profile_probability::guessed_always ().apply_scale (7, 8);
2879 body_bb = update_bb;
2880 }
2881
629b3d75
MJ
2882 if (i > 0)
2883 {
aed3ab25
JJ
2884 if (fd->loops[i].m2)
2885 t = nonrect_bounds[i];
2886 else
2887 t = unshare_expr (fd->loops[i].n2);
629b3d75
MJ
2888 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2889 false, GSI_CONTINUE_LINKING);
2890 tree v = fd->loops[i].v;
2891 if (DECL_P (v) && TREE_ADDRESSABLE (v))
2892 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
2893 false, GSI_CONTINUE_LINKING);
2894 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
2895 stmt = gimple_build_cond_empty (t);
2896 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
d1ffbd43
JJ
2897 if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)),
2898 expand_omp_regimplify_p, NULL, NULL)
2899 || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)),
2900 expand_omp_regimplify_p, NULL, NULL))
2901 gimple_regimplify_operands (stmt, &gsi);
629b3d75 2902 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
357067f2 2903 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
629b3d75
MJ
2904 }
2905 else
2906 make_edge (bb, body_bb, EDGE_FALLTHRU);
aed3ab25 2907 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
629b3d75
MJ
2908 last_bb = bb;
2909 }
2910
2911 return collapse_bb;
2912}
2913
2914/* Expand #pragma omp ordered depend(source). */
2915
2916static void
2917expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
2918 tree *counts, location_t loc)
2919{
2920 enum built_in_function source_ix
2921 = fd->iter_type == long_integer_type_node
2922 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
2923 gimple *g
2924 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
2925 build_fold_addr_expr (counts[fd->ordered]));
2926 gimple_set_location (g, loc);
2927 gsi_insert_before (gsi, g, GSI_SAME_STMT);
2928}
2929
2930/* Expand a single depend from #pragma omp ordered depend(sink:...). */
2931
2932static void
2933expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
2934 tree *counts, tree c, location_t loc)
2935{
2936 auto_vec<tree, 10> args;
2937 enum built_in_function sink_ix
2938 = fd->iter_type == long_integer_type_node
2939 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
2940 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
2941 int i;
2942 gimple_stmt_iterator gsi2 = *gsi;
2943 bool warned_step = false;
2944
2945 for (i = 0; i < fd->ordered; i++)
2946 {
2947 tree step = NULL_TREE;
2948 off = TREE_PURPOSE (deps);
2949 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2950 {
2951 step = TREE_OPERAND (off, 1);
2952 off = TREE_OPERAND (off, 0);
2953 }
2954 if (!integer_zerop (off))
2955 {
2956 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2957 || fd->loops[i].cond_code == GT_EXPR);
2958 bool forward = fd->loops[i].cond_code == LT_EXPR;
2959 if (step)
2960 {
2961 /* Non-simple Fortran DO loops. If step is variable,
2962 we don't know at compile even the direction, so can't
2963 warn. */
2964 if (TREE_CODE (step) != INTEGER_CST)
2965 break;
2966 forward = tree_int_cst_sgn (step) != -1;
2967 }
2968 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
90a0bf4e
JJ
2969 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
2970 "waiting for lexically later iteration");
629b3d75
MJ
2971 break;
2972 }
2973 deps = TREE_CHAIN (deps);
2974 }
2975 /* If all offsets corresponding to the collapsed loops are zero,
2976 this depend clause can be ignored. FIXME: but there is still a
2977 flush needed. We need to emit one __sync_synchronize () for it
2978 though (perhaps conditionally)? Solve this together with the
2979 conservative dependence folding optimization.
2980 if (i >= fd->collapse)
2981 return; */
2982
2983 deps = OMP_CLAUSE_DECL (c);
2984 gsi_prev (&gsi2);
2985 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
2986 edge e2 = split_block_after_labels (e1->dest);
2987
2988 gsi2 = gsi_after_labels (e1->dest);
2989 *gsi = gsi_last_bb (e1->src);
2990 for (i = 0; i < fd->ordered; i++)
2991 {
2992 tree itype = TREE_TYPE (fd->loops[i].v);
2993 tree step = NULL_TREE;
2994 tree orig_off = NULL_TREE;
2995 if (POINTER_TYPE_P (itype))
2996 itype = sizetype;
2997 if (i)
2998 deps = TREE_CHAIN (deps);
2999 off = TREE_PURPOSE (deps);
3000 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
3001 {
3002 step = TREE_OPERAND (off, 1);
3003 off = TREE_OPERAND (off, 0);
3004 gcc_assert (fd->loops[i].cond_code == LT_EXPR
3005 && integer_onep (fd->loops[i].step)
3006 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
3007 }
3008 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
3009 if (step)
3010 {
3011 off = fold_convert_loc (loc, itype, off);
3012 orig_off = off;
3013 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3014 }
3015
3016 if (integer_zerop (off))
3017 t = boolean_true_node;
3018 else
3019 {
3020 tree a;
3021 tree co = fold_convert_loc (loc, itype, off);
3022 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
3023 {
3024 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3025 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
3026 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
3027 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
3028 co);
3029 }
3030 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3031 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3032 fd->loops[i].v, co);
3033 else
3034 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
3035 fd->loops[i].v, co);
3036 if (step)
3037 {
3038 tree t1, t2;
3039 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3040 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3041 fd->loops[i].n1);
3042 else
3043 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3044 fd->loops[i].n2);
3045 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3046 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3047 fd->loops[i].n2);
3048 else
3049 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3050 fd->loops[i].n1);
3051 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
3052 step, build_int_cst (TREE_TYPE (step), 0));
3053 if (TREE_CODE (step) != INTEGER_CST)
3054 {
3055 t1 = unshare_expr (t1);
3056 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
3057 false, GSI_CONTINUE_LINKING);
3058 t2 = unshare_expr (t2);
3059 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
3060 false, GSI_CONTINUE_LINKING);
3061 }
3062 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
3063 t, t2, t1);
3064 }
3065 else if (fd->loops[i].cond_code == LT_EXPR)
3066 {
3067 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3068 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3069 fd->loops[i].n1);
3070 else
3071 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3072 fd->loops[i].n2);
3073 }
3074 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3075 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
3076 fd->loops[i].n2);
3077 else
3078 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
3079 fd->loops[i].n1);
3080 }
3081 if (cond)
3082 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
3083 else
3084 cond = t;
3085
3086 off = fold_convert_loc (loc, itype, off);
3087
3088 if (step
3089 || (fd->loops[i].cond_code == LT_EXPR
3090 ? !integer_onep (fd->loops[i].step)
3091 : !integer_minus_onep (fd->loops[i].step)))
3092 {
3093 if (step == NULL_TREE
3094 && TYPE_UNSIGNED (itype)
3095 && fd->loops[i].cond_code == GT_EXPR)
3096 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
3097 fold_build1_loc (loc, NEGATE_EXPR, itype,
3098 s));
3099 else
3100 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
3101 orig_off ? orig_off : off, s);
3102 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
3103 build_int_cst (itype, 0));
3104 if (integer_zerop (t) && !warned_step)
3105 {
90a0bf4e
JJ
3106 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
3107 "refers to iteration never in the iteration "
3108 "space");
629b3d75
MJ
3109 warned_step = true;
3110 }
3111 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
3112 cond, t);
3113 }
3114
3115 if (i <= fd->collapse - 1 && fd->collapse > 1)
3116 t = fd->loop.v;
3117 else if (counts[i])
3118 t = counts[i];
3119 else
3120 {
3121 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3122 fd->loops[i].v, fd->loops[i].n1);
3123 t = fold_convert_loc (loc, fd->iter_type, t);
3124 }
3125 if (step)
3126 /* We have divided off by step already earlier. */;
3127 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
3128 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
3129 fold_build1_loc (loc, NEGATE_EXPR, itype,
3130 s));
3131 else
3132 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3133 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3134 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
3135 off = fold_convert_loc (loc, fd->iter_type, off);
3136 if (i <= fd->collapse - 1 && fd->collapse > 1)
3137 {
3138 if (i)
3139 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
3140 off);
3141 if (i < fd->collapse - 1)
3142 {
3143 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
3144 counts[i]);
3145 continue;
3146 }
3147 }
3148 off = unshare_expr (off);
3149 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
3150 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3151 true, GSI_SAME_STMT);
3152 args.safe_push (t);
3153 }
3154 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
3155 gimple_set_location (g, loc);
3156 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3157
3158 cond = unshare_expr (cond);
3159 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
3160 GSI_CONTINUE_LINKING);
3161 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
3162 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
357067f2
JH
3163 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
3164 e1->probability = e3->probability.invert ();
629b3d75
MJ
3165 e1->flags = EDGE_TRUE_VALUE;
3166 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
3167
3168 *gsi = gsi_after_labels (e2->dest);
3169}
3170
3171/* Expand all #pragma omp ordered depend(source) and
3172 #pragma omp ordered depend(sink:...) constructs in the current
3173 #pragma omp for ordered(n) region. */
3174
3175static void
3176expand_omp_ordered_source_sink (struct omp_region *region,
3177 struct omp_for_data *fd, tree *counts,
3178 basic_block cont_bb)
3179{
3180 struct omp_region *inner;
3181 int i;
3182 for (i = fd->collapse - 1; i < fd->ordered; i++)
3183 if (i == fd->collapse - 1 && fd->collapse > 1)
3184 counts[i] = NULL_TREE;
3185 else if (i >= fd->collapse && !cont_bb)
3186 counts[i] = build_zero_cst (fd->iter_type);
3187 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
3188 && integer_onep (fd->loops[i].step))
3189 counts[i] = NULL_TREE;
3190 else
3191 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
3192 tree atype
3193 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
3194 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
3195 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
3196
3197 for (inner = region->inner; inner; inner = inner->next)
3198 if (inner->type == GIMPLE_OMP_ORDERED)
3199 {
3200 gomp_ordered *ord_stmt = inner->ord_stmt;
3201 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
3202 location_t loc = gimple_location (ord_stmt);
3203 tree c;
3204 for (c = gimple_omp_ordered_clauses (ord_stmt);
3205 c; c = OMP_CLAUSE_CHAIN (c))
3206 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
3207 break;
3208 if (c)
3209 expand_omp_ordered_source (&gsi, fd, counts, loc);
3210 for (c = gimple_omp_ordered_clauses (ord_stmt);
3211 c; c = OMP_CLAUSE_CHAIN (c))
3212 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
3213 expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
3214 gsi_remove (&gsi, true);
3215 }
3216}
3217
3218/* Wrap the body into fd->ordered - fd->collapse loops that aren't
3219 collapsed. */
3220
3221static basic_block
3222expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
3223 basic_block cont_bb, basic_block body_bb,
3224 bool ordered_lastprivate)
3225{
3226 if (fd->ordered == fd->collapse)
3227 return cont_bb;
3228
3229 if (!cont_bb)
3230 {
3231 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3232 for (int i = fd->collapse; i < fd->ordered; i++)
3233 {
3234 tree type = TREE_TYPE (fd->loops[i].v);
3235 tree n1 = fold_convert (type, fd->loops[i].n1);
3236 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
3237 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3238 size_int (i - fd->collapse + 1),
3239 NULL_TREE, NULL_TREE);
3240 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
3241 }
3242 return NULL;
3243 }
3244
3245 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
3246 {
3247 tree t, type = TREE_TYPE (fd->loops[i].v);
3248 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3249 expand_omp_build_assign (&gsi, fd->loops[i].v,
3250 fold_convert (type, fd->loops[i].n1));
3251 if (counts[i])
3252 expand_omp_build_assign (&gsi, counts[i],
3253 build_zero_cst (fd->iter_type));
3254 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3255 size_int (i - fd->collapse + 1),
3256 NULL_TREE, NULL_TREE);
3257 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
3258 if (!gsi_end_p (gsi))
3259 gsi_prev (&gsi);
3260 else
3261 gsi = gsi_last_bb (body_bb);
3262 edge e1 = split_block (body_bb, gsi_stmt (gsi));
3263 basic_block new_body = e1->dest;
3264 if (body_bb == cont_bb)
3265 cont_bb = new_body;
3266 edge e2 = NULL;
3267 basic_block new_header;
3268 if (EDGE_COUNT (cont_bb->preds) > 0)
3269 {
3270 gsi = gsi_last_bb (cont_bb);
3271 if (POINTER_TYPE_P (type))
3272 t = fold_build_pointer_plus (fd->loops[i].v,
3273 fold_convert (sizetype,
3274 fd->loops[i].step));
3275 else
3276 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
3277 fold_convert (type, fd->loops[i].step));
3278 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
3279 if (counts[i])
3280 {
3281 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
3282 build_int_cst (fd->iter_type, 1));
3283 expand_omp_build_assign (&gsi, counts[i], t);
3284 t = counts[i];
3285 }
3286 else
3287 {
3288 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3289 fd->loops[i].v, fd->loops[i].n1);
3290 t = fold_convert (fd->iter_type, t);
3291 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3292 true, GSI_SAME_STMT);
3293 }
3294 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3295 size_int (i - fd->collapse + 1),
3296 NULL_TREE, NULL_TREE);
3297 expand_omp_build_assign (&gsi, aref, t);
3298 gsi_prev (&gsi);
3299 e2 = split_block (cont_bb, gsi_stmt (gsi));
3300 new_header = e2->dest;
3301 }
3302 else
3303 new_header = cont_bb;
3304 gsi = gsi_after_labels (new_header);
3305 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
3306 true, GSI_SAME_STMT);
3307 tree n2
3308 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
3309 true, NULL_TREE, true, GSI_SAME_STMT);
3310 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
3311 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
3312 edge e3 = split_block (new_header, gsi_stmt (gsi));
3313 cont_bb = e3->dest;
3314 remove_edge (e1);
3315 make_edge (body_bb, new_header, EDGE_FALLTHRU);
3316 e3->flags = EDGE_FALSE_VALUE;
357067f2 3317 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
629b3d75 3318 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
357067f2 3319 e1->probability = e3->probability.invert ();
629b3d75
MJ
3320
3321 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
3322 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
3323
3324 if (e2)
3325 {
99b1c316 3326 class loop *loop = alloc_loop ();
629b3d75
MJ
3327 loop->header = new_header;
3328 loop->latch = e2->src;
3329 add_loop (loop, body_bb->loop_father);
3330 }
3331 }
3332
3333 /* If there are any lastprivate clauses and it is possible some loops
3334 might have zero iterations, ensure all the decls are initialized,
3335 otherwise we could crash evaluating C++ class iterators with lastprivate
3336 clauses. */
3337 bool need_inits = false;
3338 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
3339 if (need_inits)
3340 {
3341 tree type = TREE_TYPE (fd->loops[i].v);
3342 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3343 expand_omp_build_assign (&gsi, fd->loops[i].v,
3344 fold_convert (type, fd->loops[i].n1));
3345 }
3346 else
3347 {
3348 tree type = TREE_TYPE (fd->loops[i].v);
3349 tree this_cond = fold_build2 (fd->loops[i].cond_code,
3350 boolean_type_node,
3351 fold_convert (type, fd->loops[i].n1),
3352 fold_convert (type, fd->loops[i].n2));
3353 if (!integer_onep (this_cond))
3354 need_inits = true;
3355 }
3356
3357 return cont_bb;
3358}
3359
3360/* A subroutine of expand_omp_for. Generate code for a parallel
3361 loop with any schedule. Given parameters:
3362
3363 for (V = N1; V cond N2; V += STEP) BODY;
3364
3365 where COND is "<" or ">", we generate pseudocode
3366
3367 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
3368 if (more) goto L0; else goto L3;
3369 L0:
3370 V = istart0;
3371 iend = iend0;
3372 L1:
3373 BODY;
3374 V += STEP;
3375 if (V cond iend) goto L1; else goto L2;
3376 L2:
3377 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3378 L3:
3379
3380 If this is a combined omp parallel loop, instead of the call to
3381 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
3382 If this is gimple_omp_for_combined_p loop, then instead of assigning
3383 V and iend in L0 we assign the first two _looptemp_ clause decls of the
3384 inner GIMPLE_OMP_FOR and V += STEP; and
3385 if (V cond iend) goto L1; else goto L2; are removed.
3386
3387 For collapsed loops, given parameters:
3388 collapse(3)
3389 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
3390 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
3391 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
3392 BODY;
3393
3394 we generate pseudocode
3395
3396 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
3397 if (cond3 is <)
3398 adj = STEP3 - 1;
3399 else
3400 adj = STEP3 + 1;
3401 count3 = (adj + N32 - N31) / STEP3;
3402 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
3403 if (cond2 is <)
3404 adj = STEP2 - 1;
3405 else
3406 adj = STEP2 + 1;
3407 count2 = (adj + N22 - N21) / STEP2;
3408 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
3409 if (cond1 is <)
3410 adj = STEP1 - 1;
3411 else
3412 adj = STEP1 + 1;
3413 count1 = (adj + N12 - N11) / STEP1;
3414 count = count1 * count2 * count3;
3415 goto Z1;
3416 Z0:
3417 count = 0;
3418 Z1:
3419 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
3420 if (more) goto L0; else goto L3;
3421 L0:
3422 V = istart0;
3423 T = V;
3424 V3 = N31 + (T % count3) * STEP3;
3425 T = T / count3;
3426 V2 = N21 + (T % count2) * STEP2;
3427 T = T / count2;
3428 V1 = N11 + T * STEP1;
3429 iend = iend0;
3430 L1:
3431 BODY;
3432 V += 1;
3433 if (V < iend) goto L10; else goto L2;
3434 L10:
3435 V3 += STEP3;
3436 if (V3 cond3 N32) goto L1; else goto L11;
3437 L11:
3438 V3 = N31;
3439 V2 += STEP2;
3440 if (V2 cond2 N22) goto L1; else goto L12;
3441 L12:
3442 V2 = N21;
3443 V1 += STEP1;
3444 goto L1;
3445 L2:
3446 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3447 L3:
3448
3449 */
3450
3451static void
3452expand_omp_for_generic (struct omp_region *region,
3453 struct omp_for_data *fd,
3454 enum built_in_function start_fn,
3455 enum built_in_function next_fn,
28567c40 3456 tree sched_arg,
629b3d75
MJ
3457 gimple *inner_stmt)
3458{
3459 tree type, istart0, iend0, iend;
3460 tree t, vmain, vback, bias = NULL_TREE;
3461 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
3462 basic_block l2_bb = NULL, l3_bb = NULL;
3463 gimple_stmt_iterator gsi;
3464 gassign *assign_stmt;
3465 bool in_combined_parallel = is_combined_parallel (region);
3466 bool broken_loop = region->cont == NULL;
3467 edge e, ne;
3468 tree *counts = NULL;
3469 int i;
3470 bool ordered_lastprivate = false;
3471
3472 gcc_assert (!broken_loop || !in_combined_parallel);
3473 gcc_assert (fd->iter_type == long_integer_type_node
3474 || !in_combined_parallel);
3475
3476 entry_bb = region->entry;
3477 cont_bb = region->cont;
3478 collapse_bb = NULL;
3479 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3480 gcc_assert (broken_loop
3481 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
3482 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3483 l1_bb = single_succ (l0_bb);
3484 if (!broken_loop)
3485 {
3486 l2_bb = create_empty_bb (cont_bb);
3487 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
3488 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
3489 == l1_bb));
3490 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3491 }
3492 else
3493 l2_bb = NULL;
3494 l3_bb = BRANCH_EDGE (entry_bb)->dest;
3495 exit_bb = region->exit;
3496
65f4b875 3497 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
3498
3499 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3500 if (fd->ordered
6c7ae8c5 3501 && omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
629b3d75
MJ
3502 OMP_CLAUSE_LASTPRIVATE))
3503 ordered_lastprivate = false;
28567c40 3504 tree reductions = NULL_TREE;
6c7ae8c5
JJ
3505 tree mem = NULL_TREE, cond_var = NULL_TREE, condtemp = NULL_TREE;
3506 tree memv = NULL_TREE;
8221c30b
JJ
3507 if (fd->lastprivate_conditional)
3508 {
3509 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3510 OMP_CLAUSE__CONDTEMP_);
3511 if (fd->have_pointer_condtemp)
3512 condtemp = OMP_CLAUSE_DECL (c);
3513 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
3514 cond_var = OMP_CLAUSE_DECL (c);
3515 }
28567c40
JJ
3516 if (sched_arg)
3517 {
3518 if (fd->have_reductemp)
3519 {
6c7ae8c5 3520 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
28567c40
JJ
3521 OMP_CLAUSE__REDUCTEMP_);
3522 reductions = OMP_CLAUSE_DECL (c);
3523 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
3524 gimple *g = SSA_NAME_DEF_STMT (reductions);
3525 reductions = gimple_assign_rhs1 (g);
3526 OMP_CLAUSE_DECL (c) = reductions;
3527 entry_bb = gimple_bb (g);
3528 edge e = split_block (entry_bb, g);
3529 if (region->entry == entry_bb)
3530 region->entry = e->dest;
3531 gsi = gsi_last_bb (entry_bb);
3532 }
3533 else
3534 reductions = null_pointer_node;
8221c30b 3535 if (fd->have_pointer_condtemp)
6c7ae8c5 3536 {
6c7ae8c5
JJ
3537 tree type = TREE_TYPE (condtemp);
3538 memv = create_tmp_var (type);
3539 TREE_ADDRESSABLE (memv) = 1;
3540 unsigned HOST_WIDE_INT sz
3541 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
3542 sz *= fd->lastprivate_conditional;
3543 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
3544 false);
3545 mem = build_fold_addr_expr (memv);
3546 }
3547 else
3548 mem = null_pointer_node;
28567c40 3549 }
629b3d75
MJ
3550 if (fd->collapse > 1 || fd->ordered)
3551 {
3552 int first_zero_iter1 = -1, first_zero_iter2 = -1;
3553 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
3554
3555 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
3556 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3557 zero_iter1_bb, first_zero_iter1,
3558 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
3559
3560 if (zero_iter1_bb)
3561 {
3562 /* Some counts[i] vars might be uninitialized if
3563 some loop has zero iterations. But the body shouldn't
3564 be executed in that case, so just avoid uninit warnings. */
3565 for (i = first_zero_iter1;
3566 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
3567 if (SSA_VAR_P (counts[i]))
3568 TREE_NO_WARNING (counts[i]) = 1;
3569 gsi_prev (&gsi);
3570 e = split_block (entry_bb, gsi_stmt (gsi));
3571 entry_bb = e->dest;
3572 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
65f4b875 3573 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
3574 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
3575 get_immediate_dominator (CDI_DOMINATORS,
3576 zero_iter1_bb));
3577 }
3578 if (zero_iter2_bb)
3579 {
3580 /* Some counts[i] vars might be uninitialized if
3581 some loop has zero iterations. But the body shouldn't
3582 be executed in that case, so just avoid uninit warnings. */
3583 for (i = first_zero_iter2; i < fd->ordered; i++)
3584 if (SSA_VAR_P (counts[i]))
3585 TREE_NO_WARNING (counts[i]) = 1;
3586 if (zero_iter1_bb)
3587 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
3588 else
3589 {
3590 gsi_prev (&gsi);
3591 e = split_block (entry_bb, gsi_stmt (gsi));
3592 entry_bb = e->dest;
3593 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
65f4b875 3594 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
3595 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
3596 get_immediate_dominator
3597 (CDI_DOMINATORS, zero_iter2_bb));
3598 }
3599 }
3600 if (fd->collapse == 1)
3601 {
3602 counts[0] = fd->loop.n2;
3603 fd->loop = fd->loops[0];
3604 }
3605 }
3606
3607 type = TREE_TYPE (fd->loop.v);
3608 istart0 = create_tmp_var (fd->iter_type, ".istart0");
3609 iend0 = create_tmp_var (fd->iter_type, ".iend0");
3610 TREE_ADDRESSABLE (istart0) = 1;
3611 TREE_ADDRESSABLE (iend0) = 1;
3612
3613 /* See if we need to bias by LLONG_MIN. */
3614 if (fd->iter_type == long_long_unsigned_type_node
3615 && TREE_CODE (type) == INTEGER_TYPE
3616 && !TYPE_UNSIGNED (type)
3617 && fd->ordered == 0)
3618 {
3619 tree n1, n2;
3620
3621 if (fd->loop.cond_code == LT_EXPR)
3622 {
3623 n1 = fd->loop.n1;
3624 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
3625 }
3626 else
3627 {
3628 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
3629 n2 = fd->loop.n1;
3630 }
3631 if (TREE_CODE (n1) != INTEGER_CST
3632 || TREE_CODE (n2) != INTEGER_CST
3633 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
3634 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
3635 }
3636
3637 gimple_stmt_iterator gsif = gsi;
3638 gsi_prev (&gsif);
3639
3640 tree arr = NULL_TREE;
3641 if (in_combined_parallel)
3642 {
3643 gcc_assert (fd->ordered == 0);
3644 /* In a combined parallel loop, emit a call to
3645 GOMP_loop_foo_next. */
3646 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
3647 build_fold_addr_expr (istart0),
3648 build_fold_addr_expr (iend0));
3649 }
3650 else
3651 {
3652 tree t0, t1, t2, t3, t4;
3653 /* If this is not a combined parallel loop, emit a call to
3654 GOMP_loop_foo_start in ENTRY_BB. */
3655 t4 = build_fold_addr_expr (iend0);
3656 t3 = build_fold_addr_expr (istart0);
3657 if (fd->ordered)
3658 {
3659 t0 = build_int_cst (unsigned_type_node,
3660 fd->ordered - fd->collapse + 1);
3661 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
3662 fd->ordered
3663 - fd->collapse + 1),
3664 ".omp_counts");
3665 DECL_NAMELESS (arr) = 1;
3666 TREE_ADDRESSABLE (arr) = 1;
3667 TREE_STATIC (arr) = 1;
3668 vec<constructor_elt, va_gc> *v;
3669 vec_alloc (v, fd->ordered - fd->collapse + 1);
3670 int idx;
3671
3672 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
3673 {
3674 tree c;
3675 if (idx == 0 && fd->collapse > 1)
3676 c = fd->loop.n2;
3677 else
3678 c = counts[idx + fd->collapse - 1];
3679 tree purpose = size_int (idx);
3680 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
3681 if (TREE_CODE (c) != INTEGER_CST)
3682 TREE_STATIC (arr) = 0;
3683 }
3684
3685 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
3686 if (!TREE_STATIC (arr))
3687 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
3688 void_type_node, arr),
3689 true, NULL_TREE, true, GSI_SAME_STMT);
3690 t1 = build_fold_addr_expr (arr);
3691 t2 = NULL_TREE;
3692 }
3693 else
3694 {
3695 t2 = fold_convert (fd->iter_type, fd->loop.step);
3696 t1 = fd->loop.n2;
3697 t0 = fd->loop.n1;
3698 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3699 {
3700 tree innerc
3701 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3702 OMP_CLAUSE__LOOPTEMP_);
3703 gcc_assert (innerc);
3704 t0 = OMP_CLAUSE_DECL (innerc);
3705 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3706 OMP_CLAUSE__LOOPTEMP_);
3707 gcc_assert (innerc);
3708 t1 = OMP_CLAUSE_DECL (innerc);
3709 }
3710 if (POINTER_TYPE_P (TREE_TYPE (t0))
3711 && TYPE_PRECISION (TREE_TYPE (t0))
3712 != TYPE_PRECISION (fd->iter_type))
3713 {
3714 /* Avoid casting pointers to integer of a different size. */
3715 tree itype = signed_type_for (type);
3716 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
3717 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
3718 }
3719 else
3720 {
3721 t1 = fold_convert (fd->iter_type, t1);
3722 t0 = fold_convert (fd->iter_type, t0);
3723 }
3724 if (bias)
3725 {
3726 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
3727 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
3728 }
3729 }
3730 if (fd->iter_type == long_integer_type_node || fd->ordered)
3731 {
3732 if (fd->chunk_size)
3733 {
3734 t = fold_convert (fd->iter_type, fd->chunk_size);
3735 t = omp_adjust_chunk_size (t, fd->simd_schedule);
28567c40
JJ
3736 if (sched_arg)
3737 {
3738 if (fd->ordered)
3739 t = build_call_expr (builtin_decl_explicit (start_fn),
3740 8, t0, t1, sched_arg, t, t3, t4,
3741 reductions, mem);
3742 else
3743 t = build_call_expr (builtin_decl_explicit (start_fn),
3744 9, t0, t1, t2, sched_arg, t, t3, t4,
3745 reductions, mem);
3746 }
3747 else if (fd->ordered)
629b3d75
MJ
3748 t = build_call_expr (builtin_decl_explicit (start_fn),
3749 5, t0, t1, t, t3, t4);
3750 else
3751 t = build_call_expr (builtin_decl_explicit (start_fn),
3752 6, t0, t1, t2, t, t3, t4);
3753 }
3754 else if (fd->ordered)
3755 t = build_call_expr (builtin_decl_explicit (start_fn),
3756 4, t0, t1, t3, t4);
3757 else
3758 t = build_call_expr (builtin_decl_explicit (start_fn),
3759 5, t0, t1, t2, t3, t4);
3760 }
3761 else
3762 {
3763 tree t5;
3764 tree c_bool_type;
3765 tree bfn_decl;
3766
3767 /* The GOMP_loop_ull_*start functions have additional boolean
3768 argument, true for < loops and false for > loops.
3769 In Fortran, the C bool type can be different from
3770 boolean_type_node. */
3771 bfn_decl = builtin_decl_explicit (start_fn);
3772 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
3773 t5 = build_int_cst (c_bool_type,
3774 fd->loop.cond_code == LT_EXPR ? 1 : 0);
3775 if (fd->chunk_size)
3776 {
3777 tree bfn_decl = builtin_decl_explicit (start_fn);
3778 t = fold_convert (fd->iter_type, fd->chunk_size);
3779 t = omp_adjust_chunk_size (t, fd->simd_schedule);
28567c40
JJ
3780 if (sched_arg)
3781 t = build_call_expr (bfn_decl, 10, t5, t0, t1, t2, sched_arg,
3782 t, t3, t4, reductions, mem);
3783 else
3784 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
629b3d75
MJ
3785 }
3786 else
3787 t = build_call_expr (builtin_decl_explicit (start_fn),
3788 6, t5, t0, t1, t2, t3, t4);
3789 }
3790 }
3791 if (TREE_TYPE (t) != boolean_type_node)
3792 t = fold_build2 (NE_EXPR, boolean_type_node,
3793 t, build_int_cst (TREE_TYPE (t), 0));
3794 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
01914336 3795 true, GSI_SAME_STMT);
629b3d75
MJ
3796 if (arr && !TREE_STATIC (arr))
3797 {
25b45c7c 3798 tree clobber = build_clobber (TREE_TYPE (arr));
629b3d75
MJ
3799 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
3800 GSI_SAME_STMT);
3801 }
8221c30b 3802 if (fd->have_pointer_condtemp)
6c7ae8c5 3803 expand_omp_build_assign (&gsi, condtemp, memv, false);
28567c40
JJ
3804 if (fd->have_reductemp)
3805 {
3806 gimple *g = gsi_stmt (gsi);
3807 gsi_remove (&gsi, true);
3808 release_ssa_name (gimple_assign_lhs (g));
3809
3810 entry_bb = region->entry;
3811 gsi = gsi_last_nondebug_bb (entry_bb);
3812
3813 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3814 }
629b3d75
MJ
3815 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3816
3817 /* Remove the GIMPLE_OMP_FOR statement. */
3818 gsi_remove (&gsi, true);
3819
3820 if (gsi_end_p (gsif))
3821 gsif = gsi_after_labels (gsi_bb (gsif));
3822 gsi_next (&gsif);
3823
3824 /* Iteration setup for sequential loop goes in L0_BB. */
3825 tree startvar = fd->loop.v;
3826 tree endvar = NULL_TREE;
3827
3828 if (gimple_omp_for_combined_p (fd->for_stmt))
3829 {
3830 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
3831 && gimple_omp_for_kind (inner_stmt)
3832 == GF_OMP_FOR_KIND_SIMD);
3833 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
3834 OMP_CLAUSE__LOOPTEMP_);
3835 gcc_assert (innerc);
3836 startvar = OMP_CLAUSE_DECL (innerc);
3837 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3838 OMP_CLAUSE__LOOPTEMP_);
3839 gcc_assert (innerc);
3840 endvar = OMP_CLAUSE_DECL (innerc);
3841 }
3842
3843 gsi = gsi_start_bb (l0_bb);
3844 t = istart0;
3845 if (fd->ordered && fd->collapse == 1)
3846 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
3847 fold_convert (fd->iter_type, fd->loop.step));
3848 else if (bias)
3849 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
3850 if (fd->ordered && fd->collapse == 1)
3851 {
3852 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3853 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
3854 fd->loop.n1, fold_convert (sizetype, t));
3855 else
3856 {
3857 t = fold_convert (TREE_TYPE (startvar), t);
3858 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
3859 fd->loop.n1, t);
3860 }
3861 }
3862 else
3863 {
3864 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3865 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
3866 t = fold_convert (TREE_TYPE (startvar), t);
3867 }
3868 t = force_gimple_operand_gsi (&gsi, t,
3869 DECL_P (startvar)
3870 && TREE_ADDRESSABLE (startvar),
3871 NULL_TREE, false, GSI_CONTINUE_LINKING);
3872 assign_stmt = gimple_build_assign (startvar, t);
3873 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6c7ae8c5
JJ
3874 if (cond_var)
3875 {
3876 tree itype = TREE_TYPE (cond_var);
3877 /* For lastprivate(conditional:) itervar, we need some iteration
3878 counter that starts at unsigned non-zero and increases.
3879 Prefer as few IVs as possible, so if we can use startvar
3880 itself, use that, or startvar + constant (those would be
3881 incremented with step), and as last resort use the s0 + 1
3882 incremented by 1. */
3883 if ((fd->ordered && fd->collapse == 1)
3884 || bias
3885 || POINTER_TYPE_P (type)
3886 || TREE_CODE (fd->loop.n1) != INTEGER_CST
3887 || fd->loop.cond_code != LT_EXPR)
3888 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, istart0),
3889 build_int_cst (itype, 1));
3890 else if (tree_int_cst_sgn (fd->loop.n1) == 1)
3891 t = fold_convert (itype, t);
3892 else
3893 {
3894 tree c = fold_convert (itype, fd->loop.n1);
3895 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
3896 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
3897 }
3898 t = force_gimple_operand_gsi (&gsi, t, false,
3899 NULL_TREE, false, GSI_CONTINUE_LINKING);
3900 assign_stmt = gimple_build_assign (cond_var, t);
3901 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3902 }
629b3d75
MJ
3903
3904 t = iend0;
3905 if (fd->ordered && fd->collapse == 1)
3906 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
3907 fold_convert (fd->iter_type, fd->loop.step));
3908 else if (bias)
3909 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
3910 if (fd->ordered && fd->collapse == 1)
3911 {
3912 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3913 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
3914 fd->loop.n1, fold_convert (sizetype, t));
3915 else
3916 {
3917 t = fold_convert (TREE_TYPE (startvar), t);
3918 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
3919 fd->loop.n1, t);
3920 }
3921 }
3922 else
3923 {
3924 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3925 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
3926 t = fold_convert (TREE_TYPE (startvar), t);
3927 }
3928 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3929 false, GSI_CONTINUE_LINKING);
3930 if (endvar)
3931 {
3932 assign_stmt = gimple_build_assign (endvar, iend);
3933 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3934 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
3935 assign_stmt = gimple_build_assign (fd->loop.v, iend);
3936 else
3937 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
3938 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3939 }
3940 /* Handle linear clause adjustments. */
3941 tree itercnt = NULL_TREE;
3942 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
3943 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
3944 c; c = OMP_CLAUSE_CHAIN (c))
3945 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
3946 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
3947 {
3948 tree d = OMP_CLAUSE_DECL (c);
3949 bool is_ref = omp_is_reference (d);
3950 tree t = d, a, dest;
3951 if (is_ref)
3952 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
3953 tree type = TREE_TYPE (t);
3954 if (POINTER_TYPE_P (type))
3955 type = sizetype;
3956 dest = unshare_expr (t);
3957 tree v = create_tmp_var (TREE_TYPE (t), NULL);
3958 expand_omp_build_assign (&gsif, v, t);
3959 if (itercnt == NULL_TREE)
3960 {
3961 itercnt = startvar;
3962 tree n1 = fd->loop.n1;
3963 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
3964 {
3965 itercnt
3966 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
3967 itercnt);
3968 n1 = fold_convert (TREE_TYPE (itercnt), n1);
3969 }
3970 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
3971 itercnt, n1);
3972 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
3973 itercnt, fd->loop.step);
3974 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
3975 NULL_TREE, false,
3976 GSI_CONTINUE_LINKING);
3977 }
3978 a = fold_build2 (MULT_EXPR, type,
3979 fold_convert (type, itercnt),
3980 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
3981 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
3982 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
3983 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3984 false, GSI_CONTINUE_LINKING);
3985 assign_stmt = gimple_build_assign (dest, t);
3986 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3987 }
3988 if (fd->collapse > 1)
aed3ab25 3989 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
629b3d75
MJ
3990
3991 if (fd->ordered)
3992 {
3993 /* Until now, counts array contained number of iterations or
3994 variable containing it for ith loop. From now on, we need
3995 those counts only for collapsed loops, and only for the 2nd
3996 till the last collapsed one. Move those one element earlier,
3997 we'll use counts[fd->collapse - 1] for the first source/sink
3998 iteration counter and so on and counts[fd->ordered]
3999 as the array holding the current counter values for
4000 depend(source). */
4001 if (fd->collapse > 1)
4002 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
4003 if (broken_loop)
4004 {
4005 int i;
4006 for (i = fd->collapse; i < fd->ordered; i++)
4007 {
4008 tree type = TREE_TYPE (fd->loops[i].v);
4009 tree this_cond
4010 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
4011 fold_convert (type, fd->loops[i].n1),
4012 fold_convert (type, fd->loops[i].n2));
4013 if (!integer_onep (this_cond))
4014 break;
4015 }
4016 if (i < fd->ordered)
4017 {
4018 cont_bb
4019 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
4020 add_bb_to_loop (cont_bb, l1_bb->loop_father);
4021 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
4022 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
4023 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4024 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
4025 make_edge (cont_bb, l1_bb, 0);
4026 l2_bb = create_empty_bb (cont_bb);
4027 broken_loop = false;
4028 }
4029 }
4030 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
4031 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
4032 ordered_lastprivate);
4033 if (counts[fd->collapse - 1])
4034 {
4035 gcc_assert (fd->collapse == 1);
4036 gsi = gsi_last_bb (l0_bb);
4037 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
4038 istart0, true);
4039 gsi = gsi_last_bb (cont_bb);
4040 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1],
4041 build_int_cst (fd->iter_type, 1));
4042 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
4043 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
4044 size_zero_node, NULL_TREE, NULL_TREE);
4045 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
4046 t = counts[fd->collapse - 1];
4047 }
4048 else if (fd->collapse > 1)
4049 t = fd->loop.v;
4050 else
4051 {
4052 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4053 fd->loops[0].v, fd->loops[0].n1);
4054 t = fold_convert (fd->iter_type, t);
4055 }
4056 gsi = gsi_last_bb (l0_bb);
4057 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
4058 size_zero_node, NULL_TREE, NULL_TREE);
4059 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4060 false, GSI_CONTINUE_LINKING);
4061 expand_omp_build_assign (&gsi, aref, t, true);
4062 }
4063
4064 if (!broken_loop)
4065 {
4066 /* Code to control the increment and predicate for the sequential
4067 loop goes in the CONT_BB. */
65f4b875 4068 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
4069 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4070 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
4071 vmain = gimple_omp_continue_control_use (cont_stmt);
4072 vback = gimple_omp_continue_control_def (cont_stmt);
4073
7855700e
JJ
4074 if (cond_var)
4075 {
4076 tree itype = TREE_TYPE (cond_var);
4077 tree t2;
4078 if ((fd->ordered && fd->collapse == 1)
4079 || bias
4080 || POINTER_TYPE_P (type)
4081 || TREE_CODE (fd->loop.n1) != INTEGER_CST
4082 || fd->loop.cond_code != LT_EXPR)
4083 t2 = build_int_cst (itype, 1);
4084 else
4085 t2 = fold_convert (itype, fd->loop.step);
4086 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
4087 t2 = force_gimple_operand_gsi (&gsi, t2, false,
4088 NULL_TREE, true, GSI_SAME_STMT);
4089 assign_stmt = gimple_build_assign (cond_var, t2);
4090 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4091 }
4092
629b3d75
MJ
4093 if (!gimple_omp_for_combined_p (fd->for_stmt))
4094 {
4095 if (POINTER_TYPE_P (type))
4096 t = fold_build_pointer_plus (vmain, fd->loop.step);
4097 else
4098 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
4099 t = force_gimple_operand_gsi (&gsi, t,
4100 DECL_P (vback)
4101 && TREE_ADDRESSABLE (vback),
4102 NULL_TREE, true, GSI_SAME_STMT);
4103 assign_stmt = gimple_build_assign (vback, t);
4104 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4105
4106 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
4107 {
d1ffbd43 4108 tree tem;
629b3d75 4109 if (fd->collapse > 1)
d1ffbd43 4110 tem = fd->loop.v;
629b3d75
MJ
4111 else
4112 {
d1ffbd43
JJ
4113 tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4114 fd->loops[0].v, fd->loops[0].n1);
4115 tem = fold_convert (fd->iter_type, tem);
629b3d75
MJ
4116 }
4117 tree aref = build4 (ARRAY_REF, fd->iter_type,
4118 counts[fd->ordered], size_zero_node,
4119 NULL_TREE, NULL_TREE);
d1ffbd43
JJ
4120 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE,
4121 true, GSI_SAME_STMT);
4122 expand_omp_build_assign (&gsi, aref, tem);
629b3d75
MJ
4123 }
4124
4125 t = build2 (fd->loop.cond_code, boolean_type_node,
4126 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
4127 iend);
4128 gcond *cond_stmt = gimple_build_cond_empty (t);
4129 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4130 }
4131
4132 /* Remove GIMPLE_OMP_CONTINUE. */
4133 gsi_remove (&gsi, true);
4134
4135 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
aed3ab25 4136 collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, l1_bb);
629b3d75
MJ
4137
4138 /* Emit code to get the next parallel iteration in L2_BB. */
4139 gsi = gsi_start_bb (l2_bb);
4140
4141 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
4142 build_fold_addr_expr (istart0),
4143 build_fold_addr_expr (iend0));
4144 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4145 false, GSI_CONTINUE_LINKING);
4146 if (TREE_TYPE (t) != boolean_type_node)
4147 t = fold_build2 (NE_EXPR, boolean_type_node,
4148 t, build_int_cst (TREE_TYPE (t), 0));
4149 gcond *cond_stmt = gimple_build_cond_empty (t);
4150 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4151 }
4152
4153 /* Add the loop cleanup function. */
65f4b875 4154 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
4155 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4156 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
4157 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
4158 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4159 else
4160 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4161 gcall *call_stmt = gimple_build_call (t, 0);
629b3d75
MJ
4162 if (fd->ordered)
4163 {
4164 tree arr = counts[fd->ordered];
25b45c7c 4165 tree clobber = build_clobber (TREE_TYPE (arr));
629b3d75
MJ
4166 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
4167 GSI_SAME_STMT);
4168 }
28567c40
JJ
4169 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
4170 {
4171 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
4172 if (fd->have_reductemp)
4173 {
4174 gimple *g = gimple_build_assign (reductions, NOP_EXPR,
4175 gimple_call_lhs (call_stmt));
4176 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4177 }
4178 }
4179 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
629b3d75
MJ
4180 gsi_remove (&gsi, true);
4181
4182 /* Connect the new blocks. */
4183 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
4184 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
4185
4186 if (!broken_loop)
4187 {
4188 gimple_seq phis;
4189
4190 e = find_edge (cont_bb, l3_bb);
4191 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
4192
4193 phis = phi_nodes (l3_bb);
4194 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
4195 {
4196 gimple *phi = gsi_stmt (gsi);
4197 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
4198 PHI_ARG_DEF_FROM_EDGE (phi, e));
4199 }
4200 remove_edge (e);
4201
4202 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
4203 e = find_edge (cont_bb, l1_bb);
4204 if (e == NULL)
4205 {
4206 e = BRANCH_EDGE (cont_bb);
4207 gcc_assert (single_succ (e->dest) == l1_bb);
4208 }
4209 if (gimple_omp_for_combined_p (fd->for_stmt))
4210 {
4211 remove_edge (e);
4212 e = NULL;
4213 }
4214 else if (fd->collapse > 1)
4215 {
4216 remove_edge (e);
4217 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4218 }
4219 else
4220 e->flags = EDGE_TRUE_VALUE;
4221 if (e)
4222 {
357067f2
JH
4223 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
4224 find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
629b3d75
MJ
4225 }
4226 else
4227 {
4228 e = find_edge (cont_bb, l2_bb);
4229 e->flags = EDGE_FALLTHRU;
4230 }
4231 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
4232
4233 if (gimple_in_ssa_p (cfun))
4234 {
4235 /* Add phis to the outer loop that connect to the phis in the inner,
4236 original loop, and move the loop entry value of the inner phi to
4237 the loop entry value of the outer phi. */
4238 gphi_iterator psi;
4239 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
4240 {
620e594b 4241 location_t locus;
629b3d75
MJ
4242 gphi *nphi;
4243 gphi *exit_phi = psi.phi ();
4244
164485b5
JJ
4245 if (virtual_operand_p (gimple_phi_result (exit_phi)))
4246 continue;
4247
629b3d75
MJ
4248 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
4249 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
4250
4251 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
4252 edge latch_to_l1 = find_edge (latch, l1_bb);
4253 gphi *inner_phi
4254 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
4255
4256 tree t = gimple_phi_result (exit_phi);
4257 tree new_res = copy_ssa_name (t, NULL);
4258 nphi = create_phi_node (new_res, l0_bb);
4259
4260 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
4261 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
4262 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
4263 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
4264 add_phi_arg (nphi, t, entry_to_l0, locus);
4265
4266 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
4267 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
4268
4269 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
164485b5 4270 }
629b3d75
MJ
4271 }
4272
4273 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
4274 recompute_dominator (CDI_DOMINATORS, l2_bb));
4275 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
4276 recompute_dominator (CDI_DOMINATORS, l3_bb));
4277 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
4278 recompute_dominator (CDI_DOMINATORS, l0_bb));
4279 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
4280 recompute_dominator (CDI_DOMINATORS, l1_bb));
4281
4282 /* We enter expand_omp_for_generic with a loop. This original loop may
4283 have its own loop struct, or it may be part of an outer loop struct
4284 (which may be the fake loop). */
99b1c316 4285 class loop *outer_loop = entry_bb->loop_father;
629b3d75
MJ
4286 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
4287
4288 add_bb_to_loop (l2_bb, outer_loop);
4289
4290 /* We've added a new loop around the original loop. Allocate the
4291 corresponding loop struct. */
99b1c316 4292 class loop *new_loop = alloc_loop ();
629b3d75
MJ
4293 new_loop->header = l0_bb;
4294 new_loop->latch = l2_bb;
4295 add_loop (new_loop, outer_loop);
4296
4297 /* Allocate a loop structure for the original loop unless we already
4298 had one. */
4299 if (!orig_loop_has_loop_struct
4300 && !gimple_omp_for_combined_p (fd->for_stmt))
4301 {
99b1c316 4302 class loop *orig_loop = alloc_loop ();
629b3d75
MJ
4303 orig_loop->header = l1_bb;
4304 /* The loop may have multiple latches. */
4305 add_loop (orig_loop, new_loop);
4306 }
4307 }
4308}
4309
2f6bb511
JJ
4310/* Helper function for expand_omp_for_static_nochunk. If PTR is NULL,
4311 compute needed allocation size. If !ALLOC of team allocations,
4312 if ALLOC of thread allocation. SZ is the initial needed size for
4313 other purposes, ALLOC_ALIGN guaranteed alignment of allocation in bytes,
4314 CNT number of elements of each array, for !ALLOC this is
4315 omp_get_num_threads (), for ALLOC number of iterations handled by the
4316 current thread. If PTR is non-NULL, it is the start of the allocation
4317 and this routine shall assign to OMP_CLAUSE_DECL (c) of those _scantemp_
4318 clauses pointers to the corresponding arrays. */
4319
4320static tree
4321expand_omp_scantemp_alloc (tree clauses, tree ptr, unsigned HOST_WIDE_INT sz,
4322 unsigned HOST_WIDE_INT alloc_align, tree cnt,
4323 gimple_stmt_iterator *gsi, bool alloc)
4324{
4325 tree eltsz = NULL_TREE;
4326 unsigned HOST_WIDE_INT preval = 0;
4327 if (ptr && sz)
4328 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4329 ptr, size_int (sz));
4330 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
4331 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
4332 && !OMP_CLAUSE__SCANTEMP__CONTROL (c)
4333 && (!OMP_CLAUSE__SCANTEMP__ALLOC (c)) != alloc)
4334 {
4335 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
4336 unsigned HOST_WIDE_INT al = TYPE_ALIGN_UNIT (pointee_type);
4337 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4338 {
4339 unsigned HOST_WIDE_INT szl
4340 = tree_to_uhwi (TYPE_SIZE_UNIT (pointee_type));
4341 szl = least_bit_hwi (szl);
4342 if (szl)
4343 al = MIN (al, szl);
4344 }
4345 if (ptr == NULL_TREE)
4346 {
4347 if (eltsz == NULL_TREE)
4348 eltsz = TYPE_SIZE_UNIT (pointee_type);
4349 else
4350 eltsz = size_binop (PLUS_EXPR, eltsz,
4351 TYPE_SIZE_UNIT (pointee_type));
4352 }
4353 if (preval == 0 && al <= alloc_align)
4354 {
4355 unsigned HOST_WIDE_INT diff = ROUND_UP (sz, al) - sz;
4356 sz += diff;
4357 if (diff && ptr)
4358 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4359 ptr, size_int (diff));
4360 }
4361 else if (al > preval)
4362 {
4363 if (ptr)
4364 {
4365 ptr = fold_convert (pointer_sized_int_node, ptr);
4366 ptr = fold_build2 (PLUS_EXPR, pointer_sized_int_node, ptr,
4367 build_int_cst (pointer_sized_int_node,
4368 al - 1));
4369 ptr = fold_build2 (BIT_AND_EXPR, pointer_sized_int_node, ptr,
4370 build_int_cst (pointer_sized_int_node,
4371 -(HOST_WIDE_INT) al));
4372 ptr = fold_convert (ptr_type_node, ptr);
4373 }
4374 else
4375 sz += al - 1;
4376 }
4377 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4378 preval = al;
4379 else
4380 preval = 1;
4381 if (ptr)
4382 {
4383 expand_omp_build_assign (gsi, OMP_CLAUSE_DECL (c), ptr, false);
4384 ptr = OMP_CLAUSE_DECL (c);
4385 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), ptr,
4386 size_binop (MULT_EXPR, cnt,
4387 TYPE_SIZE_UNIT (pointee_type)));
4388 }
4389 }
4390
4391 if (ptr == NULL_TREE)
4392 {
4393 eltsz = size_binop (MULT_EXPR, eltsz, cnt);
4394 if (sz)
4395 eltsz = size_binop (PLUS_EXPR, eltsz, size_int (sz));
4396 return eltsz;
4397 }
4398 else
4399 return ptr;
4400}
4401
629b3d75
MJ
4402/* A subroutine of expand_omp_for. Generate code for a parallel
4403 loop with static schedule and no specified chunk size. Given
4404 parameters:
4405
4406 for (V = N1; V cond N2; V += STEP) BODY;
4407
4408 where COND is "<" or ">", we generate pseudocode
4409
4410 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
4411 if (cond is <)
4412 adj = STEP - 1;
4413 else
4414 adj = STEP + 1;
4415 if ((__typeof (V)) -1 > 0 && cond is >)
4416 n = -(adj + N2 - N1) / -STEP;
4417 else
4418 n = (adj + N2 - N1) / STEP;
4419 q = n / nthreads;
4420 tt = n % nthreads;
4421 if (threadid < tt) goto L3; else goto L4;
4422 L3:
4423 tt = 0;
4424 q = q + 1;
4425 L4:
4426 s0 = q * threadid + tt;
4427 e0 = s0 + q;
4428 V = s0 * STEP + N1;
4429 if (s0 >= e0) goto L2; else goto L0;
4430 L0:
4431 e = e0 * STEP + N1;
4432 L1:
4433 BODY;
4434 V += STEP;
4435 if (V cond e) goto L1;
4436 L2:
4437*/
4438
4439static void
4440expand_omp_for_static_nochunk (struct omp_region *region,
4441 struct omp_for_data *fd,
4442 gimple *inner_stmt)
4443{
2f6bb511 4444 tree n, q, s0, e0, e, t, tt, nthreads = NULL_TREE, threadid;
629b3d75
MJ
4445 tree type, itype, vmain, vback;
4446 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
4447 basic_block body_bb, cont_bb, collapse_bb = NULL;
2f6bb511
JJ
4448 basic_block fin_bb, fourth_bb = NULL, fifth_bb = NULL, sixth_bb = NULL;
4449 basic_block exit1_bb = NULL, exit2_bb = NULL, exit3_bb = NULL;
6c7ae8c5 4450 gimple_stmt_iterator gsi, gsip;
629b3d75
MJ
4451 edge ep;
4452 bool broken_loop = region->cont == NULL;
4453 tree *counts = NULL;
4454 tree n1, n2, step;
28567c40 4455 tree reductions = NULL_TREE;
8221c30b 4456 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
629b3d75
MJ
4457
4458 itype = type = TREE_TYPE (fd->loop.v);
4459 if (POINTER_TYPE_P (type))
4460 itype = signed_type_for (type);
4461
4462 entry_bb = region->entry;
4463 cont_bb = region->cont;
4464 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4465 fin_bb = BRANCH_EDGE (entry_bb)->dest;
4466 gcc_assert (broken_loop
4467 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
4468 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
4469 body_bb = single_succ (seq_start_bb);
4470 if (!broken_loop)
4471 {
4472 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
4473 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
4474 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4475 }
4476 exit_bb = region->exit;
4477
4478 /* Iteration space partitioning goes in ENTRY_BB. */
65f4b875 4479 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75 4480 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
6c7ae8c5
JJ
4481 gsip = gsi;
4482 gsi_prev (&gsip);
629b3d75
MJ
4483
4484 if (fd->collapse > 1)
4485 {
4486 int first_zero_iter = -1, dummy = -1;
4487 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
4488
4489 counts = XALLOCAVEC (tree, fd->collapse);
4490 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4491 fin_bb, first_zero_iter,
4492 dummy_bb, dummy, l2_dom_bb);
4493 t = NULL_TREE;
4494 }
4495 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
4496 t = integer_one_node;
4497 else
4498 t = fold_binary (fd->loop.cond_code, boolean_type_node,
4499 fold_convert (type, fd->loop.n1),
4500 fold_convert (type, fd->loop.n2));
4501 if (fd->collapse == 1
4502 && TYPE_UNSIGNED (type)
4503 && (t == NULL_TREE || !integer_onep (t)))
4504 {
4505 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
4506 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
4507 true, GSI_SAME_STMT);
4508 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
4509 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
4510 true, GSI_SAME_STMT);
4511 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
6c7ae8c5 4512 NULL_TREE, NULL_TREE);
629b3d75
MJ
4513 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4514 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
4515 expand_omp_regimplify_p, NULL, NULL)
4516 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
4517 expand_omp_regimplify_p, NULL, NULL))
4518 {
4519 gsi = gsi_for_stmt (cond_stmt);
4520 gimple_regimplify_operands (cond_stmt, &gsi);
4521 }
4522 ep = split_block (entry_bb, cond_stmt);
4523 ep->flags = EDGE_TRUE_VALUE;
4524 entry_bb = ep->dest;
357067f2 4525 ep->probability = profile_probability::very_likely ();
629b3d75 4526 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
357067f2 4527 ep->probability = profile_probability::very_unlikely ();
629b3d75
MJ
4528 if (gimple_in_ssa_p (cfun))
4529 {
4530 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
4531 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
4532 !gsi_end_p (gpi); gsi_next (&gpi))
4533 {
4534 gphi *phi = gpi.phi ();
4535 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
4536 ep, UNKNOWN_LOCATION);
4537 }
4538 }
4539 gsi = gsi_last_bb (entry_bb);
4540 }
4541
8221c30b
JJ
4542 if (fd->lastprivate_conditional)
4543 {
4544 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4545 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
4546 if (fd->have_pointer_condtemp)
4547 condtemp = OMP_CLAUSE_DECL (c);
4548 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
4549 cond_var = OMP_CLAUSE_DECL (c);
4550 }
2f6bb511 4551 if (fd->have_reductemp
2f03073f
JJ
4552 /* For scan, we don't want to reinitialize condtemp before the
4553 second loop. */
4554 || (fd->have_pointer_condtemp && !fd->have_scantemp)
2f6bb511 4555 || fd->have_nonctrl_scantemp)
28567c40
JJ
4556 {
4557 tree t1 = build_int_cst (long_integer_type_node, 0);
4558 tree t2 = build_int_cst (long_integer_type_node, 1);
4559 tree t3 = build_int_cstu (long_integer_type_node,
4560 (HOST_WIDE_INT_1U << 31) + 1);
4561 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
6c7ae8c5
JJ
4562 gimple_stmt_iterator gsi2 = gsi_none ();
4563 gimple *g = NULL;
4564 tree mem = null_pointer_node, memv = NULL_TREE;
2f6bb511
JJ
4565 unsigned HOST_WIDE_INT condtemp_sz = 0;
4566 unsigned HOST_WIDE_INT alloc_align = 0;
6c7ae8c5
JJ
4567 if (fd->have_reductemp)
4568 {
2f6bb511 4569 gcc_assert (!fd->have_nonctrl_scantemp);
6c7ae8c5
JJ
4570 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
4571 reductions = OMP_CLAUSE_DECL (c);
4572 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
4573 g = SSA_NAME_DEF_STMT (reductions);
4574 reductions = gimple_assign_rhs1 (g);
4575 OMP_CLAUSE_DECL (c) = reductions;
4576 gsi2 = gsi_for_stmt (g);
4577 }
4578 else
4579 {
4580 if (gsi_end_p (gsip))
4581 gsi2 = gsi_after_labels (region->entry);
4582 else
4583 gsi2 = gsip;
4584 reductions = null_pointer_node;
4585 }
2f6bb511 4586 if (fd->have_pointer_condtemp || fd->have_nonctrl_scantemp)
6c7ae8c5 4587 {
2f6bb511
JJ
4588 tree type;
4589 if (fd->have_pointer_condtemp)
4590 type = TREE_TYPE (condtemp);
4591 else
4592 type = ptr_type_node;
6c7ae8c5
JJ
4593 memv = create_tmp_var (type);
4594 TREE_ADDRESSABLE (memv) = 1;
2f6bb511
JJ
4595 unsigned HOST_WIDE_INT sz = 0;
4596 tree size = NULL_TREE;
4597 if (fd->have_pointer_condtemp)
4598 {
4599 sz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
4600 sz *= fd->lastprivate_conditional;
4601 condtemp_sz = sz;
4602 }
4603 if (fd->have_nonctrl_scantemp)
4604 {
4605 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
4606 gimple *g = gimple_build_call (nthreads, 0);
4607 nthreads = create_tmp_var (integer_type_node);
4608 gimple_call_set_lhs (g, nthreads);
4609 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
4610 nthreads = fold_convert (sizetype, nthreads);
4611 alloc_align = TYPE_ALIGN_UNIT (long_long_integer_type_node);
4612 size = expand_omp_scantemp_alloc (clauses, NULL_TREE, sz,
4613 alloc_align, nthreads, NULL,
4614 false);
4615 size = fold_convert (type, size);
4616 }
4617 else
4618 size = build_int_cst (type, sz);
4619 expand_omp_build_assign (&gsi2, memv, size, false);
6c7ae8c5
JJ
4620 mem = build_fold_addr_expr (memv);
4621 }
28567c40
JJ
4622 tree t
4623 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
4624 9, t1, t2, t2, t3, t1, null_pointer_node,
6c7ae8c5 4625 null_pointer_node, reductions, mem);
28567c40
JJ
4626 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
4627 true, GSI_SAME_STMT);
8221c30b 4628 if (fd->have_pointer_condtemp)
6c7ae8c5 4629 expand_omp_build_assign (&gsi2, condtemp, memv, false);
2f6bb511
JJ
4630 if (fd->have_nonctrl_scantemp)
4631 {
4632 tree ptr = fd->have_pointer_condtemp ? condtemp : memv;
4633 expand_omp_scantemp_alloc (clauses, ptr, condtemp_sz,
4634 alloc_align, nthreads, &gsi2, false);
4635 }
6c7ae8c5
JJ
4636 if (fd->have_reductemp)
4637 {
4638 gsi_remove (&gsi2, true);
4639 release_ssa_name (gimple_assign_lhs (g));
4640 }
28567c40 4641 }
629b3d75
MJ
4642 switch (gimple_omp_for_kind (fd->for_stmt))
4643 {
4644 case GF_OMP_FOR_KIND_FOR:
4645 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
4646 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
4647 break;
4648 case GF_OMP_FOR_KIND_DISTRIBUTE:
4649 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
4650 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
4651 break;
4652 default:
4653 gcc_unreachable ();
4654 }
4655 nthreads = build_call_expr (nthreads, 0);
4656 nthreads = fold_convert (itype, nthreads);
4657 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
4658 true, GSI_SAME_STMT);
4659 threadid = build_call_expr (threadid, 0);
4660 threadid = fold_convert (itype, threadid);
4661 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
4662 true, GSI_SAME_STMT);
4663
4664 n1 = fd->loop.n1;
4665 n2 = fd->loop.n2;
4666 step = fd->loop.step;
4667 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4668 {
4669 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4670 OMP_CLAUSE__LOOPTEMP_);
4671 gcc_assert (innerc);
4672 n1 = OMP_CLAUSE_DECL (innerc);
4673 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4674 OMP_CLAUSE__LOOPTEMP_);
4675 gcc_assert (innerc);
4676 n2 = OMP_CLAUSE_DECL (innerc);
4677 }
4678 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
4679 true, NULL_TREE, true, GSI_SAME_STMT);
4680 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
4681 true, NULL_TREE, true, GSI_SAME_STMT);
4682 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
4683 true, NULL_TREE, true, GSI_SAME_STMT);
4684
4685 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
4686 t = fold_build2 (PLUS_EXPR, itype, step, t);
4687 t = fold_build2 (PLUS_EXPR, itype, t, n2);
4688 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
4689 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
4690 t = fold_build2 (TRUNC_DIV_EXPR, itype,
4691 fold_build1 (NEGATE_EXPR, itype, t),
4692 fold_build1 (NEGATE_EXPR, itype, step));
4693 else
4694 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
4695 t = fold_convert (itype, t);
4696 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
4697
4698 q = create_tmp_reg (itype, "q");
4699 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
4700 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
4701 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
4702
4703 tt = create_tmp_reg (itype, "tt");
4704 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
4705 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
4706 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
4707
4708 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
4709 gcond *cond_stmt = gimple_build_cond_empty (t);
4710 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4711
4712 second_bb = split_block (entry_bb, cond_stmt)->dest;
65f4b875 4713 gsi = gsi_last_nondebug_bb (second_bb);
629b3d75
MJ
4714 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4715
4716 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
4717 GSI_SAME_STMT);
4718 gassign *assign_stmt
4719 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
4720 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4721
4722 third_bb = split_block (second_bb, assign_stmt)->dest;
65f4b875 4723 gsi = gsi_last_nondebug_bb (third_bb);
629b3d75
MJ
4724 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4725
2f6bb511
JJ
4726 if (fd->have_nonctrl_scantemp)
4727 {
4728 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4729 tree controlp = NULL_TREE, controlb = NULL_TREE;
4730 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
4731 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
4732 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
4733 {
4734 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
4735 controlb = OMP_CLAUSE_DECL (c);
4736 else
4737 controlp = OMP_CLAUSE_DECL (c);
4738 if (controlb && controlp)
4739 break;
4740 }
4741 gcc_assert (controlp && controlb);
4742 tree cnt = create_tmp_var (sizetype);
4743 gimple *g = gimple_build_assign (cnt, NOP_EXPR, q);
4744 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4745 unsigned HOST_WIDE_INT alloc_align = TYPE_ALIGN_UNIT (ptr_type_node);
4746 tree sz = expand_omp_scantemp_alloc (clauses, NULL_TREE, 0,
4747 alloc_align, cnt, NULL, true);
4748 tree size = create_tmp_var (sizetype);
4749 expand_omp_build_assign (&gsi, size, sz, false);
4750 tree cmp = fold_build2 (GT_EXPR, boolean_type_node,
4751 size, size_int (16384));
4752 expand_omp_build_assign (&gsi, controlb, cmp);
4753 g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
4754 NULL_TREE, NULL_TREE);
4755 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4756 fourth_bb = split_block (third_bb, g)->dest;
4757 gsi = gsi_last_nondebug_bb (fourth_bb);
4758 /* FIXME: Once we have allocators, this should use allocator. */
4759 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_MALLOC), 1, size);
4760 gimple_call_set_lhs (g, controlp);
4761 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4762 expand_omp_scantemp_alloc (clauses, controlp, 0, alloc_align, cnt,
4763 &gsi, true);
4764 gsi_prev (&gsi);
4765 g = gsi_stmt (gsi);
4766 fifth_bb = split_block (fourth_bb, g)->dest;
4767 gsi = gsi_last_nondebug_bb (fifth_bb);
4768
4769 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_SAVE), 0);
4770 gimple_call_set_lhs (g, controlp);
4771 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4772 tree alloca_decl = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN);
4773 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
4774 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
4775 && OMP_CLAUSE__SCANTEMP__ALLOC (c))
4776 {
4777 tree tmp = create_tmp_var (sizetype);
4778 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
4779 g = gimple_build_assign (tmp, MULT_EXPR, cnt,
4780 TYPE_SIZE_UNIT (pointee_type));
4781 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4782 g = gimple_build_call (alloca_decl, 2, tmp,
4783 size_int (TYPE_ALIGN (pointee_type)));
4784 gimple_call_set_lhs (g, OMP_CLAUSE_DECL (c));
4785 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4786 }
4787
4788 sixth_bb = split_block (fifth_bb, g)->dest;
4789 gsi = gsi_last_nondebug_bb (sixth_bb);
4790 }
4791
629b3d75
MJ
4792 t = build2 (MULT_EXPR, itype, q, threadid);
4793 t = build2 (PLUS_EXPR, itype, t, tt);
4794 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
4795
4796 t = fold_build2 (PLUS_EXPR, itype, s0, q);
4797 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
4798
4799 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
4800 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4801
4802 /* Remove the GIMPLE_OMP_FOR statement. */
4803 gsi_remove (&gsi, true);
4804
4805 /* Setup code for sequential iteration goes in SEQ_START_BB. */
4806 gsi = gsi_start_bb (seq_start_bb);
4807
4808 tree startvar = fd->loop.v;
4809 tree endvar = NULL_TREE;
4810
4811 if (gimple_omp_for_combined_p (fd->for_stmt))
4812 {
4813 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
4814 ? gimple_omp_parallel_clauses (inner_stmt)
4815 : gimple_omp_for_clauses (inner_stmt);
4816 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
4817 gcc_assert (innerc);
4818 startvar = OMP_CLAUSE_DECL (innerc);
4819 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4820 OMP_CLAUSE__LOOPTEMP_);
4821 gcc_assert (innerc);
4822 endvar = OMP_CLAUSE_DECL (innerc);
4823 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
4824 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
4825 {
4826 int i;
4827 for (i = 1; i < fd->collapse; i++)
4828 {
4829 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4830 OMP_CLAUSE__LOOPTEMP_);
4831 gcc_assert (innerc);
4832 }
4833 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4834 OMP_CLAUSE__LOOPTEMP_);
4835 if (innerc)
4836 {
4837 /* If needed (distribute parallel for with lastprivate),
4838 propagate down the total number of iterations. */
4839 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
4840 fd->loop.n2);
4841 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
4842 GSI_CONTINUE_LINKING);
4843 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4844 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4845 }
4846 }
4847 }
4848 t = fold_convert (itype, s0);
4849 t = fold_build2 (MULT_EXPR, itype, t, step);
4850 if (POINTER_TYPE_P (type))
bde84d51
RB
4851 {
4852 t = fold_build_pointer_plus (n1, t);
4853 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4854 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4855 t = fold_convert (signed_type_for (type), t);
4856 }
629b3d75
MJ
4857 else
4858 t = fold_build2 (PLUS_EXPR, type, t, n1);
4859 t = fold_convert (TREE_TYPE (startvar), t);
4860 t = force_gimple_operand_gsi (&gsi, t,
4861 DECL_P (startvar)
4862 && TREE_ADDRESSABLE (startvar),
4863 NULL_TREE, false, GSI_CONTINUE_LINKING);
4864 assign_stmt = gimple_build_assign (startvar, t);
4865 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6c7ae8c5
JJ
4866 if (cond_var)
4867 {
4868 tree itype = TREE_TYPE (cond_var);
4869 /* For lastprivate(conditional:) itervar, we need some iteration
4870 counter that starts at unsigned non-zero and increases.
4871 Prefer as few IVs as possible, so if we can use startvar
4872 itself, use that, or startvar + constant (those would be
4873 incremented with step), and as last resort use the s0 + 1
4874 incremented by 1. */
4875 if (POINTER_TYPE_P (type)
4876 || TREE_CODE (n1) != INTEGER_CST
4877 || fd->loop.cond_code != LT_EXPR)
4878 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
4879 build_int_cst (itype, 1));
4880 else if (tree_int_cst_sgn (n1) == 1)
4881 t = fold_convert (itype, t);
4882 else
4883 {
4884 tree c = fold_convert (itype, n1);
4885 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
4886 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
4887 }
4888 t = force_gimple_operand_gsi (&gsi, t, false,
4889 NULL_TREE, false, GSI_CONTINUE_LINKING);
4890 assign_stmt = gimple_build_assign (cond_var, t);
4891 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4892 }
629b3d75
MJ
4893
4894 t = fold_convert (itype, e0);
4895 t = fold_build2 (MULT_EXPR, itype, t, step);
4896 if (POINTER_TYPE_P (type))
bde84d51
RB
4897 {
4898 t = fold_build_pointer_plus (n1, t);
4899 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4900 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4901 t = fold_convert (signed_type_for (type), t);
4902 }
629b3d75
MJ
4903 else
4904 t = fold_build2 (PLUS_EXPR, type, t, n1);
4905 t = fold_convert (TREE_TYPE (startvar), t);
4906 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4907 false, GSI_CONTINUE_LINKING);
4908 if (endvar)
4909 {
4910 assign_stmt = gimple_build_assign (endvar, e);
4911 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4912 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4913 assign_stmt = gimple_build_assign (fd->loop.v, e);
4914 else
4915 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4916 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4917 }
4918 /* Handle linear clause adjustments. */
4919 tree itercnt = NULL_TREE;
aed3ab25 4920 tree *nonrect_bounds = NULL;
629b3d75
MJ
4921 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4922 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4923 c; c = OMP_CLAUSE_CHAIN (c))
4924 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4925 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4926 {
4927 tree d = OMP_CLAUSE_DECL (c);
4928 bool is_ref = omp_is_reference (d);
4929 tree t = d, a, dest;
4930 if (is_ref)
4931 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4932 if (itercnt == NULL_TREE)
4933 {
4934 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4935 {
4936 itercnt = fold_build2 (MINUS_EXPR, itype,
4937 fold_convert (itype, n1),
4938 fold_convert (itype, fd->loop.n1));
4939 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
4940 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
4941 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4942 NULL_TREE, false,
4943 GSI_CONTINUE_LINKING);
4944 }
4945 else
4946 itercnt = s0;
4947 }
4948 tree type = TREE_TYPE (t);
4949 if (POINTER_TYPE_P (type))
4950 type = sizetype;
4951 a = fold_build2 (MULT_EXPR, type,
4952 fold_convert (type, itercnt),
4953 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4954 dest = unshare_expr (t);
4955 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4956 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
4957 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4958 false, GSI_CONTINUE_LINKING);
4959 assign_stmt = gimple_build_assign (dest, t);
4960 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4961 }
4962 if (fd->collapse > 1)
aed3ab25
JJ
4963 {
4964 if (fd->non_rect)
4965 {
4966 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
4967 memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1));
4968 }
4969 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt,
4970 startvar);
4971 }
629b3d75
MJ
4972
4973 if (!broken_loop)
4974 {
4975 /* The code controlling the sequential loop replaces the
4976 GIMPLE_OMP_CONTINUE. */
65f4b875 4977 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
4978 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4979 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
4980 vmain = gimple_omp_continue_control_use (cont_stmt);
4981 vback = gimple_omp_continue_control_def (cont_stmt);
4982
7855700e
JJ
4983 if (cond_var)
4984 {
4985 tree itype = TREE_TYPE (cond_var);
4986 tree t2;
4987 if (POINTER_TYPE_P (type)
4988 || TREE_CODE (n1) != INTEGER_CST
4989 || fd->loop.cond_code != LT_EXPR)
4990 t2 = build_int_cst (itype, 1);
4991 else
4992 t2 = fold_convert (itype, step);
4993 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
4994 t2 = force_gimple_operand_gsi (&gsi, t2, false,
4995 NULL_TREE, true, GSI_SAME_STMT);
4996 assign_stmt = gimple_build_assign (cond_var, t2);
4997 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4998 }
4999
629b3d75
MJ
5000 if (!gimple_omp_for_combined_p (fd->for_stmt))
5001 {
5002 if (POINTER_TYPE_P (type))
5003 t = fold_build_pointer_plus (vmain, step);
5004 else
5005 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5006 t = force_gimple_operand_gsi (&gsi, t,
5007 DECL_P (vback)
5008 && TREE_ADDRESSABLE (vback),
5009 NULL_TREE, true, GSI_SAME_STMT);
5010 assign_stmt = gimple_build_assign (vback, t);
5011 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5012
5013 t = build2 (fd->loop.cond_code, boolean_type_node,
5014 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5015 ? t : vback, e);
5016 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5017 }
5018
5019 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5020 gsi_remove (&gsi, true);
5021
5022 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
aed3ab25
JJ
5023 collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
5024 cont_bb, body_bb);
629b3d75
MJ
5025 }
5026
5027 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
65f4b875 5028 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
5029 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
5030 {
5031 t = gimple_omp_return_lhs (gsi_stmt (gsi));
2f6bb511
JJ
5032 if (fd->have_reductemp
5033 || ((fd->have_pointer_condtemp || fd->have_scantemp)
5034 && !fd->have_nonctrl_scantemp))
28567c40
JJ
5035 {
5036 tree fn;
5037 if (t)
5038 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
5039 else
5040 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
5041 gcall *g = gimple_build_call (fn, 0);
5042 if (t)
5043 {
5044 gimple_call_set_lhs (g, t);
6c7ae8c5
JJ
5045 if (fd->have_reductemp)
5046 gsi_insert_after (&gsi, gimple_build_assign (reductions,
5047 NOP_EXPR, t),
5048 GSI_SAME_STMT);
28567c40
JJ
5049 }
5050 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5051 }
5052 else
5053 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
629b3d75 5054 }
2f03073f
JJ
5055 else if ((fd->have_pointer_condtemp || fd->have_scantemp)
5056 && !fd->have_nonctrl_scantemp)
1a39b3d3
JJ
5057 {
5058 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
5059 gcall *g = gimple_build_call (fn, 0);
5060 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5061 }
2f6bb511
JJ
5062 if (fd->have_scantemp && !fd->have_nonctrl_scantemp)
5063 {
5064 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5065 tree controlp = NULL_TREE, controlb = NULL_TREE;
5066 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5067 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5068 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
5069 {
5070 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
5071 controlb = OMP_CLAUSE_DECL (c);
5072 else
5073 controlp = OMP_CLAUSE_DECL (c);
5074 if (controlb && controlp)
5075 break;
5076 }
5077 gcc_assert (controlp && controlb);
5078 gimple *g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
5079 NULL_TREE, NULL_TREE);
5080 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5081 exit1_bb = split_block (exit_bb, g)->dest;
5082 gsi = gsi_after_labels (exit1_bb);
5083 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_FREE), 1,
5084 controlp);
5085 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5086 exit2_bb = split_block (exit1_bb, g)->dest;
5087 gsi = gsi_after_labels (exit2_bb);
5088 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_RESTORE), 1,
5089 controlp);
5090 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5091 exit3_bb = split_block (exit2_bb, g)->dest;
5092 gsi = gsi_after_labels (exit3_bb);
5093 }
629b3d75
MJ
5094 gsi_remove (&gsi, true);
5095
5096 /* Connect all the blocks. */
5097 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
357067f2 5098 ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
629b3d75
MJ
5099 ep = find_edge (entry_bb, second_bb);
5100 ep->flags = EDGE_TRUE_VALUE;
357067f2 5101 ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
2f6bb511
JJ
5102 if (fourth_bb)
5103 {
5104 ep = make_edge (third_bb, fifth_bb, EDGE_FALSE_VALUE);
5105 ep->probability
5106 = profile_probability::guessed_always ().apply_scale (1, 2);
5107 ep = find_edge (third_bb, fourth_bb);
5108 ep->flags = EDGE_TRUE_VALUE;
5109 ep->probability
5110 = profile_probability::guessed_always ().apply_scale (1, 2);
5111 ep = find_edge (fourth_bb, fifth_bb);
5112 redirect_edge_and_branch (ep, sixth_bb);
5113 }
5114 else
5115 sixth_bb = third_bb;
5116 find_edge (sixth_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
5117 find_edge (sixth_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
5118 if (exit1_bb)
5119 {
5120 ep = make_edge (exit_bb, exit2_bb, EDGE_FALSE_VALUE);
5121 ep->probability
5122 = profile_probability::guessed_always ().apply_scale (1, 2);
5123 ep = find_edge (exit_bb, exit1_bb);
5124 ep->flags = EDGE_TRUE_VALUE;
5125 ep->probability
5126 = profile_probability::guessed_always ().apply_scale (1, 2);
5127 ep = find_edge (exit1_bb, exit2_bb);
5128 redirect_edge_and_branch (ep, exit3_bb);
5129 }
629b3d75
MJ
5130
5131 if (!broken_loop)
5132 {
5133 ep = find_edge (cont_bb, body_bb);
5134 if (ep == NULL)
5135 {
5136 ep = BRANCH_EDGE (cont_bb);
5137 gcc_assert (single_succ (ep->dest) == body_bb);
5138 }
5139 if (gimple_omp_for_combined_p (fd->for_stmt))
5140 {
5141 remove_edge (ep);
5142 ep = NULL;
5143 }
5144 else if (fd->collapse > 1)
5145 {
5146 remove_edge (ep);
5147 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5148 }
5149 else
5150 ep->flags = EDGE_TRUE_VALUE;
5151 find_edge (cont_bb, fin_bb)->flags
5152 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5153 }
5154
5155 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
5156 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
2f6bb511
JJ
5157 if (fourth_bb)
5158 {
5159 set_immediate_dominator (CDI_DOMINATORS, fifth_bb, third_bb);
5160 set_immediate_dominator (CDI_DOMINATORS, sixth_bb, third_bb);
5161 }
5162 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, sixth_bb);
629b3d75
MJ
5163
5164 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5165 recompute_dominator (CDI_DOMINATORS, body_bb));
5166 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5167 recompute_dominator (CDI_DOMINATORS, fin_bb));
2f6bb511
JJ
5168 if (exit1_bb)
5169 {
5170 set_immediate_dominator (CDI_DOMINATORS, exit2_bb, exit_bb);
5171 set_immediate_dominator (CDI_DOMINATORS, exit3_bb, exit_bb);
5172 }
629b3d75 5173
99b1c316 5174 class loop *loop = body_bb->loop_father;
629b3d75
MJ
5175 if (loop != entry_bb->loop_father)
5176 {
5177 gcc_assert (broken_loop || loop->header == body_bb);
5178 gcc_assert (broken_loop
5179 || loop->latch == region->cont
5180 || single_pred (loop->latch) == region->cont);
5181 return;
5182 }
5183
5184 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5185 {
5186 loop = alloc_loop ();
5187 loop->header = body_bb;
5188 if (collapse_bb == NULL)
5189 loop->latch = cont_bb;
5190 add_loop (loop, body_bb->loop_father);
5191 }
5192}
5193
5194/* Return phi in E->DEST with ARG on edge E. */
5195
5196static gphi *
5197find_phi_with_arg_on_edge (tree arg, edge e)
5198{
5199 basic_block bb = e->dest;
5200
5201 for (gphi_iterator gpi = gsi_start_phis (bb);
5202 !gsi_end_p (gpi);
5203 gsi_next (&gpi))
5204 {
5205 gphi *phi = gpi.phi ();
5206 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
5207 return phi;
5208 }
5209
5210 return NULL;
5211}
5212
5213/* A subroutine of expand_omp_for. Generate code for a parallel
5214 loop with static schedule and a specified chunk size. Given
5215 parameters:
5216
5217 for (V = N1; V cond N2; V += STEP) BODY;
5218
5219 where COND is "<" or ">", we generate pseudocode
5220
5221 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
5222 if (cond is <)
5223 adj = STEP - 1;
5224 else
5225 adj = STEP + 1;
5226 if ((__typeof (V)) -1 > 0 && cond is >)
5227 n = -(adj + N2 - N1) / -STEP;
5228 else
5229 n = (adj + N2 - N1) / STEP;
5230 trip = 0;
5231 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
5232 here so that V is defined
5233 if the loop is not entered
5234 L0:
5235 s0 = (trip * nthreads + threadid) * CHUNK;
01914336 5236 e0 = min (s0 + CHUNK, n);
629b3d75
MJ
5237 if (s0 < n) goto L1; else goto L4;
5238 L1:
5239 V = s0 * STEP + N1;
5240 e = e0 * STEP + N1;
5241 L2:
5242 BODY;
5243 V += STEP;
5244 if (V cond e) goto L2; else goto L3;
5245 L3:
5246 trip += 1;
5247 goto L0;
5248 L4:
5249*/
5250
5251static void
5252expand_omp_for_static_chunk (struct omp_region *region,
5253 struct omp_for_data *fd, gimple *inner_stmt)
5254{
5255 tree n, s0, e0, e, t;
5256 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
5257 tree type, itype, vmain, vback, vextra;
5258 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
5259 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
6c7ae8c5 5260 gimple_stmt_iterator gsi, gsip;
629b3d75
MJ
5261 edge se;
5262 bool broken_loop = region->cont == NULL;
5263 tree *counts = NULL;
5264 tree n1, n2, step;
28567c40 5265 tree reductions = NULL_TREE;
8221c30b 5266 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
629b3d75
MJ
5267
5268 itype = type = TREE_TYPE (fd->loop.v);
5269 if (POINTER_TYPE_P (type))
5270 itype = signed_type_for (type);
5271
5272 entry_bb = region->entry;
5273 se = split_block (entry_bb, last_stmt (entry_bb));
5274 entry_bb = se->src;
5275 iter_part_bb = se->dest;
5276 cont_bb = region->cont;
5277 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
5278 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
5279 gcc_assert (broken_loop
5280 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
5281 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
5282 body_bb = single_succ (seq_start_bb);
5283 if (!broken_loop)
5284 {
5285 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
5286 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
5287 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5288 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
5289 }
5290 exit_bb = region->exit;
5291
5292 /* Trip and adjustment setup goes in ENTRY_BB. */
65f4b875 5293 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75 5294 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
6c7ae8c5
JJ
5295 gsip = gsi;
5296 gsi_prev (&gsip);
629b3d75
MJ
5297
5298 if (fd->collapse > 1)
5299 {
5300 int first_zero_iter = -1, dummy = -1;
5301 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5302
5303 counts = XALLOCAVEC (tree, fd->collapse);
5304 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5305 fin_bb, first_zero_iter,
5306 dummy_bb, dummy, l2_dom_bb);
5307 t = NULL_TREE;
5308 }
5309 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
5310 t = integer_one_node;
5311 else
5312 t = fold_binary (fd->loop.cond_code, boolean_type_node,
5313 fold_convert (type, fd->loop.n1),
5314 fold_convert (type, fd->loop.n2));
5315 if (fd->collapse == 1
5316 && TYPE_UNSIGNED (type)
5317 && (t == NULL_TREE || !integer_onep (t)))
5318 {
5319 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
5320 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
5321 true, GSI_SAME_STMT);
5322 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
5323 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
5324 true, GSI_SAME_STMT);
5325 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
5326 NULL_TREE, NULL_TREE);
5327 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
5328 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
5329 expand_omp_regimplify_p, NULL, NULL)
5330 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
5331 expand_omp_regimplify_p, NULL, NULL))
5332 {
5333 gsi = gsi_for_stmt (cond_stmt);
5334 gimple_regimplify_operands (cond_stmt, &gsi);
5335 }
5336 se = split_block (entry_bb, cond_stmt);
5337 se->flags = EDGE_TRUE_VALUE;
5338 entry_bb = se->dest;
357067f2 5339 se->probability = profile_probability::very_likely ();
629b3d75 5340 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
357067f2 5341 se->probability = profile_probability::very_unlikely ();
629b3d75
MJ
5342 if (gimple_in_ssa_p (cfun))
5343 {
5344 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
5345 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
5346 !gsi_end_p (gpi); gsi_next (&gpi))
5347 {
5348 gphi *phi = gpi.phi ();
5349 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
5350 se, UNKNOWN_LOCATION);
5351 }
5352 }
5353 gsi = gsi_last_bb (entry_bb);
5354 }
5355
8221c30b
JJ
5356 if (fd->lastprivate_conditional)
5357 {
5358 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5359 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
5360 if (fd->have_pointer_condtemp)
5361 condtemp = OMP_CLAUSE_DECL (c);
5362 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
5363 cond_var = OMP_CLAUSE_DECL (c);
5364 }
5365 if (fd->have_reductemp || fd->have_pointer_condtemp)
28567c40
JJ
5366 {
5367 tree t1 = build_int_cst (long_integer_type_node, 0);
5368 tree t2 = build_int_cst (long_integer_type_node, 1);
5369 tree t3 = build_int_cstu (long_integer_type_node,
5370 (HOST_WIDE_INT_1U << 31) + 1);
5371 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
6c7ae8c5
JJ
5372 gimple_stmt_iterator gsi2 = gsi_none ();
5373 gimple *g = NULL;
5374 tree mem = null_pointer_node, memv = NULL_TREE;
6c7ae8c5
JJ
5375 if (fd->have_reductemp)
5376 {
5377 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
5378 reductions = OMP_CLAUSE_DECL (c);
5379 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
5380 g = SSA_NAME_DEF_STMT (reductions);
5381 reductions = gimple_assign_rhs1 (g);
5382 OMP_CLAUSE_DECL (c) = reductions;
5383 gsi2 = gsi_for_stmt (g);
5384 }
5385 else
5386 {
5387 if (gsi_end_p (gsip))
5388 gsi2 = gsi_after_labels (region->entry);
5389 else
5390 gsi2 = gsip;
5391 reductions = null_pointer_node;
5392 }
8221c30b 5393 if (fd->have_pointer_condtemp)
6c7ae8c5 5394 {
6c7ae8c5
JJ
5395 tree type = TREE_TYPE (condtemp);
5396 memv = create_tmp_var (type);
5397 TREE_ADDRESSABLE (memv) = 1;
5398 unsigned HOST_WIDE_INT sz
5399 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
5400 sz *= fd->lastprivate_conditional;
5401 expand_omp_build_assign (&gsi2, memv, build_int_cst (type, sz),
5402 false);
5403 mem = build_fold_addr_expr (memv);
5404 }
28567c40
JJ
5405 tree t
5406 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
5407 9, t1, t2, t2, t3, t1, null_pointer_node,
6c7ae8c5 5408 null_pointer_node, reductions, mem);
28567c40
JJ
5409 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
5410 true, GSI_SAME_STMT);
8221c30b 5411 if (fd->have_pointer_condtemp)
6c7ae8c5
JJ
5412 expand_omp_build_assign (&gsi2, condtemp, memv, false);
5413 if (fd->have_reductemp)
5414 {
5415 gsi_remove (&gsi2, true);
5416 release_ssa_name (gimple_assign_lhs (g));
5417 }
28567c40 5418 }
629b3d75
MJ
5419 switch (gimple_omp_for_kind (fd->for_stmt))
5420 {
5421 case GF_OMP_FOR_KIND_FOR:
5422 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
5423 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
5424 break;
5425 case GF_OMP_FOR_KIND_DISTRIBUTE:
5426 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
5427 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
5428 break;
5429 default:
5430 gcc_unreachable ();
5431 }
5432 nthreads = build_call_expr (nthreads, 0);
5433 nthreads = fold_convert (itype, nthreads);
5434 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
5435 true, GSI_SAME_STMT);
5436 threadid = build_call_expr (threadid, 0);
5437 threadid = fold_convert (itype, threadid);
5438 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
5439 true, GSI_SAME_STMT);
5440
5441 n1 = fd->loop.n1;
5442 n2 = fd->loop.n2;
5443 step = fd->loop.step;
5444 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5445 {
5446 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5447 OMP_CLAUSE__LOOPTEMP_);
5448 gcc_assert (innerc);
5449 n1 = OMP_CLAUSE_DECL (innerc);
5450 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5451 OMP_CLAUSE__LOOPTEMP_);
5452 gcc_assert (innerc);
5453 n2 = OMP_CLAUSE_DECL (innerc);
5454 }
5455 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5456 true, NULL_TREE, true, GSI_SAME_STMT);
5457 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5458 true, NULL_TREE, true, GSI_SAME_STMT);
5459 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5460 true, NULL_TREE, true, GSI_SAME_STMT);
5461 tree chunk_size = fold_convert (itype, fd->chunk_size);
5462 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
5463 chunk_size
5464 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
5465 GSI_SAME_STMT);
5466
5467 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
5468 t = fold_build2 (PLUS_EXPR, itype, step, t);
5469 t = fold_build2 (PLUS_EXPR, itype, t, n2);
5470 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
5471 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
5472 t = fold_build2 (TRUNC_DIV_EXPR, itype,
5473 fold_build1 (NEGATE_EXPR, itype, t),
5474 fold_build1 (NEGATE_EXPR, itype, step));
5475 else
5476 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
5477 t = fold_convert (itype, t);
5478 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5479 true, GSI_SAME_STMT);
5480
5481 trip_var = create_tmp_reg (itype, ".trip");
5482 if (gimple_in_ssa_p (cfun))
5483 {
5484 trip_init = make_ssa_name (trip_var);
5485 trip_main = make_ssa_name (trip_var);
5486 trip_back = make_ssa_name (trip_var);
5487 }
5488 else
5489 {
5490 trip_init = trip_var;
5491 trip_main = trip_var;
5492 trip_back = trip_var;
5493 }
5494
5495 gassign *assign_stmt
5496 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
5497 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5498
5499 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
5500 t = fold_build2 (MULT_EXPR, itype, t, step);
5501 if (POINTER_TYPE_P (type))
5502 t = fold_build_pointer_plus (n1, t);
5503 else
5504 t = fold_build2 (PLUS_EXPR, type, t, n1);
5505 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5506 true, GSI_SAME_STMT);
5507
5508 /* Remove the GIMPLE_OMP_FOR. */
5509 gsi_remove (&gsi, true);
5510
5511 gimple_stmt_iterator gsif = gsi;
5512
5513 /* Iteration space partitioning goes in ITER_PART_BB. */
5514 gsi = gsi_last_bb (iter_part_bb);
5515
5516 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
5517 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
5518 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
5519 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5520 false, GSI_CONTINUE_LINKING);
5521
5522 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
5523 t = fold_build2 (MIN_EXPR, itype, t, n);
5524 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5525 false, GSI_CONTINUE_LINKING);
5526
5527 t = build2 (LT_EXPR, boolean_type_node, s0, n);
5528 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
5529
5530 /* Setup code for sequential iteration goes in SEQ_START_BB. */
5531 gsi = gsi_start_bb (seq_start_bb);
5532
5533 tree startvar = fd->loop.v;
5534 tree endvar = NULL_TREE;
5535
5536 if (gimple_omp_for_combined_p (fd->for_stmt))
5537 {
5538 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
5539 ? gimple_omp_parallel_clauses (inner_stmt)
5540 : gimple_omp_for_clauses (inner_stmt);
5541 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5542 gcc_assert (innerc);
5543 startvar = OMP_CLAUSE_DECL (innerc);
5544 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5545 OMP_CLAUSE__LOOPTEMP_);
5546 gcc_assert (innerc);
5547 endvar = OMP_CLAUSE_DECL (innerc);
5548 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
5549 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
5550 {
5551 int i;
5552 for (i = 1; i < fd->collapse; i++)
5553 {
5554 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5555 OMP_CLAUSE__LOOPTEMP_);
5556 gcc_assert (innerc);
5557 }
5558 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5559 OMP_CLAUSE__LOOPTEMP_);
5560 if (innerc)
5561 {
5562 /* If needed (distribute parallel for with lastprivate),
5563 propagate down the total number of iterations. */
5564 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
5565 fd->loop.n2);
5566 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
5567 GSI_CONTINUE_LINKING);
5568 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5569 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5570 }
5571 }
5572 }
5573
5574 t = fold_convert (itype, s0);
5575 t = fold_build2 (MULT_EXPR, itype, t, step);
5576 if (POINTER_TYPE_P (type))
bde84d51
RB
5577 {
5578 t = fold_build_pointer_plus (n1, t);
5579 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5580 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5581 t = fold_convert (signed_type_for (type), t);
5582 }
629b3d75
MJ
5583 else
5584 t = fold_build2 (PLUS_EXPR, type, t, n1);
5585 t = fold_convert (TREE_TYPE (startvar), t);
5586 t = force_gimple_operand_gsi (&gsi, t,
5587 DECL_P (startvar)
5588 && TREE_ADDRESSABLE (startvar),
5589 NULL_TREE, false, GSI_CONTINUE_LINKING);
5590 assign_stmt = gimple_build_assign (startvar, t);
5591 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6c7ae8c5
JJ
5592 if (cond_var)
5593 {
5594 tree itype = TREE_TYPE (cond_var);
5595 /* For lastprivate(conditional:) itervar, we need some iteration
5596 counter that starts at unsigned non-zero and increases.
5597 Prefer as few IVs as possible, so if we can use startvar
5598 itself, use that, or startvar + constant (those would be
5599 incremented with step), and as last resort use the s0 + 1
5600 incremented by 1. */
5601 if (POINTER_TYPE_P (type)
5602 || TREE_CODE (n1) != INTEGER_CST
5603 || fd->loop.cond_code != LT_EXPR)
5604 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
5605 build_int_cst (itype, 1));
5606 else if (tree_int_cst_sgn (n1) == 1)
5607 t = fold_convert (itype, t);
5608 else
5609 {
5610 tree c = fold_convert (itype, n1);
5611 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
5612 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
5613 }
5614 t = force_gimple_operand_gsi (&gsi, t, false,
5615 NULL_TREE, false, GSI_CONTINUE_LINKING);
5616 assign_stmt = gimple_build_assign (cond_var, t);
5617 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5618 }
629b3d75
MJ
5619
5620 t = fold_convert (itype, e0);
5621 t = fold_build2 (MULT_EXPR, itype, t, step);
5622 if (POINTER_TYPE_P (type))
bde84d51
RB
5623 {
5624 t = fold_build_pointer_plus (n1, t);
5625 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5626 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5627 t = fold_convert (signed_type_for (type), t);
5628 }
629b3d75
MJ
5629 else
5630 t = fold_build2 (PLUS_EXPR, type, t, n1);
5631 t = fold_convert (TREE_TYPE (startvar), t);
5632 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5633 false, GSI_CONTINUE_LINKING);
5634 if (endvar)
5635 {
5636 assign_stmt = gimple_build_assign (endvar, e);
5637 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5638 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5639 assign_stmt = gimple_build_assign (fd->loop.v, e);
5640 else
5641 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5642 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5643 }
5644 /* Handle linear clause adjustments. */
5645 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
5646 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
5647 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
5648 c; c = OMP_CLAUSE_CHAIN (c))
5649 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
5650 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
5651 {
5652 tree d = OMP_CLAUSE_DECL (c);
5653 bool is_ref = omp_is_reference (d);
5654 tree t = d, a, dest;
5655 if (is_ref)
5656 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
5657 tree type = TREE_TYPE (t);
5658 if (POINTER_TYPE_P (type))
5659 type = sizetype;
5660 dest = unshare_expr (t);
5661 tree v = create_tmp_var (TREE_TYPE (t), NULL);
5662 expand_omp_build_assign (&gsif, v, t);
5663 if (itercnt == NULL_TREE)
5664 {
5665 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5666 {
5667 itercntbias
5668 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
5669 fold_convert (itype, fd->loop.n1));
5670 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
5671 itercntbias, step);
5672 itercntbias
5673 = force_gimple_operand_gsi (&gsif, itercntbias, true,
5674 NULL_TREE, true,
5675 GSI_SAME_STMT);
5676 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
5677 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
5678 NULL_TREE, false,
5679 GSI_CONTINUE_LINKING);
5680 }
5681 else
5682 itercnt = s0;
5683 }
5684 a = fold_build2 (MULT_EXPR, type,
5685 fold_convert (type, itercnt),
5686 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
5687 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
5688 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
5689 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5690 false, GSI_CONTINUE_LINKING);
5691 assign_stmt = gimple_build_assign (dest, t);
5692 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5693 }
5694 if (fd->collapse > 1)
aed3ab25 5695 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
629b3d75
MJ
5696
5697 if (!broken_loop)
5698 {
5699 /* The code controlling the sequential loop goes in CONT_BB,
5700 replacing the GIMPLE_OMP_CONTINUE. */
65f4b875 5701 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
5702 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5703 vmain = gimple_omp_continue_control_use (cont_stmt);
5704 vback = gimple_omp_continue_control_def (cont_stmt);
5705
7855700e
JJ
5706 if (cond_var)
5707 {
5708 tree itype = TREE_TYPE (cond_var);
5709 tree t2;
5710 if (POINTER_TYPE_P (type)
5711 || TREE_CODE (n1) != INTEGER_CST
5712 || fd->loop.cond_code != LT_EXPR)
5713 t2 = build_int_cst (itype, 1);
5714 else
5715 t2 = fold_convert (itype, step);
5716 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
5717 t2 = force_gimple_operand_gsi (&gsi, t2, false,
5718 NULL_TREE, true, GSI_SAME_STMT);
5719 assign_stmt = gimple_build_assign (cond_var, t2);
5720 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5721 }
5722
629b3d75
MJ
5723 if (!gimple_omp_for_combined_p (fd->for_stmt))
5724 {
5725 if (POINTER_TYPE_P (type))
5726 t = fold_build_pointer_plus (vmain, step);
5727 else
5728 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5729 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
5730 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5731 true, GSI_SAME_STMT);
5732 assign_stmt = gimple_build_assign (vback, t);
5733 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5734
5735 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
5736 t = build2 (EQ_EXPR, boolean_type_node,
5737 build_int_cst (itype, 0),
5738 build_int_cst (itype, 1));
5739 else
5740 t = build2 (fd->loop.cond_code, boolean_type_node,
5741 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5742 ? t : vback, e);
5743 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5744 }
5745
5746 /* Remove GIMPLE_OMP_CONTINUE. */
5747 gsi_remove (&gsi, true);
5748
5749 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
aed3ab25 5750 collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, body_bb);
629b3d75
MJ
5751
5752 /* Trip update code goes into TRIP_UPDATE_BB. */
5753 gsi = gsi_start_bb (trip_update_bb);
5754
5755 t = build_int_cst (itype, 1);
5756 t = build2 (PLUS_EXPR, itype, trip_main, t);
5757 assign_stmt = gimple_build_assign (trip_back, t);
5758 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5759 }
5760
5761 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
65f4b875 5762 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
5763 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
5764 {
5765 t = gimple_omp_return_lhs (gsi_stmt (gsi));
8221c30b 5766 if (fd->have_reductemp || fd->have_pointer_condtemp)
28567c40
JJ
5767 {
5768 tree fn;
5769 if (t)
5770 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
5771 else
5772 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
5773 gcall *g = gimple_build_call (fn, 0);
5774 if (t)
5775 {
5776 gimple_call_set_lhs (g, t);
6c7ae8c5
JJ
5777 if (fd->have_reductemp)
5778 gsi_insert_after (&gsi, gimple_build_assign (reductions,
5779 NOP_EXPR, t),
5780 GSI_SAME_STMT);
28567c40
JJ
5781 }
5782 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5783 }
5784 else
5785 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
629b3d75 5786 }
1a39b3d3
JJ
5787 else if (fd->have_pointer_condtemp)
5788 {
5789 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
5790 gcall *g = gimple_build_call (fn, 0);
5791 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5792 }
629b3d75
MJ
5793 gsi_remove (&gsi, true);
5794
5795 /* Connect the new blocks. */
5796 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
5797 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
5798
5799 if (!broken_loop)
5800 {
5801 se = find_edge (cont_bb, body_bb);
5802 if (se == NULL)
5803 {
5804 se = BRANCH_EDGE (cont_bb);
5805 gcc_assert (single_succ (se->dest) == body_bb);
5806 }
5807 if (gimple_omp_for_combined_p (fd->for_stmt))
5808 {
5809 remove_edge (se);
5810 se = NULL;
5811 }
5812 else if (fd->collapse > 1)
5813 {
5814 remove_edge (se);
5815 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5816 }
5817 else
5818 se->flags = EDGE_TRUE_VALUE;
5819 find_edge (cont_bb, trip_update_bb)->flags
5820 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5821
01914336
MJ
5822 redirect_edge_and_branch (single_succ_edge (trip_update_bb),
5823 iter_part_bb);
629b3d75
MJ
5824 }
5825
5826 if (gimple_in_ssa_p (cfun))
5827 {
5828 gphi_iterator psi;
5829 gphi *phi;
5830 edge re, ene;
5831 edge_var_map *vm;
5832 size_t i;
5833
5834 gcc_assert (fd->collapse == 1 && !broken_loop);
5835
5836 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
5837 remove arguments of the phi nodes in fin_bb. We need to create
5838 appropriate phi nodes in iter_part_bb instead. */
5839 se = find_edge (iter_part_bb, fin_bb);
5840 re = single_succ_edge (trip_update_bb);
5841 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
5842 ene = single_succ_edge (entry_bb);
5843
5844 psi = gsi_start_phis (fin_bb);
5845 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
5846 gsi_next (&psi), ++i)
5847 {
5848 gphi *nphi;
620e594b 5849 location_t locus;
629b3d75
MJ
5850
5851 phi = psi.phi ();
d83cc5cc
TV
5852 if (operand_equal_p (gimple_phi_arg_def (phi, 0),
5853 redirect_edge_var_map_def (vm), 0))
5854 continue;
5855
629b3d75
MJ
5856 t = gimple_phi_result (phi);
5857 gcc_assert (t == redirect_edge_var_map_result (vm));
5858
5859 if (!single_pred_p (fin_bb))
5860 t = copy_ssa_name (t, phi);
5861
5862 nphi = create_phi_node (t, iter_part_bb);
5863
5864 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
5865 locus = gimple_phi_arg_location_from_edge (phi, se);
5866
5867 /* A special case -- fd->loop.v is not yet computed in
5868 iter_part_bb, we need to use vextra instead. */
5869 if (t == fd->loop.v)
5870 t = vextra;
5871 add_phi_arg (nphi, t, ene, locus);
5872 locus = redirect_edge_var_map_location (vm);
5873 tree back_arg = redirect_edge_var_map_def (vm);
5874 add_phi_arg (nphi, back_arg, re, locus);
5875 edge ce = find_edge (cont_bb, body_bb);
5876 if (ce == NULL)
5877 {
5878 ce = BRANCH_EDGE (cont_bb);
5879 gcc_assert (single_succ (ce->dest) == body_bb);
5880 ce = single_succ_edge (ce->dest);
5881 }
5882 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
5883 gcc_assert (inner_loop_phi != NULL);
5884 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
5885 find_edge (seq_start_bb, body_bb), locus);
5886
5887 if (!single_pred_p (fin_bb))
5888 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
5889 }
5890 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
5891 redirect_edge_var_map_clear (re);
5892 if (single_pred_p (fin_bb))
5893 while (1)
5894 {
5895 psi = gsi_start_phis (fin_bb);
5896 if (gsi_end_p (psi))
5897 break;
5898 remove_phi_node (&psi, false);
5899 }
5900
5901 /* Make phi node for trip. */
5902 phi = create_phi_node (trip_main, iter_part_bb);
5903 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
5904 UNKNOWN_LOCATION);
5905 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
5906 UNKNOWN_LOCATION);
5907 }
5908
5909 if (!broken_loop)
5910 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
5911 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
5912 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
5913 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5914 recompute_dominator (CDI_DOMINATORS, fin_bb));
5915 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
5916 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
5917 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5918 recompute_dominator (CDI_DOMINATORS, body_bb));
5919
5920 if (!broken_loop)
5921 {
99b1c316
MS
5922 class loop *loop = body_bb->loop_father;
5923 class loop *trip_loop = alloc_loop ();
629b3d75
MJ
5924 trip_loop->header = iter_part_bb;
5925 trip_loop->latch = trip_update_bb;
5926 add_loop (trip_loop, iter_part_bb->loop_father);
5927
5928 if (loop != entry_bb->loop_father)
5929 {
5930 gcc_assert (loop->header == body_bb);
5931 gcc_assert (loop->latch == region->cont
5932 || single_pred (loop->latch) == region->cont);
5933 trip_loop->inner = loop;
5934 return;
5935 }
5936
5937 if (!gimple_omp_for_combined_p (fd->for_stmt))
5938 {
5939 loop = alloc_loop ();
5940 loop->header = body_bb;
5941 if (collapse_bb == NULL)
5942 loop->latch = cont_bb;
5943 add_loop (loop, trip_loop);
5944 }
5945 }
5946}
5947
629b3d75
MJ
5948/* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
5949 loop. Given parameters:
5950
5951 for (V = N1; V cond N2; V += STEP) BODY;
5952
5953 where COND is "<" or ">", we generate pseudocode
5954
5955 V = N1;
5956 goto L1;
5957 L0:
5958 BODY;
5959 V += STEP;
5960 L1:
5961 if (V cond N2) goto L0; else goto L2;
5962 L2:
5963
5964 For collapsed loops, given parameters:
5965 collapse(3)
5966 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
5967 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
5968 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
5969 BODY;
5970
5971 we generate pseudocode
5972
5973 if (cond3 is <)
5974 adj = STEP3 - 1;
5975 else
5976 adj = STEP3 + 1;
5977 count3 = (adj + N32 - N31) / STEP3;
5978 if (cond2 is <)
5979 adj = STEP2 - 1;
5980 else
5981 adj = STEP2 + 1;
5982 count2 = (adj + N22 - N21) / STEP2;
5983 if (cond1 is <)
5984 adj = STEP1 - 1;
5985 else
5986 adj = STEP1 + 1;
5987 count1 = (adj + N12 - N11) / STEP1;
5988 count = count1 * count2 * count3;
5989 V = 0;
5990 V1 = N11;
5991 V2 = N21;
5992 V3 = N31;
5993 goto L1;
5994 L0:
5995 BODY;
5996 V += 1;
5997 V3 += STEP3;
5998 V2 += (V3 cond3 N32) ? 0 : STEP2;
5999 V3 = (V3 cond3 N32) ? V3 : N31;
6000 V1 += (V2 cond2 N22) ? 0 : STEP1;
6001 V2 = (V2 cond2 N22) ? V2 : N21;
6002 L1:
6003 if (V < count) goto L0; else goto L2;
6004 L2:
6005
6006 */
6007
6008static void
6009expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
6010{
6011 tree type, t;
6012 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
6013 gimple_stmt_iterator gsi;
6014 gimple *stmt;
6015 gcond *cond_stmt;
6016 bool broken_loop = region->cont == NULL;
6017 edge e, ne;
6018 tree *counts = NULL;
6019 int i;
6020 int safelen_int = INT_MAX;
fed2a43c 6021 bool dont_vectorize = false;
629b3d75
MJ
6022 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6023 OMP_CLAUSE_SAFELEN);
6024 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6025 OMP_CLAUSE__SIMDUID_);
fed2a43c
JJ
6026 tree ifc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6027 OMP_CLAUSE_IF);
6028 tree simdlen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6029 OMP_CLAUSE_SIMDLEN);
e7393c89
JJ
6030 tree condtemp = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6031 OMP_CLAUSE__CONDTEMP_);
629b3d75 6032 tree n1, n2;
e7393c89 6033 tree cond_var = condtemp ? OMP_CLAUSE_DECL (condtemp) : NULL_TREE;
629b3d75
MJ
6034
6035 if (safelen)
6036 {
9d2f08ab 6037 poly_uint64 val;
629b3d75 6038 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
9d2f08ab 6039 if (!poly_int_tree_p (safelen, &val))
629b3d75 6040 safelen_int = 0;
9d2f08ab
RS
6041 else
6042 safelen_int = MIN (constant_lower_bound (val), INT_MAX);
629b3d75
MJ
6043 if (safelen_int == 1)
6044 safelen_int = 0;
6045 }
fed2a43c
JJ
6046 if ((ifc && integer_zerop (OMP_CLAUSE_IF_EXPR (ifc)))
6047 || (simdlen && integer_onep (OMP_CLAUSE_SIMDLEN_EXPR (simdlen))))
6048 {
6049 safelen_int = 0;
6050 dont_vectorize = true;
6051 }
629b3d75
MJ
6052 type = TREE_TYPE (fd->loop.v);
6053 entry_bb = region->entry;
6054 cont_bb = region->cont;
6055 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
6056 gcc_assert (broken_loop
6057 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
6058 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
6059 if (!broken_loop)
6060 {
6061 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
6062 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
6063 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
6064 l2_bb = BRANCH_EDGE (entry_bb)->dest;
6065 }
6066 else
6067 {
6068 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
6069 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
6070 l2_bb = single_succ (l1_bb);
6071 }
6072 exit_bb = region->exit;
6073 l2_dom_bb = NULL;
6074
65f4b875 6075 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
6076
6077 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
6078 /* Not needed in SSA form right now. */
6079 gcc_assert (!gimple_in_ssa_p (cfun));
6080 if (fd->collapse > 1)
6081 {
6082 int first_zero_iter = -1, dummy = -1;
6083 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
6084
6085 counts = XALLOCAVEC (tree, fd->collapse);
6086 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
6087 zero_iter_bb, first_zero_iter,
6088 dummy_bb, dummy, l2_dom_bb);
6089 }
6090 if (l2_dom_bb == NULL)
6091 l2_dom_bb = l1_bb;
6092
6093 n1 = fd->loop.n1;
6094 n2 = fd->loop.n2;
6095 if (gimple_omp_for_combined_into_p (fd->for_stmt))
6096 {
6097 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6098 OMP_CLAUSE__LOOPTEMP_);
6099 gcc_assert (innerc);
6100 n1 = OMP_CLAUSE_DECL (innerc);
6101 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
6102 OMP_CLAUSE__LOOPTEMP_);
6103 gcc_assert (innerc);
6104 n2 = OMP_CLAUSE_DECL (innerc);
6105 }
6106 tree step = fd->loop.step;
6107
4cea8675
AM
6108 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6109 OMP_CLAUSE__SIMT_);
629b3d75
MJ
6110 if (is_simt)
6111 {
6112 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
4cea8675
AM
6113 is_simt = safelen_int > 1;
6114 }
6115 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
6116 if (is_simt)
6117 {
629b3d75
MJ
6118 simt_lane = create_tmp_var (unsigned_type_node);
6119 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
6120 gimple_call_set_lhs (g, simt_lane);
6121 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6122 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
6123 fold_convert (TREE_TYPE (step), simt_lane));
6124 n1 = fold_convert (type, n1);
6125 if (POINTER_TYPE_P (type))
6126 n1 = fold_build_pointer_plus (n1, offset);
6127 else
6128 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
6129
6130 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
6131 if (fd->collapse > 1)
6132 simt_maxlane = build_one_cst (unsigned_type_node);
6133 else if (safelen_int < omp_max_simt_vf ())
6134 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
6135 tree vf
6136 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
6137 unsigned_type_node, 0);
6138 if (simt_maxlane)
6139 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
6140 vf = fold_convert (TREE_TYPE (step), vf);
6141 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
6142 }
6143
6144 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
6145 if (fd->collapse > 1)
6146 {
6147 if (gimple_omp_for_combined_into_p (fd->for_stmt))
6148 {
6149 gsi_prev (&gsi);
aed3ab25 6150 expand_omp_for_init_vars (fd, &gsi, counts, NULL, NULL, n1);
629b3d75
MJ
6151 gsi_next (&gsi);
6152 }
6153 else
6154 for (i = 0; i < fd->collapse; i++)
6155 {
6156 tree itype = TREE_TYPE (fd->loops[i].v);
6157 if (POINTER_TYPE_P (itype))
6158 itype = signed_type_for (itype);
6159 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
6160 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6161 }
6162 }
e7393c89
JJ
6163 if (cond_var)
6164 {
6165 if (POINTER_TYPE_P (type)
6166 || TREE_CODE (n1) != INTEGER_CST
6167 || fd->loop.cond_code != LT_EXPR
6168 || tree_int_cst_sgn (n1) != 1)
6169 expand_omp_build_assign (&gsi, cond_var,
6170 build_one_cst (TREE_TYPE (cond_var)));
6171 else
6172 expand_omp_build_assign (&gsi, cond_var,
6173 fold_convert (TREE_TYPE (cond_var), n1));
6174 }
629b3d75
MJ
6175
6176 /* Remove the GIMPLE_OMP_FOR statement. */
6177 gsi_remove (&gsi, true);
6178
6179 if (!broken_loop)
6180 {
6181 /* Code to control the increment goes in the CONT_BB. */
65f4b875 6182 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
6183 stmt = gsi_stmt (gsi);
6184 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
6185
6186 if (POINTER_TYPE_P (type))
6187 t = fold_build_pointer_plus (fd->loop.v, step);
6188 else
6189 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6190 expand_omp_build_assign (&gsi, fd->loop.v, t);
6191
6192 if (fd->collapse > 1)
6193 {
6194 i = fd->collapse - 1;
6195 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
6196 {
6197 t = fold_convert (sizetype, fd->loops[i].step);
6198 t = fold_build_pointer_plus (fd->loops[i].v, t);
6199 }
6200 else
6201 {
6202 t = fold_convert (TREE_TYPE (fd->loops[i].v),
6203 fd->loops[i].step);
6204 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
6205 fd->loops[i].v, t);
6206 }
6207 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6208
6209 for (i = fd->collapse - 1; i > 0; i--)
6210 {
6211 tree itype = TREE_TYPE (fd->loops[i].v);
6212 tree itype2 = TREE_TYPE (fd->loops[i - 1].v);
6213 if (POINTER_TYPE_P (itype2))
6214 itype2 = signed_type_for (itype2);
bcc6842b
JJ
6215 t = fold_convert (itype2, fd->loops[i - 1].step);
6216 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
6217 GSI_SAME_STMT);
629b3d75
MJ
6218 t = build3 (COND_EXPR, itype2,
6219 build2 (fd->loops[i].cond_code, boolean_type_node,
6220 fd->loops[i].v,
6221 fold_convert (itype, fd->loops[i].n2)),
bcc6842b 6222 build_int_cst (itype2, 0), t);
629b3d75
MJ
6223 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v)))
6224 t = fold_build_pointer_plus (fd->loops[i - 1].v, t);
6225 else
6226 t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t);
6227 expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t);
6228
bcc6842b
JJ
6229 t = fold_convert (itype, fd->loops[i].n1);
6230 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
6231 GSI_SAME_STMT);
629b3d75
MJ
6232 t = build3 (COND_EXPR, itype,
6233 build2 (fd->loops[i].cond_code, boolean_type_node,
6234 fd->loops[i].v,
6235 fold_convert (itype, fd->loops[i].n2)),
bcc6842b 6236 fd->loops[i].v, t);
629b3d75
MJ
6237 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6238 }
6239 }
e7393c89
JJ
6240 if (cond_var)
6241 {
6242 if (POINTER_TYPE_P (type)
6243 || TREE_CODE (n1) != INTEGER_CST
6244 || fd->loop.cond_code != LT_EXPR
6245 || tree_int_cst_sgn (n1) != 1)
6246 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6247 build_one_cst (TREE_TYPE (cond_var)));
6248 else
6249 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6250 fold_convert (TREE_TYPE (cond_var), step));
6251 expand_omp_build_assign (&gsi, cond_var, t);
6252 }
629b3d75
MJ
6253
6254 /* Remove GIMPLE_OMP_CONTINUE. */
6255 gsi_remove (&gsi, true);
6256 }
6257
6258 /* Emit the condition in L1_BB. */
6259 gsi = gsi_start_bb (l1_bb);
6260
6261 t = fold_convert (type, n2);
6262 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6263 false, GSI_CONTINUE_LINKING);
6264 tree v = fd->loop.v;
6265 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6266 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6267 false, GSI_CONTINUE_LINKING);
6268 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
6269 cond_stmt = gimple_build_cond_empty (t);
6270 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
6271 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
6272 NULL, NULL)
6273 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
6274 NULL, NULL))
6275 {
6276 gsi = gsi_for_stmt (cond_stmt);
6277 gimple_regimplify_operands (cond_stmt, &gsi);
6278 }
6279
6280 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
6281 if (is_simt)
6282 {
6283 gsi = gsi_start_bb (l2_bb);
6284 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step);
6285 if (POINTER_TYPE_P (type))
6286 t = fold_build_pointer_plus (fd->loop.v, step);
6287 else
6288 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6289 expand_omp_build_assign (&gsi, fd->loop.v, t);
6290 }
6291
6292 /* Remove GIMPLE_OMP_RETURN. */
65f4b875 6293 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
6294 gsi_remove (&gsi, true);
6295
6296 /* Connect the new blocks. */
6297 remove_edge (FALLTHRU_EDGE (entry_bb));
6298
6299 if (!broken_loop)
6300 {
6301 remove_edge (BRANCH_EDGE (entry_bb));
6302 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
6303
6304 e = BRANCH_EDGE (l1_bb);
6305 ne = FALLTHRU_EDGE (l1_bb);
6306 e->flags = EDGE_TRUE_VALUE;
6307 }
6308 else
6309 {
6310 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6311
6312 ne = single_succ_edge (l1_bb);
6313 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
6314
6315 }
6316 ne->flags = EDGE_FALSE_VALUE;
357067f2
JH
6317 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
6318 ne->probability = e->probability.invert ();
629b3d75
MJ
6319
6320 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
6321 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
6322
6323 if (simt_maxlane)
6324 {
6325 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
6326 NULL_TREE, NULL_TREE);
6327 gsi = gsi_last_bb (entry_bb);
6328 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
6329 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
6330 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
357067f2
JH
6331 FALLTHRU_EDGE (entry_bb)->probability
6332 = profile_probability::guessed_always ().apply_scale (7, 8);
6333 BRANCH_EDGE (entry_bb)->probability
6334 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
629b3d75
MJ
6335 l2_dom_bb = entry_bb;
6336 }
6337 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
6338
6339 if (!broken_loop)
6340 {
99b1c316 6341 class loop *loop = alloc_loop ();
629b3d75
MJ
6342 loop->header = l1_bb;
6343 loop->latch = cont_bb;
6344 add_loop (loop, l1_bb->loop_father);
6345 loop->safelen = safelen_int;
6346 if (simduid)
6347 {
6348 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
6349 cfun->has_simduid_loops = true;
6350 }
6351 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
6352 the loop. */
6353 if ((flag_tree_loop_vectorize
26d476cd 6354 || !global_options_set.x_flag_tree_loop_vectorize)
629b3d75
MJ
6355 && flag_tree_loop_optimize
6356 && loop->safelen > 1)
6357 {
6358 loop->force_vectorize = true;
f63445e5
JJ
6359 if (simdlen && tree_fits_uhwi_p (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)))
6360 {
6361 unsigned HOST_WIDE_INT v
6362 = tree_to_uhwi (OMP_CLAUSE_SIMDLEN_EXPR (simdlen));
6363 if (v < INT_MAX && v <= (unsigned HOST_WIDE_INT) loop->safelen)
6364 loop->simdlen = v;
6365 }
629b3d75
MJ
6366 cfun->has_force_vectorize_loops = true;
6367 }
fed2a43c
JJ
6368 else if (dont_vectorize)
6369 loop->dont_vectorize = true;
629b3d75
MJ
6370 }
6371 else if (simduid)
6372 cfun->has_simduid_loops = true;
6373}
6374
6375/* Taskloop construct is represented after gimplification with
6376 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
6377 in between them. This routine expands the outer GIMPLE_OMP_FOR,
6378 which should just compute all the needed loop temporaries
6379 for GIMPLE_OMP_TASK. */
6380
6381static void
6382expand_omp_taskloop_for_outer (struct omp_region *region,
6383 struct omp_for_data *fd,
6384 gimple *inner_stmt)
6385{
6386 tree type, bias = NULL_TREE;
6387 basic_block entry_bb, cont_bb, exit_bb;
6388 gimple_stmt_iterator gsi;
6389 gassign *assign_stmt;
6390 tree *counts = NULL;
6391 int i;
6392
6393 gcc_assert (inner_stmt);
6394 gcc_assert (region->cont);
6395 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
6396 && gimple_omp_task_taskloop_p (inner_stmt));
6397 type = TREE_TYPE (fd->loop.v);
6398
6399 /* See if we need to bias by LLONG_MIN. */
6400 if (fd->iter_type == long_long_unsigned_type_node
6401 && TREE_CODE (type) == INTEGER_TYPE
6402 && !TYPE_UNSIGNED (type))
6403 {
6404 tree n1, n2;
6405
6406 if (fd->loop.cond_code == LT_EXPR)
6407 {
6408 n1 = fd->loop.n1;
6409 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
6410 }
6411 else
6412 {
6413 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
6414 n2 = fd->loop.n1;
6415 }
6416 if (TREE_CODE (n1) != INTEGER_CST
6417 || TREE_CODE (n2) != INTEGER_CST
6418 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
6419 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
6420 }
6421
6422 entry_bb = region->entry;
6423 cont_bb = region->cont;
6424 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
6425 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
6426 exit_bb = region->exit;
6427
65f4b875 6428 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
6429 gimple *for_stmt = gsi_stmt (gsi);
6430 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
6431 if (fd->collapse > 1)
6432 {
6433 int first_zero_iter = -1, dummy = -1;
6434 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
6435
6436 counts = XALLOCAVEC (tree, fd->collapse);
6437 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
6438 zero_iter_bb, first_zero_iter,
6439 dummy_bb, dummy, l2_dom_bb);
6440
6441 if (zero_iter_bb)
6442 {
6443 /* Some counts[i] vars might be uninitialized if
6444 some loop has zero iterations. But the body shouldn't
6445 be executed in that case, so just avoid uninit warnings. */
6446 for (i = first_zero_iter; i < fd->collapse; i++)
6447 if (SSA_VAR_P (counts[i]))
6448 TREE_NO_WARNING (counts[i]) = 1;
6449 gsi_prev (&gsi);
6450 edge e = split_block (entry_bb, gsi_stmt (gsi));
6451 entry_bb = e->dest;
6452 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
6453 gsi = gsi_last_bb (entry_bb);
6454 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
6455 get_immediate_dominator (CDI_DOMINATORS,
6456 zero_iter_bb));
6457 }
6458 }
6459
6460 tree t0, t1;
6461 t1 = fd->loop.n2;
6462 t0 = fd->loop.n1;
6463 if (POINTER_TYPE_P (TREE_TYPE (t0))
6464 && TYPE_PRECISION (TREE_TYPE (t0))
6465 != TYPE_PRECISION (fd->iter_type))
6466 {
6467 /* Avoid casting pointers to integer of a different size. */
6468 tree itype = signed_type_for (type);
6469 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
6470 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
6471 }
6472 else
6473 {
6474 t1 = fold_convert (fd->iter_type, t1);
6475 t0 = fold_convert (fd->iter_type, t0);
6476 }
6477 if (bias)
6478 {
6479 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
6480 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
6481 }
6482
6483 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
6484 OMP_CLAUSE__LOOPTEMP_);
6485 gcc_assert (innerc);
6486 tree startvar = OMP_CLAUSE_DECL (innerc);
6487 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
6488 gcc_assert (innerc);
6489 tree endvar = OMP_CLAUSE_DECL (innerc);
6490 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
6491 {
6492 gcc_assert (innerc);
6493 for (i = 1; i < fd->collapse; i++)
6494 {
6495 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
6496 OMP_CLAUSE__LOOPTEMP_);
6497 gcc_assert (innerc);
6498 }
6499 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
6500 OMP_CLAUSE__LOOPTEMP_);
6501 if (innerc)
6502 {
6503 /* If needed (inner taskloop has lastprivate clause), propagate
6504 down the total number of iterations. */
6505 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
6506 NULL_TREE, false,
6507 GSI_CONTINUE_LINKING);
6508 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
6509 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6510 }
6511 }
6512
6513 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
6514 GSI_CONTINUE_LINKING);
6515 assign_stmt = gimple_build_assign (startvar, t0);
6516 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6517
6518 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
6519 GSI_CONTINUE_LINKING);
6520 assign_stmt = gimple_build_assign (endvar, t1);
6521 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6522 if (fd->collapse > 1)
aed3ab25 6523 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
629b3d75
MJ
6524
6525 /* Remove the GIMPLE_OMP_FOR statement. */
6526 gsi = gsi_for_stmt (for_stmt);
6527 gsi_remove (&gsi, true);
6528
65f4b875 6529 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
6530 gsi_remove (&gsi, true);
6531
65f4b875 6532 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
6533 gsi_remove (&gsi, true);
6534
357067f2 6535 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
629b3d75 6536 remove_edge (BRANCH_EDGE (entry_bb));
357067f2 6537 FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
629b3d75
MJ
6538 remove_edge (BRANCH_EDGE (cont_bb));
6539 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
6540 set_immediate_dominator (CDI_DOMINATORS, region->entry,
6541 recompute_dominator (CDI_DOMINATORS, region->entry));
6542}
6543
6544/* Taskloop construct is represented after gimplification with
6545 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
6546 in between them. This routine expands the inner GIMPLE_OMP_FOR.
6547 GOMP_taskloop{,_ull} function arranges for each task to be given just
6548 a single range of iterations. */
6549
6550static void
6551expand_omp_taskloop_for_inner (struct omp_region *region,
6552 struct omp_for_data *fd,
6553 gimple *inner_stmt)
6554{
6555 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
6556 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
6557 basic_block fin_bb;
6558 gimple_stmt_iterator gsi;
6559 edge ep;
6560 bool broken_loop = region->cont == NULL;
6561 tree *counts = NULL;
6562 tree n1, n2, step;
6563
6564 itype = type = TREE_TYPE (fd->loop.v);
6565 if (POINTER_TYPE_P (type))
6566 itype = signed_type_for (type);
6567
6568 /* See if we need to bias by LLONG_MIN. */
6569 if (fd->iter_type == long_long_unsigned_type_node
6570 && TREE_CODE (type) == INTEGER_TYPE
6571 && !TYPE_UNSIGNED (type))
6572 {
6573 tree n1, n2;
6574
6575 if (fd->loop.cond_code == LT_EXPR)
6576 {
6577 n1 = fd->loop.n1;
6578 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
6579 }
6580 else
6581 {
6582 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
6583 n2 = fd->loop.n1;
6584 }
6585 if (TREE_CODE (n1) != INTEGER_CST
6586 || TREE_CODE (n2) != INTEGER_CST
6587 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
6588 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
6589 }
6590
6591 entry_bb = region->entry;
6592 cont_bb = region->cont;
6593 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
6594 fin_bb = BRANCH_EDGE (entry_bb)->dest;
6595 gcc_assert (broken_loop
6596 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
6597 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
6598 if (!broken_loop)
6599 {
6600 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
6601 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
6602 }
6603 exit_bb = region->exit;
6604
6605 /* Iteration space partitioning goes in ENTRY_BB. */
65f4b875 6606 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
6607 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
6608
6609 if (fd->collapse > 1)
6610 {
6611 int first_zero_iter = -1, dummy = -1;
6612 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
6613
6614 counts = XALLOCAVEC (tree, fd->collapse);
6615 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
6616 fin_bb, first_zero_iter,
6617 dummy_bb, dummy, l2_dom_bb);
6618 t = NULL_TREE;
6619 }
6620 else
6621 t = integer_one_node;
6622
6623 step = fd->loop.step;
6624 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6625 OMP_CLAUSE__LOOPTEMP_);
6626 gcc_assert (innerc);
6627 n1 = OMP_CLAUSE_DECL (innerc);
6628 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
6629 gcc_assert (innerc);
6630 n2 = OMP_CLAUSE_DECL (innerc);
6631 if (bias)
6632 {
6633 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
6634 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
6635 }
6636 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
6637 true, NULL_TREE, true, GSI_SAME_STMT);
6638 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
6639 true, NULL_TREE, true, GSI_SAME_STMT);
6640 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
6641 true, NULL_TREE, true, GSI_SAME_STMT);
6642
6643 tree startvar = fd->loop.v;
6644 tree endvar = NULL_TREE;
6645
6646 if (gimple_omp_for_combined_p (fd->for_stmt))
6647 {
6648 tree clauses = gimple_omp_for_clauses (inner_stmt);
6649 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
6650 gcc_assert (innerc);
6651 startvar = OMP_CLAUSE_DECL (innerc);
6652 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
6653 OMP_CLAUSE__LOOPTEMP_);
6654 gcc_assert (innerc);
6655 endvar = OMP_CLAUSE_DECL (innerc);
6656 }
6657 t = fold_convert (TREE_TYPE (startvar), n1);
6658 t = force_gimple_operand_gsi (&gsi, t,
6659 DECL_P (startvar)
6660 && TREE_ADDRESSABLE (startvar),
6661 NULL_TREE, false, GSI_CONTINUE_LINKING);
6662 gimple *assign_stmt = gimple_build_assign (startvar, t);
6663 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6664
6665 t = fold_convert (TREE_TYPE (startvar), n2);
6666 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6667 false, GSI_CONTINUE_LINKING);
6668 if (endvar)
6669 {
6670 assign_stmt = gimple_build_assign (endvar, e);
6671 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6672 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
6673 assign_stmt = gimple_build_assign (fd->loop.v, e);
6674 else
6675 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
6676 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6677 }
6678 if (fd->collapse > 1)
aed3ab25 6679 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
629b3d75
MJ
6680
6681 if (!broken_loop)
6682 {
6683 /* The code controlling the sequential loop replaces the
6684 GIMPLE_OMP_CONTINUE. */
65f4b875 6685 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
6686 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
6687 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
6688 vmain = gimple_omp_continue_control_use (cont_stmt);
6689 vback = gimple_omp_continue_control_def (cont_stmt);
6690
6691 if (!gimple_omp_for_combined_p (fd->for_stmt))
6692 {
6693 if (POINTER_TYPE_P (type))
6694 t = fold_build_pointer_plus (vmain, step);
6695 else
6696 t = fold_build2 (PLUS_EXPR, type, vmain, step);
6697 t = force_gimple_operand_gsi (&gsi, t,
6698 DECL_P (vback)
6699 && TREE_ADDRESSABLE (vback),
6700 NULL_TREE, true, GSI_SAME_STMT);
6701 assign_stmt = gimple_build_assign (vback, t);
6702 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
6703
6704 t = build2 (fd->loop.cond_code, boolean_type_node,
6705 DECL_P (vback) && TREE_ADDRESSABLE (vback)
6706 ? t : vback, e);
6707 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
6708 }
6709
6710 /* Remove the GIMPLE_OMP_CONTINUE statement. */
6711 gsi_remove (&gsi, true);
6712
6713 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
aed3ab25 6714 collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, body_bb);
629b3d75
MJ
6715 }
6716
6717 /* Remove the GIMPLE_OMP_FOR statement. */
6718 gsi = gsi_for_stmt (fd->for_stmt);
6719 gsi_remove (&gsi, true);
6720
6721 /* Remove the GIMPLE_OMP_RETURN statement. */
65f4b875 6722 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
6723 gsi_remove (&gsi, true);
6724
357067f2 6725 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
629b3d75
MJ
6726 if (!broken_loop)
6727 remove_edge (BRANCH_EDGE (entry_bb));
6728 else
6729 {
6730 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
6731 region->outer->cont = NULL;
6732 }
6733
6734 /* Connect all the blocks. */
6735 if (!broken_loop)
6736 {
6737 ep = find_edge (cont_bb, body_bb);
6738 if (gimple_omp_for_combined_p (fd->for_stmt))
6739 {
6740 remove_edge (ep);
6741 ep = NULL;
6742 }
6743 else if (fd->collapse > 1)
6744 {
6745 remove_edge (ep);
6746 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
6747 }
6748 else
6749 ep->flags = EDGE_TRUE_VALUE;
6750 find_edge (cont_bb, fin_bb)->flags
6751 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
6752 }
6753
6754 set_immediate_dominator (CDI_DOMINATORS, body_bb,
6755 recompute_dominator (CDI_DOMINATORS, body_bb));
6756 if (!broken_loop)
6757 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
6758 recompute_dominator (CDI_DOMINATORS, fin_bb));
6759
6760 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
6761 {
99b1c316 6762 class loop *loop = alloc_loop ();
629b3d75
MJ
6763 loop->header = body_bb;
6764 if (collapse_bb == NULL)
6765 loop->latch = cont_bb;
6766 add_loop (loop, body_bb->loop_father);
6767 }
6768}
6769
6770/* A subroutine of expand_omp_for. Generate code for an OpenACC
6771 partitioned loop. The lowering here is abstracted, in that the
6772 loop parameters are passed through internal functions, which are
6773 further lowered by oacc_device_lower, once we get to the target
6774 compiler. The loop is of the form:
6775
6776 for (V = B; V LTGT E; V += S) {BODY}
6777
6778 where LTGT is < or >. We may have a specified chunking size, CHUNKING
6779 (constant 0 for no chunking) and we will have a GWV partitioning
6780 mask, specifying dimensions over which the loop is to be
02889d23
CLT
6781 partitioned (see note below). We generate code that looks like
6782 (this ignores tiling):
629b3d75
MJ
6783
6784 <entry_bb> [incoming FALL->body, BRANCH->exit]
6785 typedef signedintify (typeof (V)) T; // underlying signed integral type
6786 T range = E - B;
6787 T chunk_no = 0;
6788 T DIR = LTGT == '<' ? +1 : -1;
6789 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
6790 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
6791
6792 <head_bb> [created by splitting end of entry_bb]
6793 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
6794 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
6795 if (!(offset LTGT bound)) goto bottom_bb;
6796
6797 <body_bb> [incoming]
6798 V = B + offset;
6799 {BODY}
6800
6801 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
6802 offset += step;
6803 if (offset LTGT bound) goto body_bb; [*]
6804
6805 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
6806 chunk_no++;
6807 if (chunk < chunk_max) goto head_bb;
6808
6809 <exit_bb> [incoming]
6810 V = B + ((range -/+ 1) / S +/- 1) * S [*]
6811
02889d23 6812 [*] Needed if V live at end of loop. */
629b3d75
MJ
6813
6814static void
6815expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
6816{
6817 tree v = fd->loop.v;
6818 enum tree_code cond_code = fd->loop.cond_code;
6819 enum tree_code plus_code = PLUS_EXPR;
6820
6821 tree chunk_size = integer_minus_one_node;
6822 tree gwv = integer_zero_node;
6823 tree iter_type = TREE_TYPE (v);
6824 tree diff_type = iter_type;
6825 tree plus_type = iter_type;
6826 struct oacc_collapse *counts = NULL;
6827
6828 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
6829 == GF_OMP_FOR_KIND_OACC_LOOP);
6830 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
6831 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
6832
6833 if (POINTER_TYPE_P (iter_type))
6834 {
6835 plus_code = POINTER_PLUS_EXPR;
6836 plus_type = sizetype;
6837 }
6838 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
6839 diff_type = signed_type_for (diff_type);
f4c222c0
TV
6840 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
6841 diff_type = integer_type_node;
629b3d75
MJ
6842
6843 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
6844 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
6845 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
6846 basic_block bottom_bb = NULL;
6847
9c3da8cc
JJ
6848 /* entry_bb has two successors; the branch edge is to the exit
6849 block, fallthrough edge to body. */
629b3d75
MJ
6850 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
6851 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
6852
6853 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
6854 body_bb, or to a block whose only successor is the body_bb. Its
6855 fallthrough successor is the final block (same as the branch
6856 successor of the entry_bb). */
6857 if (cont_bb)
6858 {
6859 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
6860 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
6861
6862 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
6863 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
6864 }
6865 else
6866 gcc_assert (!gimple_in_ssa_p (cfun));
6867
6868 /* The exit block only has entry_bb and cont_bb as predecessors. */
6869 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
6870
6871 tree chunk_no;
6872 tree chunk_max = NULL_TREE;
6873 tree bound, offset;
6874 tree step = create_tmp_var (diff_type, ".step");
6875 bool up = cond_code == LT_EXPR;
6876 tree dir = build_int_cst (diff_type, up ? +1 : -1);
02889d23 6877 bool chunking = !gimple_in_ssa_p (cfun);
629b3d75
MJ
6878 bool negating;
6879
02889d23
CLT
6880 /* Tiling vars. */
6881 tree tile_size = NULL_TREE;
6882 tree element_s = NULL_TREE;
6883 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
6884 basic_block elem_body_bb = NULL;
6885 basic_block elem_cont_bb = NULL;
6886
629b3d75
MJ
6887 /* SSA instances. */
6888 tree offset_incr = NULL_TREE;
6889 tree offset_init = NULL_TREE;
6890
6891 gimple_stmt_iterator gsi;
6892 gassign *ass;
6893 gcall *call;
6894 gimple *stmt;
6895 tree expr;
6896 location_t loc;
6897 edge split, be, fte;
6898
6899 /* Split the end of entry_bb to create head_bb. */
6900 split = split_block (entry_bb, last_stmt (entry_bb));
6901 basic_block head_bb = split->dest;
6902 entry_bb = split->src;
6903
6904 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
65f4b875 6905 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
6906 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
6907 loc = gimple_location (for_stmt);
6908
6909 if (gimple_in_ssa_p (cfun))
6910 {
6911 offset_init = gimple_omp_for_index (for_stmt, 0);
6912 gcc_assert (integer_zerop (fd->loop.n1));
6913 /* The SSA parallelizer does gang parallelism. */
6914 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
6915 }
6916
02889d23 6917 if (fd->collapse > 1 || fd->tiling)
629b3d75 6918 {
02889d23 6919 gcc_assert (!gimple_in_ssa_p (cfun) && up);
629b3d75
MJ
6920 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
6921 tree total = expand_oacc_collapse_init (fd, &gsi, counts,
02889d23 6922 TREE_TYPE (fd->loop.n2), loc);
629b3d75
MJ
6923
6924 if (SSA_VAR_P (fd->loop.n2))
6925 {
6926 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
6927 true, GSI_SAME_STMT);
6928 ass = gimple_build_assign (fd->loop.n2, total);
6929 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6930 }
629b3d75
MJ
6931 }
6932
6933 tree b = fd->loop.n1;
6934 tree e = fd->loop.n2;
6935 tree s = fd->loop.step;
6936
6937 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
6938 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
6939
01914336 6940 /* Convert the step, avoiding possible unsigned->signed overflow. */
629b3d75
MJ
6941 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
6942 if (negating)
6943 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
6944 s = fold_convert (diff_type, s);
6945 if (negating)
6946 s = fold_build1 (NEGATE_EXPR, diff_type, s);
6947 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
6948
6949 if (!chunking)
6950 chunk_size = integer_zero_node;
6951 expr = fold_convert (diff_type, chunk_size);
6952 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
6953 NULL_TREE, true, GSI_SAME_STMT);
02889d23
CLT
6954
6955 if (fd->tiling)
6956 {
6957 /* Determine the tile size and element step,
6958 modify the outer loop step size. */
6959 tile_size = create_tmp_var (diff_type, ".tile_size");
6960 expr = build_int_cst (diff_type, 1);
6961 for (int ix = 0; ix < fd->collapse; ix++)
6962 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
6963 expr = force_gimple_operand_gsi (&gsi, expr, true,
6964 NULL_TREE, true, GSI_SAME_STMT);
6965 ass = gimple_build_assign (tile_size, expr);
6966 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6967
6968 element_s = create_tmp_var (diff_type, ".element_s");
6969 ass = gimple_build_assign (element_s, s);
6970 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6971
6972 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
6973 s = force_gimple_operand_gsi (&gsi, expr, true,
6974 NULL_TREE, true, GSI_SAME_STMT);
6975 }
6976
01914336 6977 /* Determine the range, avoiding possible unsigned->signed overflow. */
629b3d75
MJ
6978 negating = !up && TYPE_UNSIGNED (iter_type);
6979 expr = fold_build2 (MINUS_EXPR, plus_type,
6980 fold_convert (plus_type, negating ? b : e),
6981 fold_convert (plus_type, negating ? e : b));
6982 expr = fold_convert (diff_type, expr);
6983 if (negating)
6984 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
6985 tree range = force_gimple_operand_gsi (&gsi, expr, true,
6986 NULL_TREE, true, GSI_SAME_STMT);
6987
6988 chunk_no = build_int_cst (diff_type, 0);
6989 if (chunking)
6990 {
6991 gcc_assert (!gimple_in_ssa_p (cfun));
6992
6993 expr = chunk_no;
6994 chunk_max = create_tmp_var (diff_type, ".chunk_max");
6995 chunk_no = create_tmp_var (diff_type, ".chunk_no");
6996
6997 ass = gimple_build_assign (chunk_no, expr);
6998 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6999
7000 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7001 build_int_cst (integer_type_node,
7002 IFN_GOACC_LOOP_CHUNKS),
7003 dir, range, s, chunk_size, gwv);
7004 gimple_call_set_lhs (call, chunk_max);
7005 gimple_set_location (call, loc);
7006 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7007 }
7008 else
7009 chunk_size = chunk_no;
7010
7011 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7012 build_int_cst (integer_type_node,
7013 IFN_GOACC_LOOP_STEP),
7014 dir, range, s, chunk_size, gwv);
7015 gimple_call_set_lhs (call, step);
7016 gimple_set_location (call, loc);
7017 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7018
7019 /* Remove the GIMPLE_OMP_FOR. */
7020 gsi_remove (&gsi, true);
7021
01914336 7022 /* Fixup edges from head_bb. */
629b3d75
MJ
7023 be = BRANCH_EDGE (head_bb);
7024 fte = FALLTHRU_EDGE (head_bb);
7025 be->flags |= EDGE_FALSE_VALUE;
7026 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
7027
7028 basic_block body_bb = fte->dest;
7029
7030 if (gimple_in_ssa_p (cfun))
7031 {
65f4b875 7032 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
7033 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7034
7035 offset = gimple_omp_continue_control_use (cont_stmt);
7036 offset_incr = gimple_omp_continue_control_def (cont_stmt);
7037 }
7038 else
7039 {
7040 offset = create_tmp_var (diff_type, ".offset");
7041 offset_init = offset_incr = offset;
7042 }
7043 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
7044
7045 /* Loop offset & bound go into head_bb. */
7046 gsi = gsi_start_bb (head_bb);
7047
7048 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7049 build_int_cst (integer_type_node,
7050 IFN_GOACC_LOOP_OFFSET),
7051 dir, range, s,
7052 chunk_size, gwv, chunk_no);
7053 gimple_call_set_lhs (call, offset_init);
7054 gimple_set_location (call, loc);
7055 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7056
7057 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7058 build_int_cst (integer_type_node,
7059 IFN_GOACC_LOOP_BOUND),
7060 dir, range, s,
7061 chunk_size, gwv, offset_init);
7062 gimple_call_set_lhs (call, bound);
7063 gimple_set_location (call, loc);
7064 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7065
7066 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
7067 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
7068 GSI_CONTINUE_LINKING);
7069
7070 /* V assignment goes into body_bb. */
7071 if (!gimple_in_ssa_p (cfun))
7072 {
7073 gsi = gsi_start_bb (body_bb);
7074
7075 expr = build2 (plus_code, iter_type, b,
7076 fold_convert (plus_type, offset));
7077 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7078 true, GSI_SAME_STMT);
7079 ass = gimple_build_assign (v, expr);
7080 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
02889d23
CLT
7081
7082 if (fd->collapse > 1 || fd->tiling)
7083 expand_oacc_collapse_vars (fd, false, &gsi, counts, v);
7084
7085 if (fd->tiling)
7086 {
7087 /* Determine the range of the element loop -- usually simply
7088 the tile_size, but could be smaller if the final
7089 iteration of the outer loop is a partial tile. */
7090 tree e_range = create_tmp_var (diff_type, ".e_range");
7091
7092 expr = build2 (MIN_EXPR, diff_type,
7093 build2 (MINUS_EXPR, diff_type, bound, offset),
7094 build2 (MULT_EXPR, diff_type, tile_size,
7095 element_s));
7096 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7097 true, GSI_SAME_STMT);
7098 ass = gimple_build_assign (e_range, expr);
7099 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7100
7101 /* Determine bound, offset & step of inner loop. */
7102 e_bound = create_tmp_var (diff_type, ".e_bound");
7103 e_offset = create_tmp_var (diff_type, ".e_offset");
7104 e_step = create_tmp_var (diff_type, ".e_step");
7105
7106 /* Mark these as element loops. */
7107 tree t, e_gwv = integer_minus_one_node;
7108 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */
7109
7110 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
7111 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7112 element_s, chunk, e_gwv, chunk);
7113 gimple_call_set_lhs (call, e_offset);
7114 gimple_set_location (call, loc);
7115 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7116
7117 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
7118 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7119 element_s, chunk, e_gwv, e_offset);
7120 gimple_call_set_lhs (call, e_bound);
7121 gimple_set_location (call, loc);
7122 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7123
7124 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
7125 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
7126 element_s, chunk, e_gwv);
7127 gimple_call_set_lhs (call, e_step);
7128 gimple_set_location (call, loc);
7129 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7130
7131 /* Add test and split block. */
7132 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
7133 stmt = gimple_build_cond_empty (expr);
7134 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7135 split = split_block (body_bb, stmt);
7136 elem_body_bb = split->dest;
7137 if (cont_bb == body_bb)
7138 cont_bb = elem_body_bb;
7139 body_bb = split->src;
7140
7141 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
7142
05e0af43
CP
7143 /* Add a dummy exit for the tiled block when cont_bb is missing. */
7144 if (cont_bb == NULL)
7145 {
7146 edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
7147 e->probability = profile_probability::even ();
7148 split->probability = profile_probability::even ();
7149 }
7150
02889d23
CLT
7151 /* Initialize the user's loop vars. */
7152 gsi = gsi_start_bb (elem_body_bb);
7153 expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset);
7154 }
629b3d75
MJ
7155 }
7156
7157 /* Loop increment goes into cont_bb. If this is not a loop, we
7158 will have spawned threads as if it was, and each one will
7159 execute one iteration. The specification is not explicit about
7160 whether such constructs are ill-formed or not, and they can
7161 occur, especially when noreturn routines are involved. */
7162 if (cont_bb)
7163 {
65f4b875 7164 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
7165 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7166 loc = gimple_location (cont_stmt);
7167
02889d23
CLT
7168 if (fd->tiling)
7169 {
7170 /* Insert element loop increment and test. */
7171 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
7172 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7173 true, GSI_SAME_STMT);
7174 ass = gimple_build_assign (e_offset, expr);
7175 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7176 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
7177
7178 stmt = gimple_build_cond_empty (expr);
7179 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7180 split = split_block (cont_bb, stmt);
7181 elem_cont_bb = split->src;
7182 cont_bb = split->dest;
7183
7184 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
9fba2943
TV
7185 split->probability = profile_probability::unlikely ().guessed ();
7186 edge latch_edge
7187 = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
7188 latch_edge->probability = profile_probability::likely ().guessed ();
7189
7190 edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
7191 skip_edge->probability = profile_probability::unlikely ().guessed ();
7192 edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
7193 loop_entry_edge->probability
7194 = profile_probability::likely ().guessed ();
02889d23
CLT
7195
7196 gsi = gsi_for_stmt (cont_stmt);
7197 }
7198
629b3d75
MJ
7199 /* Increment offset. */
7200 if (gimple_in_ssa_p (cfun))
02889d23
CLT
7201 expr = build2 (plus_code, iter_type, offset,
7202 fold_convert (plus_type, step));
629b3d75
MJ
7203 else
7204 expr = build2 (PLUS_EXPR, diff_type, offset, step);
7205 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7206 true, GSI_SAME_STMT);
7207 ass = gimple_build_assign (offset_incr, expr);
7208 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7209 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
7210 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
7211
7212 /* Remove the GIMPLE_OMP_CONTINUE. */
7213 gsi_remove (&gsi, true);
7214
01914336 7215 /* Fixup edges from cont_bb. */
629b3d75
MJ
7216 be = BRANCH_EDGE (cont_bb);
7217 fte = FALLTHRU_EDGE (cont_bb);
7218 be->flags |= EDGE_TRUE_VALUE;
7219 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7220
7221 if (chunking)
7222 {
7223 /* Split the beginning of exit_bb to make bottom_bb. We
7224 need to insert a nop at the start, because splitting is
01914336 7225 after a stmt, not before. */
629b3d75
MJ
7226 gsi = gsi_start_bb (exit_bb);
7227 stmt = gimple_build_nop ();
7228 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7229 split = split_block (exit_bb, stmt);
7230 bottom_bb = split->src;
7231 exit_bb = split->dest;
7232 gsi = gsi_last_bb (bottom_bb);
7233
7234 /* Chunk increment and test goes into bottom_bb. */
7235 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
7236 build_int_cst (diff_type, 1));
7237 ass = gimple_build_assign (chunk_no, expr);
7238 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
7239
7240 /* Chunk test at end of bottom_bb. */
7241 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
7242 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
7243 GSI_CONTINUE_LINKING);
7244
01914336 7245 /* Fixup edges from bottom_bb. */
629b3d75 7246 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
f38057cc
TV
7247 split->probability = profile_probability::unlikely ().guessed ();
7248 edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
7249 latch_edge->probability = profile_probability::likely ().guessed ();
629b3d75
MJ
7250 }
7251 }
7252
65f4b875 7253 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
7254 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7255 loc = gimple_location (gsi_stmt (gsi));
7256
7257 if (!gimple_in_ssa_p (cfun))
7258 {
7259 /* Insert the final value of V, in case it is live. This is the
7260 value for the only thread that survives past the join. */
7261 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
7262 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
7263 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
7264 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
7265 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
7266 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7267 true, GSI_SAME_STMT);
7268 ass = gimple_build_assign (v, expr);
7269 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7270 }
7271
01914336 7272 /* Remove the OMP_RETURN. */
629b3d75
MJ
7273 gsi_remove (&gsi, true);
7274
7275 if (cont_bb)
7276 {
02889d23 7277 /* We now have one, two or three nested loops. Update the loop
629b3d75 7278 structures. */
99b1c316
MS
7279 class loop *parent = entry_bb->loop_father;
7280 class loop *body = body_bb->loop_father;
629b3d75
MJ
7281
7282 if (chunking)
7283 {
99b1c316 7284 class loop *chunk_loop = alloc_loop ();
629b3d75
MJ
7285 chunk_loop->header = head_bb;
7286 chunk_loop->latch = bottom_bb;
7287 add_loop (chunk_loop, parent);
7288 parent = chunk_loop;
7289 }
7290 else if (parent != body)
7291 {
7292 gcc_assert (body->header == body_bb);
7293 gcc_assert (body->latch == cont_bb
7294 || single_pred (body->latch) == cont_bb);
7295 parent = NULL;
7296 }
7297
7298 if (parent)
7299 {
99b1c316 7300 class loop *body_loop = alloc_loop ();
629b3d75
MJ
7301 body_loop->header = body_bb;
7302 body_loop->latch = cont_bb;
7303 add_loop (body_loop, parent);
02889d23
CLT
7304
7305 if (fd->tiling)
7306 {
7307 /* Insert tiling's element loop. */
99b1c316 7308 class loop *inner_loop = alloc_loop ();
02889d23
CLT
7309 inner_loop->header = elem_body_bb;
7310 inner_loop->latch = elem_cont_bb;
7311 add_loop (inner_loop, body_loop);
7312 }
629b3d75
MJ
7313 }
7314 }
7315}
7316
7317/* Expand the OMP loop defined by REGION. */
7318
7319static void
7320expand_omp_for (struct omp_region *region, gimple *inner_stmt)
7321{
7322 struct omp_for_data fd;
7323 struct omp_for_data_loop *loops;
7324
9d50112a
JJ
7325 loops = XALLOCAVEC (struct omp_for_data_loop,
7326 gimple_omp_for_collapse (last_stmt (region->entry)));
629b3d75
MJ
7327 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
7328 &fd, loops);
7329 region->sched_kind = fd.sched_kind;
7330 region->sched_modifiers = fd.sched_modifiers;
0b887b75 7331 region->has_lastprivate_conditional = fd.lastprivate_conditional != 0;
9d50112a
JJ
7332 if (fd.non_rect && !gimple_omp_for_combined_into_p (fd.for_stmt))
7333 {
7334 for (int i = fd.first_nonrect; i <= fd.last_nonrect; i++)
7335 if ((loops[i].m1 || loops[i].m2)
7336 && (loops[i].m1 == NULL_TREE
7337 || TREE_CODE (loops[i].m1) == INTEGER_CST)
7338 && (loops[i].m2 == NULL_TREE
7339 || TREE_CODE (loops[i].m2) == INTEGER_CST)
7340 && TREE_CODE (loops[i].step) == INTEGER_CST
7341 && TREE_CODE (loops[i - loops[i].outer].step) == INTEGER_CST)
7342 {
7343 tree t;
7344 tree itype = TREE_TYPE (loops[i].v);
7345 if (loops[i].m1 && loops[i].m2)
7346 t = fold_build2 (MINUS_EXPR, itype, loops[i].m2, loops[i].m1);
7347 else if (loops[i].m1)
7348 t = fold_build1 (NEGATE_EXPR, itype, loops[i].m1);
7349 else
7350 t = loops[i].m2;
7351 t = fold_build2 (MULT_EXPR, itype, t,
7352 fold_convert (itype,
7353 loops[i - loops[i].outer].step));
7354 if (TYPE_UNSIGNED (itype) && loops[i].cond_code == GT_EXPR)
7355 t = fold_build2 (TRUNC_MOD_EXPR, itype,
7356 fold_build1 (NEGATE_EXPR, itype, t),
7357 fold_build1 (NEGATE_EXPR, itype,
7358 fold_convert (itype,
7359 loops[i].step)));
7360 else
7361 t = fold_build2 (TRUNC_MOD_EXPR, itype, t,
7362 fold_convert (itype, loops[i].step));
7363 if (integer_nonzerop (t))
7364 error_at (gimple_location (fd.for_stmt),
7365 "invalid OpenMP non-rectangular loop step; "
7366 "%<(%E - %E) * %E%> is not a multiple of loop %d "
7367 "step %qE",
7368 loops[i].m2 ? loops[i].m2 : integer_zero_node,
7369 loops[i].m1 ? loops[i].m1 : integer_zero_node,
7370 loops[i - loops[i].outer].step, i + 1,
7371 loops[i].step);
7372 }
7373 }
629b3d75
MJ
7374
7375 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
7376 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
7377 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
7378 if (region->cont)
7379 {
7380 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
7381 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
7382 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
7383 }
7384 else
7385 /* If there isn't a continue then this is a degerate case where
7386 the introduction of abnormal edges during lowering will prevent
7387 original loops from being detected. Fix that up. */
7388 loops_state_set (LOOPS_NEED_FIXUP);
7389
dfa6e5b4 7390 if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_SIMD)
1160ec9a
JJ
7391 {
7392 if (fd.non_rect)
7393 sorry_at (gimple_location (fd.for_stmt),
7394 "non-rectangular %<simd%> not supported yet");
7395 expand_omp_simd (region, &fd);
7396 }
629b3d75
MJ
7397 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
7398 {
1160ec9a 7399 gcc_assert (!inner_stmt && !fd.non_rect);
629b3d75
MJ
7400 expand_oacc_for (region, &fd);
7401 }
7402 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
7403 {
1160ec9a
JJ
7404 if (fd.non_rect)
7405 sorry_at (gimple_location (fd.for_stmt),
7406 "non-rectangular %<taskloop%> not supported yet");
629b3d75
MJ
7407 if (gimple_omp_for_combined_into_p (fd.for_stmt))
7408 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
7409 else
7410 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
7411 }
7412 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
7413 && !fd.have_ordered)
7414 {
aed3ab25
JJ
7415 if (fd.non_rect
7416 && (gimple_omp_for_combined_into_p (fd.for_stmt)
7417 || gimple_omp_for_combined_p (fd.for_stmt)))
1160ec9a
JJ
7418 sorry_at (gimple_location (fd.for_stmt),
7419 "non-rectangular OpenMP loops not supported yet");
629b3d75
MJ
7420 if (fd.chunk_size == NULL)
7421 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
7422 else
7423 expand_omp_for_static_chunk (region, &fd, inner_stmt);
7424 }
7425 else
7426 {
7427 int fn_index, start_ix, next_ix;
28567c40
JJ
7428 unsigned HOST_WIDE_INT sched = 0;
7429 tree sched_arg = NULL_TREE;
629b3d75
MJ
7430
7431 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
1160ec9a 7432 == GF_OMP_FOR_KIND_FOR && !fd.non_rect);
629b3d75
MJ
7433 if (fd.chunk_size == NULL
7434 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
7435 fd.chunk_size = integer_zero_node;
629b3d75
MJ
7436 switch (fd.sched_kind)
7437 {
7438 case OMP_CLAUSE_SCHEDULE_RUNTIME:
0b887b75
JJ
7439 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0
7440 && fd.lastprivate_conditional == 0)
28567c40
JJ
7441 {
7442 gcc_assert (!fd.have_ordered);
7443 fn_index = 6;
7444 sched = 4;
7445 }
7446 else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
0b887b75
JJ
7447 && !fd.have_ordered
7448 && fd.lastprivate_conditional == 0)
28567c40
JJ
7449 fn_index = 7;
7450 else
7451 {
7452 fn_index = 3;
7453 sched = (HOST_WIDE_INT_1U << 31);
7454 }
629b3d75
MJ
7455 break;
7456 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
7457 case OMP_CLAUSE_SCHEDULE_GUIDED:
28567c40 7458 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
0b887b75
JJ
7459 && !fd.have_ordered
7460 && fd.lastprivate_conditional == 0)
629b3d75
MJ
7461 {
7462 fn_index = 3 + fd.sched_kind;
28567c40 7463 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
629b3d75
MJ
7464 break;
7465 }
629b3d75 7466 fn_index = fd.sched_kind;
28567c40
JJ
7467 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
7468 sched += (HOST_WIDE_INT_1U << 31);
629b3d75 7469 break;
28567c40
JJ
7470 case OMP_CLAUSE_SCHEDULE_STATIC:
7471 gcc_assert (fd.have_ordered);
7472 fn_index = 0;
7473 sched = (HOST_WIDE_INT_1U << 31) + 1;
7474 break;
7475 default:
7476 gcc_unreachable ();
629b3d75
MJ
7477 }
7478 if (!fd.ordered)
28567c40 7479 fn_index += fd.have_ordered * 8;
629b3d75
MJ
7480 if (fd.ordered)
7481 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
7482 else
7483 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
7484 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
8221c30b 7485 if (fd.have_reductemp || fd.have_pointer_condtemp)
28567c40
JJ
7486 {
7487 if (fd.ordered)
7488 start_ix = (int)BUILT_IN_GOMP_LOOP_DOACROSS_START;
7489 else if (fd.have_ordered)
7490 start_ix = (int)BUILT_IN_GOMP_LOOP_ORDERED_START;
7491 else
7492 start_ix = (int)BUILT_IN_GOMP_LOOP_START;
7493 sched_arg = build_int_cstu (long_integer_type_node, sched);
7494 if (!fd.chunk_size)
7495 fd.chunk_size = integer_zero_node;
7496 }
629b3d75
MJ
7497 if (fd.iter_type == long_long_unsigned_type_node)
7498 {
7499 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
7500 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
7501 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
7502 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
7503 }
7504 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
28567c40
JJ
7505 (enum built_in_function) next_ix, sched_arg,
7506 inner_stmt);
629b3d75
MJ
7507 }
7508
7509 if (gimple_in_ssa_p (cfun))
7510 update_ssa (TODO_update_ssa_only_virtuals);
7511}
7512
7513/* Expand code for an OpenMP sections directive. In pseudo code, we generate
7514
7515 v = GOMP_sections_start (n);
7516 L0:
7517 switch (v)
7518 {
7519 case 0:
7520 goto L2;
7521 case 1:
7522 section 1;
7523 goto L1;
7524 case 2:
7525 ...
7526 case n:
7527 ...
7528 default:
7529 abort ();
7530 }
7531 L1:
7532 v = GOMP_sections_next ();
7533 goto L0;
7534 L2:
7535 reduction;
7536
7537 If this is a combined parallel sections, replace the call to
7538 GOMP_sections_start with call to GOMP_sections_next. */
7539
7540static void
7541expand_omp_sections (struct omp_region *region)
7542{
7543 tree t, u, vin = NULL, vmain, vnext, l2;
7544 unsigned len;
7545 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
7546 gimple_stmt_iterator si, switch_si;
7547 gomp_sections *sections_stmt;
7548 gimple *stmt;
7549 gomp_continue *cont;
7550 edge_iterator ei;
7551 edge e;
7552 struct omp_region *inner;
7553 unsigned i, casei;
7554 bool exit_reachable = region->cont != NULL;
7555
7556 gcc_assert (region->exit != NULL);
7557 entry_bb = region->entry;
7558 l0_bb = single_succ (entry_bb);
7559 l1_bb = region->cont;
7560 l2_bb = region->exit;
7561 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
7562 l2 = gimple_block_label (l2_bb);
7563 else
7564 {
7565 /* This can happen if there are reductions. */
7566 len = EDGE_COUNT (l0_bb->succs);
7567 gcc_assert (len > 0);
7568 e = EDGE_SUCC (l0_bb, len - 1);
65f4b875 7569 si = gsi_last_nondebug_bb (e->dest);
629b3d75
MJ
7570 l2 = NULL_TREE;
7571 if (gsi_end_p (si)
01914336 7572 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
629b3d75
MJ
7573 l2 = gimple_block_label (e->dest);
7574 else
7575 FOR_EACH_EDGE (e, ei, l0_bb->succs)
7576 {
65f4b875 7577 si = gsi_last_nondebug_bb (e->dest);
629b3d75
MJ
7578 if (gsi_end_p (si)
7579 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
7580 {
7581 l2 = gimple_block_label (e->dest);
7582 break;
7583 }
7584 }
7585 }
7586 if (exit_reachable)
7587 default_bb = create_empty_bb (l1_bb->prev_bb);
7588 else
7589 default_bb = create_empty_bb (l0_bb);
7590
7591 /* We will build a switch() with enough cases for all the
7592 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
7593 and a default case to abort if something goes wrong. */
7594 len = EDGE_COUNT (l0_bb->succs);
7595
7596 /* Use vec::quick_push on label_vec throughout, since we know the size
7597 in advance. */
7598 auto_vec<tree> label_vec (len);
7599
7600 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
7601 GIMPLE_OMP_SECTIONS statement. */
65f4b875 7602 si = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
7603 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
7604 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
7605 vin = gimple_omp_sections_control (sections_stmt);
28567c40
JJ
7606 tree clauses = gimple_omp_sections_clauses (sections_stmt);
7607 tree reductmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
8e7757ba
JJ
7608 tree condtmp = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
7609 tree cond_var = NULL_TREE;
7610 if (reductmp || condtmp)
7611 {
7612 tree reductions = null_pointer_node, mem = null_pointer_node;
7613 tree memv = NULL_TREE, condtemp = NULL_TREE;
7614 gimple_stmt_iterator gsi = gsi_none ();
7615 gimple *g = NULL;
7616 if (reductmp)
7617 {
7618 reductions = OMP_CLAUSE_DECL (reductmp);
7619 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
7620 g = SSA_NAME_DEF_STMT (reductions);
7621 reductions = gimple_assign_rhs1 (g);
7622 OMP_CLAUSE_DECL (reductmp) = reductions;
7623 gsi = gsi_for_stmt (g);
7624 }
7625 else
7626 gsi = si;
7627 if (condtmp)
7628 {
7629 condtemp = OMP_CLAUSE_DECL (condtmp);
7630 tree c = omp_find_clause (OMP_CLAUSE_CHAIN (condtmp),
7631 OMP_CLAUSE__CONDTEMP_);
7632 cond_var = OMP_CLAUSE_DECL (c);
7633 tree type = TREE_TYPE (condtemp);
7634 memv = create_tmp_var (type);
7635 TREE_ADDRESSABLE (memv) = 1;
7636 unsigned cnt = 0;
7637 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
7638 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
7639 && OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (c))
7640 ++cnt;
7641 unsigned HOST_WIDE_INT sz
7642 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))) * cnt;
7643 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
7644 false);
7645 mem = build_fold_addr_expr (memv);
7646 }
28567c40
JJ
7647 t = build_int_cst (unsigned_type_node, len - 1);
7648 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS2_START);
8e7757ba 7649 stmt = gimple_build_call (u, 3, t, reductions, mem);
28567c40
JJ
7650 gimple_call_set_lhs (stmt, vin);
7651 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
8e7757ba
JJ
7652 if (condtmp)
7653 {
7654 expand_omp_build_assign (&gsi, condtemp, memv, false);
7655 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
7656 vin, build_one_cst (TREE_TYPE (cond_var)));
7657 expand_omp_build_assign (&gsi, cond_var, t, false);
7658 }
7659 if (reductmp)
7660 {
7661 gsi_remove (&gsi, true);
7662 release_ssa_name (gimple_assign_lhs (g));
7663 }
28567c40
JJ
7664 }
7665 else if (!is_combined_parallel (region))
629b3d75
MJ
7666 {
7667 /* If we are not inside a combined parallel+sections region,
7668 call GOMP_sections_start. */
7669 t = build_int_cst (unsigned_type_node, len - 1);
7670 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
7671 stmt = gimple_build_call (u, 1, t);
7672 }
7673 else
7674 {
7675 /* Otherwise, call GOMP_sections_next. */
7676 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
7677 stmt = gimple_build_call (u, 0);
7678 }
8e7757ba 7679 if (!reductmp && !condtmp)
28567c40
JJ
7680 {
7681 gimple_call_set_lhs (stmt, vin);
7682 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
7683 }
629b3d75
MJ
7684 gsi_remove (&si, true);
7685
7686 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
7687 L0_BB. */
65f4b875 7688 switch_si = gsi_last_nondebug_bb (l0_bb);
629b3d75
MJ
7689 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
7690 if (exit_reachable)
7691 {
7692 cont = as_a <gomp_continue *> (last_stmt (l1_bb));
7693 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
7694 vmain = gimple_omp_continue_control_use (cont);
7695 vnext = gimple_omp_continue_control_def (cont);
7696 }
7697 else
7698 {
7699 vmain = vin;
7700 vnext = NULL_TREE;
7701 }
7702
7703 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
7704 label_vec.quick_push (t);
7705 i = 1;
7706
7707 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
7708 for (inner = region->inner, casei = 1;
7709 inner;
7710 inner = inner->next, i++, casei++)
7711 {
7712 basic_block s_entry_bb, s_exit_bb;
7713
7714 /* Skip optional reduction region. */
7715 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
7716 {
7717 --i;
7718 --casei;
7719 continue;
7720 }
7721
7722 s_entry_bb = inner->entry;
7723 s_exit_bb = inner->exit;
7724
7725 t = gimple_block_label (s_entry_bb);
7726 u = build_int_cst (unsigned_type_node, casei);
7727 u = build_case_label (u, NULL, t);
7728 label_vec.quick_push (u);
7729
65f4b875 7730 si = gsi_last_nondebug_bb (s_entry_bb);
629b3d75
MJ
7731 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
7732 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
7733 gsi_remove (&si, true);
7734 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
7735
7736 if (s_exit_bb == NULL)
7737 continue;
7738
65f4b875 7739 si = gsi_last_nondebug_bb (s_exit_bb);
629b3d75
MJ
7740 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
7741 gsi_remove (&si, true);
7742
7743 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
7744 }
7745
7746 /* Error handling code goes in DEFAULT_BB. */
7747 t = gimple_block_label (default_bb);
7748 u = build_case_label (NULL, NULL, t);
7749 make_edge (l0_bb, default_bb, 0);
7750 add_bb_to_loop (default_bb, current_loops->tree_root);
7751
7752 stmt = gimple_build_switch (vmain, u, label_vec);
7753 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
7754 gsi_remove (&switch_si, true);
7755
7756 si = gsi_start_bb (default_bb);
7757 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
7758 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
7759
7760 if (exit_reachable)
7761 {
7762 tree bfn_decl;
7763
7764 /* Code to get the next section goes in L1_BB. */
65f4b875 7765 si = gsi_last_nondebug_bb (l1_bb);
629b3d75
MJ
7766 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
7767
7768 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
7769 stmt = gimple_build_call (bfn_decl, 0);
7770 gimple_call_set_lhs (stmt, vnext);
8e7757ba
JJ
7771 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7772 if (cond_var)
7773 {
7774 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
7775 vnext, build_one_cst (TREE_TYPE (cond_var)));
7776 expand_omp_build_assign (&si, cond_var, t, false);
7777 }
629b3d75
MJ
7778 gsi_remove (&si, true);
7779
7780 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
7781 }
7782
7783 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
65f4b875 7784 si = gsi_last_nondebug_bb (l2_bb);
629b3d75
MJ
7785 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
7786 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
7787 else if (gimple_omp_return_lhs (gsi_stmt (si)))
7788 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
7789 else
7790 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
7791 stmt = gimple_build_call (t, 0);
7792 if (gimple_omp_return_lhs (gsi_stmt (si)))
7793 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
7794 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
7795 gsi_remove (&si, true);
7796
7797 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
7798}
7799
7800/* Expand code for an OpenMP single directive. We've already expanded
7801 much of the code, here we simply place the GOMP_barrier call. */
7802
7803static void
7804expand_omp_single (struct omp_region *region)
7805{
7806 basic_block entry_bb, exit_bb;
7807 gimple_stmt_iterator si;
7808
7809 entry_bb = region->entry;
7810 exit_bb = region->exit;
7811
65f4b875 7812 si = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
7813 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
7814 gsi_remove (&si, true);
7815 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
7816
65f4b875 7817 si = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
7818 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
7819 {
7820 tree t = gimple_omp_return_lhs (gsi_stmt (si));
7821 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
7822 }
7823 gsi_remove (&si, true);
7824 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
7825}
7826
7827/* Generic expansion for OpenMP synchronization directives: master,
7828 ordered and critical. All we need to do here is remove the entry
7829 and exit markers for REGION. */
7830
7831static void
7832expand_omp_synch (struct omp_region *region)
7833{
7834 basic_block entry_bb, exit_bb;
7835 gimple_stmt_iterator si;
7836
7837 entry_bb = region->entry;
7838 exit_bb = region->exit;
7839
65f4b875 7840 si = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
7841 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
7842 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
7843 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
7844 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
7845 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
7846 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
28567c40
JJ
7847 if (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS
7848 && gimple_omp_teams_host (as_a <gomp_teams *> (gsi_stmt (si))))
7849 {
7850 expand_omp_taskreg (region);
7851 return;
7852 }
629b3d75
MJ
7853 gsi_remove (&si, true);
7854 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
7855
7856 if (exit_bb)
7857 {
65f4b875 7858 si = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
7859 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
7860 gsi_remove (&si, true);
7861 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
7862 }
7863}
7864
28567c40
JJ
7865/* Translate enum omp_memory_order to enum memmodel. The two enums
7866 are using different numbers so that OMP_MEMORY_ORDER_UNSPECIFIED
7867 is 0. */
7868
7869static enum memmodel
7870omp_memory_order_to_memmodel (enum omp_memory_order mo)
7871{
7872 switch (mo)
7873 {
7874 case OMP_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
7875 case OMP_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
7876 case OMP_MEMORY_ORDER_RELEASE: return MEMMODEL_RELEASE;
7877 case OMP_MEMORY_ORDER_ACQ_REL: return MEMMODEL_ACQ_REL;
7878 case OMP_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
7879 default: gcc_unreachable ();
7880 }
7881}
7882
629b3d75
MJ
7883/* A subroutine of expand_omp_atomic. Attempt to implement the atomic
7884 operation as a normal volatile load. */
7885
7886static bool
7887expand_omp_atomic_load (basic_block load_bb, tree addr,
7888 tree loaded_val, int index)
7889{
7890 enum built_in_function tmpbase;
7891 gimple_stmt_iterator gsi;
7892 basic_block store_bb;
7893 location_t loc;
7894 gimple *stmt;
7895 tree decl, call, type, itype;
7896
65f4b875 7897 gsi = gsi_last_nondebug_bb (load_bb);
629b3d75
MJ
7898 stmt = gsi_stmt (gsi);
7899 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
7900 loc = gimple_location (stmt);
7901
7902 /* ??? If the target does not implement atomic_load_optab[mode], and mode
7903 is smaller than word size, then expand_atomic_load assumes that the load
7904 is atomic. We could avoid the builtin entirely in this case. */
7905
7906 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
7907 decl = builtin_decl_explicit (tmpbase);
7908 if (decl == NULL_TREE)
7909 return false;
7910
7911 type = TREE_TYPE (loaded_val);
7912 itype = TREE_TYPE (TREE_TYPE (decl));
7913
28567c40
JJ
7914 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
7915 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
7916 call = build_call_expr_loc (loc, decl, 2, addr, mo);
629b3d75
MJ
7917 if (!useless_type_conversion_p (type, itype))
7918 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
7919 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
7920
7921 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
7922 gsi_remove (&gsi, true);
7923
7924 store_bb = single_succ (load_bb);
65f4b875 7925 gsi = gsi_last_nondebug_bb (store_bb);
629b3d75
MJ
7926 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
7927 gsi_remove (&gsi, true);
7928
7929 if (gimple_in_ssa_p (cfun))
7930 update_ssa (TODO_update_ssa_no_phi);
7931
7932 return true;
7933}
7934
7935/* A subroutine of expand_omp_atomic. Attempt to implement the atomic
7936 operation as a normal volatile store. */
7937
7938static bool
7939expand_omp_atomic_store (basic_block load_bb, tree addr,
7940 tree loaded_val, tree stored_val, int index)
7941{
7942 enum built_in_function tmpbase;
7943 gimple_stmt_iterator gsi;
7944 basic_block store_bb = single_succ (load_bb);
7945 location_t loc;
7946 gimple *stmt;
7947 tree decl, call, type, itype;
7948 machine_mode imode;
7949 bool exchange;
7950
65f4b875 7951 gsi = gsi_last_nondebug_bb (load_bb);
629b3d75
MJ
7952 stmt = gsi_stmt (gsi);
7953 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
7954
7955 /* If the load value is needed, then this isn't a store but an exchange. */
7956 exchange = gimple_omp_atomic_need_value_p (stmt);
7957
65f4b875 7958 gsi = gsi_last_nondebug_bb (store_bb);
629b3d75
MJ
7959 stmt = gsi_stmt (gsi);
7960 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
7961 loc = gimple_location (stmt);
7962
7963 /* ??? If the target does not implement atomic_store_optab[mode], and mode
7964 is smaller than word size, then expand_atomic_store assumes that the store
7965 is atomic. We could avoid the builtin entirely in this case. */
7966
7967 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
7968 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
7969 decl = builtin_decl_explicit (tmpbase);
7970 if (decl == NULL_TREE)
7971 return false;
7972
7973 type = TREE_TYPE (stored_val);
7974
7975 /* Dig out the type of the function's second argument. */
7976 itype = TREE_TYPE (decl);
7977 itype = TYPE_ARG_TYPES (itype);
7978 itype = TREE_CHAIN (itype);
7979 itype = TREE_VALUE (itype);
7980 imode = TYPE_MODE (itype);
7981
7982 if (exchange && !can_atomic_exchange_p (imode, true))
7983 return false;
7984
7985 if (!useless_type_conversion_p (itype, type))
7986 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
28567c40
JJ
7987 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
7988 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
7989 call = build_call_expr_loc (loc, decl, 3, addr, stored_val, mo);
629b3d75
MJ
7990 if (exchange)
7991 {
7992 if (!useless_type_conversion_p (type, itype))
7993 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
7994 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
7995 }
7996
7997 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
7998 gsi_remove (&gsi, true);
7999
8000 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
65f4b875 8001 gsi = gsi_last_nondebug_bb (load_bb);
629b3d75
MJ
8002 gsi_remove (&gsi, true);
8003
8004 if (gimple_in_ssa_p (cfun))
8005 update_ssa (TODO_update_ssa_no_phi);
8006
8007 return true;
8008}
8009
8010/* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8011 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
8012 size of the data type, and thus usable to find the index of the builtin
8013 decl. Returns false if the expression is not of the proper form. */
8014
8015static bool
8016expand_omp_atomic_fetch_op (basic_block load_bb,
8017 tree addr, tree loaded_val,
8018 tree stored_val, int index)
8019{
8020 enum built_in_function oldbase, newbase, tmpbase;
8021 tree decl, itype, call;
8022 tree lhs, rhs;
8023 basic_block store_bb = single_succ (load_bb);
8024 gimple_stmt_iterator gsi;
8025 gimple *stmt;
8026 location_t loc;
8027 enum tree_code code;
8028 bool need_old, need_new;
8029 machine_mode imode;
629b3d75
MJ
8030
8031 /* We expect to find the following sequences:
8032
8033 load_bb:
8034 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
8035
8036 store_bb:
8037 val = tmp OP something; (or: something OP tmp)
8038 GIMPLE_OMP_STORE (val)
8039
8040 ???FIXME: Allow a more flexible sequence.
8041 Perhaps use data flow to pick the statements.
8042
8043 */
8044
8045 gsi = gsi_after_labels (store_bb);
8046 stmt = gsi_stmt (gsi);
65f4b875
AO
8047 if (is_gimple_debug (stmt))
8048 {
8049 gsi_next_nondebug (&gsi);
8050 if (gsi_end_p (gsi))
8051 return false;
8052 stmt = gsi_stmt (gsi);
8053 }
629b3d75
MJ
8054 loc = gimple_location (stmt);
8055 if (!is_gimple_assign (stmt))
8056 return false;
65f4b875 8057 gsi_next_nondebug (&gsi);
629b3d75
MJ
8058 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
8059 return false;
8060 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
8061 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
28567c40
JJ
8062 enum omp_memory_order omo
8063 = gimple_omp_atomic_memory_order (last_stmt (load_bb));
8064 enum memmodel mo = omp_memory_order_to_memmodel (omo);
629b3d75
MJ
8065 gcc_checking_assert (!need_old || !need_new);
8066
8067 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
8068 return false;
8069
8070 /* Check for one of the supported fetch-op operations. */
8071 code = gimple_assign_rhs_code (stmt);
8072 switch (code)
8073 {
8074 case PLUS_EXPR:
8075 case POINTER_PLUS_EXPR:
8076 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
8077 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
8078 break;
8079 case MINUS_EXPR:
8080 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
8081 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
8082 break;
8083 case BIT_AND_EXPR:
8084 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
8085 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
8086 break;
8087 case BIT_IOR_EXPR:
8088 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
8089 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
8090 break;
8091 case BIT_XOR_EXPR:
8092 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
8093 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
8094 break;
8095 default:
8096 return false;
8097 }
8098
8099 /* Make sure the expression is of the proper form. */
8100 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
8101 rhs = gimple_assign_rhs2 (stmt);
8102 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
8103 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
8104 rhs = gimple_assign_rhs1 (stmt);
8105 else
8106 return false;
8107
8108 tmpbase = ((enum built_in_function)
8109 ((need_new ? newbase : oldbase) + index + 1));
8110 decl = builtin_decl_explicit (tmpbase);
8111 if (decl == NULL_TREE)
8112 return false;
8113 itype = TREE_TYPE (TREE_TYPE (decl));
8114 imode = TYPE_MODE (itype);
8115
8116 /* We could test all of the various optabs involved, but the fact of the
8117 matter is that (with the exception of i486 vs i586 and xadd) all targets
8118 that support any atomic operaton optab also implements compare-and-swap.
8119 Let optabs.c take care of expanding any compare-and-swap loop. */
dc06356a 8120 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
629b3d75
MJ
8121 return false;
8122
65f4b875 8123 gsi = gsi_last_nondebug_bb (load_bb);
629b3d75
MJ
8124 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
8125
8126 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
8127 It only requires that the operation happen atomically. Thus we can
8128 use the RELAXED memory model. */
8129 call = build_call_expr_loc (loc, decl, 3, addr,
8130 fold_convert_loc (loc, itype, rhs),
28567c40 8131 build_int_cst (NULL, mo));
629b3d75
MJ
8132
8133 if (need_old || need_new)
8134 {
8135 lhs = need_old ? loaded_val : stored_val;
8136 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
8137 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
8138 }
8139 else
8140 call = fold_convert_loc (loc, void_type_node, call);
8141 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8142 gsi_remove (&gsi, true);
8143
65f4b875 8144 gsi = gsi_last_nondebug_bb (store_bb);
629b3d75
MJ
8145 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
8146 gsi_remove (&gsi, true);
65f4b875 8147 gsi = gsi_last_nondebug_bb (store_bb);
629b3d75
MJ
8148 stmt = gsi_stmt (gsi);
8149 gsi_remove (&gsi, true);
8150
8151 if (gimple_in_ssa_p (cfun))
8152 {
8153 release_defs (stmt);
8154 update_ssa (TODO_update_ssa_no_phi);
8155 }
8156
8157 return true;
8158}
8159
8160/* A subroutine of expand_omp_atomic. Implement the atomic operation as:
8161
8162 oldval = *addr;
8163 repeat:
01914336 8164 newval = rhs; // with oldval replacing *addr in rhs
629b3d75
MJ
8165 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
8166 if (oldval != newval)
8167 goto repeat;
8168
8169 INDEX is log2 of the size of the data type, and thus usable to find the
8170 index of the builtin decl. */
8171
8172static bool
8173expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
8174 tree addr, tree loaded_val, tree stored_val,
8175 int index)
8176{
8177 tree loadedi, storedi, initial, new_storedi, old_vali;
b4e47472 8178 tree type, itype, cmpxchg, iaddr, atype;
629b3d75
MJ
8179 gimple_stmt_iterator si;
8180 basic_block loop_header = single_succ (load_bb);
8181 gimple *phi, *stmt;
8182 edge e;
8183 enum built_in_function fncode;
8184
8185 /* ??? We need a non-pointer interface to __atomic_compare_exchange in
8186 order to use the RELAXED memory model effectively. */
8187 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
8188 + index + 1);
8189 cmpxchg = builtin_decl_explicit (fncode);
8190 if (cmpxchg == NULL_TREE)
8191 return false;
b4e47472
JJ
8192 type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
8193 atype = type;
629b3d75
MJ
8194 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
8195
dc06356a
JJ
8196 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
8197 || !can_atomic_load_p (TYPE_MODE (itype)))
629b3d75
MJ
8198 return false;
8199
8200 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
65f4b875 8201 si = gsi_last_nondebug_bb (load_bb);
629b3d75
MJ
8202 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
8203
8204 /* For floating-point values, we'll need to view-convert them to integers
8205 so that we can perform the atomic compare and swap. Simplify the
8206 following code by always setting up the "i"ntegral variables. */
8207 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
8208 {
8209 tree iaddr_val;
8210
8211 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
8212 true));
b4e47472 8213 atype = itype;
629b3d75
MJ
8214 iaddr_val
8215 = force_gimple_operand_gsi (&si,
8216 fold_convert (TREE_TYPE (iaddr), addr),
8217 false, NULL_TREE, true, GSI_SAME_STMT);
8218 stmt = gimple_build_assign (iaddr, iaddr_val);
8219 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8220 loadedi = create_tmp_var (itype);
8221 if (gimple_in_ssa_p (cfun))
8222 loadedi = make_ssa_name (loadedi);
8223 }
8224 else
8225 {
8226 iaddr = addr;
8227 loadedi = loaded_val;
8228 }
8229
8230 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
8231 tree loaddecl = builtin_decl_explicit (fncode);
8232 if (loaddecl)
8233 initial
b4e47472 8234 = fold_convert (atype,
629b3d75
MJ
8235 build_call_expr (loaddecl, 2, iaddr,
8236 build_int_cst (NULL_TREE,
8237 MEMMODEL_RELAXED)));
8238 else
b4e47472
JJ
8239 {
8240 tree off
8241 = build_int_cst (build_pointer_type_for_mode (atype, ptr_mode,
8242 true), 0);
8243 initial = build2 (MEM_REF, atype, iaddr, off);
8244 }
629b3d75
MJ
8245
8246 initial
8247 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
8248 GSI_SAME_STMT);
8249
8250 /* Move the value to the LOADEDI temporary. */
8251 if (gimple_in_ssa_p (cfun))
8252 {
8253 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
8254 phi = create_phi_node (loadedi, loop_header);
8255 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
8256 initial);
8257 }
8258 else
8259 gsi_insert_before (&si,
8260 gimple_build_assign (loadedi, initial),
8261 GSI_SAME_STMT);
8262 if (loadedi != loaded_val)
8263 {
8264 gimple_stmt_iterator gsi2;
8265 tree x;
8266
8267 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
8268 gsi2 = gsi_start_bb (loop_header);
8269 if (gimple_in_ssa_p (cfun))
8270 {
8271 gassign *stmt;
8272 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
8273 true, GSI_SAME_STMT);
8274 stmt = gimple_build_assign (loaded_val, x);
8275 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
8276 }
8277 else
8278 {
8279 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
8280 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
8281 true, GSI_SAME_STMT);
8282 }
8283 }
8284 gsi_remove (&si, true);
8285
65f4b875 8286 si = gsi_last_nondebug_bb (store_bb);
629b3d75
MJ
8287 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
8288
8289 if (iaddr == addr)
8290 storedi = stored_val;
8291 else
01914336
MJ
8292 storedi
8293 = force_gimple_operand_gsi (&si,
8294 build1 (VIEW_CONVERT_EXPR, itype,
8295 stored_val), true, NULL_TREE, true,
8296 GSI_SAME_STMT);
629b3d75
MJ
8297
8298 /* Build the compare&swap statement. */
8299 new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
8300 new_storedi = force_gimple_operand_gsi (&si,
8301 fold_convert (TREE_TYPE (loadedi),
8302 new_storedi),
8303 true, NULL_TREE,
8304 true, GSI_SAME_STMT);
8305
8306 if (gimple_in_ssa_p (cfun))
8307 old_vali = loadedi;
8308 else
8309 {
8310 old_vali = create_tmp_var (TREE_TYPE (loadedi));
8311 stmt = gimple_build_assign (old_vali, loadedi);
8312 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8313
8314 stmt = gimple_build_assign (loadedi, new_storedi);
8315 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8316 }
8317
8318 /* Note that we always perform the comparison as an integer, even for
8319 floating point. This allows the atomic operation to properly
8320 succeed even with NaNs and -0.0. */
01914336
MJ
8321 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
8322 stmt = gimple_build_cond_empty (ne);
629b3d75
MJ
8323 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8324
8325 /* Update cfg. */
8326 e = single_succ_edge (store_bb);
8327 e->flags &= ~EDGE_FALLTHRU;
8328 e->flags |= EDGE_FALSE_VALUE;
357067f2
JH
8329 /* Expect no looping. */
8330 e->probability = profile_probability::guessed_always ();
629b3d75
MJ
8331
8332 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
357067f2 8333 e->probability = profile_probability::guessed_never ();
629b3d75
MJ
8334
8335 /* Copy the new value to loadedi (we already did that before the condition
8336 if we are not in SSA). */
8337 if (gimple_in_ssa_p (cfun))
8338 {
8339 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
8340 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
8341 }
8342
8343 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
8344 gsi_remove (&si, true);
8345
99b1c316 8346 class loop *loop = alloc_loop ();
629b3d75
MJ
8347 loop->header = loop_header;
8348 loop->latch = store_bb;
8349 add_loop (loop, loop_header->loop_father);
8350
8351 if (gimple_in_ssa_p (cfun))
8352 update_ssa (TODO_update_ssa_no_phi);
8353
8354 return true;
8355}
8356
8357/* A subroutine of expand_omp_atomic. Implement the atomic operation as:
8358
01914336
MJ
8359 GOMP_atomic_start ();
8360 *addr = rhs;
8361 GOMP_atomic_end ();
629b3d75
MJ
8362
8363 The result is not globally atomic, but works so long as all parallel
8364 references are within #pragma omp atomic directives. According to
8365 responses received from omp@openmp.org, appears to be within spec.
8366 Which makes sense, since that's how several other compilers handle
8367 this situation as well.
8368 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
8369 expanding. STORED_VAL is the operand of the matching
8370 GIMPLE_OMP_ATOMIC_STORE.
8371
8372 We replace
8373 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
8374 loaded_val = *addr;
8375
8376 and replace
8377 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
8378 *addr = stored_val;
8379*/
8380
8381static bool
8382expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
8383 tree addr, tree loaded_val, tree stored_val)
8384{
8385 gimple_stmt_iterator si;
8386 gassign *stmt;
8387 tree t;
8388
65f4b875 8389 si = gsi_last_nondebug_bb (load_bb);
629b3d75
MJ
8390 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
8391
8392 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
8393 t = build_call_expr (t, 0);
8394 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
8395
b4e47472
JJ
8396 tree mem = build_simple_mem_ref (addr);
8397 TREE_TYPE (mem) = TREE_TYPE (loaded_val);
8398 TREE_OPERAND (mem, 1)
8399 = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode,
8400 true),
8401 TREE_OPERAND (mem, 1));
8402 stmt = gimple_build_assign (loaded_val, mem);
629b3d75
MJ
8403 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8404 gsi_remove (&si, true);
8405
65f4b875 8406 si = gsi_last_nondebug_bb (store_bb);
629b3d75
MJ
8407 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
8408
b4e47472 8409 stmt = gimple_build_assign (unshare_expr (mem), stored_val);
629b3d75
MJ
8410 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8411
8412 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
8413 t = build_call_expr (t, 0);
8414 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
8415 gsi_remove (&si, true);
8416
8417 if (gimple_in_ssa_p (cfun))
8418 update_ssa (TODO_update_ssa_no_phi);
8419 return true;
8420}
8421
8422/* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
01914336 8423 using expand_omp_atomic_fetch_op. If it failed, we try to
629b3d75
MJ
8424 call expand_omp_atomic_pipeline, and if it fails too, the
8425 ultimate fallback is wrapping the operation in a mutex
8426 (expand_omp_atomic_mutex). REGION is the atomic region built
8427 by build_omp_regions_1(). */
8428
8429static void
8430expand_omp_atomic (struct omp_region *region)
8431{
8432 basic_block load_bb = region->entry, store_bb = region->exit;
8433 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
8434 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
8435 tree loaded_val = gimple_omp_atomic_load_lhs (load);
8436 tree addr = gimple_omp_atomic_load_rhs (load);
8437 tree stored_val = gimple_omp_atomic_store_val (store);
b4e47472 8438 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
629b3d75
MJ
8439 HOST_WIDE_INT index;
8440
8441 /* Make sure the type is one of the supported sizes. */
8442 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
8443 index = exact_log2 (index);
8444 if (index >= 0 && index <= 4)
8445 {
8446 unsigned int align = TYPE_ALIGN_UNIT (type);
8447
8448 /* __sync builtins require strict data alignment. */
8449 if (exact_log2 (align) >= index)
8450 {
8451 /* Atomic load. */
3bd8f481 8452 scalar_mode smode;
629b3d75 8453 if (loaded_val == stored_val
3bd8f481
RS
8454 && (is_int_mode (TYPE_MODE (type), &smode)
8455 || is_float_mode (TYPE_MODE (type), &smode))
8456 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
629b3d75
MJ
8457 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
8458 return;
8459
8460 /* Atomic store. */
3bd8f481
RS
8461 if ((is_int_mode (TYPE_MODE (type), &smode)
8462 || is_float_mode (TYPE_MODE (type), &smode))
8463 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
629b3d75
MJ
8464 && store_bb == single_succ (load_bb)
8465 && first_stmt (store_bb) == store
8466 && expand_omp_atomic_store (load_bb, addr, loaded_val,
8467 stored_val, index))
8468 return;
8469
8470 /* When possible, use specialized atomic update functions. */
8471 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
8472 && store_bb == single_succ (load_bb)
8473 && expand_omp_atomic_fetch_op (load_bb, addr,
8474 loaded_val, stored_val, index))
8475 return;
8476
8477 /* If we don't have specialized __sync builtins, try and implement
8478 as a compare and swap loop. */
8479 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
8480 loaded_val, stored_val, index))
8481 return;
8482 }
8483 }
8484
8485 /* The ultimate fallback is wrapping the operation in a mutex. */
8486 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
8487}
8488
8489/* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
8490 at REGION_EXIT. */
8491
8492static void
8493mark_loops_in_oacc_kernels_region (basic_block region_entry,
8494 basic_block region_exit)
8495{
99b1c316 8496 class loop *outer = region_entry->loop_father;
629b3d75
MJ
8497 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
8498
8499 /* Don't parallelize the kernels region if it contains more than one outer
8500 loop. */
8501 unsigned int nr_outer_loops = 0;
99b1c316
MS
8502 class loop *single_outer = NULL;
8503 for (class loop *loop = outer->inner; loop != NULL; loop = loop->next)
629b3d75
MJ
8504 {
8505 gcc_assert (loop_outer (loop) == outer);
8506
8507 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
8508 continue;
8509
8510 if (region_exit != NULL
8511 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
8512 continue;
8513
8514 nr_outer_loops++;
8515 single_outer = loop;
8516 }
8517 if (nr_outer_loops != 1)
8518 return;
8519
99b1c316 8520 for (class loop *loop = single_outer->inner;
01914336
MJ
8521 loop != NULL;
8522 loop = loop->inner)
629b3d75
MJ
8523 if (loop->next)
8524 return;
8525
8526 /* Mark the loops in the region. */
99b1c316 8527 for (class loop *loop = single_outer; loop != NULL; loop = loop->inner)
629b3d75
MJ
8528 loop->in_oacc_kernels_region = true;
8529}
8530
8531/* Types used to pass grid and wortkgroup sizes to kernel invocation. */
8532
8533struct GTY(()) grid_launch_attributes_trees
8534{
8535 tree kernel_dim_array_type;
8536 tree kernel_lattrs_dimnum_decl;
8537 tree kernel_lattrs_grid_decl;
8538 tree kernel_lattrs_group_decl;
8539 tree kernel_launch_attributes_type;
8540};
8541
8542static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees;
8543
8544/* Create types used to pass kernel launch attributes to target. */
8545
8546static void
8547grid_create_kernel_launch_attr_types (void)
8548{
8549 if (grid_attr_trees)
8550 return;
8551 grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> ();
8552
8553 tree dim_arr_index_type
8554 = build_index_type (build_int_cst (integer_type_node, 2));
8555 grid_attr_trees->kernel_dim_array_type
8556 = build_array_type (uint32_type_node, dim_arr_index_type);
8557
8558 grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE);
8559 grid_attr_trees->kernel_lattrs_dimnum_decl
8560 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"),
8561 uint32_type_node);
8562 DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE;
8563
8564 grid_attr_trees->kernel_lattrs_grid_decl
8565 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"),
8566 grid_attr_trees->kernel_dim_array_type);
8567 DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl)
8568 = grid_attr_trees->kernel_lattrs_dimnum_decl;
8569 grid_attr_trees->kernel_lattrs_group_decl
8570 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"),
8571 grid_attr_trees->kernel_dim_array_type);
8572 DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl)
8573 = grid_attr_trees->kernel_lattrs_grid_decl;
8574 finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type,
8575 "__gomp_kernel_launch_attributes",
8576 grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE);
8577}
8578
8579/* Insert before the current statement in GSI a store of VALUE to INDEX of
8580 array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be
8581 of type uint32_type_node. */
8582
8583static void
8584grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var,
8585 tree fld_decl, int index, tree value)
8586{
8587 tree ref = build4 (ARRAY_REF, uint32_type_node,
8588 build3 (COMPONENT_REF,
8589 grid_attr_trees->kernel_dim_array_type,
8590 range_var, fld_decl, NULL_TREE),
8591 build_int_cst (integer_type_node, index),
8592 NULL_TREE, NULL_TREE);
8593 gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT);
8594}
8595
8596/* Return a tree representation of a pointer to a structure with grid and
8597 work-group size information. Statements filling that information will be
8598 inserted before GSI, TGT_STMT is the target statement which has the
8599 necessary information in it. */
8600
8601static tree
8602grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi,
8603 gomp_target *tgt_stmt)
8604{
8605 grid_create_kernel_launch_attr_types ();
8606 tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type,
8607 "__kernel_launch_attrs");
8608
8609 unsigned max_dim = 0;
8610 for (tree clause = gimple_omp_target_clauses (tgt_stmt);
8611 clause;
8612 clause = OMP_CLAUSE_CHAIN (clause))
8613 {
8614 if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_)
8615 continue;
8616
8617 unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause);
8618 max_dim = MAX (dim, max_dim);
8619
8620 grid_insert_store_range_dim (gsi, lattrs,
8621 grid_attr_trees->kernel_lattrs_grid_decl,
8622 dim, OMP_CLAUSE__GRIDDIM__SIZE (clause));
8623 grid_insert_store_range_dim (gsi, lattrs,
8624 grid_attr_trees->kernel_lattrs_group_decl,
8625 dim, OMP_CLAUSE__GRIDDIM__GROUP (clause));
8626 }
8627
8628 tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs,
8629 grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE);
8630 gcc_checking_assert (max_dim <= 2);
8631 tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1);
8632 gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions),
8633 GSI_SAME_STMT);
8634 TREE_ADDRESSABLE (lattrs) = 1;
8635 return build_fold_addr_expr (lattrs);
8636}
8637
8638/* Build target argument identifier from the DEVICE identifier, value
8639 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
8640
8641static tree
8642get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
8643{
8644 tree t = build_int_cst (integer_type_node, device);
8645 if (subseqent_param)
8646 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
8647 build_int_cst (integer_type_node,
8648 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
8649 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
8650 build_int_cst (integer_type_node, id));
8651 return t;
8652}
8653
8654/* Like above but return it in type that can be directly stored as an element
8655 of the argument array. */
8656
8657static tree
8658get_target_argument_identifier (int device, bool subseqent_param, int id)
8659{
8660 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
8661 return fold_convert (ptr_type_node, t);
8662}
8663
8664/* Return a target argument consisting of DEVICE identifier, value identifier
8665 ID, and the actual VALUE. */
8666
8667static tree
8668get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
8669 tree value)
8670{
8671 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
8672 fold_convert (integer_type_node, value),
8673 build_int_cst (unsigned_type_node,
8674 GOMP_TARGET_ARG_VALUE_SHIFT));
8675 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
8676 get_target_argument_identifier_1 (device, false, id));
8677 t = fold_convert (ptr_type_node, t);
8678 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
8679}
8680
8681/* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
8682 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
8683 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
8684 arguments. */
8685
8686static void
8687push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
8688 int id, tree value, vec <tree> *args)
8689{
8690 if (tree_fits_shwi_p (value)
8691 && tree_to_shwi (value) > -(1 << 15)
8692 && tree_to_shwi (value) < (1 << 15))
8693 args->quick_push (get_target_argument_value (gsi, device, id, value));
8694 else
8695 {
8696 args->quick_push (get_target_argument_identifier (device, true, id));
8697 value = fold_convert (ptr_type_node, value);
8698 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
8699 GSI_SAME_STMT);
8700 args->quick_push (value);
8701 }
8702}
8703
01914336 8704/* Create an array of arguments that is then passed to GOMP_target. */
629b3d75
MJ
8705
8706static tree
8707get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
8708{
8709 auto_vec <tree, 6> args;
8710 tree clauses = gimple_omp_target_clauses (tgt_stmt);
8711 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
8712 if (c)
8713 t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
8714 else
8715 t = integer_minus_one_node;
8716 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
8717 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
8718
8719 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
8720 if (c)
8721 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
8722 else
8723 t = integer_minus_one_node;
8724 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
8725 GOMP_TARGET_ARG_THREAD_LIMIT, t,
8726 &args);
8727
8728 /* Add HSA-specific grid sizes, if available. */
8729 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
8730 OMP_CLAUSE__GRIDDIM_))
8731 {
01914336
MJ
8732 int id = GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES;
8733 t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, id);
629b3d75
MJ
8734 args.quick_push (t);
8735 args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt));
8736 }
8737
8738 /* Produce more, perhaps device specific, arguments here. */
8739
8740 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
8741 args.length () + 1),
8742 ".omp_target_args");
8743 for (unsigned i = 0; i < args.length (); i++)
8744 {
8745 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
8746 build_int_cst (integer_type_node, i),
8747 NULL_TREE, NULL_TREE);
8748 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
8749 GSI_SAME_STMT);
8750 }
8751 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
8752 build_int_cst (integer_type_node, args.length ()),
8753 NULL_TREE, NULL_TREE);
8754 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
8755 GSI_SAME_STMT);
8756 TREE_ADDRESSABLE (argarray) = 1;
8757 return build_fold_addr_expr (argarray);
8758}
8759
8760/* Expand the GIMPLE_OMP_TARGET starting at REGION. */
8761
8762static void
8763expand_omp_target (struct omp_region *region)
8764{
8765 basic_block entry_bb, exit_bb, new_bb;
8766 struct function *child_cfun;
8767 tree child_fn, block, t;
8768 gimple_stmt_iterator gsi;
8769 gomp_target *entry_stmt;
8770 gimple *stmt;
8771 edge e;
8772 bool offloaded, data_region;
62aee289 8773 int target_kind;
629b3d75
MJ
8774
8775 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
62aee289 8776 target_kind = gimple_omp_target_kind (entry_stmt);
629b3d75
MJ
8777 new_bb = region->entry;
8778
8779 offloaded = is_gimple_omp_offloaded (entry_stmt);
62aee289 8780 switch (target_kind)
629b3d75
MJ
8781 {
8782 case GF_OMP_TARGET_KIND_REGION:
8783 case GF_OMP_TARGET_KIND_UPDATE:
8784 case GF_OMP_TARGET_KIND_ENTER_DATA:
8785 case GF_OMP_TARGET_KIND_EXIT_DATA:
8786 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8787 case GF_OMP_TARGET_KIND_OACC_KERNELS:
62aee289 8788 case GF_OMP_TARGET_KIND_OACC_SERIAL:
629b3d75
MJ
8789 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8790 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8791 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8792 data_region = false;
8793 break;
8794 case GF_OMP_TARGET_KIND_DATA:
8795 case GF_OMP_TARGET_KIND_OACC_DATA:
8796 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8797 data_region = true;
8798 break;
8799 default:
8800 gcc_unreachable ();
8801 }
8802
8803 child_fn = NULL_TREE;
8804 child_cfun = NULL;
8805 if (offloaded)
8806 {
8807 child_fn = gimple_omp_target_child_fn (entry_stmt);
8808 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
8809 }
8810
8811 /* Supported by expand_omp_taskreg, but not here. */
8812 if (child_cfun != NULL)
8813 gcc_checking_assert (!child_cfun->cfg);
8814 gcc_checking_assert (!gimple_in_ssa_p (cfun));
8815
8816 entry_bb = region->entry;
8817 exit_bb = region->exit;
8818
62aee289 8819 switch (target_kind)
25651634 8820 {
62aee289 8821 case GF_OMP_TARGET_KIND_OACC_KERNELS:
25651634
TS
8822 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
8823
62aee289
MR
8824 /* Further down, all OpenACC compute constructs will be mapped to
8825 BUILT_IN_GOACC_PARALLEL, and to distinguish between them, there
8826 is an "oacc kernels" attribute set for OpenACC kernels. */
25651634
TS
8827 DECL_ATTRIBUTES (child_fn)
8828 = tree_cons (get_identifier ("oacc kernels"),
8829 NULL_TREE, DECL_ATTRIBUTES (child_fn));
62aee289
MR
8830 break;
8831 case GF_OMP_TARGET_KIND_OACC_SERIAL:
8832 /* Further down, all OpenACC compute constructs will be mapped to
8833 BUILT_IN_GOACC_PARALLEL, and to distinguish between them, there
8834 is an "oacc serial" attribute set for OpenACC serial. */
8835 DECL_ATTRIBUTES (child_fn)
8836 = tree_cons (get_identifier ("oacc serial"),
8837 NULL_TREE, DECL_ATTRIBUTES (child_fn));
8838 break;
8839 default:
8840 break;
25651634 8841 }
629b3d75
MJ
8842
8843 if (offloaded)
8844 {
8845 unsigned srcidx, dstidx, num;
8846
8847 /* If the offloading region needs data sent from the parent
8848 function, then the very first statement (except possible
8849 tree profile counter updates) of the offloading body
8850 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
8851 &.OMP_DATA_O is passed as an argument to the child function,
8852 we need to replace it with the argument as seen by the child
8853 function.
8854
8855 In most cases, this will end up being the identity assignment
8856 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
8857 a function call that has been inlined, the original PARM_DECL
8858 .OMP_DATA_I may have been converted into a different local
8859 variable. In which case, we need to keep the assignment. */
8860 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
8861 if (data_arg)
8862 {
8863 basic_block entry_succ_bb = single_succ (entry_bb);
8864 gimple_stmt_iterator gsi;
8865 tree arg;
8866 gimple *tgtcopy_stmt = NULL;
8867 tree sender = TREE_VEC_ELT (data_arg, 0);
8868
8869 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
8870 {
8871 gcc_assert (!gsi_end_p (gsi));
8872 stmt = gsi_stmt (gsi);
8873 if (gimple_code (stmt) != GIMPLE_ASSIGN)
8874 continue;
8875
8876 if (gimple_num_ops (stmt) == 2)
8877 {
8878 tree arg = gimple_assign_rhs1 (stmt);
8879
8880 /* We're ignoring the subcode because we're
8881 effectively doing a STRIP_NOPS. */
8882
8883 if (TREE_CODE (arg) == ADDR_EXPR
8884 && TREE_OPERAND (arg, 0) == sender)
8885 {
8886 tgtcopy_stmt = stmt;
8887 break;
8888 }
8889 }
8890 }
8891
8892 gcc_assert (tgtcopy_stmt != NULL);
8893 arg = DECL_ARGUMENTS (child_fn);
8894
8895 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
8896 gsi_remove (&gsi, true);
8897 }
8898
8899 /* Declare local variables needed in CHILD_CFUN. */
8900 block = DECL_INITIAL (child_fn);
8901 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
8902 /* The gimplifier could record temporaries in the offloading block
8903 rather than in containing function's local_decls chain,
8904 which would mean cgraph missed finalizing them. Do it now. */
8905 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
8906 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
8907 varpool_node::finalize_decl (t);
8908 DECL_SAVED_TREE (child_fn) = NULL;
8909 /* We'll create a CFG for child_fn, so no gimple body is needed. */
8910 gimple_set_body (child_fn, NULL);
8911 TREE_USED (block) = 1;
8912
8913 /* Reset DECL_CONTEXT on function arguments. */
8914 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
8915 DECL_CONTEXT (t) = child_fn;
8916
8917 /* Split ENTRY_BB at GIMPLE_*,
8918 so that it can be moved to the child function. */
65f4b875 8919 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
8920 stmt = gsi_stmt (gsi);
8921 gcc_assert (stmt
8922 && gimple_code (stmt) == gimple_code (entry_stmt));
8923 e = split_block (entry_bb, stmt);
8924 gsi_remove (&gsi, true);
8925 entry_bb = e->dest;
8926 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8927
8928 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
8929 if (exit_bb)
8930 {
65f4b875 8931 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
8932 gcc_assert (!gsi_end_p (gsi)
8933 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
8934 stmt = gimple_build_return (NULL);
8935 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
8936 gsi_remove (&gsi, true);
8937 }
8938
8939 /* Move the offloading region into CHILD_CFUN. */
8940
8941 block = gimple_block (entry_stmt);
8942
8943 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
8944 if (exit_bb)
8945 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
8946 /* When the OMP expansion process cannot guarantee an up-to-date
8947 loop tree arrange for the child function to fixup loops. */
8948 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
8949 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
8950
8951 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
8952 num = vec_safe_length (child_cfun->local_decls);
8953 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
8954 {
8955 t = (*child_cfun->local_decls)[srcidx];
8956 if (DECL_CONTEXT (t) == cfun->decl)
8957 continue;
8958 if (srcidx != dstidx)
8959 (*child_cfun->local_decls)[dstidx] = t;
8960 dstidx++;
8961 }
8962 if (dstidx != num)
8963 vec_safe_truncate (child_cfun->local_decls, dstidx);
8964
8965 /* Inform the callgraph about the new function. */
8966 child_cfun->curr_properties = cfun->curr_properties;
8967 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
8968 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
8969 cgraph_node *node = cgraph_node::get_create (child_fn);
8970 node->parallelized_function = 1;
8971 cgraph_node::add_new_function (child_fn, true);
8972
8973 /* Add the new function to the offload table. */
8974 if (ENABLE_OFFLOADING)
60bf575c
TV
8975 {
8976 if (in_lto_p)
8977 DECL_PRESERVE_P (child_fn) = 1;
8978 vec_safe_push (offload_funcs, child_fn);
8979 }
629b3d75
MJ
8980
8981 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
8982 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
8983
8984 /* Fix the callgraph edges for child_cfun. Those for cfun will be
8985 fixed in a following pass. */
8986 push_cfun (child_cfun);
8987 if (need_asm)
9579db35 8988 assign_assembler_name_if_needed (child_fn);
629b3d75
MJ
8989 cgraph_edge::rebuild_edges ();
8990
8991 /* Some EH regions might become dead, see PR34608. If
8992 pass_cleanup_cfg isn't the first pass to happen with the
8993 new child, these dead EH edges might cause problems.
8994 Clean them up now. */
8995 if (flag_exceptions)
8996 {
8997 basic_block bb;
8998 bool changed = false;
8999
9000 FOR_EACH_BB_FN (bb, cfun)
9001 changed |= gimple_purge_dead_eh_edges (bb);
9002 if (changed)
9003 cleanup_tree_cfg ();
9004 }
9005 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9006 verify_loop_structure ();
9007 pop_cfun ();
9008
9009 if (dump_file && !gimple_in_ssa_p (cfun))
9010 {
9011 omp_any_child_fn_dumped = true;
9012 dump_function_header (dump_file, child_fn, dump_flags);
9013 dump_function_to_file (child_fn, dump_file, dump_flags);
9014 }
4ccc4e30
JJ
9015
9016 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
629b3d75
MJ
9017 }
9018
9019 /* Emit a library call to launch the offloading region, or do data
9020 transfers. */
59d5960c 9021 tree t1, t2, t3, t4, depend, c, clauses;
629b3d75 9022 enum built_in_function start_ix;
629b3d75 9023 unsigned int flags_i = 0;
629b3d75
MJ
9024
9025 switch (gimple_omp_target_kind (entry_stmt))
9026 {
9027 case GF_OMP_TARGET_KIND_REGION:
9028 start_ix = BUILT_IN_GOMP_TARGET;
9029 break;
9030 case GF_OMP_TARGET_KIND_DATA:
9031 start_ix = BUILT_IN_GOMP_TARGET_DATA;
9032 break;
9033 case GF_OMP_TARGET_KIND_UPDATE:
9034 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
9035 break;
9036 case GF_OMP_TARGET_KIND_ENTER_DATA:
9037 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
9038 break;
9039 case GF_OMP_TARGET_KIND_EXIT_DATA:
9040 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
9041 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
9042 break;
629b3d75 9043 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
62aee289
MR
9044 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9045 case GF_OMP_TARGET_KIND_OACC_SERIAL:
629b3d75
MJ
9046 start_ix = BUILT_IN_GOACC_PARALLEL;
9047 break;
9048 case GF_OMP_TARGET_KIND_OACC_DATA:
9049 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9050 start_ix = BUILT_IN_GOACC_DATA_START;
9051 break;
9052 case GF_OMP_TARGET_KIND_OACC_UPDATE:
9053 start_ix = BUILT_IN_GOACC_UPDATE;
9054 break;
9055 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
9056 start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA;
9057 break;
9058 case GF_OMP_TARGET_KIND_OACC_DECLARE:
9059 start_ix = BUILT_IN_GOACC_DECLARE;
9060 break;
9061 default:
9062 gcc_unreachable ();
9063 }
9064
9065 clauses = gimple_omp_target_clauses (entry_stmt);
9066
59d5960c
TS
9067 tree device = NULL_TREE;
9068 location_t device_loc = UNKNOWN_LOCATION;
9069 tree goacc_flags = NULL_TREE;
9070 if (is_gimple_omp_oacc (entry_stmt))
629b3d75 9071 {
59d5960c
TS
9072 /* By default, no GOACC_FLAGs are set. */
9073 goacc_flags = integer_zero_node;
629b3d75
MJ
9074 }
9075 else
59d5960c
TS
9076 {
9077 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
9078 if (c)
9079 {
9080 device = OMP_CLAUSE_DEVICE_ID (c);
9081 device_loc = OMP_CLAUSE_LOCATION (c);
9082 }
9083 else
9084 {
9085 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
9086 library choose). */
9087 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
9088 device_loc = gimple_location (entry_stmt);
9089 }
629b3d75 9090
59d5960c
TS
9091 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
9092 if (c)
9093 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
9094 }
629b3d75 9095
59d5960c
TS
9096 /* By default, there is no conditional. */
9097 tree cond = NULL_TREE;
9098 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
9099 if (c)
9100 cond = OMP_CLAUSE_IF_EXPR (c);
9101 /* If we found the clause 'if (cond)', build:
9102 OpenACC: goacc_flags = (cond ? goacc_flags : flags | GOACC_FLAG_HOST_FALLBACK)
9103 OpenMP: device = (cond ? device : GOMP_DEVICE_HOST_FALLBACK) */
629b3d75
MJ
9104 if (cond)
9105 {
59d5960c
TS
9106 tree *tp;
9107 if (is_gimple_omp_oacc (entry_stmt))
9108 tp = &goacc_flags;
9109 else
9110 {
9111 /* Ensure 'device' is of the correct type. */
9112 device = fold_convert_loc (device_loc, integer_type_node, device);
9113
9114 tp = &device;
9115 }
9116
629b3d75
MJ
9117 cond = gimple_boolify (cond);
9118
9119 basic_block cond_bb, then_bb, else_bb;
9120 edge e;
9121 tree tmp_var;
9122
59d5960c 9123 tmp_var = create_tmp_var (TREE_TYPE (*tp));
629b3d75
MJ
9124 if (offloaded)
9125 e = split_block_after_labels (new_bb);
9126 else
9127 {
65f4b875 9128 gsi = gsi_last_nondebug_bb (new_bb);
629b3d75
MJ
9129 gsi_prev (&gsi);
9130 e = split_block (new_bb, gsi_stmt (gsi));
9131 }
9132 cond_bb = e->src;
9133 new_bb = e->dest;
9134 remove_edge (e);
9135
9136 then_bb = create_empty_bb (cond_bb);
9137 else_bb = create_empty_bb (then_bb);
9138 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
9139 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
9140
9141 stmt = gimple_build_cond_empty (cond);
9142 gsi = gsi_last_bb (cond_bb);
9143 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
9144
9145 gsi = gsi_start_bb (then_bb);
59d5960c 9146 stmt = gimple_build_assign (tmp_var, *tp);
629b3d75
MJ
9147 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
9148
9149 gsi = gsi_start_bb (else_bb);
59d5960c
TS
9150 if (is_gimple_omp_oacc (entry_stmt))
9151 stmt = gimple_build_assign (tmp_var,
9152 BIT_IOR_EXPR,
9153 *tp,
9154 build_int_cst (integer_type_node,
9155 GOACC_FLAG_HOST_FALLBACK));
9156 else
9157 stmt = gimple_build_assign (tmp_var,
9158 build_int_cst (integer_type_node,
9159 GOMP_DEVICE_HOST_FALLBACK));
629b3d75
MJ
9160 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
9161
9162 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
9163 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
9164 add_bb_to_loop (then_bb, cond_bb->loop_father);
9165 add_bb_to_loop (else_bb, cond_bb->loop_father);
9166 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
9167 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
9168
59d5960c
TS
9169 *tp = tmp_var;
9170
65f4b875 9171 gsi = gsi_last_nondebug_bb (new_bb);
629b3d75
MJ
9172 }
9173 else
9174 {
65f4b875 9175 gsi = gsi_last_nondebug_bb (new_bb);
59d5960c
TS
9176
9177 if (device != NULL_TREE)
9178 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
9179 true, GSI_SAME_STMT);
629b3d75
MJ
9180 }
9181
9182 t = gimple_omp_target_data_arg (entry_stmt);
9183 if (t == NULL)
9184 {
9185 t1 = size_zero_node;
9186 t2 = build_zero_cst (ptr_type_node);
9187 t3 = t2;
9188 t4 = t2;
9189 }
9190 else
9191 {
9192 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
9193 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
9194 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
9195 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
9196 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
9197 }
9198
9199 gimple *g;
9200 bool tagging = false;
9201 /* The maximum number used by any start_ix, without varargs. */
9202 auto_vec<tree, 11> args;
59d5960c
TS
9203 if (is_gimple_omp_oacc (entry_stmt))
9204 {
9205 tree goacc_flags_m = fold_build1 (GOACC_FLAGS_MARSHAL_OP,
9206 TREE_TYPE (goacc_flags), goacc_flags);
9207 goacc_flags_m = force_gimple_operand_gsi (&gsi, goacc_flags_m, true,
9208 NULL_TREE, true,
9209 GSI_SAME_STMT);
9210 args.quick_push (goacc_flags_m);
9211 }
9212 else
9213 args.quick_push (device);
629b3d75
MJ
9214 if (offloaded)
9215 args.quick_push (build_fold_addr_expr (child_fn));
9216 args.quick_push (t1);
9217 args.quick_push (t2);
9218 args.quick_push (t3);
9219 args.quick_push (t4);
9220 switch (start_ix)
9221 {
9222 case BUILT_IN_GOACC_DATA_START:
9223 case BUILT_IN_GOACC_DECLARE:
9224 case BUILT_IN_GOMP_TARGET_DATA:
9225 break;
9226 case BUILT_IN_GOMP_TARGET:
9227 case BUILT_IN_GOMP_TARGET_UPDATE:
9228 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
9229 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
9230 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
9231 if (c)
9232 depend = OMP_CLAUSE_DECL (c);
9233 else
9234 depend = build_int_cst (ptr_type_node, 0);
9235 args.quick_push (depend);
9236 if (start_ix == BUILT_IN_GOMP_TARGET)
9237 args.quick_push (get_target_arguments (&gsi, entry_stmt));
9238 break;
9239 case BUILT_IN_GOACC_PARALLEL:
62aee289
MR
9240 if (lookup_attribute ("oacc serial", DECL_ATTRIBUTES (child_fn)) != NULL)
9241 {
9242 tree dims = NULL_TREE;
9243 unsigned int ix;
9244
9245 /* For serial constructs we set all dimensions to 1. */
9246 for (ix = GOMP_DIM_MAX; ix--;)
9247 dims = tree_cons (NULL_TREE, integer_one_node, dims);
9248 oacc_replace_fn_attrib (child_fn, dims);
9249 }
9250 else
9251 oacc_set_fn_attrib (child_fn, clauses, &args);
25651634 9252 tagging = true;
629b3d75
MJ
9253 /* FALLTHRU */
9254 case BUILT_IN_GOACC_ENTER_EXIT_DATA:
9255 case BUILT_IN_GOACC_UPDATE:
9256 {
9257 tree t_async = NULL_TREE;
9258
9259 /* If present, use the value specified by the respective
9260 clause, making sure that is of the correct type. */
9261 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
9262 if (c)
9263 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
9264 integer_type_node,
9265 OMP_CLAUSE_ASYNC_EXPR (c));
9266 else if (!tagging)
9267 /* Default values for t_async. */
9268 t_async = fold_convert_loc (gimple_location (entry_stmt),
9269 integer_type_node,
9270 build_int_cst (integer_type_node,
9271 GOMP_ASYNC_SYNC));
9272 if (tagging && t_async)
9273 {
9274 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
9275
9276 if (TREE_CODE (t_async) == INTEGER_CST)
9277 {
9278 /* See if we can pack the async arg in to the tag's
9279 operand. */
9280 i_async = TREE_INT_CST_LOW (t_async);
9281 if (i_async < GOMP_LAUNCH_OP_MAX)
9282 t_async = NULL_TREE;
9283 else
9284 i_async = GOMP_LAUNCH_OP_MAX;
9285 }
9286 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
9287 i_async));
9288 }
9289 if (t_async)
ee9fcee3
AS
9290 args.safe_push (force_gimple_operand_gsi (&gsi, t_async, true,
9291 NULL_TREE, true,
9292 GSI_SAME_STMT));
629b3d75
MJ
9293
9294 /* Save the argument index, and ... */
9295 unsigned t_wait_idx = args.length ();
9296 unsigned num_waits = 0;
9297 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
9298 if (!tagging || c)
9299 /* ... push a placeholder. */
9300 args.safe_push (integer_zero_node);
9301
9302 for (; c; c = OMP_CLAUSE_CHAIN (c))
9303 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
9304 {
ee9fcee3
AS
9305 tree arg = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
9306 integer_type_node,
9307 OMP_CLAUSE_WAIT_EXPR (c));
9308 arg = force_gimple_operand_gsi (&gsi, arg, true, NULL_TREE, true,
9309 GSI_SAME_STMT);
9310 args.safe_push (arg);
629b3d75
MJ
9311 num_waits++;
9312 }
9313
9314 if (!tagging || num_waits)
9315 {
9316 tree len;
9317
9318 /* Now that we know the number, update the placeholder. */
9319 if (tagging)
9320 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
9321 else
9322 len = build_int_cst (integer_type_node, num_waits);
9323 len = fold_convert_loc (gimple_location (entry_stmt),
9324 unsigned_type_node, len);
9325 args[t_wait_idx] = len;
9326 }
9327 }
9328 break;
9329 default:
9330 gcc_unreachable ();
9331 }
9332 if (tagging)
9333 /* Push terminal marker - zero. */
9334 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
9335
9336 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
9337 gimple_set_location (g, gimple_location (entry_stmt));
9338 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9339 if (!offloaded)
9340 {
9341 g = gsi_stmt (gsi);
9342 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
9343 gsi_remove (&gsi, true);
9344 }
9345 if (data_region && region->exit)
9346 {
65f4b875 9347 gsi = gsi_last_nondebug_bb (region->exit);
629b3d75
MJ
9348 g = gsi_stmt (gsi);
9349 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN);
9350 gsi_remove (&gsi, true);
9351 }
9352}
9353
9354/* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with
9355 iteration variable derived from the thread number. INTRA_GROUP means this
9356 is an expansion of a loop iterating over work-items within a separate
01914336 9357 iteration over groups. */
629b3d75
MJ
9358
9359static void
9360grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group)
9361{
9362 gimple_stmt_iterator gsi;
9363 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
9364 gcc_checking_assert (gimple_omp_for_kind (for_stmt)
9365 == GF_OMP_FOR_KIND_GRID_LOOP);
9366 size_t collapse = gimple_omp_for_collapse (for_stmt);
9367 struct omp_for_data_loop *loops
9368 = XALLOCAVEC (struct omp_for_data_loop,
01914336 9369 gimple_omp_for_collapse (for_stmt));
629b3d75
MJ
9370 struct omp_for_data fd;
9371
9372 remove_edge (BRANCH_EDGE (kfor->entry));
9373 basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest;
9374
9375 gcc_assert (kfor->cont);
9376 omp_extract_for_data (for_stmt, &fd, loops);
9377
9378 gsi = gsi_start_bb (body_bb);
9379
9380 for (size_t dim = 0; dim < collapse; dim++)
9381 {
9382 tree type, itype;
9383 itype = type = TREE_TYPE (fd.loops[dim].v);
9384 if (POINTER_TYPE_P (type))
9385 itype = signed_type_for (type);
9386
9387 tree n1 = fd.loops[dim].n1;
9388 tree step = fd.loops[dim].step;
9389 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
9390 true, NULL_TREE, true, GSI_SAME_STMT);
9391 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
9392 true, NULL_TREE, true, GSI_SAME_STMT);
9393 tree threadid;
9394 if (gimple_omp_for_grid_group_iter (for_stmt))
9395 {
9396 gcc_checking_assert (!intra_group);
9397 threadid = build_call_expr (builtin_decl_explicit
9398 (BUILT_IN_HSA_WORKGROUPID), 1,
9399 build_int_cstu (unsigned_type_node, dim));
9400 }
9401 else if (intra_group)
9402 threadid = build_call_expr (builtin_decl_explicit
9403 (BUILT_IN_HSA_WORKITEMID), 1,
9404 build_int_cstu (unsigned_type_node, dim));
9405 else
9406 threadid = build_call_expr (builtin_decl_explicit
9407 (BUILT_IN_HSA_WORKITEMABSID), 1,
9408 build_int_cstu (unsigned_type_node, dim));
9409 threadid = fold_convert (itype, threadid);
9410 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
9411 true, GSI_SAME_STMT);
9412
9413 tree startvar = fd.loops[dim].v;
9414 tree t = fold_build2 (MULT_EXPR, itype, threadid, step);
9415 if (POINTER_TYPE_P (type))
9416 t = fold_build_pointer_plus (n1, t);
9417 else
9418 t = fold_build2 (PLUS_EXPR, type, t, n1);
9419 t = fold_convert (type, t);
9420 t = force_gimple_operand_gsi (&gsi, t,
9421 DECL_P (startvar)
9422 && TREE_ADDRESSABLE (startvar),
9423 NULL_TREE, true, GSI_SAME_STMT);
9424 gassign *assign_stmt = gimple_build_assign (startvar, t);
9425 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
9426 }
01914336 9427 /* Remove the omp for statement. */
65f4b875 9428 gsi = gsi_last_nondebug_bb (kfor->entry);
629b3d75
MJ
9429 gsi_remove (&gsi, true);
9430
9431 /* Remove the GIMPLE_OMP_CONTINUE statement. */
65f4b875 9432 gsi = gsi_last_nondebug_bb (kfor->cont);
629b3d75
MJ
9433 gcc_assert (!gsi_end_p (gsi)
9434 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE);
9435 gsi_remove (&gsi, true);
9436
9437 /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary. */
65f4b875 9438 gsi = gsi_last_nondebug_bb (kfor->exit);
629b3d75
MJ
9439 gcc_assert (!gsi_end_p (gsi)
9440 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
9441 if (intra_group)
9442 gsi_insert_before (&gsi, omp_build_barrier (NULL_TREE), GSI_SAME_STMT);
9443 gsi_remove (&gsi, true);
9444
9445 /* Fixup the much simpler CFG. */
9446 remove_edge (find_edge (kfor->cont, body_bb));
9447
9448 if (kfor->cont != body_bb)
9449 set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb);
9450 set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont);
9451}
9452
9453/* Structure passed to grid_remap_kernel_arg_accesses so that it can remap
9454 argument_decls. */
9455
9456struct grid_arg_decl_map
9457{
9458 tree old_arg;
9459 tree new_arg;
9460};
9461
9462/* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones
9463 pertaining to kernel function. */
9464
9465static tree
9466grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data)
9467{
9468 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
9469 struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info;
9470 tree t = *tp;
9471
9472 if (t == adm->old_arg)
9473 *tp = adm->new_arg;
9474 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
9475 return NULL_TREE;
9476}
9477
9478/* If TARGET region contains a kernel body for loop, remove its region from the
01914336 9479 TARGET and expand it in HSA gridified kernel fashion. */
629b3d75
MJ
9480
9481static void
9482grid_expand_target_grid_body (struct omp_region *target)
9483{
9484 if (!hsa_gen_requested_p ())
9485 return;
9486
9487 gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry));
9488 struct omp_region **pp;
9489
9490 for (pp = &target->inner; *pp; pp = &(*pp)->next)
9491 if ((*pp)->type == GIMPLE_OMP_GRID_BODY)
9492 break;
9493
9494 struct omp_region *gpukernel = *pp;
9495
9496 tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt);
9497 if (!gpukernel)
9498 {
9499 /* HSA cannot handle OACC stuff. */
9500 if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION)
9501 return;
9502 gcc_checking_assert (orig_child_fndecl);
9503 gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
9504 OMP_CLAUSE__GRIDDIM_));
9505 cgraph_node *n = cgraph_node::get (orig_child_fndecl);
9506
9507 hsa_register_kernel (n);
9508 return;
9509 }
9510
9511 gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
9512 OMP_CLAUSE__GRIDDIM_));
01914336
MJ
9513 tree inside_block
9514 = gimple_block (first_stmt (single_succ (gpukernel->entry)));
629b3d75
MJ
9515 *pp = gpukernel->next;
9516 for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next)
9517 if ((*pp)->type == GIMPLE_OMP_FOR)
9518 break;
9519
9520 struct omp_region *kfor = *pp;
9521 gcc_assert (kfor);
9522 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
9523 gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP);
9524 *pp = kfor->next;
9525 if (kfor->inner)
9526 {
9527 if (gimple_omp_for_grid_group_iter (for_stmt))
9528 {
9529 struct omp_region **next_pp;
9530 for (pp = &kfor->inner; *pp; pp = next_pp)
9531 {
9532 next_pp = &(*pp)->next;
9533 if ((*pp)->type != GIMPLE_OMP_FOR)
9534 continue;
9535 gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry));
9536 gcc_assert (gimple_omp_for_kind (inner)
9537 == GF_OMP_FOR_KIND_GRID_LOOP);
9538 grid_expand_omp_for_loop (*pp, true);
9539 *pp = (*pp)->next;
9540 next_pp = pp;
9541 }
9542 }
9543 expand_omp (kfor->inner);
9544 }
9545 if (gpukernel->inner)
9546 expand_omp (gpukernel->inner);
9547
9548 tree kern_fndecl = copy_node (orig_child_fndecl);
7958186b
MP
9549 DECL_NAME (kern_fndecl) = clone_function_name_numbered (kern_fndecl,
9550 "kernel");
629b3d75
MJ
9551 SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl));
9552 tree tgtblock = gimple_block (tgt_stmt);
9553 tree fniniblock = make_node (BLOCK);
dc16b007 9554 BLOCK_ABSTRACT_ORIGIN (fniniblock) = BLOCK_ORIGIN (tgtblock);
629b3d75
MJ
9555 BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock);
9556 BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock);
9557 BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl;
9558 DECL_INITIAL (kern_fndecl) = fniniblock;
9559 push_struct_function (kern_fndecl);
9560 cfun->function_end_locus = gimple_location (tgt_stmt);
9561 init_tree_ssa (cfun);
9562 pop_cfun ();
9563
9564 tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl);
9565 gcc_assert (!DECL_CHAIN (old_parm_decl));
9566 tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl));
9567 DECL_CONTEXT (new_parm_decl) = kern_fndecl;
9568 DECL_ARGUMENTS (kern_fndecl) = new_parm_decl;
9569 gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl))));
9570 DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl));
9571 DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl;
9572 struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl);
9573 kern_cfun->curr_properties = cfun->curr_properties;
9574
9575 grid_expand_omp_for_loop (kfor, false);
9576
01914336 9577 /* Remove the omp for statement. */
65f4b875 9578 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (gpukernel->entry);
629b3d75
MJ
9579 gsi_remove (&gsi, true);
9580 /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real
9581 return. */
65f4b875 9582 gsi = gsi_last_nondebug_bb (gpukernel->exit);
629b3d75
MJ
9583 gcc_assert (!gsi_end_p (gsi)
9584 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
9585 gimple *ret_stmt = gimple_build_return (NULL);
9586 gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT);
9587 gsi_remove (&gsi, true);
9588
9589 /* Statements in the first BB in the target construct have been produced by
9590 target lowering and must be copied inside the GPUKERNEL, with the two
9591 exceptions of the first OMP statement and the OMP_DATA assignment
9592 statement. */
9593 gsi = gsi_start_bb (single_succ (gpukernel->entry));
9594 tree data_arg = gimple_omp_target_data_arg (tgt_stmt);
9595 tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL;
9596 for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry));
9597 !gsi_end_p (tsi); gsi_next (&tsi))
9598 {
9599 gimple *stmt = gsi_stmt (tsi);
9600 if (is_gimple_omp (stmt))
9601 break;
9602 if (sender
9603 && is_gimple_assign (stmt)
9604 && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR
9605 && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender)
9606 continue;
9607 gimple *copy = gimple_copy (stmt);
9608 gsi_insert_before (&gsi, copy, GSI_SAME_STMT);
9609 gimple_set_block (copy, fniniblock);
9610 }
9611
9612 move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry),
9613 gpukernel->exit, inside_block);
9614
9615 cgraph_node *kcn = cgraph_node::get_create (kern_fndecl);
9616 kcn->mark_force_output ();
9617 cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl);
9618
9619 hsa_register_kernel (kcn, orig_child);
9620
9621 cgraph_node::add_new_function (kern_fndecl, true);
9622 push_cfun (kern_cfun);
9623 cgraph_edge::rebuild_edges ();
9624
9625 /* Re-map any mention of the PARM_DECL of the original function to the
9626 PARM_DECL of the new one.
9627
9628 TODO: It would be great if lowering produced references into the GPU
9629 kernel decl straight away and we did not have to do this. */
9630 struct grid_arg_decl_map adm;
9631 adm.old_arg = old_parm_decl;
9632 adm.new_arg = new_parm_decl;
9633 basic_block bb;
9634 FOR_EACH_BB_FN (bb, kern_cfun)
9635 {
9636 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
9637 {
9638 gimple *stmt = gsi_stmt (gsi);
9639 struct walk_stmt_info wi;
9640 memset (&wi, 0, sizeof (wi));
9641 wi.info = &adm;
9642 walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi);
9643 }
9644 }
9645 pop_cfun ();
9646
9647 return;
9648}
9649
9650/* Expand the parallel region tree rooted at REGION. Expansion
9651 proceeds in depth-first order. Innermost regions are expanded
9652 first. This way, parallel regions that require a new function to
9653 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
9654 internal dependencies in their body. */
9655
9656static void
9657expand_omp (struct omp_region *region)
9658{
9659 omp_any_child_fn_dumped = false;
9660 while (region)
9661 {
9662 location_t saved_location;
9663 gimple *inner_stmt = NULL;
9664
9665 /* First, determine whether this is a combined parallel+workshare
01914336 9666 region. */
629b3d75
MJ
9667 if (region->type == GIMPLE_OMP_PARALLEL)
9668 determine_parallel_type (region);
9669 else if (region->type == GIMPLE_OMP_TARGET)
9670 grid_expand_target_grid_body (region);
9671
9672 if (region->type == GIMPLE_OMP_FOR
9673 && gimple_omp_for_combined_p (last_stmt (region->entry)))
9674 inner_stmt = last_stmt (region->inner->entry);
9675
9676 if (region->inner)
9677 expand_omp (region->inner);
9678
9679 saved_location = input_location;
9680 if (gimple_has_location (last_stmt (region->entry)))
9681 input_location = gimple_location (last_stmt (region->entry));
9682
9683 switch (region->type)
9684 {
9685 case GIMPLE_OMP_PARALLEL:
9686 case GIMPLE_OMP_TASK:
9687 expand_omp_taskreg (region);
9688 break;
9689
9690 case GIMPLE_OMP_FOR:
9691 expand_omp_for (region, inner_stmt);
9692 break;
9693
9694 case GIMPLE_OMP_SECTIONS:
9695 expand_omp_sections (region);
9696 break;
9697
9698 case GIMPLE_OMP_SECTION:
9699 /* Individual omp sections are handled together with their
9700 parent GIMPLE_OMP_SECTIONS region. */
9701 break;
9702
9703 case GIMPLE_OMP_SINGLE:
9704 expand_omp_single (region);
9705 break;
9706
9707 case GIMPLE_OMP_ORDERED:
9708 {
9709 gomp_ordered *ord_stmt
9710 = as_a <gomp_ordered *> (last_stmt (region->entry));
9711 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
9712 OMP_CLAUSE_DEPEND))
9713 {
9714 /* We'll expand these when expanding corresponding
9715 worksharing region with ordered(n) clause. */
9716 gcc_assert (region->outer
9717 && region->outer->type == GIMPLE_OMP_FOR);
9718 region->ord_stmt = ord_stmt;
9719 break;
9720 }
9721 }
9722 /* FALLTHRU */
9723 case GIMPLE_OMP_MASTER:
9724 case GIMPLE_OMP_TASKGROUP:
9725 case GIMPLE_OMP_CRITICAL:
9726 case GIMPLE_OMP_TEAMS:
9727 expand_omp_synch (region);
9728 break;
9729
9730 case GIMPLE_OMP_ATOMIC_LOAD:
9731 expand_omp_atomic (region);
9732 break;
9733
9734 case GIMPLE_OMP_TARGET:
9735 expand_omp_target (region);
9736 break;
9737
9738 default:
9739 gcc_unreachable ();
9740 }
9741
9742 input_location = saved_location;
9743 region = region->next;
9744 }
9745 if (omp_any_child_fn_dumped)
9746 {
9747 if (dump_file)
9748 dump_function_header (dump_file, current_function_decl, dump_flags);
9749 omp_any_child_fn_dumped = false;
9750 }
9751}
9752
9753/* Helper for build_omp_regions. Scan the dominator tree starting at
9754 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
9755 true, the function ends once a single tree is built (otherwise, whole
9756 forest of OMP constructs may be built). */
9757
9758static void
9759build_omp_regions_1 (basic_block bb, struct omp_region *parent,
9760 bool single_tree)
9761{
9762 gimple_stmt_iterator gsi;
9763 gimple *stmt;
9764 basic_block son;
9765
65f4b875 9766 gsi = gsi_last_nondebug_bb (bb);
629b3d75
MJ
9767 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
9768 {
9769 struct omp_region *region;
9770 enum gimple_code code;
9771
9772 stmt = gsi_stmt (gsi);
9773 code = gimple_code (stmt);
9774 if (code == GIMPLE_OMP_RETURN)
9775 {
9776 /* STMT is the return point out of region PARENT. Mark it
9777 as the exit point and make PARENT the immediately
9778 enclosing region. */
9779 gcc_assert (parent);
9780 region = parent;
9781 region->exit = bb;
9782 parent = parent->outer;
9783 }
9784 else if (code == GIMPLE_OMP_ATOMIC_STORE)
9785 {
5764ee3c 9786 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
629b3d75
MJ
9787 GIMPLE_OMP_RETURN, but matches with
9788 GIMPLE_OMP_ATOMIC_LOAD. */
9789 gcc_assert (parent);
9790 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
9791 region = parent;
9792 region->exit = bb;
9793 parent = parent->outer;
9794 }
9795 else if (code == GIMPLE_OMP_CONTINUE)
9796 {
9797 gcc_assert (parent);
9798 parent->cont = bb;
9799 }
9800 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
9801 {
9802 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
9803 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
9804 }
9805 else
9806 {
9807 region = new_omp_region (bb, code, parent);
9808 /* Otherwise... */
9809 if (code == GIMPLE_OMP_TARGET)
9810 {
9811 switch (gimple_omp_target_kind (stmt))
9812 {
9813 case GF_OMP_TARGET_KIND_REGION:
9814 case GF_OMP_TARGET_KIND_DATA:
9815 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9816 case GF_OMP_TARGET_KIND_OACC_KERNELS:
62aee289 9817 case GF_OMP_TARGET_KIND_OACC_SERIAL:
629b3d75
MJ
9818 case GF_OMP_TARGET_KIND_OACC_DATA:
9819 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9820 break;
9821 case GF_OMP_TARGET_KIND_UPDATE:
9822 case GF_OMP_TARGET_KIND_ENTER_DATA:
9823 case GF_OMP_TARGET_KIND_EXIT_DATA:
9824 case GF_OMP_TARGET_KIND_OACC_UPDATE:
9825 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
9826 case GF_OMP_TARGET_KIND_OACC_DECLARE:
9827 /* ..., other than for those stand-alone directives... */
9828 region = NULL;
9829 break;
9830 default:
9831 gcc_unreachable ();
9832 }
9833 }
9834 else if (code == GIMPLE_OMP_ORDERED
9835 && omp_find_clause (gimple_omp_ordered_clauses
9836 (as_a <gomp_ordered *> (stmt)),
9837 OMP_CLAUSE_DEPEND))
9838 /* #pragma omp ordered depend is also just a stand-alone
9839 directive. */
9840 region = NULL;
28567c40
JJ
9841 else if (code == GIMPLE_OMP_TASK
9842 && gimple_omp_task_taskwait_p (stmt))
9843 /* #pragma omp taskwait depend(...) is a stand-alone directive. */
9844 region = NULL;
629b3d75
MJ
9845 /* ..., this directive becomes the parent for a new region. */
9846 if (region)
9847 parent = region;
9848 }
9849 }
9850
9851 if (single_tree && !parent)
9852 return;
9853
9854 for (son = first_dom_son (CDI_DOMINATORS, bb);
9855 son;
9856 son = next_dom_son (CDI_DOMINATORS, son))
9857 build_omp_regions_1 (son, parent, single_tree);
9858}
9859
9860/* Builds the tree of OMP regions rooted at ROOT, storing it to
9861 root_omp_region. */
9862
9863static void
9864build_omp_regions_root (basic_block root)
9865{
9866 gcc_assert (root_omp_region == NULL);
9867 build_omp_regions_1 (root, NULL, true);
9868 gcc_assert (root_omp_region != NULL);
9869}
9870
9871/* Expands omp construct (and its subconstructs) starting in HEAD. */
9872
9873void
9874omp_expand_local (basic_block head)
9875{
9876 build_omp_regions_root (head);
9877 if (dump_file && (dump_flags & TDF_DETAILS))
9878 {
9879 fprintf (dump_file, "\nOMP region tree\n\n");
9880 dump_omp_region (dump_file, root_omp_region, 0);
9881 fprintf (dump_file, "\n");
9882 }
9883
9884 remove_exit_barriers (root_omp_region);
9885 expand_omp (root_omp_region);
9886
9887 omp_free_regions ();
9888}
9889
9890/* Scan the CFG and build a tree of OMP regions. Return the root of
9891 the OMP region tree. */
9892
9893static void
9894build_omp_regions (void)
9895{
9896 gcc_assert (root_omp_region == NULL);
9897 calculate_dominance_info (CDI_DOMINATORS);
9898 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
9899}
9900
9901/* Main entry point for expanding OMP-GIMPLE into runtime calls. */
9902
9903static unsigned int
9904execute_expand_omp (void)
9905{
9906 build_omp_regions ();
9907
9908 if (!root_omp_region)
9909 return 0;
9910
9911 if (dump_file)
9912 {
9913 fprintf (dump_file, "\nOMP region tree\n\n");
9914 dump_omp_region (dump_file, root_omp_region, 0);
9915 fprintf (dump_file, "\n");
9916 }
9917
9918 remove_exit_barriers (root_omp_region);
9919
9920 expand_omp (root_omp_region);
9921
9922 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9923 verify_loop_structure ();
9924 cleanup_tree_cfg ();
9925
9926 omp_free_regions ();
9927
9928 return 0;
9929}
9930
9931/* OMP expansion -- the default pass, run before creation of SSA form. */
9932
9933namespace {
9934
9935const pass_data pass_data_expand_omp =
9936{
9937 GIMPLE_PASS, /* type */
9938 "ompexp", /* name */
fd2b8c8b 9939 OPTGROUP_OMP, /* optinfo_flags */
629b3d75
MJ
9940 TV_NONE, /* tv_id */
9941 PROP_gimple_any, /* properties_required */
9942 PROP_gimple_eomp, /* properties_provided */
9943 0, /* properties_destroyed */
9944 0, /* todo_flags_start */
9945 0, /* todo_flags_finish */
9946};
9947
9948class pass_expand_omp : public gimple_opt_pass
9949{
9950public:
9951 pass_expand_omp (gcc::context *ctxt)
9952 : gimple_opt_pass (pass_data_expand_omp, ctxt)
9953 {}
9954
9955 /* opt_pass methods: */
9956 virtual unsigned int execute (function *)
9957 {
5e9d6aa4 9958 bool gate = ((flag_openacc != 0 || flag_openmp != 0
629b3d75
MJ
9959 || flag_openmp_simd != 0)
9960 && !seen_error ());
9961
9962 /* This pass always runs, to provide PROP_gimple_eomp.
9963 But often, there is nothing to do. */
9964 if (!gate)
9965 return 0;
9966
9967 return execute_expand_omp ();
9968 }
9969
9970}; // class pass_expand_omp
9971
9972} // anon namespace
9973
9974gimple_opt_pass *
9975make_pass_expand_omp (gcc::context *ctxt)
9976{
9977 return new pass_expand_omp (ctxt);
9978}
9979
9980namespace {
9981
9982const pass_data pass_data_expand_omp_ssa =
9983{
9984 GIMPLE_PASS, /* type */
9985 "ompexpssa", /* name */
fd2b8c8b 9986 OPTGROUP_OMP, /* optinfo_flags */
629b3d75
MJ
9987 TV_NONE, /* tv_id */
9988 PROP_cfg | PROP_ssa, /* properties_required */
9989 PROP_gimple_eomp, /* properties_provided */
9990 0, /* properties_destroyed */
9991 0, /* todo_flags_start */
9992 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
9993};
9994
9995class pass_expand_omp_ssa : public gimple_opt_pass
9996{
9997public:
9998 pass_expand_omp_ssa (gcc::context *ctxt)
9999 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
10000 {}
10001
10002 /* opt_pass methods: */
10003 virtual bool gate (function *fun)
10004 {
10005 return !(fun->curr_properties & PROP_gimple_eomp);
10006 }
10007 virtual unsigned int execute (function *) { return execute_expand_omp (); }
10008 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
10009
10010}; // class pass_expand_omp_ssa
10011
10012} // anon namespace
10013
10014gimple_opt_pass *
10015make_pass_expand_omp_ssa (gcc::context *ctxt)
10016{
10017 return new pass_expand_omp_ssa (ctxt);
10018}
10019
10020/* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
10021 GIMPLE_* codes. */
10022
10023bool
10024omp_make_gimple_edges (basic_block bb, struct omp_region **region,
10025 int *region_idx)
10026{
10027 gimple *last = last_stmt (bb);
10028 enum gimple_code code = gimple_code (last);
10029 struct omp_region *cur_region = *region;
10030 bool fallthru = false;
10031
10032 switch (code)
10033 {
10034 case GIMPLE_OMP_PARALLEL:
629b3d75
MJ
10035 case GIMPLE_OMP_FOR:
10036 case GIMPLE_OMP_SINGLE:
10037 case GIMPLE_OMP_TEAMS:
10038 case GIMPLE_OMP_MASTER:
10039 case GIMPLE_OMP_TASKGROUP:
10040 case GIMPLE_OMP_CRITICAL:
10041 case GIMPLE_OMP_SECTION:
10042 case GIMPLE_OMP_GRID_BODY:
10043 cur_region = new_omp_region (bb, code, cur_region);
10044 fallthru = true;
10045 break;
10046
28567c40
JJ
10047 case GIMPLE_OMP_TASK:
10048 cur_region = new_omp_region (bb, code, cur_region);
10049 fallthru = true;
10050 if (gimple_omp_task_taskwait_p (last))
10051 cur_region = cur_region->outer;
10052 break;
10053
629b3d75
MJ
10054 case GIMPLE_OMP_ORDERED:
10055 cur_region = new_omp_region (bb, code, cur_region);
10056 fallthru = true;
10057 if (omp_find_clause (gimple_omp_ordered_clauses
10058 (as_a <gomp_ordered *> (last)),
10059 OMP_CLAUSE_DEPEND))
10060 cur_region = cur_region->outer;
10061 break;
10062
10063 case GIMPLE_OMP_TARGET:
10064 cur_region = new_omp_region (bb, code, cur_region);
10065 fallthru = true;
10066 switch (gimple_omp_target_kind (last))
10067 {
10068 case GF_OMP_TARGET_KIND_REGION:
10069 case GF_OMP_TARGET_KIND_DATA:
10070 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
10071 case GF_OMP_TARGET_KIND_OACC_KERNELS:
62aee289 10072 case GF_OMP_TARGET_KIND_OACC_SERIAL:
629b3d75
MJ
10073 case GF_OMP_TARGET_KIND_OACC_DATA:
10074 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
10075 break;
10076 case GF_OMP_TARGET_KIND_UPDATE:
10077 case GF_OMP_TARGET_KIND_ENTER_DATA:
10078 case GF_OMP_TARGET_KIND_EXIT_DATA:
10079 case GF_OMP_TARGET_KIND_OACC_UPDATE:
10080 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
10081 case GF_OMP_TARGET_KIND_OACC_DECLARE:
10082 cur_region = cur_region->outer;
10083 break;
10084 default:
10085 gcc_unreachable ();
10086 }
10087 break;
10088
10089 case GIMPLE_OMP_SECTIONS:
10090 cur_region = new_omp_region (bb, code, cur_region);
10091 fallthru = true;
10092 break;
10093
10094 case GIMPLE_OMP_SECTIONS_SWITCH:
10095 fallthru = false;
10096 break;
10097
10098 case GIMPLE_OMP_ATOMIC_LOAD:
10099 case GIMPLE_OMP_ATOMIC_STORE:
10100 fallthru = true;
10101 break;
10102
10103 case GIMPLE_OMP_RETURN:
10104 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
10105 somewhere other than the next block. This will be
10106 created later. */
10107 cur_region->exit = bb;
10108 if (cur_region->type == GIMPLE_OMP_TASK)
10109 /* Add an edge corresponding to not scheduling the task
10110 immediately. */
10111 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
10112 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
10113 cur_region = cur_region->outer;
10114 break;
10115
10116 case GIMPLE_OMP_CONTINUE:
10117 cur_region->cont = bb;
10118 switch (cur_region->type)
10119 {
10120 case GIMPLE_OMP_FOR:
10121 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
10122 succs edges as abnormal to prevent splitting
10123 them. */
10124 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
10125 /* Make the loopback edge. */
10126 make_edge (bb, single_succ (cur_region->entry),
10127 EDGE_ABNORMAL);
10128
10129 /* Create an edge from GIMPLE_OMP_FOR to exit, which
10130 corresponds to the case that the body of the loop
10131 is not executed at all. */
10132 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
10133 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
10134 fallthru = false;
10135 break;
10136
10137 case GIMPLE_OMP_SECTIONS:
10138 /* Wire up the edges into and out of the nested sections. */
10139 {
10140 basic_block switch_bb = single_succ (cur_region->entry);
10141
10142 struct omp_region *i;
10143 for (i = cur_region->inner; i ; i = i->next)
10144 {
10145 gcc_assert (i->type == GIMPLE_OMP_SECTION);
10146 make_edge (switch_bb, i->entry, 0);
10147 make_edge (i->exit, bb, EDGE_FALLTHRU);
10148 }
10149
10150 /* Make the loopback edge to the block with
10151 GIMPLE_OMP_SECTIONS_SWITCH. */
10152 make_edge (bb, switch_bb, 0);
10153
10154 /* Make the edge from the switch to exit. */
10155 make_edge (switch_bb, bb->next_bb, 0);
10156 fallthru = false;
10157 }
10158 break;
10159
10160 case GIMPLE_OMP_TASK:
10161 fallthru = true;
10162 break;
10163
10164 default:
10165 gcc_unreachable ();
10166 }
10167 break;
10168
10169 default:
10170 gcc_unreachable ();
10171 }
10172
10173 if (*region != cur_region)
10174 {
10175 *region = cur_region;
10176 if (cur_region)
10177 *region_idx = cur_region->entry->index;
10178 else
10179 *region_idx = 0;
10180 }
10181
10182 return fallthru;
10183}
10184
10185#include "gt-omp-expand.h"