]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/omp-expand.c
LTO: pick up -fcf-protection flag for the link step
[thirdparty/gcc.git] / gcc / omp-expand.c
CommitLineData
629b3d75
MJ
1/* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
4
8d9254fc 5Copyright (C) 2005-2020 Free Software Foundation, Inc.
629b3d75
MJ
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
11Software Foundation; either version 3, or (at your option) any later
12version.
13
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING3. If not see
21<http://www.gnu.org/licenses/>. */
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "memmodel.h"
27#include "backend.h"
28#include "target.h"
29#include "rtl.h"
30#include "tree.h"
31#include "gimple.h"
32#include "cfghooks.h"
33#include "tree-pass.h"
34#include "ssa.h"
35#include "optabs.h"
36#include "cgraph.h"
37#include "pretty-print.h"
38#include "diagnostic-core.h"
39#include "fold-const.h"
40#include "stor-layout.h"
41#include "cfganal.h"
42#include "internal-fn.h"
43#include "gimplify.h"
44#include "gimple-iterator.h"
45#include "gimplify-me.h"
46#include "gimple-walk.h"
47#include "tree-cfg.h"
48#include "tree-into-ssa.h"
49#include "tree-ssa.h"
50#include "splay-tree.h"
51#include "cfgloop.h"
52#include "omp-general.h"
53#include "omp-offload.h"
54#include "tree-cfgcleanup.h"
a895e6d7 55#include "alloc-pool.h"
629b3d75 56#include "symbol-summary.h"
629b3d75
MJ
57#include "gomp-constants.h"
58#include "gimple-pretty-print.h"
13293add 59#include "hsa-common.h"
314e6352
ML
60#include "stringpool.h"
61#include "attribs.h"
629b3d75
MJ
62
63/* OMP region information. Every parallel and workshare
64 directive is enclosed between two markers, the OMP_* directive
65 and a corresponding GIMPLE_OMP_RETURN statement. */
66
67struct omp_region
68{
69 /* The enclosing region. */
70 struct omp_region *outer;
71
72 /* First child region. */
73 struct omp_region *inner;
74
75 /* Next peer region. */
76 struct omp_region *next;
77
78 /* Block containing the omp directive as its last stmt. */
79 basic_block entry;
80
81 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
82 basic_block exit;
83
84 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
85 basic_block cont;
86
87 /* If this is a combined parallel+workshare region, this is a list
88 of additional arguments needed by the combined parallel+workshare
89 library call. */
90 vec<tree, va_gc> *ws_args;
91
92 /* The code for the omp directive of this region. */
93 enum gimple_code type;
94
95 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
96 enum omp_clause_schedule_kind sched_kind;
97
98 /* Schedule modifiers. */
99 unsigned char sched_modifiers;
100
101 /* True if this is a combined parallel+workshare region. */
102 bool is_combined_parallel;
103
0b887b75
JJ
104 /* Copy of fd.lastprivate_conditional != 0. */
105 bool has_lastprivate_conditional;
106
629b3d75
MJ
107 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
108 a depend clause. */
109 gomp_ordered *ord_stmt;
110};
111
112static struct omp_region *root_omp_region;
113static bool omp_any_child_fn_dumped;
114
115static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
116 bool = false);
117static gphi *find_phi_with_arg_on_edge (tree, edge);
118static void expand_omp (struct omp_region *region);
119
120/* Return true if REGION is a combined parallel+workshare region. */
121
122static inline bool
123is_combined_parallel (struct omp_region *region)
124{
125 return region->is_combined_parallel;
126}
127
128/* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
129 is the immediate dominator of PAR_ENTRY_BB, return true if there
130 are no data dependencies that would prevent expanding the parallel
131 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
132
133 When expanding a combined parallel+workshare region, the call to
134 the child function may need additional arguments in the case of
135 GIMPLE_OMP_FOR regions. In some cases, these arguments are
136 computed out of variables passed in from the parent to the child
137 via 'struct .omp_data_s'. For instance:
138
139 #pragma omp parallel for schedule (guided, i * 4)
140 for (j ...)
141
142 Is lowered into:
143
01914336 144 # BLOCK 2 (PAR_ENTRY_BB)
629b3d75
MJ
145 .omp_data_o.i = i;
146 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
147
148 # BLOCK 3 (WS_ENTRY_BB)
149 .omp_data_i = &.omp_data_o;
150 D.1667 = .omp_data_i->i;
151 D.1598 = D.1667 * 4;
152 #pragma omp for schedule (guided, D.1598)
153
154 When we outline the parallel region, the call to the child function
155 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
156 that value is computed *after* the call site. So, in principle we
157 cannot do the transformation.
158
159 To see whether the code in WS_ENTRY_BB blocks the combined
160 parallel+workshare call, we collect all the variables used in the
161 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
162 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
163 call.
164
165 FIXME. If we had the SSA form built at this point, we could merely
166 hoist the code in block 3 into block 2 and be done with it. But at
167 this point we don't have dataflow information and though we could
168 hack something up here, it is really not worth the aggravation. */
169
170static bool
171workshare_safe_to_combine_p (basic_block ws_entry_bb)
172{
173 struct omp_for_data fd;
174 gimple *ws_stmt = last_stmt (ws_entry_bb);
175
176 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
177 return true;
178
179 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
28567c40
JJ
180 if (gimple_omp_for_kind (ws_stmt) != GF_OMP_FOR_KIND_FOR)
181 return false;
629b3d75
MJ
182
183 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
184
185 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
186 return false;
187 if (fd.iter_type != long_integer_type_node)
188 return false;
189
190 /* FIXME. We give up too easily here. If any of these arguments
191 are not constants, they will likely involve variables that have
192 been mapped into fields of .omp_data_s for sharing with the child
193 function. With appropriate data flow, it would be possible to
194 see through this. */
195 if (!is_gimple_min_invariant (fd.loop.n1)
196 || !is_gimple_min_invariant (fd.loop.n2)
197 || !is_gimple_min_invariant (fd.loop.step)
198 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
199 return false;
200
201 return true;
202}
203
204/* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
205 presence (SIMD_SCHEDULE). */
206
207static tree
208omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
209{
28567c40 210 if (!simd_schedule || integer_zerop (chunk_size))
629b3d75
MJ
211 return chunk_size;
212
9d2f08ab
RS
213 poly_uint64 vf = omp_max_vf ();
214 if (known_eq (vf, 1U))
629b3d75
MJ
215 return chunk_size;
216
217 tree type = TREE_TYPE (chunk_size);
218 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
219 build_int_cst (type, vf - 1));
220 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
221 build_int_cst (type, -vf));
222}
223
224/* Collect additional arguments needed to emit a combined
225 parallel+workshare call. WS_STMT is the workshare directive being
226 expanded. */
227
228static vec<tree, va_gc> *
229get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
230{
231 tree t;
232 location_t loc = gimple_location (ws_stmt);
233 vec<tree, va_gc> *ws_args;
234
235 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
236 {
237 struct omp_for_data fd;
238 tree n1, n2;
239
240 omp_extract_for_data (for_stmt, &fd, NULL);
241 n1 = fd.loop.n1;
242 n2 = fd.loop.n2;
243
244 if (gimple_omp_for_combined_into_p (for_stmt))
245 {
246 tree innerc
247 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
248 OMP_CLAUSE__LOOPTEMP_);
249 gcc_assert (innerc);
250 n1 = OMP_CLAUSE_DECL (innerc);
251 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
252 OMP_CLAUSE__LOOPTEMP_);
253 gcc_assert (innerc);
254 n2 = OMP_CLAUSE_DECL (innerc);
255 }
256
257 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
258
259 t = fold_convert_loc (loc, long_integer_type_node, n1);
260 ws_args->quick_push (t);
261
262 t = fold_convert_loc (loc, long_integer_type_node, n2);
263 ws_args->quick_push (t);
264
265 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
266 ws_args->quick_push (t);
267
268 if (fd.chunk_size)
269 {
270 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
271 t = omp_adjust_chunk_size (t, fd.simd_schedule);
272 ws_args->quick_push (t);
273 }
274
275 return ws_args;
276 }
277 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
278 {
279 /* Number of sections is equal to the number of edges from the
280 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
281 the exit of the sections region. */
282 basic_block bb = single_succ (gimple_bb (ws_stmt));
283 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
284 vec_alloc (ws_args, 1);
285 ws_args->quick_push (t);
286 return ws_args;
287 }
288
289 gcc_unreachable ();
290}
291
292/* Discover whether REGION is a combined parallel+workshare region. */
293
294static void
295determine_parallel_type (struct omp_region *region)
296{
297 basic_block par_entry_bb, par_exit_bb;
298 basic_block ws_entry_bb, ws_exit_bb;
299
300 if (region == NULL || region->inner == NULL
301 || region->exit == NULL || region->inner->exit == NULL
302 || region->inner->cont == NULL)
303 return;
304
305 /* We only support parallel+for and parallel+sections. */
306 if (region->type != GIMPLE_OMP_PARALLEL
307 || (region->inner->type != GIMPLE_OMP_FOR
308 && region->inner->type != GIMPLE_OMP_SECTIONS))
309 return;
310
311 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
312 WS_EXIT_BB -> PAR_EXIT_BB. */
313 par_entry_bb = region->entry;
314 par_exit_bb = region->exit;
315 ws_entry_bb = region->inner->entry;
316 ws_exit_bb = region->inner->exit;
317
28567c40
JJ
318 /* Give up for task reductions on the parallel, while it is implementable,
319 adding another big set of APIs or slowing down the normal paths is
320 not acceptable. */
321 tree pclauses = gimple_omp_parallel_clauses (last_stmt (par_entry_bb));
322 if (omp_find_clause (pclauses, OMP_CLAUSE__REDUCTEMP_))
323 return;
324
629b3d75
MJ
325 if (single_succ (par_entry_bb) == ws_entry_bb
326 && single_succ (ws_exit_bb) == par_exit_bb
327 && workshare_safe_to_combine_p (ws_entry_bb)
328 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
329 || (last_and_only_stmt (ws_entry_bb)
330 && last_and_only_stmt (par_exit_bb))))
331 {
332 gimple *par_stmt = last_stmt (par_entry_bb);
333 gimple *ws_stmt = last_stmt (ws_entry_bb);
334
335 if (region->inner->type == GIMPLE_OMP_FOR)
336 {
337 /* If this is a combined parallel loop, we need to determine
338 whether or not to use the combined library calls. There
339 are two cases where we do not apply the transformation:
340 static loops and any kind of ordered loop. In the first
341 case, we already open code the loop so there is no need
342 to do anything else. In the latter case, the combined
343 parallel loop call would still need extra synchronization
344 to implement ordered semantics, so there would not be any
345 gain in using the combined call. */
346 tree clauses = gimple_omp_for_clauses (ws_stmt);
347 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
348 if (c == NULL
349 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
350 == OMP_CLAUSE_SCHEDULE_STATIC)
28567c40 351 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED)
6c7ae8c5 352 || omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_)
8221c30b
JJ
353 || ((c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_))
354 && POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (c)))))
28567c40 355 return;
629b3d75 356 }
28567c40 357 else if (region->inner->type == GIMPLE_OMP_SECTIONS
8221c30b
JJ
358 && (omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
359 OMP_CLAUSE__REDUCTEMP_)
360 || omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
361 OMP_CLAUSE__CONDTEMP_)))
28567c40 362 return;
629b3d75
MJ
363
364 region->is_combined_parallel = true;
365 region->inner->is_combined_parallel = true;
366 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
367 }
368}
369
370/* Debugging dumps for parallel regions. */
371void dump_omp_region (FILE *, struct omp_region *, int);
372void debug_omp_region (struct omp_region *);
373void debug_all_omp_regions (void);
374
375/* Dump the parallel region tree rooted at REGION. */
376
377void
378dump_omp_region (FILE *file, struct omp_region *region, int indent)
379{
380 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
381 gimple_code_name[region->type]);
382
383 if (region->inner)
384 dump_omp_region (file, region->inner, indent + 4);
385
386 if (region->cont)
387 {
388 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
389 region->cont->index);
390 }
391
392 if (region->exit)
393 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
394 region->exit->index);
395 else
396 fprintf (file, "%*s[no exit marker]\n", indent, "");
397
398 if (region->next)
399 dump_omp_region (file, region->next, indent);
400}
401
402DEBUG_FUNCTION void
403debug_omp_region (struct omp_region *region)
404{
405 dump_omp_region (stderr, region, 0);
406}
407
408DEBUG_FUNCTION void
409debug_all_omp_regions (void)
410{
411 dump_omp_region (stderr, root_omp_region, 0);
412}
413
414/* Create a new parallel region starting at STMT inside region PARENT. */
415
416static struct omp_region *
417new_omp_region (basic_block bb, enum gimple_code type,
418 struct omp_region *parent)
419{
420 struct omp_region *region = XCNEW (struct omp_region);
421
422 region->outer = parent;
423 region->entry = bb;
424 region->type = type;
425
426 if (parent)
427 {
428 /* This is a nested region. Add it to the list of inner
429 regions in PARENT. */
430 region->next = parent->inner;
431 parent->inner = region;
432 }
433 else
434 {
435 /* This is a toplevel region. Add it to the list of toplevel
436 regions in ROOT_OMP_REGION. */
437 region->next = root_omp_region;
438 root_omp_region = region;
439 }
440
441 return region;
442}
443
444/* Release the memory associated with the region tree rooted at REGION. */
445
446static void
447free_omp_region_1 (struct omp_region *region)
448{
449 struct omp_region *i, *n;
450
451 for (i = region->inner; i ; i = n)
452 {
453 n = i->next;
454 free_omp_region_1 (i);
455 }
456
457 free (region);
458}
459
460/* Release the memory for the entire omp region tree. */
461
462void
463omp_free_regions (void)
464{
465 struct omp_region *r, *n;
466 for (r = root_omp_region; r ; r = n)
467 {
468 n = r->next;
469 free_omp_region_1 (r);
470 }
471 root_omp_region = NULL;
472}
473
474/* A convenience function to build an empty GIMPLE_COND with just the
475 condition. */
476
477static gcond *
478gimple_build_cond_empty (tree cond)
479{
480 enum tree_code pred_code;
481 tree lhs, rhs;
482
483 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
484 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
485}
486
487/* Return true if a parallel REGION is within a declare target function or
488 within a target region and is not a part of a gridified target. */
489
490static bool
491parallel_needs_hsa_kernel_p (struct omp_region *region)
492{
493 bool indirect = false;
494 for (region = region->outer; region; region = region->outer)
495 {
496 if (region->type == GIMPLE_OMP_PARALLEL)
497 indirect = true;
498 else if (region->type == GIMPLE_OMP_TARGET)
499 {
500 gomp_target *tgt_stmt
501 = as_a <gomp_target *> (last_stmt (region->entry));
502
503 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
504 OMP_CLAUSE__GRIDDIM_))
505 return indirect;
506 else
507 return true;
508 }
509 }
510
511 if (lookup_attribute ("omp declare target",
512 DECL_ATTRIBUTES (current_function_decl)))
513 return true;
514
515 return false;
516}
517
623c6df5
KB
518/* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
519 Add CHILD_FNDECL to decl chain of the supercontext of the block
520 ENTRY_BLOCK - this is the block which originally contained the
521 code from which CHILD_FNDECL was created.
522
523 Together, these actions ensure that the debug info for the outlined
524 function will be emitted with the correct lexical scope. */
525
526static void
4ccc4e30
JJ
527adjust_context_and_scope (struct omp_region *region, tree entry_block,
528 tree child_fndecl)
623c6df5 529{
4ccc4e30
JJ
530 tree parent_fndecl = NULL_TREE;
531 gimple *entry_stmt;
532 /* OMP expansion expands inner regions before outer ones, so if
533 we e.g. have explicit task region nested in parallel region, when
534 expanding the task region current_function_decl will be the original
535 source function, but we actually want to use as context the child
536 function of the parallel. */
537 for (region = region->outer;
538 region && parent_fndecl == NULL_TREE; region = region->outer)
539 switch (region->type)
540 {
541 case GIMPLE_OMP_PARALLEL:
542 case GIMPLE_OMP_TASK:
543 case GIMPLE_OMP_TEAMS:
544 entry_stmt = last_stmt (region->entry);
545 parent_fndecl = gimple_omp_taskreg_child_fn (entry_stmt);
546 break;
547 case GIMPLE_OMP_TARGET:
548 entry_stmt = last_stmt (region->entry);
549 parent_fndecl
550 = gimple_omp_target_child_fn (as_a <gomp_target *> (entry_stmt));
551 break;
552 default:
553 break;
554 }
555
556 if (parent_fndecl == NULL_TREE)
557 parent_fndecl = current_function_decl;
558 DECL_CONTEXT (child_fndecl) = parent_fndecl;
559
623c6df5
KB
560 if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
561 {
562 tree b = BLOCK_SUPERCONTEXT (entry_block);
623c6df5
KB
563 if (TREE_CODE (b) == BLOCK)
564 {
623c6df5
KB
565 DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
566 BLOCK_VARS (b) = child_fndecl;
567 }
568 }
569}
570
28567c40 571/* Build the function calls to GOMP_parallel etc to actually
629b3d75
MJ
572 generate the parallel operation. REGION is the parallel region
573 being expanded. BB is the block where to insert the code. WS_ARGS
574 will be set if this is a call to a combined parallel+workshare
575 construct, it contains the list of additional arguments needed by
576 the workshare construct. */
577
578static void
579expand_parallel_call (struct omp_region *region, basic_block bb,
580 gomp_parallel *entry_stmt,
581 vec<tree, va_gc> *ws_args)
582{
583 tree t, t1, t2, val, cond, c, clauses, flags;
584 gimple_stmt_iterator gsi;
585 gimple *stmt;
586 enum built_in_function start_ix;
587 int start_ix2;
588 location_t clause_loc;
589 vec<tree, va_gc> *args;
590
591 clauses = gimple_omp_parallel_clauses (entry_stmt);
592
593 /* Determine what flavor of GOMP_parallel we will be
594 emitting. */
595 start_ix = BUILT_IN_GOMP_PARALLEL;
28567c40
JJ
596 tree rtmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
597 if (rtmp)
598 start_ix = BUILT_IN_GOMP_PARALLEL_REDUCTIONS;
599 else if (is_combined_parallel (region))
629b3d75
MJ
600 {
601 switch (region->inner->type)
602 {
603 case GIMPLE_OMP_FOR:
604 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
605 switch (region->inner->sched_kind)
606 {
607 case OMP_CLAUSE_SCHEDULE_RUNTIME:
0b887b75
JJ
608 /* For lastprivate(conditional:), our implementation
609 requires monotonic behavior. */
610 if (region->inner->has_lastprivate_conditional != 0)
611 start_ix2 = 3;
612 else if ((region->inner->sched_modifiers
613 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
28567c40
JJ
614 start_ix2 = 6;
615 else if ((region->inner->sched_modifiers
616 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
617 start_ix2 = 7;
618 else
619 start_ix2 = 3;
629b3d75
MJ
620 break;
621 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
622 case OMP_CLAUSE_SCHEDULE_GUIDED:
28567c40 623 if ((region->inner->sched_modifiers
0b887b75
JJ
624 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
625 && !region->inner->has_lastprivate_conditional)
629b3d75
MJ
626 {
627 start_ix2 = 3 + region->inner->sched_kind;
628 break;
629 }
630 /* FALLTHRU */
631 default:
632 start_ix2 = region->inner->sched_kind;
633 break;
634 }
635 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
636 start_ix = (enum built_in_function) start_ix2;
637 break;
638 case GIMPLE_OMP_SECTIONS:
639 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
640 break;
641 default:
642 gcc_unreachable ();
643 }
644 }
645
646 /* By default, the value of NUM_THREADS is zero (selected at run time)
647 and there is no conditional. */
648 cond = NULL_TREE;
649 val = build_int_cst (unsigned_type_node, 0);
650 flags = build_int_cst (unsigned_type_node, 0);
651
652 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
653 if (c)
654 cond = OMP_CLAUSE_IF_EXPR (c);
655
656 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
657 if (c)
658 {
659 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
660 clause_loc = OMP_CLAUSE_LOCATION (c);
661 }
662 else
663 clause_loc = gimple_location (entry_stmt);
664
665 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
666 if (c)
667 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
668
669 /* Ensure 'val' is of the correct type. */
670 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
671
672 /* If we found the clause 'if (cond)', build either
673 (cond != 0) or (cond ? val : 1u). */
674 if (cond)
675 {
676 cond = gimple_boolify (cond);
677
678 if (integer_zerop (val))
679 val = fold_build2_loc (clause_loc,
680 EQ_EXPR, unsigned_type_node, cond,
681 build_int_cst (TREE_TYPE (cond), 0));
682 else
683 {
684 basic_block cond_bb, then_bb, else_bb;
685 edge e, e_then, e_else;
686 tree tmp_then, tmp_else, tmp_join, tmp_var;
687
688 tmp_var = create_tmp_var (TREE_TYPE (val));
689 if (gimple_in_ssa_p (cfun))
690 {
691 tmp_then = make_ssa_name (tmp_var);
692 tmp_else = make_ssa_name (tmp_var);
693 tmp_join = make_ssa_name (tmp_var);
694 }
695 else
696 {
697 tmp_then = tmp_var;
698 tmp_else = tmp_var;
699 tmp_join = tmp_var;
700 }
701
702 e = split_block_after_labels (bb);
703 cond_bb = e->src;
704 bb = e->dest;
705 remove_edge (e);
706
707 then_bb = create_empty_bb (cond_bb);
708 else_bb = create_empty_bb (then_bb);
709 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
710 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
711
712 stmt = gimple_build_cond_empty (cond);
713 gsi = gsi_start_bb (cond_bb);
714 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
715
716 gsi = gsi_start_bb (then_bb);
717 expand_omp_build_assign (&gsi, tmp_then, val, true);
718
719 gsi = gsi_start_bb (else_bb);
720 expand_omp_build_assign (&gsi, tmp_else,
721 build_int_cst (unsigned_type_node, 1),
722 true);
723
724 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
725 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
726 add_bb_to_loop (then_bb, cond_bb->loop_father);
727 add_bb_to_loop (else_bb, cond_bb->loop_father);
728 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
729 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
730
731 if (gimple_in_ssa_p (cfun))
732 {
733 gphi *phi = create_phi_node (tmp_join, bb);
734 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
735 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
736 }
737
738 val = tmp_join;
739 }
740
741 gsi = gsi_start_bb (bb);
742 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
743 false, GSI_CONTINUE_LINKING);
744 }
745
65f4b875 746 gsi = gsi_last_nondebug_bb (bb);
629b3d75
MJ
747 t = gimple_omp_parallel_data_arg (entry_stmt);
748 if (t == NULL)
749 t1 = null_pointer_node;
750 else
751 t1 = build_fold_addr_expr (t);
752 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
753 t2 = build_fold_addr_expr (child_fndecl);
754
755 vec_alloc (args, 4 + vec_safe_length (ws_args));
756 args->quick_push (t2);
757 args->quick_push (t1);
758 args->quick_push (val);
759 if (ws_args)
760 args->splice (*ws_args);
761 args->quick_push (flags);
762
763 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
764 builtin_decl_explicit (start_ix), args);
765
28567c40
JJ
766 if (rtmp)
767 {
768 tree type = TREE_TYPE (OMP_CLAUSE_DECL (rtmp));
769 t = build2 (MODIFY_EXPR, type, OMP_CLAUSE_DECL (rtmp),
770 fold_convert (type,
771 fold_convert (pointer_sized_int_node, t)));
772 }
629b3d75
MJ
773 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
774 false, GSI_CONTINUE_LINKING);
775
776 if (hsa_gen_requested_p ()
777 && parallel_needs_hsa_kernel_p (region))
778 {
779 cgraph_node *child_cnode = cgraph_node::get (child_fndecl);
780 hsa_register_kernel (child_cnode);
781 }
782}
783
629b3d75
MJ
784/* Build the function call to GOMP_task to actually
785 generate the task operation. BB is the block where to insert the code. */
786
787static void
788expand_task_call (struct omp_region *region, basic_block bb,
789 gomp_task *entry_stmt)
790{
791 tree t1, t2, t3;
792 gimple_stmt_iterator gsi;
793 location_t loc = gimple_location (entry_stmt);
794
795 tree clauses = gimple_omp_task_clauses (entry_stmt);
796
797 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
798 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
799 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
800 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
801 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
802 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
803
804 unsigned int iflags
805 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
806 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
807 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
808
809 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
810 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
811 tree num_tasks = NULL_TREE;
812 bool ull = false;
813 if (taskloop_p)
814 {
815 gimple *g = last_stmt (region->outer->entry);
816 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
817 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
818 struct omp_for_data fd;
819 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
820 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
821 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
822 OMP_CLAUSE__LOOPTEMP_);
823 startvar = OMP_CLAUSE_DECL (startvar);
824 endvar = OMP_CLAUSE_DECL (endvar);
825 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
826 if (fd.loop.cond_code == LT_EXPR)
827 iflags |= GOMP_TASK_FLAG_UP;
828 tree tclauses = gimple_omp_for_clauses (g);
829 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
830 if (num_tasks)
831 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
832 else
833 {
834 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
835 if (num_tasks)
836 {
837 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
838 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
839 }
840 else
841 num_tasks = integer_zero_node;
842 }
843 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
844 if (ifc == NULL_TREE)
845 iflags |= GOMP_TASK_FLAG_IF;
846 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
847 iflags |= GOMP_TASK_FLAG_NOGROUP;
848 ull = fd.iter_type == long_long_unsigned_type_node;
28567c40
JJ
849 if (omp_find_clause (clauses, OMP_CLAUSE_REDUCTION))
850 iflags |= GOMP_TASK_FLAG_REDUCTION;
629b3d75
MJ
851 }
852 else if (priority)
853 iflags |= GOMP_TASK_FLAG_PRIORITY;
854
855 tree flags = build_int_cst (unsigned_type_node, iflags);
856
857 tree cond = boolean_true_node;
858 if (ifc)
859 {
860 if (taskloop_p)
861 {
862 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
863 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
864 build_int_cst (unsigned_type_node,
865 GOMP_TASK_FLAG_IF),
866 build_int_cst (unsigned_type_node, 0));
867 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
868 flags, t);
869 }
870 else
871 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
872 }
873
874 if (finalc)
875 {
876 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
877 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
878 build_int_cst (unsigned_type_node,
879 GOMP_TASK_FLAG_FINAL),
880 build_int_cst (unsigned_type_node, 0));
881 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
882 }
883 if (depend)
884 depend = OMP_CLAUSE_DECL (depend);
885 else
886 depend = build_int_cst (ptr_type_node, 0);
887 if (priority)
888 priority = fold_convert (integer_type_node,
889 OMP_CLAUSE_PRIORITY_EXPR (priority));
890 else
891 priority = integer_zero_node;
892
65f4b875 893 gsi = gsi_last_nondebug_bb (bb);
629b3d75
MJ
894 tree t = gimple_omp_task_data_arg (entry_stmt);
895 if (t == NULL)
896 t2 = null_pointer_node;
897 else
898 t2 = build_fold_addr_expr_loc (loc, t);
899 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
900 t = gimple_omp_task_copy_fn (entry_stmt);
901 if (t == NULL)
902 t3 = null_pointer_node;
903 else
904 t3 = build_fold_addr_expr_loc (loc, t);
905
906 if (taskloop_p)
907 t = build_call_expr (ull
908 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
909 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
910 11, t1, t2, t3,
911 gimple_omp_task_arg_size (entry_stmt),
912 gimple_omp_task_arg_align (entry_stmt), flags,
913 num_tasks, priority, startvar, endvar, step);
914 else
915 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
916 9, t1, t2, t3,
917 gimple_omp_task_arg_size (entry_stmt),
918 gimple_omp_task_arg_align (entry_stmt), cond, flags,
919 depend, priority);
920
921 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
922 false, GSI_CONTINUE_LINKING);
923}
924
28567c40
JJ
925/* Build the function call to GOMP_taskwait_depend to actually
926 generate the taskwait operation. BB is the block where to insert the
927 code. */
928
929static void
930expand_taskwait_call (basic_block bb, gomp_task *entry_stmt)
931{
932 tree clauses = gimple_omp_task_clauses (entry_stmt);
933 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
934 if (depend == NULL_TREE)
935 return;
936
937 depend = OMP_CLAUSE_DECL (depend);
938
939 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
940 tree t
941 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASKWAIT_DEPEND),
942 1, depend);
943
944 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
945 false, GSI_CONTINUE_LINKING);
946}
947
948/* Build the function call to GOMP_teams_reg to actually
949 generate the host teams operation. REGION is the teams region
950 being expanded. BB is the block where to insert the code. */
951
952static void
953expand_teams_call (basic_block bb, gomp_teams *entry_stmt)
954{
955 tree clauses = gimple_omp_teams_clauses (entry_stmt);
956 tree num_teams = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
957 if (num_teams == NULL_TREE)
958 num_teams = build_int_cst (unsigned_type_node, 0);
959 else
960 {
961 num_teams = OMP_CLAUSE_NUM_TEAMS_EXPR (num_teams);
962 num_teams = fold_convert (unsigned_type_node, num_teams);
963 }
964 tree thread_limit = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
965 if (thread_limit == NULL_TREE)
966 thread_limit = build_int_cst (unsigned_type_node, 0);
967 else
968 {
969 thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit);
970 thread_limit = fold_convert (unsigned_type_node, thread_limit);
971 }
972
973 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
974 tree t = gimple_omp_teams_data_arg (entry_stmt), t1;
975 if (t == NULL)
976 t1 = null_pointer_node;
977 else
978 t1 = build_fold_addr_expr (t);
979 tree child_fndecl = gimple_omp_teams_child_fn (entry_stmt);
980 tree t2 = build_fold_addr_expr (child_fndecl);
981
28567c40
JJ
982 vec<tree, va_gc> *args;
983 vec_alloc (args, 5);
984 args->quick_push (t2);
985 args->quick_push (t1);
986 args->quick_push (num_teams);
987 args->quick_push (thread_limit);
988 /* For future extensibility. */
989 args->quick_push (build_zero_cst (unsigned_type_node));
990
991 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
992 builtin_decl_explicit (BUILT_IN_GOMP_TEAMS_REG),
993 args);
994
995 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
996 false, GSI_CONTINUE_LINKING);
997}
998
629b3d75
MJ
999/* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
1000
1001static tree
1002vec2chain (vec<tree, va_gc> *v)
1003{
1004 tree chain = NULL_TREE, t;
1005 unsigned ix;
1006
1007 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
1008 {
1009 DECL_CHAIN (t) = chain;
1010 chain = t;
1011 }
1012
1013 return chain;
1014}
1015
1016/* Remove barriers in REGION->EXIT's block. Note that this is only
1017 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
1018 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
1019 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
1020 removed. */
1021
1022static void
1023remove_exit_barrier (struct omp_region *region)
1024{
1025 gimple_stmt_iterator gsi;
1026 basic_block exit_bb;
1027 edge_iterator ei;
1028 edge e;
1029 gimple *stmt;
1030 int any_addressable_vars = -1;
1031
1032 exit_bb = region->exit;
1033
1034 /* If the parallel region doesn't return, we don't have REGION->EXIT
1035 block at all. */
1036 if (! exit_bb)
1037 return;
1038
1039 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
1040 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
1041 statements that can appear in between are extremely limited -- no
1042 memory operations at all. Here, we allow nothing at all, so the
1043 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
65f4b875 1044 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75 1045 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
65f4b875 1046 gsi_prev_nondebug (&gsi);
629b3d75
MJ
1047 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
1048 return;
1049
1050 FOR_EACH_EDGE (e, ei, exit_bb->preds)
1051 {
65f4b875 1052 gsi = gsi_last_nondebug_bb (e->src);
629b3d75
MJ
1053 if (gsi_end_p (gsi))
1054 continue;
1055 stmt = gsi_stmt (gsi);
1056 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
1057 && !gimple_omp_return_nowait_p (stmt))
1058 {
1059 /* OpenMP 3.0 tasks unfortunately prevent this optimization
1060 in many cases. If there could be tasks queued, the barrier
1061 might be needed to let the tasks run before some local
1062 variable of the parallel that the task uses as shared
1063 runs out of scope. The task can be spawned either
1064 from within current function (this would be easy to check)
1065 or from some function it calls and gets passed an address
1066 of such a variable. */
1067 if (any_addressable_vars < 0)
1068 {
1069 gomp_parallel *parallel_stmt
1070 = as_a <gomp_parallel *> (last_stmt (region->entry));
1071 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
1072 tree local_decls, block, decl;
1073 unsigned ix;
1074
1075 any_addressable_vars = 0;
1076 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
1077 if (TREE_ADDRESSABLE (decl))
1078 {
1079 any_addressable_vars = 1;
1080 break;
1081 }
1082 for (block = gimple_block (stmt);
1083 !any_addressable_vars
1084 && block
1085 && TREE_CODE (block) == BLOCK;
1086 block = BLOCK_SUPERCONTEXT (block))
1087 {
1088 for (local_decls = BLOCK_VARS (block);
1089 local_decls;
1090 local_decls = DECL_CHAIN (local_decls))
1091 if (TREE_ADDRESSABLE (local_decls))
1092 {
1093 any_addressable_vars = 1;
1094 break;
1095 }
1096 if (block == gimple_block (parallel_stmt))
1097 break;
1098 }
1099 }
1100 if (!any_addressable_vars)
1101 gimple_omp_return_set_nowait (stmt);
1102 }
1103 }
1104}
1105
1106static void
1107remove_exit_barriers (struct omp_region *region)
1108{
1109 if (region->type == GIMPLE_OMP_PARALLEL)
1110 remove_exit_barrier (region);
1111
1112 if (region->inner)
1113 {
1114 region = region->inner;
1115 remove_exit_barriers (region);
1116 while (region->next)
1117 {
1118 region = region->next;
1119 remove_exit_barriers (region);
1120 }
1121 }
1122}
1123
1124/* Optimize omp_get_thread_num () and omp_get_num_threads ()
1125 calls. These can't be declared as const functions, but
1126 within one parallel body they are constant, so they can be
1127 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1128 which are declared const. Similarly for task body, except
1129 that in untied task omp_get_thread_num () can change at any task
1130 scheduling point. */
1131
1132static void
1133optimize_omp_library_calls (gimple *entry_stmt)
1134{
1135 basic_block bb;
1136 gimple_stmt_iterator gsi;
1137 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1138 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1139 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1140 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1141 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1142 && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1143 OMP_CLAUSE_UNTIED) != NULL);
1144
1145 FOR_EACH_BB_FN (bb, cfun)
1146 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1147 {
1148 gimple *call = gsi_stmt (gsi);
1149 tree decl;
1150
1151 if (is_gimple_call (call)
1152 && (decl = gimple_call_fndecl (call))
1153 && DECL_EXTERNAL (decl)
1154 && TREE_PUBLIC (decl)
1155 && DECL_INITIAL (decl) == NULL)
1156 {
1157 tree built_in;
1158
1159 if (DECL_NAME (decl) == thr_num_id)
1160 {
1161 /* In #pragma omp task untied omp_get_thread_num () can change
1162 during the execution of the task region. */
1163 if (untied_task)
1164 continue;
1165 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1166 }
1167 else if (DECL_NAME (decl) == num_thr_id)
1168 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1169 else
1170 continue;
1171
1172 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1173 || gimple_call_num_args (call) != 0)
1174 continue;
1175
1176 if (flag_exceptions && !TREE_NOTHROW (decl))
1177 continue;
1178
1179 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1180 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1181 TREE_TYPE (TREE_TYPE (built_in))))
1182 continue;
1183
1184 gimple_call_set_fndecl (call, built_in);
1185 }
1186 }
1187}
1188
1189/* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1190 regimplified. */
1191
1192static tree
1193expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1194{
1195 tree t = *tp;
1196
1197 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1198 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1199 return t;
1200
1201 if (TREE_CODE (t) == ADDR_EXPR)
1202 recompute_tree_invariant_for_addr_expr (t);
1203
1204 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1205 return NULL_TREE;
1206}
1207
1208/* Prepend or append TO = FROM assignment before or after *GSI_P. */
1209
1210static void
1211expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1212 bool after)
1213{
1214 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1215 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1216 !after, after ? GSI_CONTINUE_LINKING
1217 : GSI_SAME_STMT);
1218 gimple *stmt = gimple_build_assign (to, from);
1219 if (after)
1220 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1221 else
1222 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1223 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1224 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1225 {
1226 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1227 gimple_regimplify_operands (stmt, &gsi);
1228 }
1229}
1230
1231/* Expand the OpenMP parallel or task directive starting at REGION. */
1232
1233static void
1234expand_omp_taskreg (struct omp_region *region)
1235{
1236 basic_block entry_bb, exit_bb, new_bb;
1237 struct function *child_cfun;
1238 tree child_fn, block, t;
1239 gimple_stmt_iterator gsi;
1240 gimple *entry_stmt, *stmt;
1241 edge e;
1242 vec<tree, va_gc> *ws_args;
1243
1244 entry_stmt = last_stmt (region->entry);
28567c40
JJ
1245 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1246 && gimple_omp_task_taskwait_p (entry_stmt))
1247 {
1248 new_bb = region->entry;
1249 gsi = gsi_last_nondebug_bb (region->entry);
1250 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1251 gsi_remove (&gsi, true);
1252 expand_taskwait_call (new_bb, as_a <gomp_task *> (entry_stmt));
1253 return;
1254 }
1255
629b3d75
MJ
1256 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1257 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1258
1259 entry_bb = region->entry;
1260 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1261 exit_bb = region->cont;
1262 else
1263 exit_bb = region->exit;
1264
5e9d6aa4 1265 if (is_combined_parallel (region))
629b3d75
MJ
1266 ws_args = region->ws_args;
1267 else
1268 ws_args = NULL;
1269
1270 if (child_cfun->cfg)
1271 {
1272 /* Due to inlining, it may happen that we have already outlined
1273 the region, in which case all we need to do is make the
1274 sub-graph unreachable and emit the parallel call. */
1275 edge entry_succ_e, exit_succ_e;
1276
1277 entry_succ_e = single_succ_edge (entry_bb);
1278
65f4b875 1279 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75 1280 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
28567c40
JJ
1281 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK
1282 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TEAMS);
629b3d75
MJ
1283 gsi_remove (&gsi, true);
1284
1285 new_bb = entry_bb;
1286 if (exit_bb)
1287 {
1288 exit_succ_e = single_succ_edge (exit_bb);
1289 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1290 }
1291 remove_edge_and_dominated_blocks (entry_succ_e);
1292 }
1293 else
1294 {
1295 unsigned srcidx, dstidx, num;
1296
1297 /* If the parallel region needs data sent from the parent
1298 function, then the very first statement (except possible
1299 tree profile counter updates) of the parallel body
1300 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1301 &.OMP_DATA_O is passed as an argument to the child function,
1302 we need to replace it with the argument as seen by the child
1303 function.
1304
1305 In most cases, this will end up being the identity assignment
1306 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1307 a function call that has been inlined, the original PARM_DECL
1308 .OMP_DATA_I may have been converted into a different local
1309 variable. In which case, we need to keep the assignment. */
1310 if (gimple_omp_taskreg_data_arg (entry_stmt))
1311 {
1312 basic_block entry_succ_bb
1313 = single_succ_p (entry_bb) ? single_succ (entry_bb)
1314 : FALLTHRU_EDGE (entry_bb)->dest;
1315 tree arg;
1316 gimple *parcopy_stmt = NULL;
1317
1318 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1319 {
1320 gimple *stmt;
1321
1322 gcc_assert (!gsi_end_p (gsi));
1323 stmt = gsi_stmt (gsi);
1324 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1325 continue;
1326
1327 if (gimple_num_ops (stmt) == 2)
1328 {
1329 tree arg = gimple_assign_rhs1 (stmt);
1330
1331 /* We're ignore the subcode because we're
1332 effectively doing a STRIP_NOPS. */
1333
1334 if (TREE_CODE (arg) == ADDR_EXPR
28567c40
JJ
1335 && (TREE_OPERAND (arg, 0)
1336 == gimple_omp_taskreg_data_arg (entry_stmt)))
629b3d75
MJ
1337 {
1338 parcopy_stmt = stmt;
1339 break;
1340 }
1341 }
1342 }
1343
1344 gcc_assert (parcopy_stmt != NULL);
1345 arg = DECL_ARGUMENTS (child_fn);
1346
1347 if (!gimple_in_ssa_p (cfun))
1348 {
1349 if (gimple_assign_lhs (parcopy_stmt) == arg)
1350 gsi_remove (&gsi, true);
1351 else
1352 {
01914336 1353 /* ?? Is setting the subcode really necessary ?? */
629b3d75
MJ
1354 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1355 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1356 }
1357 }
1358 else
1359 {
1360 tree lhs = gimple_assign_lhs (parcopy_stmt);
1361 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1362 /* We'd like to set the rhs to the default def in the child_fn,
1363 but it's too early to create ssa names in the child_fn.
1364 Instead, we set the rhs to the parm. In
1365 move_sese_region_to_fn, we introduce a default def for the
1366 parm, map the parm to it's default def, and once we encounter
1367 this stmt, replace the parm with the default def. */
1368 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1369 update_stmt (parcopy_stmt);
1370 }
1371 }
1372
1373 /* Declare local variables needed in CHILD_CFUN. */
1374 block = DECL_INITIAL (child_fn);
1375 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1376 /* The gimplifier could record temporaries in parallel/task block
1377 rather than in containing function's local_decls chain,
1378 which would mean cgraph missed finalizing them. Do it now. */
1379 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1380 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1381 varpool_node::finalize_decl (t);
1382 DECL_SAVED_TREE (child_fn) = NULL;
1383 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1384 gimple_set_body (child_fn, NULL);
1385 TREE_USED (block) = 1;
1386
1387 /* Reset DECL_CONTEXT on function arguments. */
1388 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1389 DECL_CONTEXT (t) = child_fn;
1390
1391 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1392 so that it can be moved to the child function. */
65f4b875 1393 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
1394 stmt = gsi_stmt (gsi);
1395 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
28567c40
JJ
1396 || gimple_code (stmt) == GIMPLE_OMP_TASK
1397 || gimple_code (stmt) == GIMPLE_OMP_TEAMS));
629b3d75
MJ
1398 e = split_block (entry_bb, stmt);
1399 gsi_remove (&gsi, true);
1400 entry_bb = e->dest;
1401 edge e2 = NULL;
28567c40 1402 if (gimple_code (entry_stmt) != GIMPLE_OMP_TASK)
629b3d75
MJ
1403 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1404 else
1405 {
1406 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1407 gcc_assert (e2->dest == region->exit);
1408 remove_edge (BRANCH_EDGE (entry_bb));
1409 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
65f4b875 1410 gsi = gsi_last_nondebug_bb (region->exit);
629b3d75
MJ
1411 gcc_assert (!gsi_end_p (gsi)
1412 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1413 gsi_remove (&gsi, true);
1414 }
1415
1416 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1417 if (exit_bb)
1418 {
65f4b875 1419 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
1420 gcc_assert (!gsi_end_p (gsi)
1421 && (gimple_code (gsi_stmt (gsi))
1422 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1423 stmt = gimple_build_return (NULL);
1424 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1425 gsi_remove (&gsi, true);
1426 }
1427
1428 /* Move the parallel region into CHILD_CFUN. */
1429
1430 if (gimple_in_ssa_p (cfun))
1431 {
1432 init_tree_ssa (child_cfun);
1433 init_ssa_operands (child_cfun);
1434 child_cfun->gimple_df->in_ssa_p = true;
1435 block = NULL_TREE;
1436 }
1437 else
1438 block = gimple_block (entry_stmt);
1439
1440 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1441 if (exit_bb)
1442 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1443 if (e2)
1444 {
1445 basic_block dest_bb = e2->dest;
1446 if (!exit_bb)
1447 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1448 remove_edge (e2);
1449 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1450 }
1451 /* When the OMP expansion process cannot guarantee an up-to-date
01914336 1452 loop tree arrange for the child function to fixup loops. */
629b3d75
MJ
1453 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1454 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1455
1456 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1457 num = vec_safe_length (child_cfun->local_decls);
1458 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1459 {
1460 t = (*child_cfun->local_decls)[srcidx];
1461 if (DECL_CONTEXT (t) == cfun->decl)
1462 continue;
1463 if (srcidx != dstidx)
1464 (*child_cfun->local_decls)[dstidx] = t;
1465 dstidx++;
1466 }
1467 if (dstidx != num)
1468 vec_safe_truncate (child_cfun->local_decls, dstidx);
1469
1470 /* Inform the callgraph about the new function. */
1471 child_cfun->curr_properties = cfun->curr_properties;
1472 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1473 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1474 cgraph_node *node = cgraph_node::get_create (child_fn);
1475 node->parallelized_function = 1;
1476 cgraph_node::add_new_function (child_fn, true);
1477
1478 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1479 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1480
1481 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1482 fixed in a following pass. */
1483 push_cfun (child_cfun);
1484 if (need_asm)
9579db35 1485 assign_assembler_name_if_needed (child_fn);
629b3d75
MJ
1486
1487 if (optimize)
1488 optimize_omp_library_calls (entry_stmt);
fc06ae0d 1489 update_max_bb_count ();
629b3d75
MJ
1490 cgraph_edge::rebuild_edges ();
1491
1492 /* Some EH regions might become dead, see PR34608. If
1493 pass_cleanup_cfg isn't the first pass to happen with the
1494 new child, these dead EH edges might cause problems.
1495 Clean them up now. */
1496 if (flag_exceptions)
1497 {
1498 basic_block bb;
1499 bool changed = false;
1500
1501 FOR_EACH_BB_FN (bb, cfun)
1502 changed |= gimple_purge_dead_eh_edges (bb);
1503 if (changed)
1504 cleanup_tree_cfg ();
1505 }
1506 if (gimple_in_ssa_p (cfun))
1507 update_ssa (TODO_update_ssa);
1508 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1509 verify_loop_structure ();
1510 pop_cfun ();
1511
1512 if (dump_file && !gimple_in_ssa_p (cfun))
1513 {
1514 omp_any_child_fn_dumped = true;
1515 dump_function_header (dump_file, child_fn, dump_flags);
1516 dump_function_to_file (child_fn, dump_file, dump_flags);
1517 }
1518 }
1519
4ccc4e30
JJ
1520 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
1521
5e9d6aa4 1522 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
629b3d75
MJ
1523 expand_parallel_call (region, new_bb,
1524 as_a <gomp_parallel *> (entry_stmt), ws_args);
28567c40
JJ
1525 else if (gimple_code (entry_stmt) == GIMPLE_OMP_TEAMS)
1526 expand_teams_call (new_bb, as_a <gomp_teams *> (entry_stmt));
629b3d75
MJ
1527 else
1528 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1529 if (gimple_in_ssa_p (cfun))
1530 update_ssa (TODO_update_ssa_only_virtuals);
1531}
1532
1533/* Information about members of an OpenACC collapsed loop nest. */
1534
1535struct oacc_collapse
1536{
01914336 1537 tree base; /* Base value. */
629b3d75 1538 tree iters; /* Number of steps. */
02889d23
CLT
1539 tree step; /* Step size. */
1540 tree tile; /* Tile increment (if tiled). */
1541 tree outer; /* Tile iterator var. */
629b3d75
MJ
1542};
1543
1544/* Helper for expand_oacc_for. Determine collapsed loop information.
1545 Fill in COUNTS array. Emit any initialization code before GSI.
1546 Return the calculated outer loop bound of BOUND_TYPE. */
1547
1548static tree
1549expand_oacc_collapse_init (const struct omp_for_data *fd,
1550 gimple_stmt_iterator *gsi,
02889d23
CLT
1551 oacc_collapse *counts, tree bound_type,
1552 location_t loc)
629b3d75 1553{
02889d23 1554 tree tiling = fd->tiling;
629b3d75
MJ
1555 tree total = build_int_cst (bound_type, 1);
1556 int ix;
1557
1558 gcc_assert (integer_onep (fd->loop.step));
1559 gcc_assert (integer_zerop (fd->loop.n1));
1560
02889d23
CLT
1561 /* When tiling, the first operand of the tile clause applies to the
1562 innermost loop, and we work outwards from there. Seems
1563 backwards, but whatever. */
1564 for (ix = fd->collapse; ix--;)
629b3d75
MJ
1565 {
1566 const omp_for_data_loop *loop = &fd->loops[ix];
1567
1568 tree iter_type = TREE_TYPE (loop->v);
1569 tree diff_type = iter_type;
1570 tree plus_type = iter_type;
1571
1572 gcc_assert (loop->cond_code == fd->loop.cond_code);
1573
1574 if (POINTER_TYPE_P (iter_type))
1575 plus_type = sizetype;
1576 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
1577 diff_type = signed_type_for (diff_type);
c31bc4ac
TV
1578 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
1579 diff_type = integer_type_node;
629b3d75 1580
02889d23
CLT
1581 if (tiling)
1582 {
1583 tree num = build_int_cst (integer_type_node, fd->collapse);
1584 tree loop_no = build_int_cst (integer_type_node, ix);
1585 tree tile = TREE_VALUE (tiling);
1586 gcall *call
1587 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1588 /* gwv-outer=*/integer_zero_node,
1589 /* gwv-inner=*/integer_zero_node);
1590
1591 counts[ix].outer = create_tmp_var (iter_type, ".outer");
1592 counts[ix].tile = create_tmp_var (diff_type, ".tile");
1593 gimple_call_set_lhs (call, counts[ix].tile);
1594 gimple_set_location (call, loc);
1595 gsi_insert_before (gsi, call, GSI_SAME_STMT);
1596
1597 tiling = TREE_CHAIN (tiling);
1598 }
1599 else
1600 {
1601 counts[ix].tile = NULL;
1602 counts[ix].outer = loop->v;
1603 }
1604
629b3d75
MJ
1605 tree b = loop->n1;
1606 tree e = loop->n2;
1607 tree s = loop->step;
1608 bool up = loop->cond_code == LT_EXPR;
1609 tree dir = build_int_cst (diff_type, up ? +1 : -1);
1610 bool negating;
1611 tree expr;
1612
1613 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1614 true, GSI_SAME_STMT);
1615 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1616 true, GSI_SAME_STMT);
1617
01914336 1618 /* Convert the step, avoiding possible unsigned->signed overflow. */
629b3d75
MJ
1619 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1620 if (negating)
1621 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1622 s = fold_convert (diff_type, s);
1623 if (negating)
1624 s = fold_build1 (NEGATE_EXPR, diff_type, s);
1625 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1626 true, GSI_SAME_STMT);
1627
01914336 1628 /* Determine the range, avoiding possible unsigned->signed overflow. */
629b3d75
MJ
1629 negating = !up && TYPE_UNSIGNED (iter_type);
1630 expr = fold_build2 (MINUS_EXPR, plus_type,
1631 fold_convert (plus_type, negating ? b : e),
1632 fold_convert (plus_type, negating ? e : b));
1633 expr = fold_convert (diff_type, expr);
1634 if (negating)
1635 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1636 tree range = force_gimple_operand_gsi
1637 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1638
1639 /* Determine number of iterations. */
1640 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1641 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1642 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1643
1644 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1645 true, GSI_SAME_STMT);
1646
1647 counts[ix].base = b;
1648 counts[ix].iters = iters;
1649 counts[ix].step = s;
1650
1651 total = fold_build2 (MULT_EXPR, bound_type, total,
1652 fold_convert (bound_type, iters));
1653 }
1654
1655 return total;
1656}
1657
02889d23
CLT
1658/* Emit initializers for collapsed loop members. INNER is true if
1659 this is for the element loop of a TILE. IVAR is the outer
629b3d75
MJ
1660 loop iteration variable, from which collapsed loop iteration values
1661 are calculated. COUNTS array has been initialized by
1662 expand_oacc_collapse_inits. */
1663
1664static void
02889d23 1665expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
629b3d75
MJ
1666 gimple_stmt_iterator *gsi,
1667 const oacc_collapse *counts, tree ivar)
1668{
1669 tree ivar_type = TREE_TYPE (ivar);
1670
1671 /* The most rapidly changing iteration variable is the innermost
1672 one. */
1673 for (int ix = fd->collapse; ix--;)
1674 {
1675 const omp_for_data_loop *loop = &fd->loops[ix];
1676 const oacc_collapse *collapse = &counts[ix];
02889d23
CLT
1677 tree v = inner ? loop->v : collapse->outer;
1678 tree iter_type = TREE_TYPE (v);
629b3d75
MJ
1679 tree diff_type = TREE_TYPE (collapse->step);
1680 tree plus_type = iter_type;
1681 enum tree_code plus_code = PLUS_EXPR;
1682 tree expr;
1683
1684 if (POINTER_TYPE_P (iter_type))
1685 {
1686 plus_code = POINTER_PLUS_EXPR;
1687 plus_type = sizetype;
1688 }
1689
02889d23
CLT
1690 expr = ivar;
1691 if (ix)
1692 {
1693 tree mod = fold_convert (ivar_type, collapse->iters);
1694 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1695 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1696 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1697 true, GSI_SAME_STMT);
1698 }
1699
629b3d75
MJ
1700 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1701 collapse->step);
02889d23
CLT
1702 expr = fold_build2 (plus_code, iter_type,
1703 inner ? collapse->outer : collapse->base,
629b3d75
MJ
1704 fold_convert (plus_type, expr));
1705 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1706 true, GSI_SAME_STMT);
02889d23 1707 gassign *ass = gimple_build_assign (v, expr);
629b3d75 1708 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
629b3d75
MJ
1709 }
1710}
1711
1712/* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1713 of the combined collapse > 1 loop constructs, generate code like:
1714 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1715 if (cond3 is <)
1716 adj = STEP3 - 1;
1717 else
1718 adj = STEP3 + 1;
1719 count3 = (adj + N32 - N31) / STEP3;
1720 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1721 if (cond2 is <)
1722 adj = STEP2 - 1;
1723 else
1724 adj = STEP2 + 1;
1725 count2 = (adj + N22 - N21) / STEP2;
1726 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1727 if (cond1 is <)
1728 adj = STEP1 - 1;
1729 else
1730 adj = STEP1 + 1;
1731 count1 = (adj + N12 - N11) / STEP1;
1732 count = count1 * count2 * count3;
1733 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1734 count = 0;
1735 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1736 of the combined loop constructs, just initialize COUNTS array
aed3ab25
JJ
1737 from the _looptemp_ clauses. For loop nests with non-rectangular
1738 loops, do this only for the rectangular loops. Then pick
1739 the loops which reference outer vars in their bound expressions
1740 and the loops which they refer to and for this sub-nest compute
1741 number of iterations. For triangular loops use Faulhaber's formula
1742 (TBD.), otherwise as a fallback, compute by iterating the loops.
1743 If e.g. the sub-nest is
1744 for (I = N11; I COND1 N12; I += STEP1)
1745 for (J = M21 * I + N21; J COND2 M22 * I + N22; J += STEP2)
1746 for (K = M31 * J + N31; K COND3 M32 * J + N32; K += STEP3)
1747 do:
1748 COUNT = 0;
1749 for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
1750 for (tmpj = M21 * tmpi + N21;
1751 tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
1752 {
1753 int tmpk1 = M31 * tmpj + N31;
1754 int tmpk2 = M32 * tmpj + N32;
1755 if (tmpk1 COND3 tmpk2)
1756 {
1757 if (COND3 is <)
1758 adj = STEP3 - 1;
1759 else
1760 adj = STEP3 + 1;
1761 COUNT += (adj + tmpk2 - tmpk1) / STEP3;
1762 }
1763 }
1764 and finally multiply the counts of the rectangular loops not
1765 in the sub-nest with COUNT. Also, as counts[fd->last_nonrect]
1766 store number of iterations of the loops from fd->first_nonrect
1767 to fd->last_nonrect inclusive, i.e. the above COUNT multiplied
1768 by the counts of rectangular loops not referenced in any non-rectangular
1769 loops sandwitched in between those. */
629b3d75
MJ
1770
1771/* NOTE: It *could* be better to moosh all of the BBs together,
1772 creating one larger BB with all the computation and the unexpected
1773 jump at the end. I.e.
1774
1775 bool zero3, zero2, zero1, zero;
1776
1777 zero3 = N32 c3 N31;
1778 count3 = (N32 - N31) /[cl] STEP3;
1779 zero2 = N22 c2 N21;
1780 count2 = (N22 - N21) /[cl] STEP2;
1781 zero1 = N12 c1 N11;
1782 count1 = (N12 - N11) /[cl] STEP1;
1783 zero = zero3 || zero2 || zero1;
1784 count = count1 * count2 * count3;
1785 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1786
1787 After all, we expect the zero=false, and thus we expect to have to
1788 evaluate all of the comparison expressions, so short-circuiting
1789 oughtn't be a win. Since the condition isn't protecting a
1790 denominator, we're not concerned about divide-by-zero, so we can
1791 fully evaluate count even if a numerator turned out to be wrong.
1792
1793 It seems like putting this all together would create much better
1794 scheduling opportunities, and less pressure on the chip's branch
1795 predictor. */
1796
1797static void
1798expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1799 basic_block &entry_bb, tree *counts,
1800 basic_block &zero_iter1_bb, int &first_zero_iter1,
1801 basic_block &zero_iter2_bb, int &first_zero_iter2,
1802 basic_block &l2_dom_bb)
1803{
1804 tree t, type = TREE_TYPE (fd->loop.v);
1805 edge e, ne;
1806 int i;
1807
1808 /* Collapsed loops need work for expansion into SSA form. */
1809 gcc_assert (!gimple_in_ssa_p (cfun));
1810
1811 if (gimple_omp_for_combined_into_p (fd->for_stmt)
1812 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1813 {
1814 gcc_assert (fd->ordered == 0);
1815 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1816 isn't supposed to be handled, as the inner loop doesn't
1817 use it. */
1818 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1819 OMP_CLAUSE__LOOPTEMP_);
1820 gcc_assert (innerc);
1821 for (i = 0; i < fd->collapse; i++)
1822 {
1823 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1824 OMP_CLAUSE__LOOPTEMP_);
1825 gcc_assert (innerc);
1826 if (i)
1827 counts[i] = OMP_CLAUSE_DECL (innerc);
1828 else
1829 counts[0] = NULL_TREE;
1830 }
1831 return;
1832 }
1833
1834 for (i = fd->collapse; i < fd->ordered; i++)
1835 {
1836 tree itype = TREE_TYPE (fd->loops[i].v);
1837 counts[i] = NULL_TREE;
1838 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1839 fold_convert (itype, fd->loops[i].n1),
1840 fold_convert (itype, fd->loops[i].n2));
1841 if (t && integer_zerop (t))
1842 {
1843 for (i = fd->collapse; i < fd->ordered; i++)
1844 counts[i] = build_int_cst (type, 0);
1845 break;
1846 }
1847 }
aed3ab25 1848 bool rect_count_seen = false;
629b3d75
MJ
1849 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1850 {
1851 tree itype = TREE_TYPE (fd->loops[i].v);
1852
1853 if (i >= fd->collapse && counts[i])
1854 continue;
aed3ab25
JJ
1855 if (fd->non_rect)
1856 {
1857 /* Skip loops that use outer iterators in their expressions
1858 during this phase. */
1859 if (fd->loops[i].m1 || fd->loops[i].m2)
1860 {
1861 counts[i] = build_zero_cst (type);
1862 continue;
1863 }
1864 }
629b3d75
MJ
1865 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1866 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1867 fold_convert (itype, fd->loops[i].n1),
1868 fold_convert (itype, fd->loops[i].n2)))
1869 == NULL_TREE || !integer_onep (t)))
1870 {
1871 gcond *cond_stmt;
1872 tree n1, n2;
1873 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1874 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1875 true, GSI_SAME_STMT);
1876 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1877 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1878 true, GSI_SAME_STMT);
1879 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1880 NULL_TREE, NULL_TREE);
1881 gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1882 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1883 expand_omp_regimplify_p, NULL, NULL)
1884 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1885 expand_omp_regimplify_p, NULL, NULL))
1886 {
1887 *gsi = gsi_for_stmt (cond_stmt);
1888 gimple_regimplify_operands (cond_stmt, gsi);
1889 }
1890 e = split_block (entry_bb, cond_stmt);
1891 basic_block &zero_iter_bb
1892 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1893 int &first_zero_iter
1894 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1895 if (zero_iter_bb == NULL)
1896 {
1897 gassign *assign_stmt;
1898 first_zero_iter = i;
1899 zero_iter_bb = create_empty_bb (entry_bb);
1900 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1901 *gsi = gsi_after_labels (zero_iter_bb);
1902 if (i < fd->collapse)
1903 assign_stmt = gimple_build_assign (fd->loop.n2,
1904 build_zero_cst (type));
1905 else
1906 {
1907 counts[i] = create_tmp_reg (type, ".count");
1908 assign_stmt
1909 = gimple_build_assign (counts[i], build_zero_cst (type));
1910 }
1911 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1912 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1913 entry_bb);
1914 }
1915 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
357067f2 1916 ne->probability = profile_probability::very_unlikely ();
629b3d75 1917 e->flags = EDGE_TRUE_VALUE;
357067f2 1918 e->probability = ne->probability.invert ();
629b3d75
MJ
1919 if (l2_dom_bb == NULL)
1920 l2_dom_bb = entry_bb;
1921 entry_bb = e->dest;
65f4b875 1922 *gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
1923 }
1924
1925 if (POINTER_TYPE_P (itype))
1926 itype = signed_type_for (itype);
1927 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1928 ? -1 : 1));
1929 t = fold_build2 (PLUS_EXPR, itype,
1930 fold_convert (itype, fd->loops[i].step), t);
1931 t = fold_build2 (PLUS_EXPR, itype, t,
1932 fold_convert (itype, fd->loops[i].n2));
1933 t = fold_build2 (MINUS_EXPR, itype, t,
1934 fold_convert (itype, fd->loops[i].n1));
1935 /* ?? We could probably use CEIL_DIV_EXPR instead of
1936 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1937 generate the same code in the end because generically we
1938 don't know that the values involved must be negative for
1939 GT?? */
1940 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1941 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1942 fold_build1 (NEGATE_EXPR, itype, t),
1943 fold_build1 (NEGATE_EXPR, itype,
1944 fold_convert (itype,
1945 fd->loops[i].step)));
1946 else
1947 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1948 fold_convert (itype, fd->loops[i].step));
1949 t = fold_convert (type, t);
1950 if (TREE_CODE (t) == INTEGER_CST)
1951 counts[i] = t;
1952 else
1953 {
1954 if (i < fd->collapse || i != first_zero_iter2)
1955 counts[i] = create_tmp_reg (type, ".count");
1956 expand_omp_build_assign (gsi, counts[i], t);
1957 }
1958 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1959 {
aed3ab25
JJ
1960 if (fd->non_rect && i >= fd->first_nonrect && i <= fd->last_nonrect)
1961 continue;
1962 if (!rect_count_seen)
1963 {
1964 t = counts[i];
1965 rect_count_seen = true;
1966 }
629b3d75
MJ
1967 else
1968 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1969 expand_omp_build_assign (gsi, fd->loop.n2, t);
1970 }
1971 }
aed3ab25
JJ
1972 if (fd->non_rect && SSA_VAR_P (fd->loop.n2))
1973 {
1974 gcc_assert (fd->last_nonrect != -1);
1975
1976 /* Fallback implementation. Evaluate the loops with m1/m2
1977 non-NULL as well as their outer loops at runtime using temporaries
1978 instead of the original iteration variables, and in the
1979 body just bump the counter. */
1980 counts[fd->last_nonrect] = create_tmp_reg (type, ".count");
1981 expand_omp_build_assign (gsi, counts[fd->last_nonrect],
1982 build_zero_cst (type));
1983 gimple_stmt_iterator gsi2 = *gsi;
1984 gsi_prev (&gsi2);
1985 e = split_block (entry_bb, gsi_stmt (gsi2));
1986 e = split_block (e->dest, (gimple *) NULL);
1987 basic_block cur_bb = e->src;
1988 basic_block next_bb = e->dest;
1989 entry_bb = e->dest;
1990 *gsi = gsi_after_labels (entry_bb);
1991
1992 tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
1993 memset (vs, 0, fd->last_nonrect * sizeof (tree));
1994
1995 for (i = 0; i <= fd->last_nonrect; i++)
1996 {
1997 if (fd->loops[i].m1 == NULL_TREE
1998 && fd->loops[i].m2 == NULL_TREE
1999 && !fd->loops[i].non_rect_referenced)
2000 continue;
2001
2002 tree itype = TREE_TYPE (fd->loops[i].v);
2003
2004 gsi2 = gsi_after_labels (cur_bb);
2005 tree n1, n2;
2006 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2007 if (fd->loops[i].m1)
2008 {
2009 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2010 n1 = fold_build2 (MULT_EXPR, itype, vs[i - fd->loops[i].outer],
2011 n1);
2012 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2013 }
2014 else
2015 n1 = t;
2016 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2017 true, GSI_SAME_STMT);
2018 if (i < fd->last_nonrect)
2019 {
2020 vs[i] = create_tmp_reg (itype, ".it");
2021 expand_omp_build_assign (&gsi2, vs[i], n1);
2022 }
2023 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2024 if (fd->loops[i].m2)
2025 {
2026 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2027 n2 = fold_build2 (MULT_EXPR, itype, vs[i - fd->loops[i].outer],
2028 n2);
2029 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2030 }
2031 else
2032 n2 = t;
2033 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2034 true, GSI_SAME_STMT);
2035 if (i == fd->last_nonrect)
2036 {
2037 gcond *cond_stmt
2038 = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
2039 NULL_TREE, NULL_TREE);
2040 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2041 e = split_block (cur_bb, cond_stmt);
2042 e->flags = EDGE_TRUE_VALUE;
2043 ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2044 e->probability = profile_probability::likely ().guessed ();
2045 ne->probability = e->probability.invert ();
2046 gsi2 = gsi_after_labels (e->dest);
2047
2048 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
2049 ? -1 : 1));
2050 t = fold_build2 (PLUS_EXPR, itype,
2051 fold_convert (itype, fd->loops[i].step), t);
2052 t = fold_build2 (PLUS_EXPR, itype, t, n2);
2053 t = fold_build2 (MINUS_EXPR, itype, t, n1);
2054 tree step = fold_convert (itype, fd->loops[i].step);
2055 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
2056 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2057 fold_build1 (NEGATE_EXPR, itype, t),
2058 fold_build1 (NEGATE_EXPR, itype, step));
2059 else
2060 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2061 t = fold_convert (type, t);
2062 t = fold_build2 (PLUS_EXPR, type, counts[fd->last_nonrect], t);
2063 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2064 true, GSI_SAME_STMT);
2065 expand_omp_build_assign (&gsi2, counts[fd->last_nonrect], t);
2066 e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2067 set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2068 break;
2069 }
2070 e = split_block (cur_bb, last_stmt (cur_bb));
2071
2072 basic_block new_cur_bb = create_empty_bb (cur_bb);
2073 add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2074
2075 gsi2 = gsi_after_labels (e->dest);
2076 tree step = fold_convert (itype, unshare_expr (fd->loops[i].step));
2077 t = fold_build2 (PLUS_EXPR, itype, vs[i], step);
2078 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2079 true, GSI_SAME_STMT);
2080 expand_omp_build_assign (&gsi2, vs[i], t);
2081
2082 ne = split_block (e->dest, last_stmt (e->dest));
2083 gsi2 = gsi_after_labels (ne->dest);
2084
2085 gcond *cond_stmt
2086 = gimple_build_cond (fd->loops[i].cond_code, vs[i], n2,
2087 NULL_TREE, NULL_TREE);
2088 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2089 edge e3, e4;
2090 if (next_bb == entry_bb)
2091 {
2092 e3 = find_edge (ne->dest, next_bb);
2093 e3->flags = EDGE_FALSE_VALUE;
2094 }
2095 else
2096 e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2097 e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2098 e4->probability = profile_probability::likely ().guessed ();
2099 e3->probability = e4->probability.invert ();
2100 basic_block esrc = e->src;
2101 make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2102 cur_bb = new_cur_bb;
2103 basic_block latch_bb = next_bb;
2104 next_bb = e->dest;
2105 remove_edge (e);
2106 set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2107 set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2108 set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
2109 }
2110 t = NULL_TREE;
2111 for (i = fd->first_nonrect; i < fd->last_nonrect; i++)
2112 if (!fd->loops[i].non_rect_referenced
2113 && fd->loops[i].m1 == NULL_TREE
2114 && fd->loops[i].m2 == NULL_TREE)
2115 {
2116 if (t == NULL_TREE)
2117 t = counts[i];
2118 else
2119 t = fold_build2 (MULT_EXPR, type, t, counts[i]);
2120 }
2121 if (t)
2122 {
2123 t = fold_build2 (MULT_EXPR, type, counts[fd->last_nonrect], t);
2124 expand_omp_build_assign (gsi, counts[fd->last_nonrect], t);
2125 }
2126 if (!rect_count_seen)
2127 t = counts[fd->last_nonrect];
2128 else
2129 t = fold_build2 (MULT_EXPR, type, fd->loop.n2,
2130 counts[fd->last_nonrect]);
2131 expand_omp_build_assign (gsi, fd->loop.n2, t);
2132 }
2133 else if (fd->non_rect)
2134 {
2135 tree t = fd->loop.n2;
2136 gcc_assert (TREE_CODE (t) == INTEGER_CST);
2137 int non_rect_referenced = 0, non_rect = 0;
2138 for (i = 0; i < fd->collapse; i++)
2139 {
5acef69f 2140 if ((i < fd->first_nonrect || i > fd->last_nonrect)
aed3ab25
JJ
2141 && !integer_zerop (counts[i]))
2142 t = fold_build2 (TRUNC_DIV_EXPR, type, t, counts[i]);
2143 if (fd->loops[i].non_rect_referenced)
2144 non_rect_referenced++;
2145 if (fd->loops[i].m1 || fd->loops[i].m2)
2146 non_rect++;
2147 }
2148 gcc_assert (non_rect == 1 && non_rect_referenced == 1);
2149 counts[fd->last_nonrect] = t;
2150 }
629b3d75
MJ
2151}
2152
2153/* Helper function for expand_omp_{for_*,simd}. Generate code like:
2154 T = V;
2155 V3 = N31 + (T % count3) * STEP3;
2156 T = T / count3;
2157 V2 = N21 + (T % count2) * STEP2;
2158 T = T / count2;
2159 V1 = N11 + T * STEP1;
2160 if this loop doesn't have an inner loop construct combined with it.
2161 If it does have an inner loop construct combined with it and the
2162 iteration count isn't known constant, store values from counts array
aed3ab25
JJ
2163 into its _looptemp_ temporaries instead.
2164 For non-rectangular loops (between fd->first_nonrect and fd->last_nonrect
2165 inclusive), use the count of all those loops together, and either
2166 find quadratic etc. equation roots (TBD), or as a fallback, do:
2167 COUNT = 0;
2168 for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
2169 for (tmpj = M21 * tmpi + N21;
2170 tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
2171 {
2172 int tmpk1 = M31 * tmpj + N31;
2173 int tmpk2 = M32 * tmpj + N32;
2174 if (tmpk1 COND3 tmpk2)
2175 {
2176 if (COND3 is <)
2177 adj = STEP3 - 1;
2178 else
2179 adj = STEP3 + 1;
2180 int temp = (adj + tmpk2 - tmpk1) / STEP3;
2181 if (COUNT + temp > T)
2182 {
2183 V1 = tmpi;
2184 V2 = tmpj;
2185 V3 = tmpk1 + (T - COUNT) * STEP3;
2186 goto done;
2187 }
2188 else
2189 COUNT += temp;
2190 }
2191 }
2192 done:;
2193 but for optional innermost or outermost rectangular loops that aren't
2194 referenced by other loop expressions keep doing the division/modulo. */
629b3d75
MJ
2195
2196static void
2197expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
aed3ab25
JJ
2198 tree *counts, tree *nonrect_bounds,
2199 gimple *inner_stmt, tree startvar)
629b3d75
MJ
2200{
2201 int i;
2202 if (gimple_omp_for_combined_p (fd->for_stmt))
2203 {
2204 /* If fd->loop.n2 is constant, then no propagation of the counts
2205 is needed, they are constant. */
2206 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
2207 return;
2208
2209 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
2210 ? gimple_omp_taskreg_clauses (inner_stmt)
2211 : gimple_omp_for_clauses (inner_stmt);
2212 /* First two _looptemp_ clauses are for istart/iend, counts[0]
2213 isn't supposed to be handled, as the inner loop doesn't
2214 use it. */
2215 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
2216 gcc_assert (innerc);
2217 for (i = 0; i < fd->collapse; i++)
2218 {
2219 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2220 OMP_CLAUSE__LOOPTEMP_);
2221 gcc_assert (innerc);
2222 if (i)
2223 {
2224 tree tem = OMP_CLAUSE_DECL (innerc);
2225 tree t = fold_convert (TREE_TYPE (tem), counts[i]);
2226 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
2227 false, GSI_CONTINUE_LINKING);
2228 gassign *stmt = gimple_build_assign (tem, t);
2229 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2230 }
2231 }
2232 return;
2233 }
2234
2235 tree type = TREE_TYPE (fd->loop.v);
2236 tree tem = create_tmp_reg (type, ".tem");
2237 gassign *stmt = gimple_build_assign (tem, startvar);
2238 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2239
2240 for (i = fd->collapse - 1; i >= 0; i--)
2241 {
2242 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
2243 itype = vtype;
2244 if (POINTER_TYPE_P (vtype))
2245 itype = signed_type_for (vtype);
aed3ab25 2246 if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
629b3d75
MJ
2247 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
2248 else
2249 t = tem;
aed3ab25
JJ
2250 if (i == fd->last_nonrect)
2251 {
aed3ab25
JJ
2252 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
2253 false, GSI_CONTINUE_LINKING);
2254 tree stopval = t;
2255 tree idx = create_tmp_reg (type, ".count");
2256 expand_omp_build_assign (gsi, idx,
2257 build_zero_cst (type), true);
5acef69f
JJ
2258 basic_block bb_triang = NULL;
2259 if (fd->first_nonrect + 1 == fd->last_nonrect
2260 /* For now. */
2261 && TREE_CODE (fd->loop.n2) == INTEGER_CST
2262 && (optab_handler (sqrt_optab, TYPE_MODE (double_type_node))
2263 != CODE_FOR_nothing))
2264 {
2265 tree itype = TREE_TYPE (fd->loops[i].v);
2266 tree min_inner_iterations = fd->min_inner_iterations;
2267 tree factor = fd->factor;
2268 gcond *cond_stmt
2269 = gimple_build_cond (NE_EXPR, factor,
2270 build_zero_cst (TREE_TYPE (factor)),
2271 NULL_TREE, NULL_TREE);
2272 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2273 edge e = split_block (gsi_bb (*gsi), cond_stmt);
2274 basic_block bb0 = e->src;
2275 e->flags = EDGE_TRUE_VALUE;
2276 e->probability = profile_probability::likely ();
2277 *gsi = gsi_after_labels (e->dest);
2278 tree slltype = long_long_integer_type_node;
2279 tree ulltype = long_long_unsigned_type_node;
2280 tree stopvalull = fold_convert (ulltype, stopval);
2281 stopvalull
2282 = force_gimple_operand_gsi (gsi, stopvalull, true, NULL_TREE,
2283 false, GSI_CONTINUE_LINKING);
2284 min_inner_iterations
2285 = fold_convert (slltype, min_inner_iterations);
2286 min_inner_iterations
2287 = force_gimple_operand_gsi (gsi, min_inner_iterations, true,
2288 NULL_TREE, false,
2289 GSI_CONTINUE_LINKING);
2290 factor = fold_convert (slltype, factor);
2291 factor
2292 = force_gimple_operand_gsi (gsi, factor, true, NULL_TREE,
2293 false, GSI_CONTINUE_LINKING);
2294 tree min_inner_iterationsd
2295 = fold_build1 (FLOAT_EXPR, double_type_node,
2296 min_inner_iterations);
2297 min_inner_iterationsd
2298 = force_gimple_operand_gsi (gsi, min_inner_iterationsd, true,
2299 NULL_TREE, false,
2300 GSI_CONTINUE_LINKING);
2301 tree factord = fold_build1 (FLOAT_EXPR, double_type_node,
2302 factor);
2303 factord = force_gimple_operand_gsi (gsi, factord, true,
2304 NULL_TREE, false,
2305 GSI_CONTINUE_LINKING);
2306 tree stopvald = fold_build1 (FLOAT_EXPR, double_type_node,
2307 stopvalull);
2308 stopvald = force_gimple_operand_gsi (gsi, stopvald, true,
2309 NULL_TREE, false,
2310 GSI_CONTINUE_LINKING);
2311 /* Temporarily disable flag_rounding_math, values will be
2312 decimal numbers divided by 2 and worst case imprecisions
2313 due to too large values ought to be caught later by the
2314 checks for fallback. */
2315 int save_flag_rounding_math = flag_rounding_math;
2316 flag_rounding_math = 0;
2317 t = fold_build2 (RDIV_EXPR, double_type_node, factord,
2318 build_real (double_type_node, dconst2));
2319 tree t3 = fold_build2 (MINUS_EXPR, double_type_node,
2320 min_inner_iterationsd, t);
2321 t3 = force_gimple_operand_gsi (gsi, t3, true, NULL_TREE, false,
2322 GSI_CONTINUE_LINKING);
2323 t = fold_build2 (MULT_EXPR, double_type_node, factord,
2324 build_real (double_type_node, dconst2));
2325 t = fold_build2 (MULT_EXPR, double_type_node, t, stopvald);
2326 t = fold_build2 (PLUS_EXPR, double_type_node, t,
2327 fold_build2 (MULT_EXPR, double_type_node,
2328 t3, t3));
2329 flag_rounding_math = save_flag_rounding_math;
2330 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2331 GSI_CONTINUE_LINKING);
2332 cond_stmt
2333 = gimple_build_cond (LT_EXPR, t,
2334 build_zero_cst (double_type_node),
2335 NULL_TREE, NULL_TREE);
2336 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2337 e = split_block (gsi_bb (*gsi), cond_stmt);
2338 basic_block bb1 = e->src;
2339 e->flags = EDGE_FALSE_VALUE;
2340 e->probability = profile_probability::very_likely ();
2341 *gsi = gsi_after_labels (e->dest);
2342 gcall *call = gimple_build_call_internal (IFN_SQRT, 1, t);
2343 tree sqrtr = create_tmp_var (double_type_node);
2344 gimple_call_set_lhs (call, sqrtr);
2345 gsi_insert_after (gsi, call, GSI_CONTINUE_LINKING);
2346 t = fold_build2 (MINUS_EXPR, double_type_node, sqrtr, t3);
2347 t = fold_build2 (RDIV_EXPR, double_type_node, t, factord);
2348 t = fold_build1 (FIX_TRUNC_EXPR, ulltype, t);
2349 tree c = create_tmp_var (ulltype);
2350 tree d = create_tmp_var (ulltype);
2351 expand_omp_build_assign (gsi, c, t, true);
2352 t = fold_build2 (MINUS_EXPR, ulltype, c,
2353 build_one_cst (ulltype));
2354 t = fold_build2 (MULT_EXPR, ulltype, c, t);
2355 t = fold_build2 (RSHIFT_EXPR, ulltype, t, integer_one_node);
2356 t = fold_build2 (MULT_EXPR, ulltype, fd->factor, t);
2357 tree t2 = fold_build2 (MULT_EXPR, ulltype, c,
2358 fd->min_inner_iterations);
2359 t = fold_build2 (PLUS_EXPR, ulltype, t, t2);
2360 expand_omp_build_assign (gsi, d, t, true);
2361 t = fold_build2 (MULT_EXPR, ulltype, fd->factor, c);
2362 t = fold_build2 (PLUS_EXPR, ulltype,
2363 t, fd->min_inner_iterations);
2364 t2 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2365 GSI_CONTINUE_LINKING);
2366 cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, d,
2367 NULL_TREE, NULL_TREE);
2368 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2369 e = split_block (gsi_bb (*gsi), cond_stmt);
2370 basic_block bb2 = e->src;
2371 e->flags = EDGE_TRUE_VALUE;
2372 e->probability = profile_probability::very_likely ();
2373 *gsi = gsi_after_labels (e->dest);
2374 t = fold_build2 (PLUS_EXPR, ulltype, d, t2);
2375 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2376 GSI_CONTINUE_LINKING);
2377 cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, t,
2378 NULL_TREE, NULL_TREE);
2379 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2380 e = split_block (gsi_bb (*gsi), cond_stmt);
2381 basic_block bb3 = e->src;
2382 e->flags = EDGE_FALSE_VALUE;
2383 e->probability = profile_probability::very_likely ();
2384 *gsi = gsi_after_labels (e->dest);
2385 t = fold_convert (itype, c);
2386 t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i - 1].step);
2387 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i - 1].n1, t);
2388 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2389 GSI_CONTINUE_LINKING);
2390 expand_omp_build_assign (gsi, fd->loops[i - 1].v, t, true);
2391 t2 = fold_build2 (MINUS_EXPR, ulltype, stopvalull, d);
2392 t2 = fold_convert (itype, t2);
2393 t2 = fold_build2 (MULT_EXPR, itype, t2, fd->loops[i].step);
2394 t2 = fold_build2 (PLUS_EXPR, itype, t2, fd->loops[i].n1);
2395 if (fd->loops[i].m1)
2396 {
2397 t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i].m1);
2398 t2 = fold_build2 (PLUS_EXPR, itype, t2, t);
2399 }
2400 expand_omp_build_assign (gsi, fd->loops[i].v, t2, true);
2401 e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
2402 bb_triang = e->src;
2403 *gsi = gsi_after_labels (e->dest);
2404 remove_edge (e);
2405 e = make_edge (bb1, gsi_bb (*gsi), EDGE_TRUE_VALUE);
2406 e->probability = profile_probability::very_unlikely ();
2407 e = make_edge (bb2, gsi_bb (*gsi), EDGE_FALSE_VALUE);
2408 e->probability = profile_probability::very_unlikely ();
2409 e = make_edge (bb3, gsi_bb (*gsi), EDGE_TRUE_VALUE);
2410 e->probability = profile_probability::very_unlikely ();
2411
2412 basic_block bb4 = create_empty_bb (bb0);
2413 add_bb_to_loop (bb4, bb0->loop_father);
2414 e = make_edge (bb0, bb4, EDGE_FALSE_VALUE);
2415 e->probability = profile_probability::unlikely ();
2416 make_edge (bb4, gsi_bb (*gsi), EDGE_FALLTHRU);
2417 set_immediate_dominator (CDI_DOMINATORS, bb4, bb0);
2418 set_immediate_dominator (CDI_DOMINATORS, gsi_bb (*gsi), bb0);
2419 gimple_stmt_iterator gsi2 = gsi_after_labels (bb4);
2420 t2 = fold_build2 (TRUNC_DIV_EXPR, type,
2421 counts[i], counts[i - 1]);
2422 t2 = force_gimple_operand_gsi (&gsi2, t2, true, NULL_TREE, false,
2423 GSI_CONTINUE_LINKING);
2424 t = fold_build2 (TRUNC_MOD_EXPR, type, stopval, t2);
2425 t2 = fold_build2 (TRUNC_DIV_EXPR, type, stopval, t2);
2426 t = fold_convert (itype, t);
2427 t2 = fold_convert (itype, t2);
2428 t = fold_build2 (MULT_EXPR, itype, t,
2429 fold_convert (itype, fd->loops[i].step));
2430 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
2431 t2 = fold_build2 (MULT_EXPR, itype, t2,
2432 fold_convert (itype, fd->loops[i - 1].step));
2433 t2 = fold_build2 (PLUS_EXPR, itype, fd->loops[i - 1].n1, t2);
2434 t2 = force_gimple_operand_gsi (&gsi2, t2, false, NULL_TREE,
2435 false, GSI_CONTINUE_LINKING);
2436 stmt = gimple_build_assign (fd->loops[i - 1].v, t2);
2437 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2438 if (fd->loops[i].m1)
2439 {
2440 t2 = fold_build2 (MULT_EXPR, itype, fd->loops[i].m1,
2441 fd->loops[i - 1].v);
2442 t = fold_build2 (PLUS_EXPR, itype, t, t2);
2443 }
2444 t = force_gimple_operand_gsi (&gsi2, t, false, NULL_TREE,
2445 false, GSI_CONTINUE_LINKING);
2446 stmt = gimple_build_assign (fd->loops[i].v, t);
2447 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2448 }
2449 /* Fallback implementation. Evaluate the loops in between
2450 (inclusive) fd->first_nonrect and fd->last_nonrect at
2451 runtime unsing temporaries instead of the original iteration
2452 variables, in the body just bump the counter and compare
2453 with the desired value. */
aed3ab25
JJ
2454 gimple_stmt_iterator gsi2 = *gsi;
2455 basic_block entry_bb = gsi_bb (gsi2);
2456 edge e = split_block (entry_bb, gsi_stmt (gsi2));
2457 e = split_block (e->dest, (gimple *) NULL);
2458 basic_block dom_bb = NULL;
2459 basic_block cur_bb = e->src;
2460 basic_block next_bb = e->dest;
2461 entry_bb = e->dest;
2462 *gsi = gsi_after_labels (entry_bb);
2463
2464 tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
2465 tree n1 = NULL_TREE, n2 = NULL_TREE;
2466 memset (vs, 0, fd->last_nonrect * sizeof (tree));
2467
2468 for (int j = fd->first_nonrect; j <= fd->last_nonrect; j++)
2469 {
2470 tree itype = TREE_TYPE (fd->loops[j].v);
2471 bool rect_p = (fd->loops[j].m1 == NULL_TREE
2472 && fd->loops[j].m2 == NULL_TREE
2473 && !fd->loops[j].non_rect_referenced);
2474 gsi2 = gsi_after_labels (cur_bb);
2475 t = fold_convert (itype, unshare_expr (fd->loops[j].n1));
2476 if (fd->loops[j].m1)
2477 {
2478 n1 = fold_convert (itype, unshare_expr (fd->loops[j].m1));
2479 n1 = fold_build2 (MULT_EXPR, itype,
2480 vs[j - fd->loops[j].outer], n1);
2481 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2482 }
2483 else if (rect_p)
2484 n1 = build_zero_cst (type);
2485 else
2486 n1 = t;
2487 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2488 true, GSI_SAME_STMT);
2489 if (j < fd->last_nonrect)
2490 {
2491 vs[j] = create_tmp_reg (rect_p ? type : itype, ".it");
2492 expand_omp_build_assign (&gsi2, vs[j], n1);
2493 }
2494 t = fold_convert (itype, unshare_expr (fd->loops[j].n2));
2495 if (fd->loops[j].m2)
2496 {
2497 n2 = fold_convert (itype, unshare_expr (fd->loops[j].m2));
2498 n2 = fold_build2 (MULT_EXPR, itype,
2499 vs[j - fd->loops[j].outer], n2);
2500 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2501 }
2502 else if (rect_p)
2503 n2 = counts[j];
2504 else
2505 n2 = t;
2506 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2507 true, GSI_SAME_STMT);
2508 if (j == fd->last_nonrect)
2509 {
2510 gcond *cond_stmt
2511 = gimple_build_cond (fd->loops[j].cond_code, n1, n2,
2512 NULL_TREE, NULL_TREE);
2513 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2514 e = split_block (cur_bb, cond_stmt);
2515 e->flags = EDGE_TRUE_VALUE;
2516 edge ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2517 e->probability = profile_probability::likely ().guessed ();
2518 ne->probability = e->probability.invert ();
2519 gsi2 = gsi_after_labels (e->dest);
2520
2521 t = build_int_cst (itype, (fd->loops[j].cond_code == LT_EXPR
2522 ? -1 : 1));
2523 t = fold_build2 (PLUS_EXPR, itype,
2524 fold_convert (itype, fd->loops[j].step), t);
2525 t = fold_build2 (PLUS_EXPR, itype, t, n2);
2526 t = fold_build2 (MINUS_EXPR, itype, t, n1);
2527 tree step = fold_convert (itype, fd->loops[j].step);
2528 if (TYPE_UNSIGNED (itype)
2529 && fd->loops[j].cond_code == GT_EXPR)
2530 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2531 fold_build1 (NEGATE_EXPR, itype, t),
2532 fold_build1 (NEGATE_EXPR, itype, step));
2533 else
2534 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2535 t = fold_convert (type, t);
2536 t = fold_build2 (PLUS_EXPR, type, idx, t);
2537 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2538 true, GSI_SAME_STMT);
2539 e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2540 set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2541 cond_stmt
2542 = gimple_build_cond (LE_EXPR, t, stopval, NULL_TREE,
2543 NULL_TREE);
2544 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2545 e = split_block (gsi_bb (gsi2), cond_stmt);
2546 e->flags = EDGE_TRUE_VALUE;
2547 e->probability = profile_probability::likely ().guessed ();
2548 ne = make_edge (e->src, entry_bb, EDGE_FALSE_VALUE);
2549 ne->probability = e->probability.invert ();
2550 gsi2 = gsi_after_labels (e->dest);
2551 expand_omp_build_assign (&gsi2, idx, t);
2552 set_immediate_dominator (CDI_DOMINATORS, entry_bb, dom_bb);
2553 break;
2554 }
2555 e = split_block (cur_bb, last_stmt (cur_bb));
2556
2557 basic_block new_cur_bb = create_empty_bb (cur_bb);
2558 add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2559
2560 gsi2 = gsi_after_labels (e->dest);
2561 if (rect_p)
2562 t = fold_build2 (PLUS_EXPR, type, vs[j],
2563 build_one_cst (type));
2564 else
2565 {
2566 tree step
2567 = fold_convert (itype, unshare_expr (fd->loops[j].step));
2568 t = fold_build2 (PLUS_EXPR, itype, vs[j], step);
2569 }
2570 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2571 true, GSI_SAME_STMT);
2572 expand_omp_build_assign (&gsi2, vs[j], t);
2573
2574 edge ne = split_block (e->dest, last_stmt (e->dest));
2575 gsi2 = gsi_after_labels (ne->dest);
2576
2577 gcond *cond_stmt;
2578 if (next_bb == entry_bb)
2579 /* No need to actually check the outermost condition. */
2580 cond_stmt
2581 = gimple_build_cond (EQ_EXPR, boolean_true_node,
2582 boolean_true_node,
2583 NULL_TREE, NULL_TREE);
2584 else
2585 cond_stmt
2586 = gimple_build_cond (rect_p ? LT_EXPR
2587 : fd->loops[j].cond_code,
2588 vs[j], n2, NULL_TREE, NULL_TREE);
2589 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2590 edge e3, e4;
2591 if (next_bb == entry_bb)
2592 {
2593 e3 = find_edge (ne->dest, next_bb);
2594 e3->flags = EDGE_FALSE_VALUE;
2595 dom_bb = ne->dest;
2596 }
2597 else
2598 e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2599 e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2600 e4->probability = profile_probability::likely ().guessed ();
2601 e3->probability = e4->probability.invert ();
2602 basic_block esrc = e->src;
2603 make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2604 cur_bb = new_cur_bb;
2605 basic_block latch_bb = next_bb;
2606 next_bb = e->dest;
2607 remove_edge (e);
2608 set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2609 set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2610 set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
2611 }
2612 for (int j = fd->last_nonrect; j >= fd->first_nonrect; j--)
2613 {
2614 tree itype = TREE_TYPE (fd->loops[j].v);
2615 bool rect_p = (fd->loops[j].m1 == NULL_TREE
2616 && fd->loops[j].m2 == NULL_TREE
2617 && !fd->loops[j].non_rect_referenced);
2618 if (j == fd->last_nonrect)
2619 {
2620 t = fold_build2 (MINUS_EXPR, type, stopval, idx);
2621 t = fold_convert (itype, t);
2622 tree t2
2623 = fold_convert (itype, unshare_expr (fd->loops[j].step));
2624 t = fold_build2 (MULT_EXPR, itype, t, t2);
2625 t = fold_build2 (PLUS_EXPR, itype, n1, t);
2626 }
2627 else if (rect_p)
2628 {
2629 t = fold_convert (itype, vs[j]);
2630 t = fold_build2 (MULT_EXPR, itype, t,
2631 fold_convert (itype, fd->loops[j].step));
2632 if (POINTER_TYPE_P (vtype))
2633 t = fold_build_pointer_plus (fd->loops[j].n1, t);
2634 else
2635 t = fold_build2 (PLUS_EXPR, itype, fd->loops[j].n1, t);
2636 }
2637 else
2638 t = vs[j];
2639 t = force_gimple_operand_gsi (gsi, t, false,
2640 NULL_TREE, true,
2641 GSI_SAME_STMT);
2642 stmt = gimple_build_assign (fd->loops[j].v, t);
2643 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
2644 }
2645 if (gsi_end_p (*gsi))
2646 *gsi = gsi_last_bb (gsi_bb (*gsi));
2647 else
2648 gsi_prev (gsi);
5acef69f
JJ
2649 if (bb_triang)
2650 {
2651 e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
2652 make_edge (bb_triang, e->dest, EDGE_FALLTHRU);
2653 *gsi = gsi_after_labels (e->dest);
2654 if (!gsi_end_p (*gsi))
2655 gsi_insert_before (gsi, gimple_build_nop (), GSI_NEW_STMT);
2656 }
aed3ab25 2657 }
629b3d75 2658 else
aed3ab25
JJ
2659 {
2660 t = fold_convert (itype, t);
2661 t = fold_build2 (MULT_EXPR, itype, t,
2662 fold_convert (itype, fd->loops[i].step));
2663 if (POINTER_TYPE_P (vtype))
2664 t = fold_build_pointer_plus (fd->loops[i].n1, t);
2665 else
2666 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
2667 t = force_gimple_operand_gsi (gsi, t,
2668 DECL_P (fd->loops[i].v)
2669 && TREE_ADDRESSABLE (fd->loops[i].v),
2670 NULL_TREE, false,
2671 GSI_CONTINUE_LINKING);
2672 stmt = gimple_build_assign (fd->loops[i].v, t);
2673 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2674 }
2675 if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
629b3d75
MJ
2676 {
2677 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
2678 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
2679 false, GSI_CONTINUE_LINKING);
2680 stmt = gimple_build_assign (tem, t);
2681 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2682 }
aed3ab25
JJ
2683 if (i == fd->last_nonrect)
2684 i = fd->first_nonrect;
629b3d75 2685 }
aed3ab25
JJ
2686 if (fd->non_rect)
2687 for (i = 0; i <= fd->last_nonrect; i++)
2688 if (fd->loops[i].m2)
2689 {
2690 tree itype = TREE_TYPE (fd->loops[i].v);
2691
2692 tree t = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2693 t = fold_build2 (MULT_EXPR, itype,
2694 fd->loops[i - fd->loops[i].outer].v, t);
2695 t = fold_build2 (PLUS_EXPR, itype, t,
2696 fold_convert (itype,
2697 unshare_expr (fd->loops[i].n2)));
2698 nonrect_bounds[i] = create_tmp_reg (itype, ".bound");
2699 t = force_gimple_operand_gsi (gsi, t, false,
2700 NULL_TREE, false,
2701 GSI_CONTINUE_LINKING);
2702 stmt = gimple_build_assign (nonrect_bounds[i], t);
2703 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2704 }
629b3d75
MJ
2705}
2706
2707/* Helper function for expand_omp_for_*. Generate code like:
2708 L10:
2709 V3 += STEP3;
2710 if (V3 cond3 N32) goto BODY_BB; else goto L11;
2711 L11:
2712 V3 = N31;
2713 V2 += STEP2;
2714 if (V2 cond2 N22) goto BODY_BB; else goto L12;
2715 L12:
2716 V2 = N21;
2717 V1 += STEP1;
aed3ab25
JJ
2718 goto BODY_BB;
2719 For non-rectangular loops, use temporaries stored in nonrect_bounds
2720 for the upper bounds if M?2 multiplier is present. Given e.g.
2721 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2722 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2723 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
2724 for (V4 = N41 + M41 * V2; V4 cond4 N42 + M42 * V2; V4 += STEP4)
2725 do:
2726 L10:
2727 V4 += STEP4;
2728 if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L11;
2729 L11:
2730 V4 = N41 + M41 * V2; // This can be left out if the loop
2731 // refers to the immediate parent loop
2732 V3 += STEP3;
2733 if (V3 cond3 N32) goto BODY_BB; else goto L12;
2734 L12:
2735 V3 = N31;
2736 V2 += STEP2;
2737 if (V2 cond2 N22) goto L120; else goto L13;
2738 L120:
2739 V4 = N41 + M41 * V2;
2740 NONRECT_BOUND4 = N42 + M42 * V2;
2741 if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L12;
2742 L13:
2743 V2 = N21;
2744 V1 += STEP1;
2745 goto L120; */
629b3d75
MJ
2746
2747static basic_block
aed3ab25
JJ
2748extract_omp_for_update_vars (struct omp_for_data *fd, tree *nonrect_bounds,
2749 basic_block cont_bb, basic_block body_bb)
629b3d75
MJ
2750{
2751 basic_block last_bb, bb, collapse_bb = NULL;
2752 int i;
2753 gimple_stmt_iterator gsi;
2754 edge e;
2755 tree t;
2756 gimple *stmt;
2757
2758 last_bb = cont_bb;
2759 for (i = fd->collapse - 1; i >= 0; i--)
2760 {
2761 tree vtype = TREE_TYPE (fd->loops[i].v);
2762
2763 bb = create_empty_bb (last_bb);
2764 add_bb_to_loop (bb, last_bb->loop_father);
2765 gsi = gsi_start_bb (bb);
2766
2767 if (i < fd->collapse - 1)
2768 {
2769 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
aed3ab25
JJ
2770 e->probability
2771 = profile_probability::guessed_always ().apply_scale (1, 8);
629b3d75 2772
aed3ab25
JJ
2773 struct omp_for_data_loop *l = &fd->loops[i + 1];
2774 if (l->m1 == NULL_TREE || l->outer != 1)
2775 {
2776 t = l->n1;
2777 if (l->m1)
2778 {
2779 tree t2
2780 = fold_build2 (MULT_EXPR, TREE_TYPE (t),
2781 fd->loops[i + 1 - l->outer].v, l->m1);
2782 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t2, t);
2783 }
2784 t = force_gimple_operand_gsi (&gsi, t,
2785 DECL_P (l->v)
2786 && TREE_ADDRESSABLE (l->v),
2787 NULL_TREE, false,
2788 GSI_CONTINUE_LINKING);
2789 stmt = gimple_build_assign (l->v, t);
2790 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2791 }
629b3d75
MJ
2792 }
2793 else
2794 collapse_bb = bb;
2795
2796 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
2797
2798 if (POINTER_TYPE_P (vtype))
2799 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
2800 else
2801 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
2802 t = force_gimple_operand_gsi (&gsi, t,
2803 DECL_P (fd->loops[i].v)
2804 && TREE_ADDRESSABLE (fd->loops[i].v),
2805 NULL_TREE, false, GSI_CONTINUE_LINKING);
2806 stmt = gimple_build_assign (fd->loops[i].v, t);
2807 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2808
aed3ab25
JJ
2809 if (fd->loops[i].non_rect_referenced)
2810 {
2811 basic_block update_bb = NULL, prev_bb = NULL;
2812 for (int j = i + 1; j <= fd->last_nonrect; j++)
2813 if (j - fd->loops[j].outer == i)
2814 {
2815 tree n1, n2;
2816 struct omp_for_data_loop *l = &fd->loops[j];
2817 basic_block this_bb = create_empty_bb (last_bb);
2818 add_bb_to_loop (this_bb, last_bb->loop_father);
2819 gimple_stmt_iterator gsi2 = gsi_start_bb (this_bb);
2820 if (prev_bb)
2821 {
2822 e = make_edge (prev_bb, this_bb, EDGE_TRUE_VALUE);
2823 e->probability
2824 = profile_probability::guessed_always ().apply_scale (7,
2825 8);
2826 set_immediate_dominator (CDI_DOMINATORS, this_bb, prev_bb);
2827
2828 }
2829 if (l->m1)
2830 {
2831 t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m1), l->m1,
2832 fd->loops[i].v);
2833 t = fold_build2 (PLUS_EXPR, TREE_TYPE (l->v), t, l->n1);
2834 n1 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2835 false,
2836 GSI_CONTINUE_LINKING);
2837 stmt = gimple_build_assign (l->v, n1);
2838 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2839 n1 = l->v;
2840 }
2841 else
2842 n1 = force_gimple_operand_gsi (&gsi2, l->n1, true,
2843 NULL_TREE, false,
2844 GSI_CONTINUE_LINKING);
2845 if (l->m2)
2846 {
2847 t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m2), l->m2,
2848 fd->loops[i].v);
2849 t = fold_build2 (PLUS_EXPR, TREE_TYPE (nonrect_bounds[j]),
2850 t, unshare_expr (l->n2));
2851 n2 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2852 false,
2853 GSI_CONTINUE_LINKING);
2854 stmt = gimple_build_assign (nonrect_bounds[j], n2);
2855 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2856 n2 = nonrect_bounds[j];
2857 }
2858 else
2859 n2 = force_gimple_operand_gsi (&gsi2, unshare_expr (l->n2),
2860 true, NULL_TREE, false,
2861 GSI_CONTINUE_LINKING);
2862 gcond *cond_stmt
2863 = gimple_build_cond (l->cond_code, n1, n2,
2864 NULL_TREE, NULL_TREE);
2865 gsi_insert_after (&gsi2, cond_stmt, GSI_CONTINUE_LINKING);
2866 if (update_bb == NULL)
2867 update_bb = this_bb;
2868 e = make_edge (this_bb, bb, EDGE_FALSE_VALUE);
2869 e->probability
2870 = profile_probability::guessed_always ().apply_scale (1, 8);
2871 if (prev_bb == NULL)
2872 set_immediate_dominator (CDI_DOMINATORS, this_bb, last_bb);
2873 prev_bb = this_bb;
2874 }
2875 e = make_edge (prev_bb, body_bb, EDGE_TRUE_VALUE);
2876 e->probability
2877 = profile_probability::guessed_always ().apply_scale (7, 8);
2878 body_bb = update_bb;
2879 }
2880
629b3d75
MJ
2881 if (i > 0)
2882 {
aed3ab25
JJ
2883 if (fd->loops[i].m2)
2884 t = nonrect_bounds[i];
2885 else
2886 t = unshare_expr (fd->loops[i].n2);
629b3d75
MJ
2887 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2888 false, GSI_CONTINUE_LINKING);
2889 tree v = fd->loops[i].v;
2890 if (DECL_P (v) && TREE_ADDRESSABLE (v))
2891 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
2892 false, GSI_CONTINUE_LINKING);
2893 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
2894 stmt = gimple_build_cond_empty (t);
2895 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
d1ffbd43
JJ
2896 if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)),
2897 expand_omp_regimplify_p, NULL, NULL)
2898 || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)),
2899 expand_omp_regimplify_p, NULL, NULL))
2900 gimple_regimplify_operands (stmt, &gsi);
629b3d75 2901 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
357067f2 2902 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
629b3d75
MJ
2903 }
2904 else
2905 make_edge (bb, body_bb, EDGE_FALLTHRU);
aed3ab25 2906 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
629b3d75
MJ
2907 last_bb = bb;
2908 }
2909
2910 return collapse_bb;
2911}
2912
2913/* Expand #pragma omp ordered depend(source). */
2914
2915static void
2916expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
2917 tree *counts, location_t loc)
2918{
2919 enum built_in_function source_ix
2920 = fd->iter_type == long_integer_type_node
2921 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
2922 gimple *g
2923 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
2924 build_fold_addr_expr (counts[fd->ordered]));
2925 gimple_set_location (g, loc);
2926 gsi_insert_before (gsi, g, GSI_SAME_STMT);
2927}
2928
2929/* Expand a single depend from #pragma omp ordered depend(sink:...). */
2930
2931static void
2932expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
2933 tree *counts, tree c, location_t loc)
2934{
2935 auto_vec<tree, 10> args;
2936 enum built_in_function sink_ix
2937 = fd->iter_type == long_integer_type_node
2938 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
2939 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
2940 int i;
2941 gimple_stmt_iterator gsi2 = *gsi;
2942 bool warned_step = false;
2943
2944 for (i = 0; i < fd->ordered; i++)
2945 {
2946 tree step = NULL_TREE;
2947 off = TREE_PURPOSE (deps);
2948 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2949 {
2950 step = TREE_OPERAND (off, 1);
2951 off = TREE_OPERAND (off, 0);
2952 }
2953 if (!integer_zerop (off))
2954 {
2955 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2956 || fd->loops[i].cond_code == GT_EXPR);
2957 bool forward = fd->loops[i].cond_code == LT_EXPR;
2958 if (step)
2959 {
2960 /* Non-simple Fortran DO loops. If step is variable,
2961 we don't know at compile even the direction, so can't
2962 warn. */
2963 if (TREE_CODE (step) != INTEGER_CST)
2964 break;
2965 forward = tree_int_cst_sgn (step) != -1;
2966 }
2967 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
90a0bf4e
JJ
2968 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
2969 "waiting for lexically later iteration");
629b3d75
MJ
2970 break;
2971 }
2972 deps = TREE_CHAIN (deps);
2973 }
2974 /* If all offsets corresponding to the collapsed loops are zero,
2975 this depend clause can be ignored. FIXME: but there is still a
2976 flush needed. We need to emit one __sync_synchronize () for it
2977 though (perhaps conditionally)? Solve this together with the
2978 conservative dependence folding optimization.
2979 if (i >= fd->collapse)
2980 return; */
2981
2982 deps = OMP_CLAUSE_DECL (c);
2983 gsi_prev (&gsi2);
2984 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
2985 edge e2 = split_block_after_labels (e1->dest);
2986
2987 gsi2 = gsi_after_labels (e1->dest);
2988 *gsi = gsi_last_bb (e1->src);
2989 for (i = 0; i < fd->ordered; i++)
2990 {
2991 tree itype = TREE_TYPE (fd->loops[i].v);
2992 tree step = NULL_TREE;
2993 tree orig_off = NULL_TREE;
2994 if (POINTER_TYPE_P (itype))
2995 itype = sizetype;
2996 if (i)
2997 deps = TREE_CHAIN (deps);
2998 off = TREE_PURPOSE (deps);
2999 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
3000 {
3001 step = TREE_OPERAND (off, 1);
3002 off = TREE_OPERAND (off, 0);
3003 gcc_assert (fd->loops[i].cond_code == LT_EXPR
3004 && integer_onep (fd->loops[i].step)
3005 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
3006 }
3007 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
3008 if (step)
3009 {
3010 off = fold_convert_loc (loc, itype, off);
3011 orig_off = off;
3012 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3013 }
3014
3015 if (integer_zerop (off))
3016 t = boolean_true_node;
3017 else
3018 {
3019 tree a;
3020 tree co = fold_convert_loc (loc, itype, off);
3021 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
3022 {
3023 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3024 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
3025 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
3026 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
3027 co);
3028 }
3029 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3030 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3031 fd->loops[i].v, co);
3032 else
3033 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
3034 fd->loops[i].v, co);
3035 if (step)
3036 {
3037 tree t1, t2;
3038 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3039 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3040 fd->loops[i].n1);
3041 else
3042 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3043 fd->loops[i].n2);
3044 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3045 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3046 fd->loops[i].n2);
3047 else
3048 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3049 fd->loops[i].n1);
3050 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
3051 step, build_int_cst (TREE_TYPE (step), 0));
3052 if (TREE_CODE (step) != INTEGER_CST)
3053 {
3054 t1 = unshare_expr (t1);
3055 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
3056 false, GSI_CONTINUE_LINKING);
3057 t2 = unshare_expr (t2);
3058 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
3059 false, GSI_CONTINUE_LINKING);
3060 }
3061 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
3062 t, t2, t1);
3063 }
3064 else if (fd->loops[i].cond_code == LT_EXPR)
3065 {
3066 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3067 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3068 fd->loops[i].n1);
3069 else
3070 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3071 fd->loops[i].n2);
3072 }
3073 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3074 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
3075 fd->loops[i].n2);
3076 else
3077 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
3078 fd->loops[i].n1);
3079 }
3080 if (cond)
3081 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
3082 else
3083 cond = t;
3084
3085 off = fold_convert_loc (loc, itype, off);
3086
3087 if (step
3088 || (fd->loops[i].cond_code == LT_EXPR
3089 ? !integer_onep (fd->loops[i].step)
3090 : !integer_minus_onep (fd->loops[i].step)))
3091 {
3092 if (step == NULL_TREE
3093 && TYPE_UNSIGNED (itype)
3094 && fd->loops[i].cond_code == GT_EXPR)
3095 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
3096 fold_build1_loc (loc, NEGATE_EXPR, itype,
3097 s));
3098 else
3099 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
3100 orig_off ? orig_off : off, s);
3101 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
3102 build_int_cst (itype, 0));
3103 if (integer_zerop (t) && !warned_step)
3104 {
90a0bf4e
JJ
3105 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
3106 "refers to iteration never in the iteration "
3107 "space");
629b3d75
MJ
3108 warned_step = true;
3109 }
3110 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
3111 cond, t);
3112 }
3113
3114 if (i <= fd->collapse - 1 && fd->collapse > 1)
3115 t = fd->loop.v;
3116 else if (counts[i])
3117 t = counts[i];
3118 else
3119 {
3120 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3121 fd->loops[i].v, fd->loops[i].n1);
3122 t = fold_convert_loc (loc, fd->iter_type, t);
3123 }
3124 if (step)
3125 /* We have divided off by step already earlier. */;
3126 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
3127 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
3128 fold_build1_loc (loc, NEGATE_EXPR, itype,
3129 s));
3130 else
3131 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3132 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3133 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
3134 off = fold_convert_loc (loc, fd->iter_type, off);
3135 if (i <= fd->collapse - 1 && fd->collapse > 1)
3136 {
3137 if (i)
3138 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
3139 off);
3140 if (i < fd->collapse - 1)
3141 {
3142 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
3143 counts[i]);
3144 continue;
3145 }
3146 }
3147 off = unshare_expr (off);
3148 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
3149 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3150 true, GSI_SAME_STMT);
3151 args.safe_push (t);
3152 }
3153 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
3154 gimple_set_location (g, loc);
3155 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3156
3157 cond = unshare_expr (cond);
3158 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
3159 GSI_CONTINUE_LINKING);
3160 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
3161 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
357067f2
JH
3162 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
3163 e1->probability = e3->probability.invert ();
629b3d75
MJ
3164 e1->flags = EDGE_TRUE_VALUE;
3165 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
3166
3167 *gsi = gsi_after_labels (e2->dest);
3168}
3169
3170/* Expand all #pragma omp ordered depend(source) and
3171 #pragma omp ordered depend(sink:...) constructs in the current
3172 #pragma omp for ordered(n) region. */
3173
3174static void
3175expand_omp_ordered_source_sink (struct omp_region *region,
3176 struct omp_for_data *fd, tree *counts,
3177 basic_block cont_bb)
3178{
3179 struct omp_region *inner;
3180 int i;
3181 for (i = fd->collapse - 1; i < fd->ordered; i++)
3182 if (i == fd->collapse - 1 && fd->collapse > 1)
3183 counts[i] = NULL_TREE;
3184 else if (i >= fd->collapse && !cont_bb)
3185 counts[i] = build_zero_cst (fd->iter_type);
3186 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
3187 && integer_onep (fd->loops[i].step))
3188 counts[i] = NULL_TREE;
3189 else
3190 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
3191 tree atype
3192 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
3193 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
3194 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
3195
3196 for (inner = region->inner; inner; inner = inner->next)
3197 if (inner->type == GIMPLE_OMP_ORDERED)
3198 {
3199 gomp_ordered *ord_stmt = inner->ord_stmt;
3200 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
3201 location_t loc = gimple_location (ord_stmt);
3202 tree c;
3203 for (c = gimple_omp_ordered_clauses (ord_stmt);
3204 c; c = OMP_CLAUSE_CHAIN (c))
3205 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
3206 break;
3207 if (c)
3208 expand_omp_ordered_source (&gsi, fd, counts, loc);
3209 for (c = gimple_omp_ordered_clauses (ord_stmt);
3210 c; c = OMP_CLAUSE_CHAIN (c))
3211 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
3212 expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
3213 gsi_remove (&gsi, true);
3214 }
3215}
3216
3217/* Wrap the body into fd->ordered - fd->collapse loops that aren't
3218 collapsed. */
3219
3220static basic_block
3221expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
3222 basic_block cont_bb, basic_block body_bb,
3223 bool ordered_lastprivate)
3224{
3225 if (fd->ordered == fd->collapse)
3226 return cont_bb;
3227
3228 if (!cont_bb)
3229 {
3230 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3231 for (int i = fd->collapse; i < fd->ordered; i++)
3232 {
3233 tree type = TREE_TYPE (fd->loops[i].v);
3234 tree n1 = fold_convert (type, fd->loops[i].n1);
3235 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
3236 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3237 size_int (i - fd->collapse + 1),
3238 NULL_TREE, NULL_TREE);
3239 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
3240 }
3241 return NULL;
3242 }
3243
3244 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
3245 {
3246 tree t, type = TREE_TYPE (fd->loops[i].v);
3247 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3248 expand_omp_build_assign (&gsi, fd->loops[i].v,
3249 fold_convert (type, fd->loops[i].n1));
3250 if (counts[i])
3251 expand_omp_build_assign (&gsi, counts[i],
3252 build_zero_cst (fd->iter_type));
3253 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3254 size_int (i - fd->collapse + 1),
3255 NULL_TREE, NULL_TREE);
3256 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
3257 if (!gsi_end_p (gsi))
3258 gsi_prev (&gsi);
3259 else
3260 gsi = gsi_last_bb (body_bb);
3261 edge e1 = split_block (body_bb, gsi_stmt (gsi));
3262 basic_block new_body = e1->dest;
3263 if (body_bb == cont_bb)
3264 cont_bb = new_body;
3265 edge e2 = NULL;
3266 basic_block new_header;
3267 if (EDGE_COUNT (cont_bb->preds) > 0)
3268 {
3269 gsi = gsi_last_bb (cont_bb);
3270 if (POINTER_TYPE_P (type))
3271 t = fold_build_pointer_plus (fd->loops[i].v,
3272 fold_convert (sizetype,
3273 fd->loops[i].step));
3274 else
3275 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
3276 fold_convert (type, fd->loops[i].step));
3277 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
3278 if (counts[i])
3279 {
3280 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
3281 build_int_cst (fd->iter_type, 1));
3282 expand_omp_build_assign (&gsi, counts[i], t);
3283 t = counts[i];
3284 }
3285 else
3286 {
3287 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3288 fd->loops[i].v, fd->loops[i].n1);
3289 t = fold_convert (fd->iter_type, t);
3290 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3291 true, GSI_SAME_STMT);
3292 }
3293 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3294 size_int (i - fd->collapse + 1),
3295 NULL_TREE, NULL_TREE);
3296 expand_omp_build_assign (&gsi, aref, t);
3297 gsi_prev (&gsi);
3298 e2 = split_block (cont_bb, gsi_stmt (gsi));
3299 new_header = e2->dest;
3300 }
3301 else
3302 new_header = cont_bb;
3303 gsi = gsi_after_labels (new_header);
3304 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
3305 true, GSI_SAME_STMT);
3306 tree n2
3307 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
3308 true, NULL_TREE, true, GSI_SAME_STMT);
3309 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
3310 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
3311 edge e3 = split_block (new_header, gsi_stmt (gsi));
3312 cont_bb = e3->dest;
3313 remove_edge (e1);
3314 make_edge (body_bb, new_header, EDGE_FALLTHRU);
3315 e3->flags = EDGE_FALSE_VALUE;
357067f2 3316 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
629b3d75 3317 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
357067f2 3318 e1->probability = e3->probability.invert ();
629b3d75
MJ
3319
3320 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
3321 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
3322
3323 if (e2)
3324 {
99b1c316 3325 class loop *loop = alloc_loop ();
629b3d75
MJ
3326 loop->header = new_header;
3327 loop->latch = e2->src;
3328 add_loop (loop, body_bb->loop_father);
3329 }
3330 }
3331
3332 /* If there are any lastprivate clauses and it is possible some loops
3333 might have zero iterations, ensure all the decls are initialized,
3334 otherwise we could crash evaluating C++ class iterators with lastprivate
3335 clauses. */
3336 bool need_inits = false;
3337 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
3338 if (need_inits)
3339 {
3340 tree type = TREE_TYPE (fd->loops[i].v);
3341 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3342 expand_omp_build_assign (&gsi, fd->loops[i].v,
3343 fold_convert (type, fd->loops[i].n1));
3344 }
3345 else
3346 {
3347 tree type = TREE_TYPE (fd->loops[i].v);
3348 tree this_cond = fold_build2 (fd->loops[i].cond_code,
3349 boolean_type_node,
3350 fold_convert (type, fd->loops[i].n1),
3351 fold_convert (type, fd->loops[i].n2));
3352 if (!integer_onep (this_cond))
3353 need_inits = true;
3354 }
3355
3356 return cont_bb;
3357}
3358
3359/* A subroutine of expand_omp_for. Generate code for a parallel
3360 loop with any schedule. Given parameters:
3361
3362 for (V = N1; V cond N2; V += STEP) BODY;
3363
3364 where COND is "<" or ">", we generate pseudocode
3365
3366 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
3367 if (more) goto L0; else goto L3;
3368 L0:
3369 V = istart0;
3370 iend = iend0;
3371 L1:
3372 BODY;
3373 V += STEP;
3374 if (V cond iend) goto L1; else goto L2;
3375 L2:
3376 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3377 L3:
3378
3379 If this is a combined omp parallel loop, instead of the call to
3380 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
3381 If this is gimple_omp_for_combined_p loop, then instead of assigning
3382 V and iend in L0 we assign the first two _looptemp_ clause decls of the
3383 inner GIMPLE_OMP_FOR and V += STEP; and
3384 if (V cond iend) goto L1; else goto L2; are removed.
3385
3386 For collapsed loops, given parameters:
3387 collapse(3)
3388 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
3389 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
3390 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
3391 BODY;
3392
3393 we generate pseudocode
3394
3395 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
3396 if (cond3 is <)
3397 adj = STEP3 - 1;
3398 else
3399 adj = STEP3 + 1;
3400 count3 = (adj + N32 - N31) / STEP3;
3401 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
3402 if (cond2 is <)
3403 adj = STEP2 - 1;
3404 else
3405 adj = STEP2 + 1;
3406 count2 = (adj + N22 - N21) / STEP2;
3407 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
3408 if (cond1 is <)
3409 adj = STEP1 - 1;
3410 else
3411 adj = STEP1 + 1;
3412 count1 = (adj + N12 - N11) / STEP1;
3413 count = count1 * count2 * count3;
3414 goto Z1;
3415 Z0:
3416 count = 0;
3417 Z1:
3418 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
3419 if (more) goto L0; else goto L3;
3420 L0:
3421 V = istart0;
3422 T = V;
3423 V3 = N31 + (T % count3) * STEP3;
3424 T = T / count3;
3425 V2 = N21 + (T % count2) * STEP2;
3426 T = T / count2;
3427 V1 = N11 + T * STEP1;
3428 iend = iend0;
3429 L1:
3430 BODY;
3431 V += 1;
3432 if (V < iend) goto L10; else goto L2;
3433 L10:
3434 V3 += STEP3;
3435 if (V3 cond3 N32) goto L1; else goto L11;
3436 L11:
3437 V3 = N31;
3438 V2 += STEP2;
3439 if (V2 cond2 N22) goto L1; else goto L12;
3440 L12:
3441 V2 = N21;
3442 V1 += STEP1;
3443 goto L1;
3444 L2:
3445 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3446 L3:
3447
3448 */
3449
3450static void
3451expand_omp_for_generic (struct omp_region *region,
3452 struct omp_for_data *fd,
3453 enum built_in_function start_fn,
3454 enum built_in_function next_fn,
28567c40 3455 tree sched_arg,
629b3d75
MJ
3456 gimple *inner_stmt)
3457{
3458 tree type, istart0, iend0, iend;
3459 tree t, vmain, vback, bias = NULL_TREE;
3460 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
3461 basic_block l2_bb = NULL, l3_bb = NULL;
3462 gimple_stmt_iterator gsi;
3463 gassign *assign_stmt;
3464 bool in_combined_parallel = is_combined_parallel (region);
3465 bool broken_loop = region->cont == NULL;
3466 edge e, ne;
3467 tree *counts = NULL;
3468 int i;
3469 bool ordered_lastprivate = false;
3470
3471 gcc_assert (!broken_loop || !in_combined_parallel);
3472 gcc_assert (fd->iter_type == long_integer_type_node
3473 || !in_combined_parallel);
3474
3475 entry_bb = region->entry;
3476 cont_bb = region->cont;
3477 collapse_bb = NULL;
3478 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3479 gcc_assert (broken_loop
3480 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
3481 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3482 l1_bb = single_succ (l0_bb);
3483 if (!broken_loop)
3484 {
3485 l2_bb = create_empty_bb (cont_bb);
3486 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
3487 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
3488 == l1_bb));
3489 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3490 }
3491 else
3492 l2_bb = NULL;
3493 l3_bb = BRANCH_EDGE (entry_bb)->dest;
3494 exit_bb = region->exit;
3495
65f4b875 3496 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
3497
3498 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3499 if (fd->ordered
6c7ae8c5 3500 && omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
629b3d75
MJ
3501 OMP_CLAUSE_LASTPRIVATE))
3502 ordered_lastprivate = false;
28567c40 3503 tree reductions = NULL_TREE;
6c7ae8c5
JJ
3504 tree mem = NULL_TREE, cond_var = NULL_TREE, condtemp = NULL_TREE;
3505 tree memv = NULL_TREE;
8221c30b
JJ
3506 if (fd->lastprivate_conditional)
3507 {
3508 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3509 OMP_CLAUSE__CONDTEMP_);
3510 if (fd->have_pointer_condtemp)
3511 condtemp = OMP_CLAUSE_DECL (c);
3512 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
3513 cond_var = OMP_CLAUSE_DECL (c);
3514 }
28567c40
JJ
3515 if (sched_arg)
3516 {
3517 if (fd->have_reductemp)
3518 {
6c7ae8c5 3519 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
28567c40
JJ
3520 OMP_CLAUSE__REDUCTEMP_);
3521 reductions = OMP_CLAUSE_DECL (c);
3522 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
3523 gimple *g = SSA_NAME_DEF_STMT (reductions);
3524 reductions = gimple_assign_rhs1 (g);
3525 OMP_CLAUSE_DECL (c) = reductions;
3526 entry_bb = gimple_bb (g);
3527 edge e = split_block (entry_bb, g);
3528 if (region->entry == entry_bb)
3529 region->entry = e->dest;
3530 gsi = gsi_last_bb (entry_bb);
3531 }
3532 else
3533 reductions = null_pointer_node;
8221c30b 3534 if (fd->have_pointer_condtemp)
6c7ae8c5 3535 {
6c7ae8c5
JJ
3536 tree type = TREE_TYPE (condtemp);
3537 memv = create_tmp_var (type);
3538 TREE_ADDRESSABLE (memv) = 1;
3539 unsigned HOST_WIDE_INT sz
3540 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
3541 sz *= fd->lastprivate_conditional;
3542 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
3543 false);
3544 mem = build_fold_addr_expr (memv);
3545 }
3546 else
3547 mem = null_pointer_node;
28567c40 3548 }
629b3d75
MJ
3549 if (fd->collapse > 1 || fd->ordered)
3550 {
3551 int first_zero_iter1 = -1, first_zero_iter2 = -1;
3552 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
3553
3554 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
3555 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3556 zero_iter1_bb, first_zero_iter1,
3557 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
3558
3559 if (zero_iter1_bb)
3560 {
3561 /* Some counts[i] vars might be uninitialized if
3562 some loop has zero iterations. But the body shouldn't
3563 be executed in that case, so just avoid uninit warnings. */
3564 for (i = first_zero_iter1;
3565 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
3566 if (SSA_VAR_P (counts[i]))
3567 TREE_NO_WARNING (counts[i]) = 1;
3568 gsi_prev (&gsi);
3569 e = split_block (entry_bb, gsi_stmt (gsi));
3570 entry_bb = e->dest;
3571 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
65f4b875 3572 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
3573 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
3574 get_immediate_dominator (CDI_DOMINATORS,
3575 zero_iter1_bb));
3576 }
3577 if (zero_iter2_bb)
3578 {
3579 /* Some counts[i] vars might be uninitialized if
3580 some loop has zero iterations. But the body shouldn't
3581 be executed in that case, so just avoid uninit warnings. */
3582 for (i = first_zero_iter2; i < fd->ordered; i++)
3583 if (SSA_VAR_P (counts[i]))
3584 TREE_NO_WARNING (counts[i]) = 1;
3585 if (zero_iter1_bb)
3586 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
3587 else
3588 {
3589 gsi_prev (&gsi);
3590 e = split_block (entry_bb, gsi_stmt (gsi));
3591 entry_bb = e->dest;
3592 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
65f4b875 3593 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
3594 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
3595 get_immediate_dominator
3596 (CDI_DOMINATORS, zero_iter2_bb));
3597 }
3598 }
3599 if (fd->collapse == 1)
3600 {
3601 counts[0] = fd->loop.n2;
3602 fd->loop = fd->loops[0];
3603 }
3604 }
3605
3606 type = TREE_TYPE (fd->loop.v);
3607 istart0 = create_tmp_var (fd->iter_type, ".istart0");
3608 iend0 = create_tmp_var (fd->iter_type, ".iend0");
3609 TREE_ADDRESSABLE (istart0) = 1;
3610 TREE_ADDRESSABLE (iend0) = 1;
3611
3612 /* See if we need to bias by LLONG_MIN. */
3613 if (fd->iter_type == long_long_unsigned_type_node
3614 && TREE_CODE (type) == INTEGER_TYPE
3615 && !TYPE_UNSIGNED (type)
3616 && fd->ordered == 0)
3617 {
3618 tree n1, n2;
3619
3620 if (fd->loop.cond_code == LT_EXPR)
3621 {
3622 n1 = fd->loop.n1;
3623 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
3624 }
3625 else
3626 {
3627 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
3628 n2 = fd->loop.n1;
3629 }
3630 if (TREE_CODE (n1) != INTEGER_CST
3631 || TREE_CODE (n2) != INTEGER_CST
3632 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
3633 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
3634 }
3635
3636 gimple_stmt_iterator gsif = gsi;
3637 gsi_prev (&gsif);
3638
3639 tree arr = NULL_TREE;
3640 if (in_combined_parallel)
3641 {
3642 gcc_assert (fd->ordered == 0);
3643 /* In a combined parallel loop, emit a call to
3644 GOMP_loop_foo_next. */
3645 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
3646 build_fold_addr_expr (istart0),
3647 build_fold_addr_expr (iend0));
3648 }
3649 else
3650 {
3651 tree t0, t1, t2, t3, t4;
3652 /* If this is not a combined parallel loop, emit a call to
3653 GOMP_loop_foo_start in ENTRY_BB. */
3654 t4 = build_fold_addr_expr (iend0);
3655 t3 = build_fold_addr_expr (istart0);
3656 if (fd->ordered)
3657 {
3658 t0 = build_int_cst (unsigned_type_node,
3659 fd->ordered - fd->collapse + 1);
3660 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
3661 fd->ordered
3662 - fd->collapse + 1),
3663 ".omp_counts");
3664 DECL_NAMELESS (arr) = 1;
3665 TREE_ADDRESSABLE (arr) = 1;
3666 TREE_STATIC (arr) = 1;
3667 vec<constructor_elt, va_gc> *v;
3668 vec_alloc (v, fd->ordered - fd->collapse + 1);
3669 int idx;
3670
3671 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
3672 {
3673 tree c;
3674 if (idx == 0 && fd->collapse > 1)
3675 c = fd->loop.n2;
3676 else
3677 c = counts[idx + fd->collapse - 1];
3678 tree purpose = size_int (idx);
3679 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
3680 if (TREE_CODE (c) != INTEGER_CST)
3681 TREE_STATIC (arr) = 0;
3682 }
3683
3684 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
3685 if (!TREE_STATIC (arr))
3686 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
3687 void_type_node, arr),
3688 true, NULL_TREE, true, GSI_SAME_STMT);
3689 t1 = build_fold_addr_expr (arr);
3690 t2 = NULL_TREE;
3691 }
3692 else
3693 {
3694 t2 = fold_convert (fd->iter_type, fd->loop.step);
3695 t1 = fd->loop.n2;
3696 t0 = fd->loop.n1;
3697 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3698 {
3699 tree innerc
3700 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3701 OMP_CLAUSE__LOOPTEMP_);
3702 gcc_assert (innerc);
3703 t0 = OMP_CLAUSE_DECL (innerc);
3704 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3705 OMP_CLAUSE__LOOPTEMP_);
3706 gcc_assert (innerc);
3707 t1 = OMP_CLAUSE_DECL (innerc);
3708 }
3709 if (POINTER_TYPE_P (TREE_TYPE (t0))
3710 && TYPE_PRECISION (TREE_TYPE (t0))
3711 != TYPE_PRECISION (fd->iter_type))
3712 {
3713 /* Avoid casting pointers to integer of a different size. */
3714 tree itype = signed_type_for (type);
3715 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
3716 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
3717 }
3718 else
3719 {
3720 t1 = fold_convert (fd->iter_type, t1);
3721 t0 = fold_convert (fd->iter_type, t0);
3722 }
3723 if (bias)
3724 {
3725 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
3726 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
3727 }
3728 }
3729 if (fd->iter_type == long_integer_type_node || fd->ordered)
3730 {
3731 if (fd->chunk_size)
3732 {
3733 t = fold_convert (fd->iter_type, fd->chunk_size);
3734 t = omp_adjust_chunk_size (t, fd->simd_schedule);
28567c40
JJ
3735 if (sched_arg)
3736 {
3737 if (fd->ordered)
3738 t = build_call_expr (builtin_decl_explicit (start_fn),
3739 8, t0, t1, sched_arg, t, t3, t4,
3740 reductions, mem);
3741 else
3742 t = build_call_expr (builtin_decl_explicit (start_fn),
3743 9, t0, t1, t2, sched_arg, t, t3, t4,
3744 reductions, mem);
3745 }
3746 else if (fd->ordered)
629b3d75
MJ
3747 t = build_call_expr (builtin_decl_explicit (start_fn),
3748 5, t0, t1, t, t3, t4);
3749 else
3750 t = build_call_expr (builtin_decl_explicit (start_fn),
3751 6, t0, t1, t2, t, t3, t4);
3752 }
3753 else if (fd->ordered)
3754 t = build_call_expr (builtin_decl_explicit (start_fn),
3755 4, t0, t1, t3, t4);
3756 else
3757 t = build_call_expr (builtin_decl_explicit (start_fn),
3758 5, t0, t1, t2, t3, t4);
3759 }
3760 else
3761 {
3762 tree t5;
3763 tree c_bool_type;
3764 tree bfn_decl;
3765
3766 /* The GOMP_loop_ull_*start functions have additional boolean
3767 argument, true for < loops and false for > loops.
3768 In Fortran, the C bool type can be different from
3769 boolean_type_node. */
3770 bfn_decl = builtin_decl_explicit (start_fn);
3771 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
3772 t5 = build_int_cst (c_bool_type,
3773 fd->loop.cond_code == LT_EXPR ? 1 : 0);
3774 if (fd->chunk_size)
3775 {
3776 tree bfn_decl = builtin_decl_explicit (start_fn);
3777 t = fold_convert (fd->iter_type, fd->chunk_size);
3778 t = omp_adjust_chunk_size (t, fd->simd_schedule);
28567c40
JJ
3779 if (sched_arg)
3780 t = build_call_expr (bfn_decl, 10, t5, t0, t1, t2, sched_arg,
3781 t, t3, t4, reductions, mem);
3782 else
3783 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
629b3d75
MJ
3784 }
3785 else
3786 t = build_call_expr (builtin_decl_explicit (start_fn),
3787 6, t5, t0, t1, t2, t3, t4);
3788 }
3789 }
3790 if (TREE_TYPE (t) != boolean_type_node)
3791 t = fold_build2 (NE_EXPR, boolean_type_node,
3792 t, build_int_cst (TREE_TYPE (t), 0));
3793 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
01914336 3794 true, GSI_SAME_STMT);
629b3d75
MJ
3795 if (arr && !TREE_STATIC (arr))
3796 {
25b45c7c 3797 tree clobber = build_clobber (TREE_TYPE (arr));
629b3d75
MJ
3798 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
3799 GSI_SAME_STMT);
3800 }
8221c30b 3801 if (fd->have_pointer_condtemp)
6c7ae8c5 3802 expand_omp_build_assign (&gsi, condtemp, memv, false);
28567c40
JJ
3803 if (fd->have_reductemp)
3804 {
3805 gimple *g = gsi_stmt (gsi);
3806 gsi_remove (&gsi, true);
3807 release_ssa_name (gimple_assign_lhs (g));
3808
3809 entry_bb = region->entry;
3810 gsi = gsi_last_nondebug_bb (entry_bb);
3811
3812 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3813 }
629b3d75
MJ
3814 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3815
3816 /* Remove the GIMPLE_OMP_FOR statement. */
3817 gsi_remove (&gsi, true);
3818
3819 if (gsi_end_p (gsif))
3820 gsif = gsi_after_labels (gsi_bb (gsif));
3821 gsi_next (&gsif);
3822
3823 /* Iteration setup for sequential loop goes in L0_BB. */
3824 tree startvar = fd->loop.v;
3825 tree endvar = NULL_TREE;
3826
3827 if (gimple_omp_for_combined_p (fd->for_stmt))
3828 {
3829 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
3830 && gimple_omp_for_kind (inner_stmt)
3831 == GF_OMP_FOR_KIND_SIMD);
3832 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
3833 OMP_CLAUSE__LOOPTEMP_);
3834 gcc_assert (innerc);
3835 startvar = OMP_CLAUSE_DECL (innerc);
3836 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3837 OMP_CLAUSE__LOOPTEMP_);
3838 gcc_assert (innerc);
3839 endvar = OMP_CLAUSE_DECL (innerc);
3840 }
3841
3842 gsi = gsi_start_bb (l0_bb);
3843 t = istart0;
3844 if (fd->ordered && fd->collapse == 1)
3845 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
3846 fold_convert (fd->iter_type, fd->loop.step));
3847 else if (bias)
3848 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
3849 if (fd->ordered && fd->collapse == 1)
3850 {
3851 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3852 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
3853 fd->loop.n1, fold_convert (sizetype, t));
3854 else
3855 {
3856 t = fold_convert (TREE_TYPE (startvar), t);
3857 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
3858 fd->loop.n1, t);
3859 }
3860 }
3861 else
3862 {
3863 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3864 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
3865 t = fold_convert (TREE_TYPE (startvar), t);
3866 }
3867 t = force_gimple_operand_gsi (&gsi, t,
3868 DECL_P (startvar)
3869 && TREE_ADDRESSABLE (startvar),
3870 NULL_TREE, false, GSI_CONTINUE_LINKING);
3871 assign_stmt = gimple_build_assign (startvar, t);
3872 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6c7ae8c5
JJ
3873 if (cond_var)
3874 {
3875 tree itype = TREE_TYPE (cond_var);
3876 /* For lastprivate(conditional:) itervar, we need some iteration
3877 counter that starts at unsigned non-zero and increases.
3878 Prefer as few IVs as possible, so if we can use startvar
3879 itself, use that, or startvar + constant (those would be
3880 incremented with step), and as last resort use the s0 + 1
3881 incremented by 1. */
3882 if ((fd->ordered && fd->collapse == 1)
3883 || bias
3884 || POINTER_TYPE_P (type)
3885 || TREE_CODE (fd->loop.n1) != INTEGER_CST
3886 || fd->loop.cond_code != LT_EXPR)
3887 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, istart0),
3888 build_int_cst (itype, 1));
3889 else if (tree_int_cst_sgn (fd->loop.n1) == 1)
3890 t = fold_convert (itype, t);
3891 else
3892 {
3893 tree c = fold_convert (itype, fd->loop.n1);
3894 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
3895 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
3896 }
3897 t = force_gimple_operand_gsi (&gsi, t, false,
3898 NULL_TREE, false, GSI_CONTINUE_LINKING);
3899 assign_stmt = gimple_build_assign (cond_var, t);
3900 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3901 }
629b3d75
MJ
3902
3903 t = iend0;
3904 if (fd->ordered && fd->collapse == 1)
3905 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
3906 fold_convert (fd->iter_type, fd->loop.step));
3907 else if (bias)
3908 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
3909 if (fd->ordered && fd->collapse == 1)
3910 {
3911 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3912 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
3913 fd->loop.n1, fold_convert (sizetype, t));
3914 else
3915 {
3916 t = fold_convert (TREE_TYPE (startvar), t);
3917 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
3918 fd->loop.n1, t);
3919 }
3920 }
3921 else
3922 {
3923 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3924 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
3925 t = fold_convert (TREE_TYPE (startvar), t);
3926 }
3927 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3928 false, GSI_CONTINUE_LINKING);
3929 if (endvar)
3930 {
3931 assign_stmt = gimple_build_assign (endvar, iend);
3932 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3933 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
3934 assign_stmt = gimple_build_assign (fd->loop.v, iend);
3935 else
3936 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
3937 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3938 }
3939 /* Handle linear clause adjustments. */
3940 tree itercnt = NULL_TREE;
3941 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
3942 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
3943 c; c = OMP_CLAUSE_CHAIN (c))
3944 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
3945 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
3946 {
3947 tree d = OMP_CLAUSE_DECL (c);
3948 bool is_ref = omp_is_reference (d);
3949 tree t = d, a, dest;
3950 if (is_ref)
3951 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
3952 tree type = TREE_TYPE (t);
3953 if (POINTER_TYPE_P (type))
3954 type = sizetype;
3955 dest = unshare_expr (t);
3956 tree v = create_tmp_var (TREE_TYPE (t), NULL);
3957 expand_omp_build_assign (&gsif, v, t);
3958 if (itercnt == NULL_TREE)
3959 {
3960 itercnt = startvar;
3961 tree n1 = fd->loop.n1;
3962 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
3963 {
3964 itercnt
3965 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
3966 itercnt);
3967 n1 = fold_convert (TREE_TYPE (itercnt), n1);
3968 }
3969 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
3970 itercnt, n1);
3971 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
3972 itercnt, fd->loop.step);
3973 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
3974 NULL_TREE, false,
3975 GSI_CONTINUE_LINKING);
3976 }
3977 a = fold_build2 (MULT_EXPR, type,
3978 fold_convert (type, itercnt),
3979 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
3980 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
3981 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
3982 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3983 false, GSI_CONTINUE_LINKING);
3984 assign_stmt = gimple_build_assign (dest, t);
3985 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3986 }
3987 if (fd->collapse > 1)
aed3ab25 3988 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
629b3d75
MJ
3989
3990 if (fd->ordered)
3991 {
3992 /* Until now, counts array contained number of iterations or
3993 variable containing it for ith loop. From now on, we need
3994 those counts only for collapsed loops, and only for the 2nd
3995 till the last collapsed one. Move those one element earlier,
3996 we'll use counts[fd->collapse - 1] for the first source/sink
3997 iteration counter and so on and counts[fd->ordered]
3998 as the array holding the current counter values for
3999 depend(source). */
4000 if (fd->collapse > 1)
4001 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
4002 if (broken_loop)
4003 {
4004 int i;
4005 for (i = fd->collapse; i < fd->ordered; i++)
4006 {
4007 tree type = TREE_TYPE (fd->loops[i].v);
4008 tree this_cond
4009 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
4010 fold_convert (type, fd->loops[i].n1),
4011 fold_convert (type, fd->loops[i].n2));
4012 if (!integer_onep (this_cond))
4013 break;
4014 }
4015 if (i < fd->ordered)
4016 {
4017 cont_bb
4018 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
4019 add_bb_to_loop (cont_bb, l1_bb->loop_father);
4020 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
4021 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
4022 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4023 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
4024 make_edge (cont_bb, l1_bb, 0);
4025 l2_bb = create_empty_bb (cont_bb);
4026 broken_loop = false;
4027 }
4028 }
4029 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
4030 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
4031 ordered_lastprivate);
4032 if (counts[fd->collapse - 1])
4033 {
4034 gcc_assert (fd->collapse == 1);
4035 gsi = gsi_last_bb (l0_bb);
4036 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
4037 istart0, true);
4038 gsi = gsi_last_bb (cont_bb);
4039 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1],
4040 build_int_cst (fd->iter_type, 1));
4041 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
4042 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
4043 size_zero_node, NULL_TREE, NULL_TREE);
4044 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
4045 t = counts[fd->collapse - 1];
4046 }
4047 else if (fd->collapse > 1)
4048 t = fd->loop.v;
4049 else
4050 {
4051 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4052 fd->loops[0].v, fd->loops[0].n1);
4053 t = fold_convert (fd->iter_type, t);
4054 }
4055 gsi = gsi_last_bb (l0_bb);
4056 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
4057 size_zero_node, NULL_TREE, NULL_TREE);
4058 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4059 false, GSI_CONTINUE_LINKING);
4060 expand_omp_build_assign (&gsi, aref, t, true);
4061 }
4062
4063 if (!broken_loop)
4064 {
4065 /* Code to control the increment and predicate for the sequential
4066 loop goes in the CONT_BB. */
65f4b875 4067 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
4068 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4069 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
4070 vmain = gimple_omp_continue_control_use (cont_stmt);
4071 vback = gimple_omp_continue_control_def (cont_stmt);
4072
7855700e
JJ
4073 if (cond_var)
4074 {
4075 tree itype = TREE_TYPE (cond_var);
4076 tree t2;
4077 if ((fd->ordered && fd->collapse == 1)
4078 || bias
4079 || POINTER_TYPE_P (type)
4080 || TREE_CODE (fd->loop.n1) != INTEGER_CST
4081 || fd->loop.cond_code != LT_EXPR)
4082 t2 = build_int_cst (itype, 1);
4083 else
4084 t2 = fold_convert (itype, fd->loop.step);
4085 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
4086 t2 = force_gimple_operand_gsi (&gsi, t2, false,
4087 NULL_TREE, true, GSI_SAME_STMT);
4088 assign_stmt = gimple_build_assign (cond_var, t2);
4089 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4090 }
4091
629b3d75
MJ
4092 if (!gimple_omp_for_combined_p (fd->for_stmt))
4093 {
4094 if (POINTER_TYPE_P (type))
4095 t = fold_build_pointer_plus (vmain, fd->loop.step);
4096 else
4097 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
4098 t = force_gimple_operand_gsi (&gsi, t,
4099 DECL_P (vback)
4100 && TREE_ADDRESSABLE (vback),
4101 NULL_TREE, true, GSI_SAME_STMT);
4102 assign_stmt = gimple_build_assign (vback, t);
4103 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4104
4105 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
4106 {
d1ffbd43 4107 tree tem;
629b3d75 4108 if (fd->collapse > 1)
d1ffbd43 4109 tem = fd->loop.v;
629b3d75
MJ
4110 else
4111 {
d1ffbd43
JJ
4112 tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4113 fd->loops[0].v, fd->loops[0].n1);
4114 tem = fold_convert (fd->iter_type, tem);
629b3d75
MJ
4115 }
4116 tree aref = build4 (ARRAY_REF, fd->iter_type,
4117 counts[fd->ordered], size_zero_node,
4118 NULL_TREE, NULL_TREE);
d1ffbd43
JJ
4119 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE,
4120 true, GSI_SAME_STMT);
4121 expand_omp_build_assign (&gsi, aref, tem);
629b3d75
MJ
4122 }
4123
4124 t = build2 (fd->loop.cond_code, boolean_type_node,
4125 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
4126 iend);
4127 gcond *cond_stmt = gimple_build_cond_empty (t);
4128 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4129 }
4130
4131 /* Remove GIMPLE_OMP_CONTINUE. */
4132 gsi_remove (&gsi, true);
4133
4134 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
aed3ab25 4135 collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, l1_bb);
629b3d75
MJ
4136
4137 /* Emit code to get the next parallel iteration in L2_BB. */
4138 gsi = gsi_start_bb (l2_bb);
4139
4140 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
4141 build_fold_addr_expr (istart0),
4142 build_fold_addr_expr (iend0));
4143 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4144 false, GSI_CONTINUE_LINKING);
4145 if (TREE_TYPE (t) != boolean_type_node)
4146 t = fold_build2 (NE_EXPR, boolean_type_node,
4147 t, build_int_cst (TREE_TYPE (t), 0));
4148 gcond *cond_stmt = gimple_build_cond_empty (t);
4149 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4150 }
4151
4152 /* Add the loop cleanup function. */
65f4b875 4153 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
4154 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4155 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
4156 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
4157 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4158 else
4159 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4160 gcall *call_stmt = gimple_build_call (t, 0);
629b3d75
MJ
4161 if (fd->ordered)
4162 {
4163 tree arr = counts[fd->ordered];
25b45c7c 4164 tree clobber = build_clobber (TREE_TYPE (arr));
629b3d75
MJ
4165 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
4166 GSI_SAME_STMT);
4167 }
28567c40
JJ
4168 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
4169 {
4170 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
4171 if (fd->have_reductemp)
4172 {
4173 gimple *g = gimple_build_assign (reductions, NOP_EXPR,
4174 gimple_call_lhs (call_stmt));
4175 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4176 }
4177 }
4178 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
629b3d75
MJ
4179 gsi_remove (&gsi, true);
4180
4181 /* Connect the new blocks. */
4182 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
4183 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
4184
4185 if (!broken_loop)
4186 {
4187 gimple_seq phis;
4188
4189 e = find_edge (cont_bb, l3_bb);
4190 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
4191
4192 phis = phi_nodes (l3_bb);
4193 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
4194 {
4195 gimple *phi = gsi_stmt (gsi);
4196 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
4197 PHI_ARG_DEF_FROM_EDGE (phi, e));
4198 }
4199 remove_edge (e);
4200
4201 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
4202 e = find_edge (cont_bb, l1_bb);
4203 if (e == NULL)
4204 {
4205 e = BRANCH_EDGE (cont_bb);
4206 gcc_assert (single_succ (e->dest) == l1_bb);
4207 }
4208 if (gimple_omp_for_combined_p (fd->for_stmt))
4209 {
4210 remove_edge (e);
4211 e = NULL;
4212 }
4213 else if (fd->collapse > 1)
4214 {
4215 remove_edge (e);
4216 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4217 }
4218 else
4219 e->flags = EDGE_TRUE_VALUE;
4220 if (e)
4221 {
357067f2
JH
4222 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
4223 find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
629b3d75
MJ
4224 }
4225 else
4226 {
4227 e = find_edge (cont_bb, l2_bb);
4228 e->flags = EDGE_FALLTHRU;
4229 }
4230 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
4231
4232 if (gimple_in_ssa_p (cfun))
4233 {
4234 /* Add phis to the outer loop that connect to the phis in the inner,
4235 original loop, and move the loop entry value of the inner phi to
4236 the loop entry value of the outer phi. */
4237 gphi_iterator psi;
4238 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
4239 {
620e594b 4240 location_t locus;
629b3d75
MJ
4241 gphi *nphi;
4242 gphi *exit_phi = psi.phi ();
4243
164485b5
JJ
4244 if (virtual_operand_p (gimple_phi_result (exit_phi)))
4245 continue;
4246
629b3d75
MJ
4247 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
4248 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
4249
4250 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
4251 edge latch_to_l1 = find_edge (latch, l1_bb);
4252 gphi *inner_phi
4253 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
4254
4255 tree t = gimple_phi_result (exit_phi);
4256 tree new_res = copy_ssa_name (t, NULL);
4257 nphi = create_phi_node (new_res, l0_bb);
4258
4259 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
4260 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
4261 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
4262 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
4263 add_phi_arg (nphi, t, entry_to_l0, locus);
4264
4265 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
4266 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
4267
4268 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
164485b5 4269 }
629b3d75
MJ
4270 }
4271
4272 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
4273 recompute_dominator (CDI_DOMINATORS, l2_bb));
4274 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
4275 recompute_dominator (CDI_DOMINATORS, l3_bb));
4276 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
4277 recompute_dominator (CDI_DOMINATORS, l0_bb));
4278 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
4279 recompute_dominator (CDI_DOMINATORS, l1_bb));
4280
4281 /* We enter expand_omp_for_generic with a loop. This original loop may
4282 have its own loop struct, or it may be part of an outer loop struct
4283 (which may be the fake loop). */
99b1c316 4284 class loop *outer_loop = entry_bb->loop_father;
629b3d75
MJ
4285 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
4286
4287 add_bb_to_loop (l2_bb, outer_loop);
4288
4289 /* We've added a new loop around the original loop. Allocate the
4290 corresponding loop struct. */
99b1c316 4291 class loop *new_loop = alloc_loop ();
629b3d75
MJ
4292 new_loop->header = l0_bb;
4293 new_loop->latch = l2_bb;
4294 add_loop (new_loop, outer_loop);
4295
4296 /* Allocate a loop structure for the original loop unless we already
4297 had one. */
4298 if (!orig_loop_has_loop_struct
4299 && !gimple_omp_for_combined_p (fd->for_stmt))
4300 {
99b1c316 4301 class loop *orig_loop = alloc_loop ();
629b3d75
MJ
4302 orig_loop->header = l1_bb;
4303 /* The loop may have multiple latches. */
4304 add_loop (orig_loop, new_loop);
4305 }
4306 }
4307}
4308
2f6bb511
JJ
4309/* Helper function for expand_omp_for_static_nochunk. If PTR is NULL,
4310 compute needed allocation size. If !ALLOC of team allocations,
4311 if ALLOC of thread allocation. SZ is the initial needed size for
4312 other purposes, ALLOC_ALIGN guaranteed alignment of allocation in bytes,
4313 CNT number of elements of each array, for !ALLOC this is
4314 omp_get_num_threads (), for ALLOC number of iterations handled by the
4315 current thread. If PTR is non-NULL, it is the start of the allocation
4316 and this routine shall assign to OMP_CLAUSE_DECL (c) of those _scantemp_
4317 clauses pointers to the corresponding arrays. */
4318
4319static tree
4320expand_omp_scantemp_alloc (tree clauses, tree ptr, unsigned HOST_WIDE_INT sz,
4321 unsigned HOST_WIDE_INT alloc_align, tree cnt,
4322 gimple_stmt_iterator *gsi, bool alloc)
4323{
4324 tree eltsz = NULL_TREE;
4325 unsigned HOST_WIDE_INT preval = 0;
4326 if (ptr && sz)
4327 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4328 ptr, size_int (sz));
4329 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
4330 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
4331 && !OMP_CLAUSE__SCANTEMP__CONTROL (c)
4332 && (!OMP_CLAUSE__SCANTEMP__ALLOC (c)) != alloc)
4333 {
4334 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
4335 unsigned HOST_WIDE_INT al = TYPE_ALIGN_UNIT (pointee_type);
4336 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4337 {
4338 unsigned HOST_WIDE_INT szl
4339 = tree_to_uhwi (TYPE_SIZE_UNIT (pointee_type));
4340 szl = least_bit_hwi (szl);
4341 if (szl)
4342 al = MIN (al, szl);
4343 }
4344 if (ptr == NULL_TREE)
4345 {
4346 if (eltsz == NULL_TREE)
4347 eltsz = TYPE_SIZE_UNIT (pointee_type);
4348 else
4349 eltsz = size_binop (PLUS_EXPR, eltsz,
4350 TYPE_SIZE_UNIT (pointee_type));
4351 }
4352 if (preval == 0 && al <= alloc_align)
4353 {
4354 unsigned HOST_WIDE_INT diff = ROUND_UP (sz, al) - sz;
4355 sz += diff;
4356 if (diff && ptr)
4357 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4358 ptr, size_int (diff));
4359 }
4360 else if (al > preval)
4361 {
4362 if (ptr)
4363 {
4364 ptr = fold_convert (pointer_sized_int_node, ptr);
4365 ptr = fold_build2 (PLUS_EXPR, pointer_sized_int_node, ptr,
4366 build_int_cst (pointer_sized_int_node,
4367 al - 1));
4368 ptr = fold_build2 (BIT_AND_EXPR, pointer_sized_int_node, ptr,
4369 build_int_cst (pointer_sized_int_node,
4370 -(HOST_WIDE_INT) al));
4371 ptr = fold_convert (ptr_type_node, ptr);
4372 }
4373 else
4374 sz += al - 1;
4375 }
4376 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4377 preval = al;
4378 else
4379 preval = 1;
4380 if (ptr)
4381 {
4382 expand_omp_build_assign (gsi, OMP_CLAUSE_DECL (c), ptr, false);
4383 ptr = OMP_CLAUSE_DECL (c);
4384 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), ptr,
4385 size_binop (MULT_EXPR, cnt,
4386 TYPE_SIZE_UNIT (pointee_type)));
4387 }
4388 }
4389
4390 if (ptr == NULL_TREE)
4391 {
4392 eltsz = size_binop (MULT_EXPR, eltsz, cnt);
4393 if (sz)
4394 eltsz = size_binop (PLUS_EXPR, eltsz, size_int (sz));
4395 return eltsz;
4396 }
4397 else
4398 return ptr;
4399}
4400
629b3d75
MJ
4401/* A subroutine of expand_omp_for. Generate code for a parallel
4402 loop with static schedule and no specified chunk size. Given
4403 parameters:
4404
4405 for (V = N1; V cond N2; V += STEP) BODY;
4406
4407 where COND is "<" or ">", we generate pseudocode
4408
4409 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
4410 if (cond is <)
4411 adj = STEP - 1;
4412 else
4413 adj = STEP + 1;
4414 if ((__typeof (V)) -1 > 0 && cond is >)
4415 n = -(adj + N2 - N1) / -STEP;
4416 else
4417 n = (adj + N2 - N1) / STEP;
4418 q = n / nthreads;
4419 tt = n % nthreads;
4420 if (threadid < tt) goto L3; else goto L4;
4421 L3:
4422 tt = 0;
4423 q = q + 1;
4424 L4:
4425 s0 = q * threadid + tt;
4426 e0 = s0 + q;
4427 V = s0 * STEP + N1;
4428 if (s0 >= e0) goto L2; else goto L0;
4429 L0:
4430 e = e0 * STEP + N1;
4431 L1:
4432 BODY;
4433 V += STEP;
4434 if (V cond e) goto L1;
4435 L2:
4436*/
4437
4438static void
4439expand_omp_for_static_nochunk (struct omp_region *region,
4440 struct omp_for_data *fd,
4441 gimple *inner_stmt)
4442{
2f6bb511 4443 tree n, q, s0, e0, e, t, tt, nthreads = NULL_TREE, threadid;
629b3d75
MJ
4444 tree type, itype, vmain, vback;
4445 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
4446 basic_block body_bb, cont_bb, collapse_bb = NULL;
2f6bb511
JJ
4447 basic_block fin_bb, fourth_bb = NULL, fifth_bb = NULL, sixth_bb = NULL;
4448 basic_block exit1_bb = NULL, exit2_bb = NULL, exit3_bb = NULL;
6c7ae8c5 4449 gimple_stmt_iterator gsi, gsip;
629b3d75
MJ
4450 edge ep;
4451 bool broken_loop = region->cont == NULL;
4452 tree *counts = NULL;
4453 tree n1, n2, step;
28567c40 4454 tree reductions = NULL_TREE;
8221c30b 4455 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
629b3d75
MJ
4456
4457 itype = type = TREE_TYPE (fd->loop.v);
4458 if (POINTER_TYPE_P (type))
4459 itype = signed_type_for (type);
4460
4461 entry_bb = region->entry;
4462 cont_bb = region->cont;
4463 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4464 fin_bb = BRANCH_EDGE (entry_bb)->dest;
4465 gcc_assert (broken_loop
4466 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
4467 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
4468 body_bb = single_succ (seq_start_bb);
4469 if (!broken_loop)
4470 {
4471 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
4472 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
4473 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4474 }
4475 exit_bb = region->exit;
4476
4477 /* Iteration space partitioning goes in ENTRY_BB. */
65f4b875 4478 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75 4479 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
6c7ae8c5
JJ
4480 gsip = gsi;
4481 gsi_prev (&gsip);
629b3d75
MJ
4482
4483 if (fd->collapse > 1)
4484 {
4485 int first_zero_iter = -1, dummy = -1;
4486 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
4487
4488 counts = XALLOCAVEC (tree, fd->collapse);
4489 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4490 fin_bb, first_zero_iter,
4491 dummy_bb, dummy, l2_dom_bb);
4492 t = NULL_TREE;
4493 }
4494 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
4495 t = integer_one_node;
4496 else
4497 t = fold_binary (fd->loop.cond_code, boolean_type_node,
4498 fold_convert (type, fd->loop.n1),
4499 fold_convert (type, fd->loop.n2));
4500 if (fd->collapse == 1
4501 && TYPE_UNSIGNED (type)
4502 && (t == NULL_TREE || !integer_onep (t)))
4503 {
4504 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
4505 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
4506 true, GSI_SAME_STMT);
4507 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
4508 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
4509 true, GSI_SAME_STMT);
4510 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
6c7ae8c5 4511 NULL_TREE, NULL_TREE);
629b3d75
MJ
4512 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4513 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
4514 expand_omp_regimplify_p, NULL, NULL)
4515 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
4516 expand_omp_regimplify_p, NULL, NULL))
4517 {
4518 gsi = gsi_for_stmt (cond_stmt);
4519 gimple_regimplify_operands (cond_stmt, &gsi);
4520 }
4521 ep = split_block (entry_bb, cond_stmt);
4522 ep->flags = EDGE_TRUE_VALUE;
4523 entry_bb = ep->dest;
357067f2 4524 ep->probability = profile_probability::very_likely ();
629b3d75 4525 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
357067f2 4526 ep->probability = profile_probability::very_unlikely ();
629b3d75
MJ
4527 if (gimple_in_ssa_p (cfun))
4528 {
4529 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
4530 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
4531 !gsi_end_p (gpi); gsi_next (&gpi))
4532 {
4533 gphi *phi = gpi.phi ();
4534 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
4535 ep, UNKNOWN_LOCATION);
4536 }
4537 }
4538 gsi = gsi_last_bb (entry_bb);
4539 }
4540
8221c30b
JJ
4541 if (fd->lastprivate_conditional)
4542 {
4543 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4544 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
4545 if (fd->have_pointer_condtemp)
4546 condtemp = OMP_CLAUSE_DECL (c);
4547 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
4548 cond_var = OMP_CLAUSE_DECL (c);
4549 }
2f6bb511 4550 if (fd->have_reductemp
2f03073f
JJ
4551 /* For scan, we don't want to reinitialize condtemp before the
4552 second loop. */
4553 || (fd->have_pointer_condtemp && !fd->have_scantemp)
2f6bb511 4554 || fd->have_nonctrl_scantemp)
28567c40
JJ
4555 {
4556 tree t1 = build_int_cst (long_integer_type_node, 0);
4557 tree t2 = build_int_cst (long_integer_type_node, 1);
4558 tree t3 = build_int_cstu (long_integer_type_node,
4559 (HOST_WIDE_INT_1U << 31) + 1);
4560 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
6c7ae8c5
JJ
4561 gimple_stmt_iterator gsi2 = gsi_none ();
4562 gimple *g = NULL;
4563 tree mem = null_pointer_node, memv = NULL_TREE;
2f6bb511
JJ
4564 unsigned HOST_WIDE_INT condtemp_sz = 0;
4565 unsigned HOST_WIDE_INT alloc_align = 0;
6c7ae8c5
JJ
4566 if (fd->have_reductemp)
4567 {
2f6bb511 4568 gcc_assert (!fd->have_nonctrl_scantemp);
6c7ae8c5
JJ
4569 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
4570 reductions = OMP_CLAUSE_DECL (c);
4571 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
4572 g = SSA_NAME_DEF_STMT (reductions);
4573 reductions = gimple_assign_rhs1 (g);
4574 OMP_CLAUSE_DECL (c) = reductions;
4575 gsi2 = gsi_for_stmt (g);
4576 }
4577 else
4578 {
4579 if (gsi_end_p (gsip))
4580 gsi2 = gsi_after_labels (region->entry);
4581 else
4582 gsi2 = gsip;
4583 reductions = null_pointer_node;
4584 }
2f6bb511 4585 if (fd->have_pointer_condtemp || fd->have_nonctrl_scantemp)
6c7ae8c5 4586 {
2f6bb511
JJ
4587 tree type;
4588 if (fd->have_pointer_condtemp)
4589 type = TREE_TYPE (condtemp);
4590 else
4591 type = ptr_type_node;
6c7ae8c5
JJ
4592 memv = create_tmp_var (type);
4593 TREE_ADDRESSABLE (memv) = 1;
2f6bb511
JJ
4594 unsigned HOST_WIDE_INT sz = 0;
4595 tree size = NULL_TREE;
4596 if (fd->have_pointer_condtemp)
4597 {
4598 sz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
4599 sz *= fd->lastprivate_conditional;
4600 condtemp_sz = sz;
4601 }
4602 if (fd->have_nonctrl_scantemp)
4603 {
4604 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
4605 gimple *g = gimple_build_call (nthreads, 0);
4606 nthreads = create_tmp_var (integer_type_node);
4607 gimple_call_set_lhs (g, nthreads);
4608 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
4609 nthreads = fold_convert (sizetype, nthreads);
4610 alloc_align = TYPE_ALIGN_UNIT (long_long_integer_type_node);
4611 size = expand_omp_scantemp_alloc (clauses, NULL_TREE, sz,
4612 alloc_align, nthreads, NULL,
4613 false);
4614 size = fold_convert (type, size);
4615 }
4616 else
4617 size = build_int_cst (type, sz);
4618 expand_omp_build_assign (&gsi2, memv, size, false);
6c7ae8c5
JJ
4619 mem = build_fold_addr_expr (memv);
4620 }
28567c40
JJ
4621 tree t
4622 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
4623 9, t1, t2, t2, t3, t1, null_pointer_node,
6c7ae8c5 4624 null_pointer_node, reductions, mem);
28567c40
JJ
4625 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
4626 true, GSI_SAME_STMT);
8221c30b 4627 if (fd->have_pointer_condtemp)
6c7ae8c5 4628 expand_omp_build_assign (&gsi2, condtemp, memv, false);
2f6bb511
JJ
4629 if (fd->have_nonctrl_scantemp)
4630 {
4631 tree ptr = fd->have_pointer_condtemp ? condtemp : memv;
4632 expand_omp_scantemp_alloc (clauses, ptr, condtemp_sz,
4633 alloc_align, nthreads, &gsi2, false);
4634 }
6c7ae8c5
JJ
4635 if (fd->have_reductemp)
4636 {
4637 gsi_remove (&gsi2, true);
4638 release_ssa_name (gimple_assign_lhs (g));
4639 }
28567c40 4640 }
629b3d75
MJ
4641 switch (gimple_omp_for_kind (fd->for_stmt))
4642 {
4643 case GF_OMP_FOR_KIND_FOR:
4644 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
4645 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
4646 break;
4647 case GF_OMP_FOR_KIND_DISTRIBUTE:
4648 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
4649 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
4650 break;
4651 default:
4652 gcc_unreachable ();
4653 }
4654 nthreads = build_call_expr (nthreads, 0);
4655 nthreads = fold_convert (itype, nthreads);
4656 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
4657 true, GSI_SAME_STMT);
4658 threadid = build_call_expr (threadid, 0);
4659 threadid = fold_convert (itype, threadid);
4660 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
4661 true, GSI_SAME_STMT);
4662
4663 n1 = fd->loop.n1;
4664 n2 = fd->loop.n2;
4665 step = fd->loop.step;
4666 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4667 {
4668 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4669 OMP_CLAUSE__LOOPTEMP_);
4670 gcc_assert (innerc);
4671 n1 = OMP_CLAUSE_DECL (innerc);
4672 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4673 OMP_CLAUSE__LOOPTEMP_);
4674 gcc_assert (innerc);
4675 n2 = OMP_CLAUSE_DECL (innerc);
4676 }
4677 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
4678 true, NULL_TREE, true, GSI_SAME_STMT);
4679 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
4680 true, NULL_TREE, true, GSI_SAME_STMT);
4681 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
4682 true, NULL_TREE, true, GSI_SAME_STMT);
4683
4684 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
4685 t = fold_build2 (PLUS_EXPR, itype, step, t);
4686 t = fold_build2 (PLUS_EXPR, itype, t, n2);
4687 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
4688 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
4689 t = fold_build2 (TRUNC_DIV_EXPR, itype,
4690 fold_build1 (NEGATE_EXPR, itype, t),
4691 fold_build1 (NEGATE_EXPR, itype, step));
4692 else
4693 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
4694 t = fold_convert (itype, t);
4695 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
4696
4697 q = create_tmp_reg (itype, "q");
4698 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
4699 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
4700 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
4701
4702 tt = create_tmp_reg (itype, "tt");
4703 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
4704 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
4705 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
4706
4707 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
4708 gcond *cond_stmt = gimple_build_cond_empty (t);
4709 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4710
4711 second_bb = split_block (entry_bb, cond_stmt)->dest;
65f4b875 4712 gsi = gsi_last_nondebug_bb (second_bb);
629b3d75
MJ
4713 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4714
4715 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
4716 GSI_SAME_STMT);
4717 gassign *assign_stmt
4718 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
4719 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4720
4721 third_bb = split_block (second_bb, assign_stmt)->dest;
65f4b875 4722 gsi = gsi_last_nondebug_bb (third_bb);
629b3d75
MJ
4723 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4724
2f6bb511
JJ
4725 if (fd->have_nonctrl_scantemp)
4726 {
4727 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4728 tree controlp = NULL_TREE, controlb = NULL_TREE;
4729 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
4730 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
4731 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
4732 {
4733 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
4734 controlb = OMP_CLAUSE_DECL (c);
4735 else
4736 controlp = OMP_CLAUSE_DECL (c);
4737 if (controlb && controlp)
4738 break;
4739 }
4740 gcc_assert (controlp && controlb);
4741 tree cnt = create_tmp_var (sizetype);
4742 gimple *g = gimple_build_assign (cnt, NOP_EXPR, q);
4743 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4744 unsigned HOST_WIDE_INT alloc_align = TYPE_ALIGN_UNIT (ptr_type_node);
4745 tree sz = expand_omp_scantemp_alloc (clauses, NULL_TREE, 0,
4746 alloc_align, cnt, NULL, true);
4747 tree size = create_tmp_var (sizetype);
4748 expand_omp_build_assign (&gsi, size, sz, false);
4749 tree cmp = fold_build2 (GT_EXPR, boolean_type_node,
4750 size, size_int (16384));
4751 expand_omp_build_assign (&gsi, controlb, cmp);
4752 g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
4753 NULL_TREE, NULL_TREE);
4754 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4755 fourth_bb = split_block (third_bb, g)->dest;
4756 gsi = gsi_last_nondebug_bb (fourth_bb);
4757 /* FIXME: Once we have allocators, this should use allocator. */
4758 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_MALLOC), 1, size);
4759 gimple_call_set_lhs (g, controlp);
4760 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4761 expand_omp_scantemp_alloc (clauses, controlp, 0, alloc_align, cnt,
4762 &gsi, true);
4763 gsi_prev (&gsi);
4764 g = gsi_stmt (gsi);
4765 fifth_bb = split_block (fourth_bb, g)->dest;
4766 gsi = gsi_last_nondebug_bb (fifth_bb);
4767
4768 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_SAVE), 0);
4769 gimple_call_set_lhs (g, controlp);
4770 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4771 tree alloca_decl = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN);
4772 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
4773 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
4774 && OMP_CLAUSE__SCANTEMP__ALLOC (c))
4775 {
4776 tree tmp = create_tmp_var (sizetype);
4777 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
4778 g = gimple_build_assign (tmp, MULT_EXPR, cnt,
4779 TYPE_SIZE_UNIT (pointee_type));
4780 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4781 g = gimple_build_call (alloca_decl, 2, tmp,
4782 size_int (TYPE_ALIGN (pointee_type)));
4783 gimple_call_set_lhs (g, OMP_CLAUSE_DECL (c));
4784 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4785 }
4786
4787 sixth_bb = split_block (fifth_bb, g)->dest;
4788 gsi = gsi_last_nondebug_bb (sixth_bb);
4789 }
4790
629b3d75
MJ
4791 t = build2 (MULT_EXPR, itype, q, threadid);
4792 t = build2 (PLUS_EXPR, itype, t, tt);
4793 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
4794
4795 t = fold_build2 (PLUS_EXPR, itype, s0, q);
4796 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
4797
4798 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
4799 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4800
4801 /* Remove the GIMPLE_OMP_FOR statement. */
4802 gsi_remove (&gsi, true);
4803
4804 /* Setup code for sequential iteration goes in SEQ_START_BB. */
4805 gsi = gsi_start_bb (seq_start_bb);
4806
4807 tree startvar = fd->loop.v;
4808 tree endvar = NULL_TREE;
4809
4810 if (gimple_omp_for_combined_p (fd->for_stmt))
4811 {
4812 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
4813 ? gimple_omp_parallel_clauses (inner_stmt)
4814 : gimple_omp_for_clauses (inner_stmt);
4815 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
4816 gcc_assert (innerc);
4817 startvar = OMP_CLAUSE_DECL (innerc);
4818 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4819 OMP_CLAUSE__LOOPTEMP_);
4820 gcc_assert (innerc);
4821 endvar = OMP_CLAUSE_DECL (innerc);
4822 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
4823 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
4824 {
4825 int i;
4826 for (i = 1; i < fd->collapse; i++)
4827 {
4828 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4829 OMP_CLAUSE__LOOPTEMP_);
4830 gcc_assert (innerc);
4831 }
4832 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4833 OMP_CLAUSE__LOOPTEMP_);
4834 if (innerc)
4835 {
4836 /* If needed (distribute parallel for with lastprivate),
4837 propagate down the total number of iterations. */
4838 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
4839 fd->loop.n2);
4840 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
4841 GSI_CONTINUE_LINKING);
4842 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4843 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4844 }
4845 }
4846 }
4847 t = fold_convert (itype, s0);
4848 t = fold_build2 (MULT_EXPR, itype, t, step);
4849 if (POINTER_TYPE_P (type))
bde84d51
RB
4850 {
4851 t = fold_build_pointer_plus (n1, t);
4852 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4853 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4854 t = fold_convert (signed_type_for (type), t);
4855 }
629b3d75
MJ
4856 else
4857 t = fold_build2 (PLUS_EXPR, type, t, n1);
4858 t = fold_convert (TREE_TYPE (startvar), t);
4859 t = force_gimple_operand_gsi (&gsi, t,
4860 DECL_P (startvar)
4861 && TREE_ADDRESSABLE (startvar),
4862 NULL_TREE, false, GSI_CONTINUE_LINKING);
4863 assign_stmt = gimple_build_assign (startvar, t);
4864 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6c7ae8c5
JJ
4865 if (cond_var)
4866 {
4867 tree itype = TREE_TYPE (cond_var);
4868 /* For lastprivate(conditional:) itervar, we need some iteration
4869 counter that starts at unsigned non-zero and increases.
4870 Prefer as few IVs as possible, so if we can use startvar
4871 itself, use that, or startvar + constant (those would be
4872 incremented with step), and as last resort use the s0 + 1
4873 incremented by 1. */
4874 if (POINTER_TYPE_P (type)
4875 || TREE_CODE (n1) != INTEGER_CST
4876 || fd->loop.cond_code != LT_EXPR)
4877 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
4878 build_int_cst (itype, 1));
4879 else if (tree_int_cst_sgn (n1) == 1)
4880 t = fold_convert (itype, t);
4881 else
4882 {
4883 tree c = fold_convert (itype, n1);
4884 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
4885 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
4886 }
4887 t = force_gimple_operand_gsi (&gsi, t, false,
4888 NULL_TREE, false, GSI_CONTINUE_LINKING);
4889 assign_stmt = gimple_build_assign (cond_var, t);
4890 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4891 }
629b3d75
MJ
4892
4893 t = fold_convert (itype, e0);
4894 t = fold_build2 (MULT_EXPR, itype, t, step);
4895 if (POINTER_TYPE_P (type))
bde84d51
RB
4896 {
4897 t = fold_build_pointer_plus (n1, t);
4898 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4899 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4900 t = fold_convert (signed_type_for (type), t);
4901 }
629b3d75
MJ
4902 else
4903 t = fold_build2 (PLUS_EXPR, type, t, n1);
4904 t = fold_convert (TREE_TYPE (startvar), t);
4905 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4906 false, GSI_CONTINUE_LINKING);
4907 if (endvar)
4908 {
4909 assign_stmt = gimple_build_assign (endvar, e);
4910 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4911 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4912 assign_stmt = gimple_build_assign (fd->loop.v, e);
4913 else
4914 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4915 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4916 }
4917 /* Handle linear clause adjustments. */
4918 tree itercnt = NULL_TREE;
aed3ab25 4919 tree *nonrect_bounds = NULL;
629b3d75
MJ
4920 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4921 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4922 c; c = OMP_CLAUSE_CHAIN (c))
4923 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4924 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4925 {
4926 tree d = OMP_CLAUSE_DECL (c);
4927 bool is_ref = omp_is_reference (d);
4928 tree t = d, a, dest;
4929 if (is_ref)
4930 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4931 if (itercnt == NULL_TREE)
4932 {
4933 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4934 {
4935 itercnt = fold_build2 (MINUS_EXPR, itype,
4936 fold_convert (itype, n1),
4937 fold_convert (itype, fd->loop.n1));
4938 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
4939 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
4940 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4941 NULL_TREE, false,
4942 GSI_CONTINUE_LINKING);
4943 }
4944 else
4945 itercnt = s0;
4946 }
4947 tree type = TREE_TYPE (t);
4948 if (POINTER_TYPE_P (type))
4949 type = sizetype;
4950 a = fold_build2 (MULT_EXPR, type,
4951 fold_convert (type, itercnt),
4952 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4953 dest = unshare_expr (t);
4954 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4955 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
4956 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4957 false, GSI_CONTINUE_LINKING);
4958 assign_stmt = gimple_build_assign (dest, t);
4959 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4960 }
4961 if (fd->collapse > 1)
aed3ab25
JJ
4962 {
4963 if (fd->non_rect)
4964 {
4965 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
4966 memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1));
4967 }
4968 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt,
4969 startvar);
4970 }
629b3d75
MJ
4971
4972 if (!broken_loop)
4973 {
4974 /* The code controlling the sequential loop replaces the
4975 GIMPLE_OMP_CONTINUE. */
65f4b875 4976 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
4977 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4978 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
4979 vmain = gimple_omp_continue_control_use (cont_stmt);
4980 vback = gimple_omp_continue_control_def (cont_stmt);
4981
7855700e
JJ
4982 if (cond_var)
4983 {
4984 tree itype = TREE_TYPE (cond_var);
4985 tree t2;
4986 if (POINTER_TYPE_P (type)
4987 || TREE_CODE (n1) != INTEGER_CST
4988 || fd->loop.cond_code != LT_EXPR)
4989 t2 = build_int_cst (itype, 1);
4990 else
4991 t2 = fold_convert (itype, step);
4992 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
4993 t2 = force_gimple_operand_gsi (&gsi, t2, false,
4994 NULL_TREE, true, GSI_SAME_STMT);
4995 assign_stmt = gimple_build_assign (cond_var, t2);
4996 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4997 }
4998
629b3d75
MJ
4999 if (!gimple_omp_for_combined_p (fd->for_stmt))
5000 {
5001 if (POINTER_TYPE_P (type))
5002 t = fold_build_pointer_plus (vmain, step);
5003 else
5004 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5005 t = force_gimple_operand_gsi (&gsi, t,
5006 DECL_P (vback)
5007 && TREE_ADDRESSABLE (vback),
5008 NULL_TREE, true, GSI_SAME_STMT);
5009 assign_stmt = gimple_build_assign (vback, t);
5010 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5011
5012 t = build2 (fd->loop.cond_code, boolean_type_node,
5013 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5014 ? t : vback, e);
5015 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5016 }
5017
5018 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5019 gsi_remove (&gsi, true);
5020
5021 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
aed3ab25
JJ
5022 collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
5023 cont_bb, body_bb);
629b3d75
MJ
5024 }
5025
5026 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
65f4b875 5027 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
5028 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
5029 {
5030 t = gimple_omp_return_lhs (gsi_stmt (gsi));
2f6bb511
JJ
5031 if (fd->have_reductemp
5032 || ((fd->have_pointer_condtemp || fd->have_scantemp)
5033 && !fd->have_nonctrl_scantemp))
28567c40
JJ
5034 {
5035 tree fn;
5036 if (t)
5037 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
5038 else
5039 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
5040 gcall *g = gimple_build_call (fn, 0);
5041 if (t)
5042 {
5043 gimple_call_set_lhs (g, t);
6c7ae8c5
JJ
5044 if (fd->have_reductemp)
5045 gsi_insert_after (&gsi, gimple_build_assign (reductions,
5046 NOP_EXPR, t),
5047 GSI_SAME_STMT);
28567c40
JJ
5048 }
5049 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5050 }
5051 else
5052 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
629b3d75 5053 }
2f03073f
JJ
5054 else if ((fd->have_pointer_condtemp || fd->have_scantemp)
5055 && !fd->have_nonctrl_scantemp)
1a39b3d3
JJ
5056 {
5057 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
5058 gcall *g = gimple_build_call (fn, 0);
5059 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5060 }
2f6bb511
JJ
5061 if (fd->have_scantemp && !fd->have_nonctrl_scantemp)
5062 {
5063 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5064 tree controlp = NULL_TREE, controlb = NULL_TREE;
5065 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5066 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5067 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
5068 {
5069 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
5070 controlb = OMP_CLAUSE_DECL (c);
5071 else
5072 controlp = OMP_CLAUSE_DECL (c);
5073 if (controlb && controlp)
5074 break;
5075 }
5076 gcc_assert (controlp && controlb);
5077 gimple *g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
5078 NULL_TREE, NULL_TREE);
5079 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5080 exit1_bb = split_block (exit_bb, g)->dest;
5081 gsi = gsi_after_labels (exit1_bb);
5082 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_FREE), 1,
5083 controlp);
5084 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5085 exit2_bb = split_block (exit1_bb, g)->dest;
5086 gsi = gsi_after_labels (exit2_bb);
5087 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_RESTORE), 1,
5088 controlp);
5089 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5090 exit3_bb = split_block (exit2_bb, g)->dest;
5091 gsi = gsi_after_labels (exit3_bb);
5092 }
629b3d75
MJ
5093 gsi_remove (&gsi, true);
5094
5095 /* Connect all the blocks. */
5096 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
357067f2 5097 ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
629b3d75
MJ
5098 ep = find_edge (entry_bb, second_bb);
5099 ep->flags = EDGE_TRUE_VALUE;
357067f2 5100 ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
2f6bb511
JJ
5101 if (fourth_bb)
5102 {
5103 ep = make_edge (third_bb, fifth_bb, EDGE_FALSE_VALUE);
5104 ep->probability
5105 = profile_probability::guessed_always ().apply_scale (1, 2);
5106 ep = find_edge (third_bb, fourth_bb);
5107 ep->flags = EDGE_TRUE_VALUE;
5108 ep->probability
5109 = profile_probability::guessed_always ().apply_scale (1, 2);
5110 ep = find_edge (fourth_bb, fifth_bb);
5111 redirect_edge_and_branch (ep, sixth_bb);
5112 }
5113 else
5114 sixth_bb = third_bb;
5115 find_edge (sixth_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
5116 find_edge (sixth_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
5117 if (exit1_bb)
5118 {
5119 ep = make_edge (exit_bb, exit2_bb, EDGE_FALSE_VALUE);
5120 ep->probability
5121 = profile_probability::guessed_always ().apply_scale (1, 2);
5122 ep = find_edge (exit_bb, exit1_bb);
5123 ep->flags = EDGE_TRUE_VALUE;
5124 ep->probability
5125 = profile_probability::guessed_always ().apply_scale (1, 2);
5126 ep = find_edge (exit1_bb, exit2_bb);
5127 redirect_edge_and_branch (ep, exit3_bb);
5128 }
629b3d75
MJ
5129
5130 if (!broken_loop)
5131 {
5132 ep = find_edge (cont_bb, body_bb);
5133 if (ep == NULL)
5134 {
5135 ep = BRANCH_EDGE (cont_bb);
5136 gcc_assert (single_succ (ep->dest) == body_bb);
5137 }
5138 if (gimple_omp_for_combined_p (fd->for_stmt))
5139 {
5140 remove_edge (ep);
5141 ep = NULL;
5142 }
5143 else if (fd->collapse > 1)
5144 {
5145 remove_edge (ep);
5146 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5147 }
5148 else
5149 ep->flags = EDGE_TRUE_VALUE;
5150 find_edge (cont_bb, fin_bb)->flags
5151 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5152 }
5153
5154 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
5155 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
2f6bb511
JJ
5156 if (fourth_bb)
5157 {
5158 set_immediate_dominator (CDI_DOMINATORS, fifth_bb, third_bb);
5159 set_immediate_dominator (CDI_DOMINATORS, sixth_bb, third_bb);
5160 }
5161 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, sixth_bb);
629b3d75
MJ
5162
5163 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5164 recompute_dominator (CDI_DOMINATORS, body_bb));
5165 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5166 recompute_dominator (CDI_DOMINATORS, fin_bb));
2f6bb511
JJ
5167 if (exit1_bb)
5168 {
5169 set_immediate_dominator (CDI_DOMINATORS, exit2_bb, exit_bb);
5170 set_immediate_dominator (CDI_DOMINATORS, exit3_bb, exit_bb);
5171 }
629b3d75 5172
99b1c316 5173 class loop *loop = body_bb->loop_father;
629b3d75
MJ
5174 if (loop != entry_bb->loop_father)
5175 {
5176 gcc_assert (broken_loop || loop->header == body_bb);
5177 gcc_assert (broken_loop
5178 || loop->latch == region->cont
5179 || single_pred (loop->latch) == region->cont);
5180 return;
5181 }
5182
5183 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5184 {
5185 loop = alloc_loop ();
5186 loop->header = body_bb;
5187 if (collapse_bb == NULL)
5188 loop->latch = cont_bb;
5189 add_loop (loop, body_bb->loop_father);
5190 }
5191}
5192
5193/* Return phi in E->DEST with ARG on edge E. */
5194
5195static gphi *
5196find_phi_with_arg_on_edge (tree arg, edge e)
5197{
5198 basic_block bb = e->dest;
5199
5200 for (gphi_iterator gpi = gsi_start_phis (bb);
5201 !gsi_end_p (gpi);
5202 gsi_next (&gpi))
5203 {
5204 gphi *phi = gpi.phi ();
5205 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
5206 return phi;
5207 }
5208
5209 return NULL;
5210}
5211
5212/* A subroutine of expand_omp_for. Generate code for a parallel
5213 loop with static schedule and a specified chunk size. Given
5214 parameters:
5215
5216 for (V = N1; V cond N2; V += STEP) BODY;
5217
5218 where COND is "<" or ">", we generate pseudocode
5219
5220 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
5221 if (cond is <)
5222 adj = STEP - 1;
5223 else
5224 adj = STEP + 1;
5225 if ((__typeof (V)) -1 > 0 && cond is >)
5226 n = -(adj + N2 - N1) / -STEP;
5227 else
5228 n = (adj + N2 - N1) / STEP;
5229 trip = 0;
5230 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
5231 here so that V is defined
5232 if the loop is not entered
5233 L0:
5234 s0 = (trip * nthreads + threadid) * CHUNK;
01914336 5235 e0 = min (s0 + CHUNK, n);
629b3d75
MJ
5236 if (s0 < n) goto L1; else goto L4;
5237 L1:
5238 V = s0 * STEP + N1;
5239 e = e0 * STEP + N1;
5240 L2:
5241 BODY;
5242 V += STEP;
5243 if (V cond e) goto L2; else goto L3;
5244 L3:
5245 trip += 1;
5246 goto L0;
5247 L4:
5248*/
5249
5250static void
5251expand_omp_for_static_chunk (struct omp_region *region,
5252 struct omp_for_data *fd, gimple *inner_stmt)
5253{
5254 tree n, s0, e0, e, t;
5255 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
5256 tree type, itype, vmain, vback, vextra;
5257 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
5258 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
6c7ae8c5 5259 gimple_stmt_iterator gsi, gsip;
629b3d75
MJ
5260 edge se;
5261 bool broken_loop = region->cont == NULL;
5262 tree *counts = NULL;
5263 tree n1, n2, step;
28567c40 5264 tree reductions = NULL_TREE;
8221c30b 5265 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
629b3d75
MJ
5266
5267 itype = type = TREE_TYPE (fd->loop.v);
5268 if (POINTER_TYPE_P (type))
5269 itype = signed_type_for (type);
5270
5271 entry_bb = region->entry;
5272 se = split_block (entry_bb, last_stmt (entry_bb));
5273 entry_bb = se->src;
5274 iter_part_bb = se->dest;
5275 cont_bb = region->cont;
5276 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
5277 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
5278 gcc_assert (broken_loop
5279 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
5280 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
5281 body_bb = single_succ (seq_start_bb);
5282 if (!broken_loop)
5283 {
5284 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
5285 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
5286 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5287 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
5288 }
5289 exit_bb = region->exit;
5290
5291 /* Trip and adjustment setup goes in ENTRY_BB. */
65f4b875 5292 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75 5293 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
6c7ae8c5
JJ
5294 gsip = gsi;
5295 gsi_prev (&gsip);
629b3d75
MJ
5296
5297 if (fd->collapse > 1)
5298 {
5299 int first_zero_iter = -1, dummy = -1;
5300 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5301
5302 counts = XALLOCAVEC (tree, fd->collapse);
5303 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5304 fin_bb, first_zero_iter,
5305 dummy_bb, dummy, l2_dom_bb);
5306 t = NULL_TREE;
5307 }
5308 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
5309 t = integer_one_node;
5310 else
5311 t = fold_binary (fd->loop.cond_code, boolean_type_node,
5312 fold_convert (type, fd->loop.n1),
5313 fold_convert (type, fd->loop.n2));
5314 if (fd->collapse == 1
5315 && TYPE_UNSIGNED (type)
5316 && (t == NULL_TREE || !integer_onep (t)))
5317 {
5318 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
5319 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
5320 true, GSI_SAME_STMT);
5321 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
5322 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
5323 true, GSI_SAME_STMT);
5324 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
5325 NULL_TREE, NULL_TREE);
5326 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
5327 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
5328 expand_omp_regimplify_p, NULL, NULL)
5329 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
5330 expand_omp_regimplify_p, NULL, NULL))
5331 {
5332 gsi = gsi_for_stmt (cond_stmt);
5333 gimple_regimplify_operands (cond_stmt, &gsi);
5334 }
5335 se = split_block (entry_bb, cond_stmt);
5336 se->flags = EDGE_TRUE_VALUE;
5337 entry_bb = se->dest;
357067f2 5338 se->probability = profile_probability::very_likely ();
629b3d75 5339 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
357067f2 5340 se->probability = profile_probability::very_unlikely ();
629b3d75
MJ
5341 if (gimple_in_ssa_p (cfun))
5342 {
5343 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
5344 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
5345 !gsi_end_p (gpi); gsi_next (&gpi))
5346 {
5347 gphi *phi = gpi.phi ();
5348 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
5349 se, UNKNOWN_LOCATION);
5350 }
5351 }
5352 gsi = gsi_last_bb (entry_bb);
5353 }
5354
8221c30b
JJ
5355 if (fd->lastprivate_conditional)
5356 {
5357 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5358 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
5359 if (fd->have_pointer_condtemp)
5360 condtemp = OMP_CLAUSE_DECL (c);
5361 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
5362 cond_var = OMP_CLAUSE_DECL (c);
5363 }
5364 if (fd->have_reductemp || fd->have_pointer_condtemp)
28567c40
JJ
5365 {
5366 tree t1 = build_int_cst (long_integer_type_node, 0);
5367 tree t2 = build_int_cst (long_integer_type_node, 1);
5368 tree t3 = build_int_cstu (long_integer_type_node,
5369 (HOST_WIDE_INT_1U << 31) + 1);
5370 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
6c7ae8c5
JJ
5371 gimple_stmt_iterator gsi2 = gsi_none ();
5372 gimple *g = NULL;
5373 tree mem = null_pointer_node, memv = NULL_TREE;
6c7ae8c5
JJ
5374 if (fd->have_reductemp)
5375 {
5376 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
5377 reductions = OMP_CLAUSE_DECL (c);
5378 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
5379 g = SSA_NAME_DEF_STMT (reductions);
5380 reductions = gimple_assign_rhs1 (g);
5381 OMP_CLAUSE_DECL (c) = reductions;
5382 gsi2 = gsi_for_stmt (g);
5383 }
5384 else
5385 {
5386 if (gsi_end_p (gsip))
5387 gsi2 = gsi_after_labels (region->entry);
5388 else
5389 gsi2 = gsip;
5390 reductions = null_pointer_node;
5391 }
8221c30b 5392 if (fd->have_pointer_condtemp)
6c7ae8c5 5393 {
6c7ae8c5
JJ
5394 tree type = TREE_TYPE (condtemp);
5395 memv = create_tmp_var (type);
5396 TREE_ADDRESSABLE (memv) = 1;
5397 unsigned HOST_WIDE_INT sz
5398 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
5399 sz *= fd->lastprivate_conditional;
5400 expand_omp_build_assign (&gsi2, memv, build_int_cst (type, sz),
5401 false);
5402 mem = build_fold_addr_expr (memv);
5403 }
28567c40
JJ
5404 tree t
5405 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
5406 9, t1, t2, t2, t3, t1, null_pointer_node,
6c7ae8c5 5407 null_pointer_node, reductions, mem);
28567c40
JJ
5408 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
5409 true, GSI_SAME_STMT);
8221c30b 5410 if (fd->have_pointer_condtemp)
6c7ae8c5
JJ
5411 expand_omp_build_assign (&gsi2, condtemp, memv, false);
5412 if (fd->have_reductemp)
5413 {
5414 gsi_remove (&gsi2, true);
5415 release_ssa_name (gimple_assign_lhs (g));
5416 }
28567c40 5417 }
629b3d75
MJ
5418 switch (gimple_omp_for_kind (fd->for_stmt))
5419 {
5420 case GF_OMP_FOR_KIND_FOR:
5421 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
5422 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
5423 break;
5424 case GF_OMP_FOR_KIND_DISTRIBUTE:
5425 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
5426 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
5427 break;
5428 default:
5429 gcc_unreachable ();
5430 }
5431 nthreads = build_call_expr (nthreads, 0);
5432 nthreads = fold_convert (itype, nthreads);
5433 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
5434 true, GSI_SAME_STMT);
5435 threadid = build_call_expr (threadid, 0);
5436 threadid = fold_convert (itype, threadid);
5437 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
5438 true, GSI_SAME_STMT);
5439
5440 n1 = fd->loop.n1;
5441 n2 = fd->loop.n2;
5442 step = fd->loop.step;
5443 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5444 {
5445 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5446 OMP_CLAUSE__LOOPTEMP_);
5447 gcc_assert (innerc);
5448 n1 = OMP_CLAUSE_DECL (innerc);
5449 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5450 OMP_CLAUSE__LOOPTEMP_);
5451 gcc_assert (innerc);
5452 n2 = OMP_CLAUSE_DECL (innerc);
5453 }
5454 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5455 true, NULL_TREE, true, GSI_SAME_STMT);
5456 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5457 true, NULL_TREE, true, GSI_SAME_STMT);
5458 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5459 true, NULL_TREE, true, GSI_SAME_STMT);
5460 tree chunk_size = fold_convert (itype, fd->chunk_size);
5461 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
5462 chunk_size
5463 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
5464 GSI_SAME_STMT);
5465
5466 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
5467 t = fold_build2 (PLUS_EXPR, itype, step, t);
5468 t = fold_build2 (PLUS_EXPR, itype, t, n2);
5469 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
5470 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
5471 t = fold_build2 (TRUNC_DIV_EXPR, itype,
5472 fold_build1 (NEGATE_EXPR, itype, t),
5473 fold_build1 (NEGATE_EXPR, itype, step));
5474 else
5475 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
5476 t = fold_convert (itype, t);
5477 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5478 true, GSI_SAME_STMT);
5479
5480 trip_var = create_tmp_reg (itype, ".trip");
5481 if (gimple_in_ssa_p (cfun))
5482 {
5483 trip_init = make_ssa_name (trip_var);
5484 trip_main = make_ssa_name (trip_var);
5485 trip_back = make_ssa_name (trip_var);
5486 }
5487 else
5488 {
5489 trip_init = trip_var;
5490 trip_main = trip_var;
5491 trip_back = trip_var;
5492 }
5493
5494 gassign *assign_stmt
5495 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
5496 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5497
5498 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
5499 t = fold_build2 (MULT_EXPR, itype, t, step);
5500 if (POINTER_TYPE_P (type))
5501 t = fold_build_pointer_plus (n1, t);
5502 else
5503 t = fold_build2 (PLUS_EXPR, type, t, n1);
5504 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5505 true, GSI_SAME_STMT);
5506
5507 /* Remove the GIMPLE_OMP_FOR. */
5508 gsi_remove (&gsi, true);
5509
5510 gimple_stmt_iterator gsif = gsi;
5511
5512 /* Iteration space partitioning goes in ITER_PART_BB. */
5513 gsi = gsi_last_bb (iter_part_bb);
5514
5515 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
5516 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
5517 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
5518 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5519 false, GSI_CONTINUE_LINKING);
5520
5521 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
5522 t = fold_build2 (MIN_EXPR, itype, t, n);
5523 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5524 false, GSI_CONTINUE_LINKING);
5525
5526 t = build2 (LT_EXPR, boolean_type_node, s0, n);
5527 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
5528
5529 /* Setup code for sequential iteration goes in SEQ_START_BB. */
5530 gsi = gsi_start_bb (seq_start_bb);
5531
5532 tree startvar = fd->loop.v;
5533 tree endvar = NULL_TREE;
5534
5535 if (gimple_omp_for_combined_p (fd->for_stmt))
5536 {
5537 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
5538 ? gimple_omp_parallel_clauses (inner_stmt)
5539 : gimple_omp_for_clauses (inner_stmt);
5540 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5541 gcc_assert (innerc);
5542 startvar = OMP_CLAUSE_DECL (innerc);
5543 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5544 OMP_CLAUSE__LOOPTEMP_);
5545 gcc_assert (innerc);
5546 endvar = OMP_CLAUSE_DECL (innerc);
5547 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
5548 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
5549 {
5550 int i;
5551 for (i = 1; i < fd->collapse; i++)
5552 {
5553 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5554 OMP_CLAUSE__LOOPTEMP_);
5555 gcc_assert (innerc);
5556 }
5557 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5558 OMP_CLAUSE__LOOPTEMP_);
5559 if (innerc)
5560 {
5561 /* If needed (distribute parallel for with lastprivate),
5562 propagate down the total number of iterations. */
5563 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
5564 fd->loop.n2);
5565 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
5566 GSI_CONTINUE_LINKING);
5567 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5568 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5569 }
5570 }
5571 }
5572
5573 t = fold_convert (itype, s0);
5574 t = fold_build2 (MULT_EXPR, itype, t, step);
5575 if (POINTER_TYPE_P (type))
bde84d51
RB
5576 {
5577 t = fold_build_pointer_plus (n1, t);
5578 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5579 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5580 t = fold_convert (signed_type_for (type), t);
5581 }
629b3d75
MJ
5582 else
5583 t = fold_build2 (PLUS_EXPR, type, t, n1);
5584 t = fold_convert (TREE_TYPE (startvar), t);
5585 t = force_gimple_operand_gsi (&gsi, t,
5586 DECL_P (startvar)
5587 && TREE_ADDRESSABLE (startvar),
5588 NULL_TREE, false, GSI_CONTINUE_LINKING);
5589 assign_stmt = gimple_build_assign (startvar, t);
5590 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6c7ae8c5
JJ
5591 if (cond_var)
5592 {
5593 tree itype = TREE_TYPE (cond_var);
5594 /* For lastprivate(conditional:) itervar, we need some iteration
5595 counter that starts at unsigned non-zero and increases.
5596 Prefer as few IVs as possible, so if we can use startvar
5597 itself, use that, or startvar + constant (those would be
5598 incremented with step), and as last resort use the s0 + 1
5599 incremented by 1. */
5600 if (POINTER_TYPE_P (type)
5601 || TREE_CODE (n1) != INTEGER_CST
5602 || fd->loop.cond_code != LT_EXPR)
5603 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
5604 build_int_cst (itype, 1));
5605 else if (tree_int_cst_sgn (n1) == 1)
5606 t = fold_convert (itype, t);
5607 else
5608 {
5609 tree c = fold_convert (itype, n1);
5610 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
5611 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
5612 }
5613 t = force_gimple_operand_gsi (&gsi, t, false,
5614 NULL_TREE, false, GSI_CONTINUE_LINKING);
5615 assign_stmt = gimple_build_assign (cond_var, t);
5616 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5617 }
629b3d75
MJ
5618
5619 t = fold_convert (itype, e0);
5620 t = fold_build2 (MULT_EXPR, itype, t, step);
5621 if (POINTER_TYPE_P (type))
bde84d51
RB
5622 {
5623 t = fold_build_pointer_plus (n1, t);
5624 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5625 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5626 t = fold_convert (signed_type_for (type), t);
5627 }
629b3d75
MJ
5628 else
5629 t = fold_build2 (PLUS_EXPR, type, t, n1);
5630 t = fold_convert (TREE_TYPE (startvar), t);
5631 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5632 false, GSI_CONTINUE_LINKING);
5633 if (endvar)
5634 {
5635 assign_stmt = gimple_build_assign (endvar, e);
5636 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5637 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5638 assign_stmt = gimple_build_assign (fd->loop.v, e);
5639 else
5640 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5641 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5642 }
5643 /* Handle linear clause adjustments. */
5644 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
5645 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
5646 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
5647 c; c = OMP_CLAUSE_CHAIN (c))
5648 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
5649 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
5650 {
5651 tree d = OMP_CLAUSE_DECL (c);
5652 bool is_ref = omp_is_reference (d);
5653 tree t = d, a, dest;
5654 if (is_ref)
5655 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
5656 tree type = TREE_TYPE (t);
5657 if (POINTER_TYPE_P (type))
5658 type = sizetype;
5659 dest = unshare_expr (t);
5660 tree v = create_tmp_var (TREE_TYPE (t), NULL);
5661 expand_omp_build_assign (&gsif, v, t);
5662 if (itercnt == NULL_TREE)
5663 {
5664 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5665 {
5666 itercntbias
5667 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
5668 fold_convert (itype, fd->loop.n1));
5669 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
5670 itercntbias, step);
5671 itercntbias
5672 = force_gimple_operand_gsi (&gsif, itercntbias, true,
5673 NULL_TREE, true,
5674 GSI_SAME_STMT);
5675 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
5676 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
5677 NULL_TREE, false,
5678 GSI_CONTINUE_LINKING);
5679 }
5680 else
5681 itercnt = s0;
5682 }
5683 a = fold_build2 (MULT_EXPR, type,
5684 fold_convert (type, itercnt),
5685 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
5686 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
5687 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
5688 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5689 false, GSI_CONTINUE_LINKING);
5690 assign_stmt = gimple_build_assign (dest, t);
5691 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5692 }
5693 if (fd->collapse > 1)
aed3ab25 5694 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
629b3d75
MJ
5695
5696 if (!broken_loop)
5697 {
5698 /* The code controlling the sequential loop goes in CONT_BB,
5699 replacing the GIMPLE_OMP_CONTINUE. */
65f4b875 5700 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
5701 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5702 vmain = gimple_omp_continue_control_use (cont_stmt);
5703 vback = gimple_omp_continue_control_def (cont_stmt);
5704
7855700e
JJ
5705 if (cond_var)
5706 {
5707 tree itype = TREE_TYPE (cond_var);
5708 tree t2;
5709 if (POINTER_TYPE_P (type)
5710 || TREE_CODE (n1) != INTEGER_CST
5711 || fd->loop.cond_code != LT_EXPR)
5712 t2 = build_int_cst (itype, 1);
5713 else
5714 t2 = fold_convert (itype, step);
5715 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
5716 t2 = force_gimple_operand_gsi (&gsi, t2, false,
5717 NULL_TREE, true, GSI_SAME_STMT);
5718 assign_stmt = gimple_build_assign (cond_var, t2);
5719 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5720 }
5721
629b3d75
MJ
5722 if (!gimple_omp_for_combined_p (fd->for_stmt))
5723 {
5724 if (POINTER_TYPE_P (type))
5725 t = fold_build_pointer_plus (vmain, step);
5726 else
5727 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5728 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
5729 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5730 true, GSI_SAME_STMT);
5731 assign_stmt = gimple_build_assign (vback, t);
5732 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5733
5734 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
5735 t = build2 (EQ_EXPR, boolean_type_node,
5736 build_int_cst (itype, 0),
5737 build_int_cst (itype, 1));
5738 else
5739 t = build2 (fd->loop.cond_code, boolean_type_node,
5740 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5741 ? t : vback, e);
5742 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5743 }
5744
5745 /* Remove GIMPLE_OMP_CONTINUE. */
5746 gsi_remove (&gsi, true);
5747
5748 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
aed3ab25 5749 collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, body_bb);
629b3d75
MJ
5750
5751 /* Trip update code goes into TRIP_UPDATE_BB. */
5752 gsi = gsi_start_bb (trip_update_bb);
5753
5754 t = build_int_cst (itype, 1);
5755 t = build2 (PLUS_EXPR, itype, trip_main, t);
5756 assign_stmt = gimple_build_assign (trip_back, t);
5757 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5758 }
5759
5760 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
65f4b875 5761 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
5762 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
5763 {
5764 t = gimple_omp_return_lhs (gsi_stmt (gsi));
8221c30b 5765 if (fd->have_reductemp || fd->have_pointer_condtemp)
28567c40
JJ
5766 {
5767 tree fn;
5768 if (t)
5769 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
5770 else
5771 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
5772 gcall *g = gimple_build_call (fn, 0);
5773 if (t)
5774 {
5775 gimple_call_set_lhs (g, t);
6c7ae8c5
JJ
5776 if (fd->have_reductemp)
5777 gsi_insert_after (&gsi, gimple_build_assign (reductions,
5778 NOP_EXPR, t),
5779 GSI_SAME_STMT);
28567c40
JJ
5780 }
5781 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5782 }
5783 else
5784 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
629b3d75 5785 }
1a39b3d3
JJ
5786 else if (fd->have_pointer_condtemp)
5787 {
5788 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
5789 gcall *g = gimple_build_call (fn, 0);
5790 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5791 }
629b3d75
MJ
5792 gsi_remove (&gsi, true);
5793
5794 /* Connect the new blocks. */
5795 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
5796 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
5797
5798 if (!broken_loop)
5799 {
5800 se = find_edge (cont_bb, body_bb);
5801 if (se == NULL)
5802 {
5803 se = BRANCH_EDGE (cont_bb);
5804 gcc_assert (single_succ (se->dest) == body_bb);
5805 }
5806 if (gimple_omp_for_combined_p (fd->for_stmt))
5807 {
5808 remove_edge (se);
5809 se = NULL;
5810 }
5811 else if (fd->collapse > 1)
5812 {
5813 remove_edge (se);
5814 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5815 }
5816 else
5817 se->flags = EDGE_TRUE_VALUE;
5818 find_edge (cont_bb, trip_update_bb)->flags
5819 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5820
01914336
MJ
5821 redirect_edge_and_branch (single_succ_edge (trip_update_bb),
5822 iter_part_bb);
629b3d75
MJ
5823 }
5824
5825 if (gimple_in_ssa_p (cfun))
5826 {
5827 gphi_iterator psi;
5828 gphi *phi;
5829 edge re, ene;
5830 edge_var_map *vm;
5831 size_t i;
5832
5833 gcc_assert (fd->collapse == 1 && !broken_loop);
5834
5835 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
5836 remove arguments of the phi nodes in fin_bb. We need to create
5837 appropriate phi nodes in iter_part_bb instead. */
5838 se = find_edge (iter_part_bb, fin_bb);
5839 re = single_succ_edge (trip_update_bb);
5840 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
5841 ene = single_succ_edge (entry_bb);
5842
5843 psi = gsi_start_phis (fin_bb);
5844 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
5845 gsi_next (&psi), ++i)
5846 {
5847 gphi *nphi;
620e594b 5848 location_t locus;
629b3d75
MJ
5849
5850 phi = psi.phi ();
d83cc5cc
TV
5851 if (operand_equal_p (gimple_phi_arg_def (phi, 0),
5852 redirect_edge_var_map_def (vm), 0))
5853 continue;
5854
629b3d75
MJ
5855 t = gimple_phi_result (phi);
5856 gcc_assert (t == redirect_edge_var_map_result (vm));
5857
5858 if (!single_pred_p (fin_bb))
5859 t = copy_ssa_name (t, phi);
5860
5861 nphi = create_phi_node (t, iter_part_bb);
5862
5863 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
5864 locus = gimple_phi_arg_location_from_edge (phi, se);
5865
5866 /* A special case -- fd->loop.v is not yet computed in
5867 iter_part_bb, we need to use vextra instead. */
5868 if (t == fd->loop.v)
5869 t = vextra;
5870 add_phi_arg (nphi, t, ene, locus);
5871 locus = redirect_edge_var_map_location (vm);
5872 tree back_arg = redirect_edge_var_map_def (vm);
5873 add_phi_arg (nphi, back_arg, re, locus);
5874 edge ce = find_edge (cont_bb, body_bb);
5875 if (ce == NULL)
5876 {
5877 ce = BRANCH_EDGE (cont_bb);
5878 gcc_assert (single_succ (ce->dest) == body_bb);
5879 ce = single_succ_edge (ce->dest);
5880 }
5881 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
5882 gcc_assert (inner_loop_phi != NULL);
5883 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
5884 find_edge (seq_start_bb, body_bb), locus);
5885
5886 if (!single_pred_p (fin_bb))
5887 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
5888 }
5889 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
5890 redirect_edge_var_map_clear (re);
5891 if (single_pred_p (fin_bb))
5892 while (1)
5893 {
5894 psi = gsi_start_phis (fin_bb);
5895 if (gsi_end_p (psi))
5896 break;
5897 remove_phi_node (&psi, false);
5898 }
5899
5900 /* Make phi node for trip. */
5901 phi = create_phi_node (trip_main, iter_part_bb);
5902 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
5903 UNKNOWN_LOCATION);
5904 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
5905 UNKNOWN_LOCATION);
5906 }
5907
5908 if (!broken_loop)
5909 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
5910 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
5911 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
5912 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5913 recompute_dominator (CDI_DOMINATORS, fin_bb));
5914 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
5915 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
5916 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5917 recompute_dominator (CDI_DOMINATORS, body_bb));
5918
5919 if (!broken_loop)
5920 {
99b1c316
MS
5921 class loop *loop = body_bb->loop_father;
5922 class loop *trip_loop = alloc_loop ();
629b3d75
MJ
5923 trip_loop->header = iter_part_bb;
5924 trip_loop->latch = trip_update_bb;
5925 add_loop (trip_loop, iter_part_bb->loop_father);
5926
5927 if (loop != entry_bb->loop_father)
5928 {
5929 gcc_assert (loop->header == body_bb);
5930 gcc_assert (loop->latch == region->cont
5931 || single_pred (loop->latch) == region->cont);
5932 trip_loop->inner = loop;
5933 return;
5934 }
5935
5936 if (!gimple_omp_for_combined_p (fd->for_stmt))
5937 {
5938 loop = alloc_loop ();
5939 loop->header = body_bb;
5940 if (collapse_bb == NULL)
5941 loop->latch = cont_bb;
5942 add_loop (loop, trip_loop);
5943 }
5944 }
5945}
5946
629b3d75
MJ
5947/* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
5948 loop. Given parameters:
5949
5950 for (V = N1; V cond N2; V += STEP) BODY;
5951
5952 where COND is "<" or ">", we generate pseudocode
5953
5954 V = N1;
5955 goto L1;
5956 L0:
5957 BODY;
5958 V += STEP;
5959 L1:
5960 if (V cond N2) goto L0; else goto L2;
5961 L2:
5962
5963 For collapsed loops, given parameters:
5964 collapse(3)
5965 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
5966 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
5967 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
5968 BODY;
5969
5970 we generate pseudocode
5971
5972 if (cond3 is <)
5973 adj = STEP3 - 1;
5974 else
5975 adj = STEP3 + 1;
5976 count3 = (adj + N32 - N31) / STEP3;
5977 if (cond2 is <)
5978 adj = STEP2 - 1;
5979 else
5980 adj = STEP2 + 1;
5981 count2 = (adj + N22 - N21) / STEP2;
5982 if (cond1 is <)
5983 adj = STEP1 - 1;
5984 else
5985 adj = STEP1 + 1;
5986 count1 = (adj + N12 - N11) / STEP1;
5987 count = count1 * count2 * count3;
5988 V = 0;
5989 V1 = N11;
5990 V2 = N21;
5991 V3 = N31;
5992 goto L1;
5993 L0:
5994 BODY;
5995 V += 1;
5996 V3 += STEP3;
5997 V2 += (V3 cond3 N32) ? 0 : STEP2;
5998 V3 = (V3 cond3 N32) ? V3 : N31;
5999 V1 += (V2 cond2 N22) ? 0 : STEP1;
6000 V2 = (V2 cond2 N22) ? V2 : N21;
6001 L1:
6002 if (V < count) goto L0; else goto L2;
6003 L2:
6004
6005 */
6006
6007static void
6008expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
6009{
6010 tree type, t;
6011 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
6012 gimple_stmt_iterator gsi;
6013 gimple *stmt;
6014 gcond *cond_stmt;
6015 bool broken_loop = region->cont == NULL;
6016 edge e, ne;
6017 tree *counts = NULL;
6018 int i;
6019 int safelen_int = INT_MAX;
fed2a43c 6020 bool dont_vectorize = false;
629b3d75
MJ
6021 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6022 OMP_CLAUSE_SAFELEN);
6023 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6024 OMP_CLAUSE__SIMDUID_);
fed2a43c
JJ
6025 tree ifc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6026 OMP_CLAUSE_IF);
6027 tree simdlen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6028 OMP_CLAUSE_SIMDLEN);
e7393c89
JJ
6029 tree condtemp = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6030 OMP_CLAUSE__CONDTEMP_);
629b3d75 6031 tree n1, n2;
e7393c89 6032 tree cond_var = condtemp ? OMP_CLAUSE_DECL (condtemp) : NULL_TREE;
629b3d75
MJ
6033
6034 if (safelen)
6035 {
9d2f08ab 6036 poly_uint64 val;
629b3d75 6037 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
9d2f08ab 6038 if (!poly_int_tree_p (safelen, &val))
629b3d75 6039 safelen_int = 0;
9d2f08ab
RS
6040 else
6041 safelen_int = MIN (constant_lower_bound (val), INT_MAX);
629b3d75
MJ
6042 if (safelen_int == 1)
6043 safelen_int = 0;
6044 }
fed2a43c
JJ
6045 if ((ifc && integer_zerop (OMP_CLAUSE_IF_EXPR (ifc)))
6046 || (simdlen && integer_onep (OMP_CLAUSE_SIMDLEN_EXPR (simdlen))))
6047 {
6048 safelen_int = 0;
6049 dont_vectorize = true;
6050 }
629b3d75
MJ
6051 type = TREE_TYPE (fd->loop.v);
6052 entry_bb = region->entry;
6053 cont_bb = region->cont;
6054 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
6055 gcc_assert (broken_loop
6056 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
6057 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
6058 if (!broken_loop)
6059 {
6060 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
6061 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
6062 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
6063 l2_bb = BRANCH_EDGE (entry_bb)->dest;
6064 }
6065 else
6066 {
6067 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
6068 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
6069 l2_bb = single_succ (l1_bb);
6070 }
6071 exit_bb = region->exit;
6072 l2_dom_bb = NULL;
6073
65f4b875 6074 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
6075
6076 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
6077 /* Not needed in SSA form right now. */
6078 gcc_assert (!gimple_in_ssa_p (cfun));
6079 if (fd->collapse > 1)
6080 {
6081 int first_zero_iter = -1, dummy = -1;
6082 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
6083
6084 counts = XALLOCAVEC (tree, fd->collapse);
6085 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
6086 zero_iter_bb, first_zero_iter,
6087 dummy_bb, dummy, l2_dom_bb);
6088 }
6089 if (l2_dom_bb == NULL)
6090 l2_dom_bb = l1_bb;
6091
6092 n1 = fd->loop.n1;
6093 n2 = fd->loop.n2;
6094 if (gimple_omp_for_combined_into_p (fd->for_stmt))
6095 {
6096 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6097 OMP_CLAUSE__LOOPTEMP_);
6098 gcc_assert (innerc);
6099 n1 = OMP_CLAUSE_DECL (innerc);
6100 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
6101 OMP_CLAUSE__LOOPTEMP_);
6102 gcc_assert (innerc);
6103 n2 = OMP_CLAUSE_DECL (innerc);
6104 }
6105 tree step = fd->loop.step;
6106
4cea8675
AM
6107 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6108 OMP_CLAUSE__SIMT_);
629b3d75
MJ
6109 if (is_simt)
6110 {
6111 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
4cea8675
AM
6112 is_simt = safelen_int > 1;
6113 }
6114 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
6115 if (is_simt)
6116 {
629b3d75
MJ
6117 simt_lane = create_tmp_var (unsigned_type_node);
6118 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
6119 gimple_call_set_lhs (g, simt_lane);
6120 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6121 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
6122 fold_convert (TREE_TYPE (step), simt_lane));
6123 n1 = fold_convert (type, n1);
6124 if (POINTER_TYPE_P (type))
6125 n1 = fold_build_pointer_plus (n1, offset);
6126 else
6127 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
6128
6129 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
6130 if (fd->collapse > 1)
6131 simt_maxlane = build_one_cst (unsigned_type_node);
6132 else if (safelen_int < omp_max_simt_vf ())
6133 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
6134 tree vf
6135 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
6136 unsigned_type_node, 0);
6137 if (simt_maxlane)
6138 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
6139 vf = fold_convert (TREE_TYPE (step), vf);
6140 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
6141 }
6142
6143 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
6144 if (fd->collapse > 1)
6145 {
6146 if (gimple_omp_for_combined_into_p (fd->for_stmt))
6147 {
6148 gsi_prev (&gsi);
aed3ab25 6149 expand_omp_for_init_vars (fd, &gsi, counts, NULL, NULL, n1);
629b3d75
MJ
6150 gsi_next (&gsi);
6151 }
6152 else
6153 for (i = 0; i < fd->collapse; i++)
6154 {
6155 tree itype = TREE_TYPE (fd->loops[i].v);
6156 if (POINTER_TYPE_P (itype))
6157 itype = signed_type_for (itype);
6158 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
6159 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6160 }
6161 }
e7393c89
JJ
6162 if (cond_var)
6163 {
6164 if (POINTER_TYPE_P (type)
6165 || TREE_CODE (n1) != INTEGER_CST
6166 || fd->loop.cond_code != LT_EXPR
6167 || tree_int_cst_sgn (n1) != 1)
6168 expand_omp_build_assign (&gsi, cond_var,
6169 build_one_cst (TREE_TYPE (cond_var)));
6170 else
6171 expand_omp_build_assign (&gsi, cond_var,
6172 fold_convert (TREE_TYPE (cond_var), n1));
6173 }
629b3d75
MJ
6174
6175 /* Remove the GIMPLE_OMP_FOR statement. */
6176 gsi_remove (&gsi, true);
6177
6178 if (!broken_loop)
6179 {
6180 /* Code to control the increment goes in the CONT_BB. */
65f4b875 6181 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
6182 stmt = gsi_stmt (gsi);
6183 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
6184
6185 if (POINTER_TYPE_P (type))
6186 t = fold_build_pointer_plus (fd->loop.v, step);
6187 else
6188 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6189 expand_omp_build_assign (&gsi, fd->loop.v, t);
6190
6191 if (fd->collapse > 1)
6192 {
6193 i = fd->collapse - 1;
6194 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
6195 {
6196 t = fold_convert (sizetype, fd->loops[i].step);
6197 t = fold_build_pointer_plus (fd->loops[i].v, t);
6198 }
6199 else
6200 {
6201 t = fold_convert (TREE_TYPE (fd->loops[i].v),
6202 fd->loops[i].step);
6203 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
6204 fd->loops[i].v, t);
6205 }
6206 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6207
6208 for (i = fd->collapse - 1; i > 0; i--)
6209 {
6210 tree itype = TREE_TYPE (fd->loops[i].v);
6211 tree itype2 = TREE_TYPE (fd->loops[i - 1].v);
6212 if (POINTER_TYPE_P (itype2))
6213 itype2 = signed_type_for (itype2);
bcc6842b
JJ
6214 t = fold_convert (itype2, fd->loops[i - 1].step);
6215 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
6216 GSI_SAME_STMT);
629b3d75
MJ
6217 t = build3 (COND_EXPR, itype2,
6218 build2 (fd->loops[i].cond_code, boolean_type_node,
6219 fd->loops[i].v,
6220 fold_convert (itype, fd->loops[i].n2)),
bcc6842b 6221 build_int_cst (itype2, 0), t);
629b3d75
MJ
6222 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v)))
6223 t = fold_build_pointer_plus (fd->loops[i - 1].v, t);
6224 else
6225 t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t);
6226 expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t);
6227
bcc6842b
JJ
6228 t = fold_convert (itype, fd->loops[i].n1);
6229 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
6230 GSI_SAME_STMT);
629b3d75
MJ
6231 t = build3 (COND_EXPR, itype,
6232 build2 (fd->loops[i].cond_code, boolean_type_node,
6233 fd->loops[i].v,
6234 fold_convert (itype, fd->loops[i].n2)),
bcc6842b 6235 fd->loops[i].v, t);
629b3d75
MJ
6236 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6237 }
6238 }
e7393c89
JJ
6239 if (cond_var)
6240 {
6241 if (POINTER_TYPE_P (type)
6242 || TREE_CODE (n1) != INTEGER_CST
6243 || fd->loop.cond_code != LT_EXPR
6244 || tree_int_cst_sgn (n1) != 1)
6245 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6246 build_one_cst (TREE_TYPE (cond_var)));
6247 else
6248 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6249 fold_convert (TREE_TYPE (cond_var), step));
6250 expand_omp_build_assign (&gsi, cond_var, t);
6251 }
629b3d75
MJ
6252
6253 /* Remove GIMPLE_OMP_CONTINUE. */
6254 gsi_remove (&gsi, true);
6255 }
6256
6257 /* Emit the condition in L1_BB. */
6258 gsi = gsi_start_bb (l1_bb);
6259
6260 t = fold_convert (type, n2);
6261 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6262 false, GSI_CONTINUE_LINKING);
6263 tree v = fd->loop.v;
6264 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6265 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6266 false, GSI_CONTINUE_LINKING);
6267 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
6268 cond_stmt = gimple_build_cond_empty (t);
6269 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
6270 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
6271 NULL, NULL)
6272 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
6273 NULL, NULL))
6274 {
6275 gsi = gsi_for_stmt (cond_stmt);
6276 gimple_regimplify_operands (cond_stmt, &gsi);
6277 }
6278
6279 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
6280 if (is_simt)
6281 {
6282 gsi = gsi_start_bb (l2_bb);
6283 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step);
6284 if (POINTER_TYPE_P (type))
6285 t = fold_build_pointer_plus (fd->loop.v, step);
6286 else
6287 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6288 expand_omp_build_assign (&gsi, fd->loop.v, t);
6289 }
6290
6291 /* Remove GIMPLE_OMP_RETURN. */
65f4b875 6292 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
6293 gsi_remove (&gsi, true);
6294
6295 /* Connect the new blocks. */
6296 remove_edge (FALLTHRU_EDGE (entry_bb));
6297
6298 if (!broken_loop)
6299 {
6300 remove_edge (BRANCH_EDGE (entry_bb));
6301 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
6302
6303 e = BRANCH_EDGE (l1_bb);
6304 ne = FALLTHRU_EDGE (l1_bb);
6305 e->flags = EDGE_TRUE_VALUE;
6306 }
6307 else
6308 {
6309 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6310
6311 ne = single_succ_edge (l1_bb);
6312 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
6313
6314 }
6315 ne->flags = EDGE_FALSE_VALUE;
357067f2
JH
6316 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
6317 ne->probability = e->probability.invert ();
629b3d75
MJ
6318
6319 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
6320 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
6321
6322 if (simt_maxlane)
6323 {
6324 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
6325 NULL_TREE, NULL_TREE);
6326 gsi = gsi_last_bb (entry_bb);
6327 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
6328 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
6329 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
357067f2
JH
6330 FALLTHRU_EDGE (entry_bb)->probability
6331 = profile_probability::guessed_always ().apply_scale (7, 8);
6332 BRANCH_EDGE (entry_bb)->probability
6333 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
629b3d75
MJ
6334 l2_dom_bb = entry_bb;
6335 }
6336 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
6337
6338 if (!broken_loop)
6339 {
99b1c316 6340 class loop *loop = alloc_loop ();
629b3d75
MJ
6341 loop->header = l1_bb;
6342 loop->latch = cont_bb;
6343 add_loop (loop, l1_bb->loop_father);
6344 loop->safelen = safelen_int;
6345 if (simduid)
6346 {
6347 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
6348 cfun->has_simduid_loops = true;
6349 }
6350 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
6351 the loop. */
6352 if ((flag_tree_loop_vectorize
26d476cd 6353 || !global_options_set.x_flag_tree_loop_vectorize)
629b3d75
MJ
6354 && flag_tree_loop_optimize
6355 && loop->safelen > 1)
6356 {
6357 loop->force_vectorize = true;
f63445e5
JJ
6358 if (simdlen && tree_fits_uhwi_p (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)))
6359 {
6360 unsigned HOST_WIDE_INT v
6361 = tree_to_uhwi (OMP_CLAUSE_SIMDLEN_EXPR (simdlen));
6362 if (v < INT_MAX && v <= (unsigned HOST_WIDE_INT) loop->safelen)
6363 loop->simdlen = v;
6364 }
629b3d75
MJ
6365 cfun->has_force_vectorize_loops = true;
6366 }
fed2a43c
JJ
6367 else if (dont_vectorize)
6368 loop->dont_vectorize = true;
629b3d75
MJ
6369 }
6370 else if (simduid)
6371 cfun->has_simduid_loops = true;
6372}
6373
6374/* Taskloop construct is represented after gimplification with
6375 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
6376 in between them. This routine expands the outer GIMPLE_OMP_FOR,
6377 which should just compute all the needed loop temporaries
6378 for GIMPLE_OMP_TASK. */
6379
6380static void
6381expand_omp_taskloop_for_outer (struct omp_region *region,
6382 struct omp_for_data *fd,
6383 gimple *inner_stmt)
6384{
6385 tree type, bias = NULL_TREE;
6386 basic_block entry_bb, cont_bb, exit_bb;
6387 gimple_stmt_iterator gsi;
6388 gassign *assign_stmt;
6389 tree *counts = NULL;
6390 int i;
6391
6392 gcc_assert (inner_stmt);
6393 gcc_assert (region->cont);
6394 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
6395 && gimple_omp_task_taskloop_p (inner_stmt));
6396 type = TREE_TYPE (fd->loop.v);
6397
6398 /* See if we need to bias by LLONG_MIN. */
6399 if (fd->iter_type == long_long_unsigned_type_node
6400 && TREE_CODE (type) == INTEGER_TYPE
6401 && !TYPE_UNSIGNED (type))
6402 {
6403 tree n1, n2;
6404
6405 if (fd->loop.cond_code == LT_EXPR)
6406 {
6407 n1 = fd->loop.n1;
6408 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
6409 }
6410 else
6411 {
6412 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
6413 n2 = fd->loop.n1;
6414 }
6415 if (TREE_CODE (n1) != INTEGER_CST
6416 || TREE_CODE (n2) != INTEGER_CST
6417 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
6418 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
6419 }
6420
6421 entry_bb = region->entry;
6422 cont_bb = region->cont;
6423 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
6424 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
6425 exit_bb = region->exit;
6426
65f4b875 6427 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
6428 gimple *for_stmt = gsi_stmt (gsi);
6429 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
6430 if (fd->collapse > 1)
6431 {
6432 int first_zero_iter = -1, dummy = -1;
6433 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
6434
6435 counts = XALLOCAVEC (tree, fd->collapse);
6436 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
6437 zero_iter_bb, first_zero_iter,
6438 dummy_bb, dummy, l2_dom_bb);
6439
6440 if (zero_iter_bb)
6441 {
6442 /* Some counts[i] vars might be uninitialized if
6443 some loop has zero iterations. But the body shouldn't
6444 be executed in that case, so just avoid uninit warnings. */
6445 for (i = first_zero_iter; i < fd->collapse; i++)
6446 if (SSA_VAR_P (counts[i]))
6447 TREE_NO_WARNING (counts[i]) = 1;
6448 gsi_prev (&gsi);
6449 edge e = split_block (entry_bb, gsi_stmt (gsi));
6450 entry_bb = e->dest;
6451 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
6452 gsi = gsi_last_bb (entry_bb);
6453 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
6454 get_immediate_dominator (CDI_DOMINATORS,
6455 zero_iter_bb));
6456 }
6457 }
6458
6459 tree t0, t1;
6460 t1 = fd->loop.n2;
6461 t0 = fd->loop.n1;
6462 if (POINTER_TYPE_P (TREE_TYPE (t0))
6463 && TYPE_PRECISION (TREE_TYPE (t0))
6464 != TYPE_PRECISION (fd->iter_type))
6465 {
6466 /* Avoid casting pointers to integer of a different size. */
6467 tree itype = signed_type_for (type);
6468 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
6469 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
6470 }
6471 else
6472 {
6473 t1 = fold_convert (fd->iter_type, t1);
6474 t0 = fold_convert (fd->iter_type, t0);
6475 }
6476 if (bias)
6477 {
6478 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
6479 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
6480 }
6481
6482 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
6483 OMP_CLAUSE__LOOPTEMP_);
6484 gcc_assert (innerc);
6485 tree startvar = OMP_CLAUSE_DECL (innerc);
6486 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
6487 gcc_assert (innerc);
6488 tree endvar = OMP_CLAUSE_DECL (innerc);
6489 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
6490 {
6491 gcc_assert (innerc);
6492 for (i = 1; i < fd->collapse; i++)
6493 {
6494 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
6495 OMP_CLAUSE__LOOPTEMP_);
6496 gcc_assert (innerc);
6497 }
6498 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
6499 OMP_CLAUSE__LOOPTEMP_);
6500 if (innerc)
6501 {
6502 /* If needed (inner taskloop has lastprivate clause), propagate
6503 down the total number of iterations. */
6504 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
6505 NULL_TREE, false,
6506 GSI_CONTINUE_LINKING);
6507 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
6508 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6509 }
6510 }
6511
6512 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
6513 GSI_CONTINUE_LINKING);
6514 assign_stmt = gimple_build_assign (startvar, t0);
6515 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6516
6517 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
6518 GSI_CONTINUE_LINKING);
6519 assign_stmt = gimple_build_assign (endvar, t1);
6520 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6521 if (fd->collapse > 1)
aed3ab25 6522 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
629b3d75
MJ
6523
6524 /* Remove the GIMPLE_OMP_FOR statement. */
6525 gsi = gsi_for_stmt (for_stmt);
6526 gsi_remove (&gsi, true);
6527
65f4b875 6528 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
6529 gsi_remove (&gsi, true);
6530
65f4b875 6531 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
6532 gsi_remove (&gsi, true);
6533
357067f2 6534 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
629b3d75 6535 remove_edge (BRANCH_EDGE (entry_bb));
357067f2 6536 FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
629b3d75
MJ
6537 remove_edge (BRANCH_EDGE (cont_bb));
6538 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
6539 set_immediate_dominator (CDI_DOMINATORS, region->entry,
6540 recompute_dominator (CDI_DOMINATORS, region->entry));
6541}
6542
6543/* Taskloop construct is represented after gimplification with
6544 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
6545 in between them. This routine expands the inner GIMPLE_OMP_FOR.
6546 GOMP_taskloop{,_ull} function arranges for each task to be given just
6547 a single range of iterations. */
6548
6549static void
6550expand_omp_taskloop_for_inner (struct omp_region *region,
6551 struct omp_for_data *fd,
6552 gimple *inner_stmt)
6553{
6554 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
6555 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
6556 basic_block fin_bb;
6557 gimple_stmt_iterator gsi;
6558 edge ep;
6559 bool broken_loop = region->cont == NULL;
6560 tree *counts = NULL;
6561 tree n1, n2, step;
6562
6563 itype = type = TREE_TYPE (fd->loop.v);
6564 if (POINTER_TYPE_P (type))
6565 itype = signed_type_for (type);
6566
6567 /* See if we need to bias by LLONG_MIN. */
6568 if (fd->iter_type == long_long_unsigned_type_node
6569 && TREE_CODE (type) == INTEGER_TYPE
6570 && !TYPE_UNSIGNED (type))
6571 {
6572 tree n1, n2;
6573
6574 if (fd->loop.cond_code == LT_EXPR)
6575 {
6576 n1 = fd->loop.n1;
6577 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
6578 }
6579 else
6580 {
6581 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
6582 n2 = fd->loop.n1;
6583 }
6584 if (TREE_CODE (n1) != INTEGER_CST
6585 || TREE_CODE (n2) != INTEGER_CST
6586 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
6587 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
6588 }
6589
6590 entry_bb = region->entry;
6591 cont_bb = region->cont;
6592 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
6593 fin_bb = BRANCH_EDGE (entry_bb)->dest;
6594 gcc_assert (broken_loop
6595 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
6596 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
6597 if (!broken_loop)
6598 {
6599 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
6600 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
6601 }
6602 exit_bb = region->exit;
6603
6604 /* Iteration space partitioning goes in ENTRY_BB. */
65f4b875 6605 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
6606 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
6607
6608 if (fd->collapse > 1)
6609 {
6610 int first_zero_iter = -1, dummy = -1;
6611 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
6612
6613 counts = XALLOCAVEC (tree, fd->collapse);
6614 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
6615 fin_bb, first_zero_iter,
6616 dummy_bb, dummy, l2_dom_bb);
6617 t = NULL_TREE;
6618 }
6619 else
6620 t = integer_one_node;
6621
6622 step = fd->loop.step;
6623 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6624 OMP_CLAUSE__LOOPTEMP_);
6625 gcc_assert (innerc);
6626 n1 = OMP_CLAUSE_DECL (innerc);
6627 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
6628 gcc_assert (innerc);
6629 n2 = OMP_CLAUSE_DECL (innerc);
6630 if (bias)
6631 {
6632 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
6633 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
6634 }
6635 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
6636 true, NULL_TREE, true, GSI_SAME_STMT);
6637 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
6638 true, NULL_TREE, true, GSI_SAME_STMT);
6639 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
6640 true, NULL_TREE, true, GSI_SAME_STMT);
6641
6642 tree startvar = fd->loop.v;
6643 tree endvar = NULL_TREE;
6644
6645 if (gimple_omp_for_combined_p (fd->for_stmt))
6646 {
6647 tree clauses = gimple_omp_for_clauses (inner_stmt);
6648 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
6649 gcc_assert (innerc);
6650 startvar = OMP_CLAUSE_DECL (innerc);
6651 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
6652 OMP_CLAUSE__LOOPTEMP_);
6653 gcc_assert (innerc);
6654 endvar = OMP_CLAUSE_DECL (innerc);
6655 }
6656 t = fold_convert (TREE_TYPE (startvar), n1);
6657 t = force_gimple_operand_gsi (&gsi, t,
6658 DECL_P (startvar)
6659 && TREE_ADDRESSABLE (startvar),
6660 NULL_TREE, false, GSI_CONTINUE_LINKING);
6661 gimple *assign_stmt = gimple_build_assign (startvar, t);
6662 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6663
6664 t = fold_convert (TREE_TYPE (startvar), n2);
6665 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6666 false, GSI_CONTINUE_LINKING);
6667 if (endvar)
6668 {
6669 assign_stmt = gimple_build_assign (endvar, e);
6670 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6671 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
6672 assign_stmt = gimple_build_assign (fd->loop.v, e);
6673 else
6674 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
6675 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6676 }
6677 if (fd->collapse > 1)
aed3ab25 6678 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
629b3d75
MJ
6679
6680 if (!broken_loop)
6681 {
6682 /* The code controlling the sequential loop replaces the
6683 GIMPLE_OMP_CONTINUE. */
65f4b875 6684 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
6685 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
6686 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
6687 vmain = gimple_omp_continue_control_use (cont_stmt);
6688 vback = gimple_omp_continue_control_def (cont_stmt);
6689
6690 if (!gimple_omp_for_combined_p (fd->for_stmt))
6691 {
6692 if (POINTER_TYPE_P (type))
6693 t = fold_build_pointer_plus (vmain, step);
6694 else
6695 t = fold_build2 (PLUS_EXPR, type, vmain, step);
6696 t = force_gimple_operand_gsi (&gsi, t,
6697 DECL_P (vback)
6698 && TREE_ADDRESSABLE (vback),
6699 NULL_TREE, true, GSI_SAME_STMT);
6700 assign_stmt = gimple_build_assign (vback, t);
6701 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
6702
6703 t = build2 (fd->loop.cond_code, boolean_type_node,
6704 DECL_P (vback) && TREE_ADDRESSABLE (vback)
6705 ? t : vback, e);
6706 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
6707 }
6708
6709 /* Remove the GIMPLE_OMP_CONTINUE statement. */
6710 gsi_remove (&gsi, true);
6711
6712 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
aed3ab25 6713 collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, body_bb);
629b3d75
MJ
6714 }
6715
6716 /* Remove the GIMPLE_OMP_FOR statement. */
6717 gsi = gsi_for_stmt (fd->for_stmt);
6718 gsi_remove (&gsi, true);
6719
6720 /* Remove the GIMPLE_OMP_RETURN statement. */
65f4b875 6721 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
6722 gsi_remove (&gsi, true);
6723
357067f2 6724 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
629b3d75
MJ
6725 if (!broken_loop)
6726 remove_edge (BRANCH_EDGE (entry_bb));
6727 else
6728 {
6729 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
6730 region->outer->cont = NULL;
6731 }
6732
6733 /* Connect all the blocks. */
6734 if (!broken_loop)
6735 {
6736 ep = find_edge (cont_bb, body_bb);
6737 if (gimple_omp_for_combined_p (fd->for_stmt))
6738 {
6739 remove_edge (ep);
6740 ep = NULL;
6741 }
6742 else if (fd->collapse > 1)
6743 {
6744 remove_edge (ep);
6745 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
6746 }
6747 else
6748 ep->flags = EDGE_TRUE_VALUE;
6749 find_edge (cont_bb, fin_bb)->flags
6750 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
6751 }
6752
6753 set_immediate_dominator (CDI_DOMINATORS, body_bb,
6754 recompute_dominator (CDI_DOMINATORS, body_bb));
6755 if (!broken_loop)
6756 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
6757 recompute_dominator (CDI_DOMINATORS, fin_bb));
6758
6759 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
6760 {
99b1c316 6761 class loop *loop = alloc_loop ();
629b3d75
MJ
6762 loop->header = body_bb;
6763 if (collapse_bb == NULL)
6764 loop->latch = cont_bb;
6765 add_loop (loop, body_bb->loop_father);
6766 }
6767}
6768
6769/* A subroutine of expand_omp_for. Generate code for an OpenACC
6770 partitioned loop. The lowering here is abstracted, in that the
6771 loop parameters are passed through internal functions, which are
6772 further lowered by oacc_device_lower, once we get to the target
6773 compiler. The loop is of the form:
6774
6775 for (V = B; V LTGT E; V += S) {BODY}
6776
6777 where LTGT is < or >. We may have a specified chunking size, CHUNKING
6778 (constant 0 for no chunking) and we will have a GWV partitioning
6779 mask, specifying dimensions over which the loop is to be
02889d23
CLT
6780 partitioned (see note below). We generate code that looks like
6781 (this ignores tiling):
629b3d75
MJ
6782
6783 <entry_bb> [incoming FALL->body, BRANCH->exit]
6784 typedef signedintify (typeof (V)) T; // underlying signed integral type
6785 T range = E - B;
6786 T chunk_no = 0;
6787 T DIR = LTGT == '<' ? +1 : -1;
6788 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
6789 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
6790
6791 <head_bb> [created by splitting end of entry_bb]
6792 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
6793 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
6794 if (!(offset LTGT bound)) goto bottom_bb;
6795
6796 <body_bb> [incoming]
6797 V = B + offset;
6798 {BODY}
6799
6800 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
6801 offset += step;
6802 if (offset LTGT bound) goto body_bb; [*]
6803
6804 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
6805 chunk_no++;
6806 if (chunk < chunk_max) goto head_bb;
6807
6808 <exit_bb> [incoming]
6809 V = B + ((range -/+ 1) / S +/- 1) * S [*]
6810
02889d23 6811 [*] Needed if V live at end of loop. */
629b3d75
MJ
6812
6813static void
6814expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
6815{
6816 tree v = fd->loop.v;
6817 enum tree_code cond_code = fd->loop.cond_code;
6818 enum tree_code plus_code = PLUS_EXPR;
6819
6820 tree chunk_size = integer_minus_one_node;
6821 tree gwv = integer_zero_node;
6822 tree iter_type = TREE_TYPE (v);
6823 tree diff_type = iter_type;
6824 tree plus_type = iter_type;
6825 struct oacc_collapse *counts = NULL;
6826
6827 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
6828 == GF_OMP_FOR_KIND_OACC_LOOP);
6829 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
6830 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
6831
6832 if (POINTER_TYPE_P (iter_type))
6833 {
6834 plus_code = POINTER_PLUS_EXPR;
6835 plus_type = sizetype;
6836 }
6837 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
6838 diff_type = signed_type_for (diff_type);
f4c222c0
TV
6839 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
6840 diff_type = integer_type_node;
629b3d75
MJ
6841
6842 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
6843 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
6844 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
6845 basic_block bottom_bb = NULL;
6846
9c3da8cc
JJ
6847 /* entry_bb has two successors; the branch edge is to the exit
6848 block, fallthrough edge to body. */
629b3d75
MJ
6849 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
6850 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
6851
6852 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
6853 body_bb, or to a block whose only successor is the body_bb. Its
6854 fallthrough successor is the final block (same as the branch
6855 successor of the entry_bb). */
6856 if (cont_bb)
6857 {
6858 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
6859 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
6860
6861 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
6862 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
6863 }
6864 else
6865 gcc_assert (!gimple_in_ssa_p (cfun));
6866
6867 /* The exit block only has entry_bb and cont_bb as predecessors. */
6868 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
6869
6870 tree chunk_no;
6871 tree chunk_max = NULL_TREE;
6872 tree bound, offset;
6873 tree step = create_tmp_var (diff_type, ".step");
6874 bool up = cond_code == LT_EXPR;
6875 tree dir = build_int_cst (diff_type, up ? +1 : -1);
02889d23 6876 bool chunking = !gimple_in_ssa_p (cfun);
629b3d75
MJ
6877 bool negating;
6878
02889d23
CLT
6879 /* Tiling vars. */
6880 tree tile_size = NULL_TREE;
6881 tree element_s = NULL_TREE;
6882 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
6883 basic_block elem_body_bb = NULL;
6884 basic_block elem_cont_bb = NULL;
6885
629b3d75
MJ
6886 /* SSA instances. */
6887 tree offset_incr = NULL_TREE;
6888 tree offset_init = NULL_TREE;
6889
6890 gimple_stmt_iterator gsi;
6891 gassign *ass;
6892 gcall *call;
6893 gimple *stmt;
6894 tree expr;
6895 location_t loc;
6896 edge split, be, fte;
6897
6898 /* Split the end of entry_bb to create head_bb. */
6899 split = split_block (entry_bb, last_stmt (entry_bb));
6900 basic_block head_bb = split->dest;
6901 entry_bb = split->src;
6902
6903 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
65f4b875 6904 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
6905 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
6906 loc = gimple_location (for_stmt);
6907
6908 if (gimple_in_ssa_p (cfun))
6909 {
6910 offset_init = gimple_omp_for_index (for_stmt, 0);
6911 gcc_assert (integer_zerop (fd->loop.n1));
6912 /* The SSA parallelizer does gang parallelism. */
6913 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
6914 }
6915
02889d23 6916 if (fd->collapse > 1 || fd->tiling)
629b3d75 6917 {
02889d23 6918 gcc_assert (!gimple_in_ssa_p (cfun) && up);
629b3d75
MJ
6919 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
6920 tree total = expand_oacc_collapse_init (fd, &gsi, counts,
02889d23 6921 TREE_TYPE (fd->loop.n2), loc);
629b3d75
MJ
6922
6923 if (SSA_VAR_P (fd->loop.n2))
6924 {
6925 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
6926 true, GSI_SAME_STMT);
6927 ass = gimple_build_assign (fd->loop.n2, total);
6928 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6929 }
629b3d75
MJ
6930 }
6931
6932 tree b = fd->loop.n1;
6933 tree e = fd->loop.n2;
6934 tree s = fd->loop.step;
6935
6936 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
6937 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
6938
01914336 6939 /* Convert the step, avoiding possible unsigned->signed overflow. */
629b3d75
MJ
6940 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
6941 if (negating)
6942 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
6943 s = fold_convert (diff_type, s);
6944 if (negating)
6945 s = fold_build1 (NEGATE_EXPR, diff_type, s);
6946 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
6947
6948 if (!chunking)
6949 chunk_size = integer_zero_node;
6950 expr = fold_convert (diff_type, chunk_size);
6951 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
6952 NULL_TREE, true, GSI_SAME_STMT);
02889d23
CLT
6953
6954 if (fd->tiling)
6955 {
6956 /* Determine the tile size and element step,
6957 modify the outer loop step size. */
6958 tile_size = create_tmp_var (diff_type, ".tile_size");
6959 expr = build_int_cst (diff_type, 1);
6960 for (int ix = 0; ix < fd->collapse; ix++)
6961 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
6962 expr = force_gimple_operand_gsi (&gsi, expr, true,
6963 NULL_TREE, true, GSI_SAME_STMT);
6964 ass = gimple_build_assign (tile_size, expr);
6965 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6966
6967 element_s = create_tmp_var (diff_type, ".element_s");
6968 ass = gimple_build_assign (element_s, s);
6969 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6970
6971 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
6972 s = force_gimple_operand_gsi (&gsi, expr, true,
6973 NULL_TREE, true, GSI_SAME_STMT);
6974 }
6975
01914336 6976 /* Determine the range, avoiding possible unsigned->signed overflow. */
629b3d75
MJ
6977 negating = !up && TYPE_UNSIGNED (iter_type);
6978 expr = fold_build2 (MINUS_EXPR, plus_type,
6979 fold_convert (plus_type, negating ? b : e),
6980 fold_convert (plus_type, negating ? e : b));
6981 expr = fold_convert (diff_type, expr);
6982 if (negating)
6983 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
6984 tree range = force_gimple_operand_gsi (&gsi, expr, true,
6985 NULL_TREE, true, GSI_SAME_STMT);
6986
6987 chunk_no = build_int_cst (diff_type, 0);
6988 if (chunking)
6989 {
6990 gcc_assert (!gimple_in_ssa_p (cfun));
6991
6992 expr = chunk_no;
6993 chunk_max = create_tmp_var (diff_type, ".chunk_max");
6994 chunk_no = create_tmp_var (diff_type, ".chunk_no");
6995
6996 ass = gimple_build_assign (chunk_no, expr);
6997 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6998
6999 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7000 build_int_cst (integer_type_node,
7001 IFN_GOACC_LOOP_CHUNKS),
7002 dir, range, s, chunk_size, gwv);
7003 gimple_call_set_lhs (call, chunk_max);
7004 gimple_set_location (call, loc);
7005 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7006 }
7007 else
7008 chunk_size = chunk_no;
7009
7010 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7011 build_int_cst (integer_type_node,
7012 IFN_GOACC_LOOP_STEP),
7013 dir, range, s, chunk_size, gwv);
7014 gimple_call_set_lhs (call, step);
7015 gimple_set_location (call, loc);
7016 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7017
7018 /* Remove the GIMPLE_OMP_FOR. */
7019 gsi_remove (&gsi, true);
7020
01914336 7021 /* Fixup edges from head_bb. */
629b3d75
MJ
7022 be = BRANCH_EDGE (head_bb);
7023 fte = FALLTHRU_EDGE (head_bb);
7024 be->flags |= EDGE_FALSE_VALUE;
7025 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
7026
7027 basic_block body_bb = fte->dest;
7028
7029 if (gimple_in_ssa_p (cfun))
7030 {
65f4b875 7031 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
7032 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7033
7034 offset = gimple_omp_continue_control_use (cont_stmt);
7035 offset_incr = gimple_omp_continue_control_def (cont_stmt);
7036 }
7037 else
7038 {
7039 offset = create_tmp_var (diff_type, ".offset");
7040 offset_init = offset_incr = offset;
7041 }
7042 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
7043
7044 /* Loop offset & bound go into head_bb. */
7045 gsi = gsi_start_bb (head_bb);
7046
7047 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7048 build_int_cst (integer_type_node,
7049 IFN_GOACC_LOOP_OFFSET),
7050 dir, range, s,
7051 chunk_size, gwv, chunk_no);
7052 gimple_call_set_lhs (call, offset_init);
7053 gimple_set_location (call, loc);
7054 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7055
7056 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7057 build_int_cst (integer_type_node,
7058 IFN_GOACC_LOOP_BOUND),
7059 dir, range, s,
7060 chunk_size, gwv, offset_init);
7061 gimple_call_set_lhs (call, bound);
7062 gimple_set_location (call, loc);
7063 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7064
7065 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
7066 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
7067 GSI_CONTINUE_LINKING);
7068
7069 /* V assignment goes into body_bb. */
7070 if (!gimple_in_ssa_p (cfun))
7071 {
7072 gsi = gsi_start_bb (body_bb);
7073
7074 expr = build2 (plus_code, iter_type, b,
7075 fold_convert (plus_type, offset));
7076 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7077 true, GSI_SAME_STMT);
7078 ass = gimple_build_assign (v, expr);
7079 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
02889d23
CLT
7080
7081 if (fd->collapse > 1 || fd->tiling)
7082 expand_oacc_collapse_vars (fd, false, &gsi, counts, v);
7083
7084 if (fd->tiling)
7085 {
7086 /* Determine the range of the element loop -- usually simply
7087 the tile_size, but could be smaller if the final
7088 iteration of the outer loop is a partial tile. */
7089 tree e_range = create_tmp_var (diff_type, ".e_range");
7090
7091 expr = build2 (MIN_EXPR, diff_type,
7092 build2 (MINUS_EXPR, diff_type, bound, offset),
7093 build2 (MULT_EXPR, diff_type, tile_size,
7094 element_s));
7095 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7096 true, GSI_SAME_STMT);
7097 ass = gimple_build_assign (e_range, expr);
7098 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7099
7100 /* Determine bound, offset & step of inner loop. */
7101 e_bound = create_tmp_var (diff_type, ".e_bound");
7102 e_offset = create_tmp_var (diff_type, ".e_offset");
7103 e_step = create_tmp_var (diff_type, ".e_step");
7104
7105 /* Mark these as element loops. */
7106 tree t, e_gwv = integer_minus_one_node;
7107 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */
7108
7109 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
7110 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7111 element_s, chunk, e_gwv, chunk);
7112 gimple_call_set_lhs (call, e_offset);
7113 gimple_set_location (call, loc);
7114 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7115
7116 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
7117 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7118 element_s, chunk, e_gwv, e_offset);
7119 gimple_call_set_lhs (call, e_bound);
7120 gimple_set_location (call, loc);
7121 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7122
7123 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
7124 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
7125 element_s, chunk, e_gwv);
7126 gimple_call_set_lhs (call, e_step);
7127 gimple_set_location (call, loc);
7128 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7129
7130 /* Add test and split block. */
7131 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
7132 stmt = gimple_build_cond_empty (expr);
7133 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7134 split = split_block (body_bb, stmt);
7135 elem_body_bb = split->dest;
7136 if (cont_bb == body_bb)
7137 cont_bb = elem_body_bb;
7138 body_bb = split->src;
7139
7140 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
7141
05e0af43
CP
7142 /* Add a dummy exit for the tiled block when cont_bb is missing. */
7143 if (cont_bb == NULL)
7144 {
7145 edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
7146 e->probability = profile_probability::even ();
7147 split->probability = profile_probability::even ();
7148 }
7149
02889d23
CLT
7150 /* Initialize the user's loop vars. */
7151 gsi = gsi_start_bb (elem_body_bb);
7152 expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset);
7153 }
629b3d75
MJ
7154 }
7155
7156 /* Loop increment goes into cont_bb. If this is not a loop, we
7157 will have spawned threads as if it was, and each one will
7158 execute one iteration. The specification is not explicit about
7159 whether such constructs are ill-formed or not, and they can
7160 occur, especially when noreturn routines are involved. */
7161 if (cont_bb)
7162 {
65f4b875 7163 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
7164 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7165 loc = gimple_location (cont_stmt);
7166
02889d23
CLT
7167 if (fd->tiling)
7168 {
7169 /* Insert element loop increment and test. */
7170 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
7171 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7172 true, GSI_SAME_STMT);
7173 ass = gimple_build_assign (e_offset, expr);
7174 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7175 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
7176
7177 stmt = gimple_build_cond_empty (expr);
7178 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7179 split = split_block (cont_bb, stmt);
7180 elem_cont_bb = split->src;
7181 cont_bb = split->dest;
7182
7183 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
9fba2943
TV
7184 split->probability = profile_probability::unlikely ().guessed ();
7185 edge latch_edge
7186 = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
7187 latch_edge->probability = profile_probability::likely ().guessed ();
7188
7189 edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
7190 skip_edge->probability = profile_probability::unlikely ().guessed ();
7191 edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
7192 loop_entry_edge->probability
7193 = profile_probability::likely ().guessed ();
02889d23
CLT
7194
7195 gsi = gsi_for_stmt (cont_stmt);
7196 }
7197
629b3d75
MJ
7198 /* Increment offset. */
7199 if (gimple_in_ssa_p (cfun))
02889d23
CLT
7200 expr = build2 (plus_code, iter_type, offset,
7201 fold_convert (plus_type, step));
629b3d75
MJ
7202 else
7203 expr = build2 (PLUS_EXPR, diff_type, offset, step);
7204 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7205 true, GSI_SAME_STMT);
7206 ass = gimple_build_assign (offset_incr, expr);
7207 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7208 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
7209 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
7210
7211 /* Remove the GIMPLE_OMP_CONTINUE. */
7212 gsi_remove (&gsi, true);
7213
01914336 7214 /* Fixup edges from cont_bb. */
629b3d75
MJ
7215 be = BRANCH_EDGE (cont_bb);
7216 fte = FALLTHRU_EDGE (cont_bb);
7217 be->flags |= EDGE_TRUE_VALUE;
7218 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7219
7220 if (chunking)
7221 {
7222 /* Split the beginning of exit_bb to make bottom_bb. We
7223 need to insert a nop at the start, because splitting is
01914336 7224 after a stmt, not before. */
629b3d75
MJ
7225 gsi = gsi_start_bb (exit_bb);
7226 stmt = gimple_build_nop ();
7227 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7228 split = split_block (exit_bb, stmt);
7229 bottom_bb = split->src;
7230 exit_bb = split->dest;
7231 gsi = gsi_last_bb (bottom_bb);
7232
7233 /* Chunk increment and test goes into bottom_bb. */
7234 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
7235 build_int_cst (diff_type, 1));
7236 ass = gimple_build_assign (chunk_no, expr);
7237 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
7238
7239 /* Chunk test at end of bottom_bb. */
7240 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
7241 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
7242 GSI_CONTINUE_LINKING);
7243
01914336 7244 /* Fixup edges from bottom_bb. */
629b3d75 7245 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
f38057cc
TV
7246 split->probability = profile_probability::unlikely ().guessed ();
7247 edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
7248 latch_edge->probability = profile_probability::likely ().guessed ();
629b3d75
MJ
7249 }
7250 }
7251
65f4b875 7252 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
7253 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7254 loc = gimple_location (gsi_stmt (gsi));
7255
7256 if (!gimple_in_ssa_p (cfun))
7257 {
7258 /* Insert the final value of V, in case it is live. This is the
7259 value for the only thread that survives past the join. */
7260 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
7261 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
7262 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
7263 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
7264 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
7265 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7266 true, GSI_SAME_STMT);
7267 ass = gimple_build_assign (v, expr);
7268 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7269 }
7270
01914336 7271 /* Remove the OMP_RETURN. */
629b3d75
MJ
7272 gsi_remove (&gsi, true);
7273
7274 if (cont_bb)
7275 {
02889d23 7276 /* We now have one, two or three nested loops. Update the loop
629b3d75 7277 structures. */
99b1c316
MS
7278 class loop *parent = entry_bb->loop_father;
7279 class loop *body = body_bb->loop_father;
629b3d75
MJ
7280
7281 if (chunking)
7282 {
99b1c316 7283 class loop *chunk_loop = alloc_loop ();
629b3d75
MJ
7284 chunk_loop->header = head_bb;
7285 chunk_loop->latch = bottom_bb;
7286 add_loop (chunk_loop, parent);
7287 parent = chunk_loop;
7288 }
7289 else if (parent != body)
7290 {
7291 gcc_assert (body->header == body_bb);
7292 gcc_assert (body->latch == cont_bb
7293 || single_pred (body->latch) == cont_bb);
7294 parent = NULL;
7295 }
7296
7297 if (parent)
7298 {
99b1c316 7299 class loop *body_loop = alloc_loop ();
629b3d75
MJ
7300 body_loop->header = body_bb;
7301 body_loop->latch = cont_bb;
7302 add_loop (body_loop, parent);
02889d23
CLT
7303
7304 if (fd->tiling)
7305 {
7306 /* Insert tiling's element loop. */
99b1c316 7307 class loop *inner_loop = alloc_loop ();
02889d23
CLT
7308 inner_loop->header = elem_body_bb;
7309 inner_loop->latch = elem_cont_bb;
7310 add_loop (inner_loop, body_loop);
7311 }
629b3d75
MJ
7312 }
7313 }
7314}
7315
7316/* Expand the OMP loop defined by REGION. */
7317
7318static void
7319expand_omp_for (struct omp_region *region, gimple *inner_stmt)
7320{
7321 struct omp_for_data fd;
7322 struct omp_for_data_loop *loops;
7323
9d50112a
JJ
7324 loops = XALLOCAVEC (struct omp_for_data_loop,
7325 gimple_omp_for_collapse (last_stmt (region->entry)));
629b3d75
MJ
7326 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
7327 &fd, loops);
7328 region->sched_kind = fd.sched_kind;
7329 region->sched_modifiers = fd.sched_modifiers;
0b887b75 7330 region->has_lastprivate_conditional = fd.lastprivate_conditional != 0;
9d50112a
JJ
7331 if (fd.non_rect && !gimple_omp_for_combined_into_p (fd.for_stmt))
7332 {
7333 for (int i = fd.first_nonrect; i <= fd.last_nonrect; i++)
7334 if ((loops[i].m1 || loops[i].m2)
7335 && (loops[i].m1 == NULL_TREE
7336 || TREE_CODE (loops[i].m1) == INTEGER_CST)
7337 && (loops[i].m2 == NULL_TREE
7338 || TREE_CODE (loops[i].m2) == INTEGER_CST)
7339 && TREE_CODE (loops[i].step) == INTEGER_CST
7340 && TREE_CODE (loops[i - loops[i].outer].step) == INTEGER_CST)
7341 {
7342 tree t;
7343 tree itype = TREE_TYPE (loops[i].v);
7344 if (loops[i].m1 && loops[i].m2)
7345 t = fold_build2 (MINUS_EXPR, itype, loops[i].m2, loops[i].m1);
7346 else if (loops[i].m1)
7347 t = fold_build1 (NEGATE_EXPR, itype, loops[i].m1);
7348 else
7349 t = loops[i].m2;
7350 t = fold_build2 (MULT_EXPR, itype, t,
7351 fold_convert (itype,
7352 loops[i - loops[i].outer].step));
7353 if (TYPE_UNSIGNED (itype) && loops[i].cond_code == GT_EXPR)
7354 t = fold_build2 (TRUNC_MOD_EXPR, itype,
7355 fold_build1 (NEGATE_EXPR, itype, t),
7356 fold_build1 (NEGATE_EXPR, itype,
7357 fold_convert (itype,
7358 loops[i].step)));
7359 else
7360 t = fold_build2 (TRUNC_MOD_EXPR, itype, t,
7361 fold_convert (itype, loops[i].step));
7362 if (integer_nonzerop (t))
7363 error_at (gimple_location (fd.for_stmt),
7364 "invalid OpenMP non-rectangular loop step; "
7365 "%<(%E - %E) * %E%> is not a multiple of loop %d "
7366 "step %qE",
7367 loops[i].m2 ? loops[i].m2 : integer_zero_node,
7368 loops[i].m1 ? loops[i].m1 : integer_zero_node,
7369 loops[i - loops[i].outer].step, i + 1,
7370 loops[i].step);
7371 }
7372 }
629b3d75
MJ
7373
7374 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
7375 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
7376 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
7377 if (region->cont)
7378 {
7379 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
7380 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
7381 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
7382 }
7383 else
7384 /* If there isn't a continue then this is a degerate case where
7385 the introduction of abnormal edges during lowering will prevent
7386 original loops from being detected. Fix that up. */
7387 loops_state_set (LOOPS_NEED_FIXUP);
7388
dfa6e5b4 7389 if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_SIMD)
1160ec9a
JJ
7390 {
7391 if (fd.non_rect)
7392 sorry_at (gimple_location (fd.for_stmt),
7393 "non-rectangular %<simd%> not supported yet");
7394 expand_omp_simd (region, &fd);
7395 }
629b3d75
MJ
7396 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
7397 {
1160ec9a 7398 gcc_assert (!inner_stmt && !fd.non_rect);
629b3d75
MJ
7399 expand_oacc_for (region, &fd);
7400 }
7401 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
7402 {
1160ec9a
JJ
7403 if (fd.non_rect)
7404 sorry_at (gimple_location (fd.for_stmt),
7405 "non-rectangular %<taskloop%> not supported yet");
629b3d75
MJ
7406 if (gimple_omp_for_combined_into_p (fd.for_stmt))
7407 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
7408 else
7409 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
7410 }
7411 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
7412 && !fd.have_ordered)
7413 {
aed3ab25
JJ
7414 if (fd.non_rect
7415 && (gimple_omp_for_combined_into_p (fd.for_stmt)
7416 || gimple_omp_for_combined_p (fd.for_stmt)))
1160ec9a
JJ
7417 sorry_at (gimple_location (fd.for_stmt),
7418 "non-rectangular OpenMP loops not supported yet");
629b3d75
MJ
7419 if (fd.chunk_size == NULL)
7420 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
7421 else
7422 expand_omp_for_static_chunk (region, &fd, inner_stmt);
7423 }
7424 else
7425 {
7426 int fn_index, start_ix, next_ix;
28567c40
JJ
7427 unsigned HOST_WIDE_INT sched = 0;
7428 tree sched_arg = NULL_TREE;
629b3d75
MJ
7429
7430 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
1160ec9a 7431 == GF_OMP_FOR_KIND_FOR && !fd.non_rect);
629b3d75
MJ
7432 if (fd.chunk_size == NULL
7433 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
7434 fd.chunk_size = integer_zero_node;
629b3d75
MJ
7435 switch (fd.sched_kind)
7436 {
7437 case OMP_CLAUSE_SCHEDULE_RUNTIME:
0b887b75
JJ
7438 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0
7439 && fd.lastprivate_conditional == 0)
28567c40
JJ
7440 {
7441 gcc_assert (!fd.have_ordered);
7442 fn_index = 6;
7443 sched = 4;
7444 }
7445 else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
0b887b75
JJ
7446 && !fd.have_ordered
7447 && fd.lastprivate_conditional == 0)
28567c40
JJ
7448 fn_index = 7;
7449 else
7450 {
7451 fn_index = 3;
7452 sched = (HOST_WIDE_INT_1U << 31);
7453 }
629b3d75
MJ
7454 break;
7455 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
7456 case OMP_CLAUSE_SCHEDULE_GUIDED:
28567c40 7457 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
0b887b75
JJ
7458 && !fd.have_ordered
7459 && fd.lastprivate_conditional == 0)
629b3d75
MJ
7460 {
7461 fn_index = 3 + fd.sched_kind;
28567c40 7462 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
629b3d75
MJ
7463 break;
7464 }
629b3d75 7465 fn_index = fd.sched_kind;
28567c40
JJ
7466 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
7467 sched += (HOST_WIDE_INT_1U << 31);
629b3d75 7468 break;
28567c40
JJ
7469 case OMP_CLAUSE_SCHEDULE_STATIC:
7470 gcc_assert (fd.have_ordered);
7471 fn_index = 0;
7472 sched = (HOST_WIDE_INT_1U << 31) + 1;
7473 break;
7474 default:
7475 gcc_unreachable ();
629b3d75
MJ
7476 }
7477 if (!fd.ordered)
28567c40 7478 fn_index += fd.have_ordered * 8;
629b3d75
MJ
7479 if (fd.ordered)
7480 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
7481 else
7482 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
7483 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
8221c30b 7484 if (fd.have_reductemp || fd.have_pointer_condtemp)
28567c40
JJ
7485 {
7486 if (fd.ordered)
7487 start_ix = (int)BUILT_IN_GOMP_LOOP_DOACROSS_START;
7488 else if (fd.have_ordered)
7489 start_ix = (int)BUILT_IN_GOMP_LOOP_ORDERED_START;
7490 else
7491 start_ix = (int)BUILT_IN_GOMP_LOOP_START;
7492 sched_arg = build_int_cstu (long_integer_type_node, sched);
7493 if (!fd.chunk_size)
7494 fd.chunk_size = integer_zero_node;
7495 }
629b3d75
MJ
7496 if (fd.iter_type == long_long_unsigned_type_node)
7497 {
7498 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
7499 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
7500 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
7501 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
7502 }
7503 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
28567c40
JJ
7504 (enum built_in_function) next_ix, sched_arg,
7505 inner_stmt);
629b3d75
MJ
7506 }
7507
7508 if (gimple_in_ssa_p (cfun))
7509 update_ssa (TODO_update_ssa_only_virtuals);
7510}
7511
7512/* Expand code for an OpenMP sections directive. In pseudo code, we generate
7513
7514 v = GOMP_sections_start (n);
7515 L0:
7516 switch (v)
7517 {
7518 case 0:
7519 goto L2;
7520 case 1:
7521 section 1;
7522 goto L1;
7523 case 2:
7524 ...
7525 case n:
7526 ...
7527 default:
7528 abort ();
7529 }
7530 L1:
7531 v = GOMP_sections_next ();
7532 goto L0;
7533 L2:
7534 reduction;
7535
7536 If this is a combined parallel sections, replace the call to
7537 GOMP_sections_start with call to GOMP_sections_next. */
7538
7539static void
7540expand_omp_sections (struct omp_region *region)
7541{
7542 tree t, u, vin = NULL, vmain, vnext, l2;
7543 unsigned len;
7544 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
7545 gimple_stmt_iterator si, switch_si;
7546 gomp_sections *sections_stmt;
7547 gimple *stmt;
7548 gomp_continue *cont;
7549 edge_iterator ei;
7550 edge e;
7551 struct omp_region *inner;
7552 unsigned i, casei;
7553 bool exit_reachable = region->cont != NULL;
7554
7555 gcc_assert (region->exit != NULL);
7556 entry_bb = region->entry;
7557 l0_bb = single_succ (entry_bb);
7558 l1_bb = region->cont;
7559 l2_bb = region->exit;
7560 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
7561 l2 = gimple_block_label (l2_bb);
7562 else
7563 {
7564 /* This can happen if there are reductions. */
7565 len = EDGE_COUNT (l0_bb->succs);
7566 gcc_assert (len > 0);
7567 e = EDGE_SUCC (l0_bb, len - 1);
65f4b875 7568 si = gsi_last_nondebug_bb (e->dest);
629b3d75
MJ
7569 l2 = NULL_TREE;
7570 if (gsi_end_p (si)
01914336 7571 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
629b3d75
MJ
7572 l2 = gimple_block_label (e->dest);
7573 else
7574 FOR_EACH_EDGE (e, ei, l0_bb->succs)
7575 {
65f4b875 7576 si = gsi_last_nondebug_bb (e->dest);
629b3d75
MJ
7577 if (gsi_end_p (si)
7578 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
7579 {
7580 l2 = gimple_block_label (e->dest);
7581 break;
7582 }
7583 }
7584 }
7585 if (exit_reachable)
7586 default_bb = create_empty_bb (l1_bb->prev_bb);
7587 else
7588 default_bb = create_empty_bb (l0_bb);
7589
7590 /* We will build a switch() with enough cases for all the
7591 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
7592 and a default case to abort if something goes wrong. */
7593 len = EDGE_COUNT (l0_bb->succs);
7594
7595 /* Use vec::quick_push on label_vec throughout, since we know the size
7596 in advance. */
7597 auto_vec<tree> label_vec (len);
7598
7599 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
7600 GIMPLE_OMP_SECTIONS statement. */
65f4b875 7601 si = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
7602 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
7603 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
7604 vin = gimple_omp_sections_control (sections_stmt);
28567c40
JJ
7605 tree clauses = gimple_omp_sections_clauses (sections_stmt);
7606 tree reductmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
8e7757ba
JJ
7607 tree condtmp = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
7608 tree cond_var = NULL_TREE;
7609 if (reductmp || condtmp)
7610 {
7611 tree reductions = null_pointer_node, mem = null_pointer_node;
7612 tree memv = NULL_TREE, condtemp = NULL_TREE;
7613 gimple_stmt_iterator gsi = gsi_none ();
7614 gimple *g = NULL;
7615 if (reductmp)
7616 {
7617 reductions = OMP_CLAUSE_DECL (reductmp);
7618 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
7619 g = SSA_NAME_DEF_STMT (reductions);
7620 reductions = gimple_assign_rhs1 (g);
7621 OMP_CLAUSE_DECL (reductmp) = reductions;
7622 gsi = gsi_for_stmt (g);
7623 }
7624 else
7625 gsi = si;
7626 if (condtmp)
7627 {
7628 condtemp = OMP_CLAUSE_DECL (condtmp);
7629 tree c = omp_find_clause (OMP_CLAUSE_CHAIN (condtmp),
7630 OMP_CLAUSE__CONDTEMP_);
7631 cond_var = OMP_CLAUSE_DECL (c);
7632 tree type = TREE_TYPE (condtemp);
7633 memv = create_tmp_var (type);
7634 TREE_ADDRESSABLE (memv) = 1;
7635 unsigned cnt = 0;
7636 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
7637 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
7638 && OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (c))
7639 ++cnt;
7640 unsigned HOST_WIDE_INT sz
7641 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))) * cnt;
7642 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
7643 false);
7644 mem = build_fold_addr_expr (memv);
7645 }
28567c40
JJ
7646 t = build_int_cst (unsigned_type_node, len - 1);
7647 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS2_START);
8e7757ba 7648 stmt = gimple_build_call (u, 3, t, reductions, mem);
28567c40
JJ
7649 gimple_call_set_lhs (stmt, vin);
7650 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
8e7757ba
JJ
7651 if (condtmp)
7652 {
7653 expand_omp_build_assign (&gsi, condtemp, memv, false);
7654 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
7655 vin, build_one_cst (TREE_TYPE (cond_var)));
7656 expand_omp_build_assign (&gsi, cond_var, t, false);
7657 }
7658 if (reductmp)
7659 {
7660 gsi_remove (&gsi, true);
7661 release_ssa_name (gimple_assign_lhs (g));
7662 }
28567c40
JJ
7663 }
7664 else if (!is_combined_parallel (region))
629b3d75
MJ
7665 {
7666 /* If we are not inside a combined parallel+sections region,
7667 call GOMP_sections_start. */
7668 t = build_int_cst (unsigned_type_node, len - 1);
7669 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
7670 stmt = gimple_build_call (u, 1, t);
7671 }
7672 else
7673 {
7674 /* Otherwise, call GOMP_sections_next. */
7675 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
7676 stmt = gimple_build_call (u, 0);
7677 }
8e7757ba 7678 if (!reductmp && !condtmp)
28567c40
JJ
7679 {
7680 gimple_call_set_lhs (stmt, vin);
7681 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
7682 }
629b3d75
MJ
7683 gsi_remove (&si, true);
7684
7685 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
7686 L0_BB. */
65f4b875 7687 switch_si = gsi_last_nondebug_bb (l0_bb);
629b3d75
MJ
7688 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
7689 if (exit_reachable)
7690 {
7691 cont = as_a <gomp_continue *> (last_stmt (l1_bb));
7692 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
7693 vmain = gimple_omp_continue_control_use (cont);
7694 vnext = gimple_omp_continue_control_def (cont);
7695 }
7696 else
7697 {
7698 vmain = vin;
7699 vnext = NULL_TREE;
7700 }
7701
7702 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
7703 label_vec.quick_push (t);
7704 i = 1;
7705
7706 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
7707 for (inner = region->inner, casei = 1;
7708 inner;
7709 inner = inner->next, i++, casei++)
7710 {
7711 basic_block s_entry_bb, s_exit_bb;
7712
7713 /* Skip optional reduction region. */
7714 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
7715 {
7716 --i;
7717 --casei;
7718 continue;
7719 }
7720
7721 s_entry_bb = inner->entry;
7722 s_exit_bb = inner->exit;
7723
7724 t = gimple_block_label (s_entry_bb);
7725 u = build_int_cst (unsigned_type_node, casei);
7726 u = build_case_label (u, NULL, t);
7727 label_vec.quick_push (u);
7728
65f4b875 7729 si = gsi_last_nondebug_bb (s_entry_bb);
629b3d75
MJ
7730 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
7731 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
7732 gsi_remove (&si, true);
7733 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
7734
7735 if (s_exit_bb == NULL)
7736 continue;
7737
65f4b875 7738 si = gsi_last_nondebug_bb (s_exit_bb);
629b3d75
MJ
7739 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
7740 gsi_remove (&si, true);
7741
7742 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
7743 }
7744
7745 /* Error handling code goes in DEFAULT_BB. */
7746 t = gimple_block_label (default_bb);
7747 u = build_case_label (NULL, NULL, t);
7748 make_edge (l0_bb, default_bb, 0);
7749 add_bb_to_loop (default_bb, current_loops->tree_root);
7750
7751 stmt = gimple_build_switch (vmain, u, label_vec);
7752 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
7753 gsi_remove (&switch_si, true);
7754
7755 si = gsi_start_bb (default_bb);
7756 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
7757 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
7758
7759 if (exit_reachable)
7760 {
7761 tree bfn_decl;
7762
7763 /* Code to get the next section goes in L1_BB. */
65f4b875 7764 si = gsi_last_nondebug_bb (l1_bb);
629b3d75
MJ
7765 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
7766
7767 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
7768 stmt = gimple_build_call (bfn_decl, 0);
7769 gimple_call_set_lhs (stmt, vnext);
8e7757ba
JJ
7770 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7771 if (cond_var)
7772 {
7773 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
7774 vnext, build_one_cst (TREE_TYPE (cond_var)));
7775 expand_omp_build_assign (&si, cond_var, t, false);
7776 }
629b3d75
MJ
7777 gsi_remove (&si, true);
7778
7779 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
7780 }
7781
7782 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
65f4b875 7783 si = gsi_last_nondebug_bb (l2_bb);
629b3d75
MJ
7784 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
7785 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
7786 else if (gimple_omp_return_lhs (gsi_stmt (si)))
7787 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
7788 else
7789 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
7790 stmt = gimple_build_call (t, 0);
7791 if (gimple_omp_return_lhs (gsi_stmt (si)))
7792 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
7793 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
7794 gsi_remove (&si, true);
7795
7796 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
7797}
7798
7799/* Expand code for an OpenMP single directive. We've already expanded
7800 much of the code, here we simply place the GOMP_barrier call. */
7801
7802static void
7803expand_omp_single (struct omp_region *region)
7804{
7805 basic_block entry_bb, exit_bb;
7806 gimple_stmt_iterator si;
7807
7808 entry_bb = region->entry;
7809 exit_bb = region->exit;
7810
65f4b875 7811 si = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
7812 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
7813 gsi_remove (&si, true);
7814 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
7815
65f4b875 7816 si = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
7817 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
7818 {
7819 tree t = gimple_omp_return_lhs (gsi_stmt (si));
7820 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
7821 }
7822 gsi_remove (&si, true);
7823 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
7824}
7825
7826/* Generic expansion for OpenMP synchronization directives: master,
7827 ordered and critical. All we need to do here is remove the entry
7828 and exit markers for REGION. */
7829
7830static void
7831expand_omp_synch (struct omp_region *region)
7832{
7833 basic_block entry_bb, exit_bb;
7834 gimple_stmt_iterator si;
7835
7836 entry_bb = region->entry;
7837 exit_bb = region->exit;
7838
65f4b875 7839 si = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
7840 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
7841 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
7842 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
7843 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
7844 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
7845 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
28567c40
JJ
7846 if (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS
7847 && gimple_omp_teams_host (as_a <gomp_teams *> (gsi_stmt (si))))
7848 {
7849 expand_omp_taskreg (region);
7850 return;
7851 }
629b3d75
MJ
7852 gsi_remove (&si, true);
7853 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
7854
7855 if (exit_bb)
7856 {
65f4b875 7857 si = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
7858 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
7859 gsi_remove (&si, true);
7860 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
7861 }
7862}
7863
28567c40
JJ
7864/* Translate enum omp_memory_order to enum memmodel. The two enums
7865 are using different numbers so that OMP_MEMORY_ORDER_UNSPECIFIED
7866 is 0. */
7867
7868static enum memmodel
7869omp_memory_order_to_memmodel (enum omp_memory_order mo)
7870{
7871 switch (mo)
7872 {
7873 case OMP_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
7874 case OMP_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
7875 case OMP_MEMORY_ORDER_RELEASE: return MEMMODEL_RELEASE;
7876 case OMP_MEMORY_ORDER_ACQ_REL: return MEMMODEL_ACQ_REL;
7877 case OMP_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
7878 default: gcc_unreachable ();
7879 }
7880}
7881
629b3d75
MJ
7882/* A subroutine of expand_omp_atomic. Attempt to implement the atomic
7883 operation as a normal volatile load. */
7884
7885static bool
7886expand_omp_atomic_load (basic_block load_bb, tree addr,
7887 tree loaded_val, int index)
7888{
7889 enum built_in_function tmpbase;
7890 gimple_stmt_iterator gsi;
7891 basic_block store_bb;
7892 location_t loc;
7893 gimple *stmt;
7894 tree decl, call, type, itype;
7895
65f4b875 7896 gsi = gsi_last_nondebug_bb (load_bb);
629b3d75
MJ
7897 stmt = gsi_stmt (gsi);
7898 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
7899 loc = gimple_location (stmt);
7900
7901 /* ??? If the target does not implement atomic_load_optab[mode], and mode
7902 is smaller than word size, then expand_atomic_load assumes that the load
7903 is atomic. We could avoid the builtin entirely in this case. */
7904
7905 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
7906 decl = builtin_decl_explicit (tmpbase);
7907 if (decl == NULL_TREE)
7908 return false;
7909
7910 type = TREE_TYPE (loaded_val);
7911 itype = TREE_TYPE (TREE_TYPE (decl));
7912
28567c40
JJ
7913 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
7914 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
7915 call = build_call_expr_loc (loc, decl, 2, addr, mo);
629b3d75
MJ
7916 if (!useless_type_conversion_p (type, itype))
7917 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
7918 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
7919
7920 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
7921 gsi_remove (&gsi, true);
7922
7923 store_bb = single_succ (load_bb);
65f4b875 7924 gsi = gsi_last_nondebug_bb (store_bb);
629b3d75
MJ
7925 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
7926 gsi_remove (&gsi, true);
7927
7928 if (gimple_in_ssa_p (cfun))
7929 update_ssa (TODO_update_ssa_no_phi);
7930
7931 return true;
7932}
7933
7934/* A subroutine of expand_omp_atomic. Attempt to implement the atomic
7935 operation as a normal volatile store. */
7936
7937static bool
7938expand_omp_atomic_store (basic_block load_bb, tree addr,
7939 tree loaded_val, tree stored_val, int index)
7940{
7941 enum built_in_function tmpbase;
7942 gimple_stmt_iterator gsi;
7943 basic_block store_bb = single_succ (load_bb);
7944 location_t loc;
7945 gimple *stmt;
7946 tree decl, call, type, itype;
7947 machine_mode imode;
7948 bool exchange;
7949
65f4b875 7950 gsi = gsi_last_nondebug_bb (load_bb);
629b3d75
MJ
7951 stmt = gsi_stmt (gsi);
7952 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
7953
7954 /* If the load value is needed, then this isn't a store but an exchange. */
7955 exchange = gimple_omp_atomic_need_value_p (stmt);
7956
65f4b875 7957 gsi = gsi_last_nondebug_bb (store_bb);
629b3d75
MJ
7958 stmt = gsi_stmt (gsi);
7959 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
7960 loc = gimple_location (stmt);
7961
7962 /* ??? If the target does not implement atomic_store_optab[mode], and mode
7963 is smaller than word size, then expand_atomic_store assumes that the store
7964 is atomic. We could avoid the builtin entirely in this case. */
7965
7966 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
7967 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
7968 decl = builtin_decl_explicit (tmpbase);
7969 if (decl == NULL_TREE)
7970 return false;
7971
7972 type = TREE_TYPE (stored_val);
7973
7974 /* Dig out the type of the function's second argument. */
7975 itype = TREE_TYPE (decl);
7976 itype = TYPE_ARG_TYPES (itype);
7977 itype = TREE_CHAIN (itype);
7978 itype = TREE_VALUE (itype);
7979 imode = TYPE_MODE (itype);
7980
7981 if (exchange && !can_atomic_exchange_p (imode, true))
7982 return false;
7983
7984 if (!useless_type_conversion_p (itype, type))
7985 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
28567c40
JJ
7986 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
7987 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
7988 call = build_call_expr_loc (loc, decl, 3, addr, stored_val, mo);
629b3d75
MJ
7989 if (exchange)
7990 {
7991 if (!useless_type_conversion_p (type, itype))
7992 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
7993 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
7994 }
7995
7996 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
7997 gsi_remove (&gsi, true);
7998
7999 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
65f4b875 8000 gsi = gsi_last_nondebug_bb (load_bb);
629b3d75
MJ
8001 gsi_remove (&gsi, true);
8002
8003 if (gimple_in_ssa_p (cfun))
8004 update_ssa (TODO_update_ssa_no_phi);
8005
8006 return true;
8007}
8008
8009/* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8010 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
8011 size of the data type, and thus usable to find the index of the builtin
8012 decl. Returns false if the expression is not of the proper form. */
8013
8014static bool
8015expand_omp_atomic_fetch_op (basic_block load_bb,
8016 tree addr, tree loaded_val,
8017 tree stored_val, int index)
8018{
8019 enum built_in_function oldbase, newbase, tmpbase;
8020 tree decl, itype, call;
8021 tree lhs, rhs;
8022 basic_block store_bb = single_succ (load_bb);
8023 gimple_stmt_iterator gsi;
8024 gimple *stmt;
8025 location_t loc;
8026 enum tree_code code;
8027 bool need_old, need_new;
8028 machine_mode imode;
629b3d75
MJ
8029
8030 /* We expect to find the following sequences:
8031
8032 load_bb:
8033 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
8034
8035 store_bb:
8036 val = tmp OP something; (or: something OP tmp)
8037 GIMPLE_OMP_STORE (val)
8038
8039 ???FIXME: Allow a more flexible sequence.
8040 Perhaps use data flow to pick the statements.
8041
8042 */
8043
8044 gsi = gsi_after_labels (store_bb);
8045 stmt = gsi_stmt (gsi);
65f4b875
AO
8046 if (is_gimple_debug (stmt))
8047 {
8048 gsi_next_nondebug (&gsi);
8049 if (gsi_end_p (gsi))
8050 return false;
8051 stmt = gsi_stmt (gsi);
8052 }
629b3d75
MJ
8053 loc = gimple_location (stmt);
8054 if (!is_gimple_assign (stmt))
8055 return false;
65f4b875 8056 gsi_next_nondebug (&gsi);
629b3d75
MJ
8057 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
8058 return false;
8059 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
8060 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
28567c40
JJ
8061 enum omp_memory_order omo
8062 = gimple_omp_atomic_memory_order (last_stmt (load_bb));
8063 enum memmodel mo = omp_memory_order_to_memmodel (omo);
629b3d75
MJ
8064 gcc_checking_assert (!need_old || !need_new);
8065
8066 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
8067 return false;
8068
8069 /* Check for one of the supported fetch-op operations. */
8070 code = gimple_assign_rhs_code (stmt);
8071 switch (code)
8072 {
8073 case PLUS_EXPR:
8074 case POINTER_PLUS_EXPR:
8075 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
8076 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
8077 break;
8078 case MINUS_EXPR:
8079 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
8080 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
8081 break;
8082 case BIT_AND_EXPR:
8083 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
8084 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
8085 break;
8086 case BIT_IOR_EXPR:
8087 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
8088 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
8089 break;
8090 case BIT_XOR_EXPR:
8091 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
8092 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
8093 break;
8094 default:
8095 return false;
8096 }
8097
8098 /* Make sure the expression is of the proper form. */
8099 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
8100 rhs = gimple_assign_rhs2 (stmt);
8101 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
8102 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
8103 rhs = gimple_assign_rhs1 (stmt);
8104 else
8105 return false;
8106
8107 tmpbase = ((enum built_in_function)
8108 ((need_new ? newbase : oldbase) + index + 1));
8109 decl = builtin_decl_explicit (tmpbase);
8110 if (decl == NULL_TREE)
8111 return false;
8112 itype = TREE_TYPE (TREE_TYPE (decl));
8113 imode = TYPE_MODE (itype);
8114
8115 /* We could test all of the various optabs involved, but the fact of the
8116 matter is that (with the exception of i486 vs i586 and xadd) all targets
8117 that support any atomic operaton optab also implements compare-and-swap.
8118 Let optabs.c take care of expanding any compare-and-swap loop. */
dc06356a 8119 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
629b3d75
MJ
8120 return false;
8121
65f4b875 8122 gsi = gsi_last_nondebug_bb (load_bb);
629b3d75
MJ
8123 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
8124
8125 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
8126 It only requires that the operation happen atomically. Thus we can
8127 use the RELAXED memory model. */
8128 call = build_call_expr_loc (loc, decl, 3, addr,
8129 fold_convert_loc (loc, itype, rhs),
28567c40 8130 build_int_cst (NULL, mo));
629b3d75
MJ
8131
8132 if (need_old || need_new)
8133 {
8134 lhs = need_old ? loaded_val : stored_val;
8135 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
8136 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
8137 }
8138 else
8139 call = fold_convert_loc (loc, void_type_node, call);
8140 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8141 gsi_remove (&gsi, true);
8142
65f4b875 8143 gsi = gsi_last_nondebug_bb (store_bb);
629b3d75
MJ
8144 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
8145 gsi_remove (&gsi, true);
65f4b875 8146 gsi = gsi_last_nondebug_bb (store_bb);
629b3d75
MJ
8147 stmt = gsi_stmt (gsi);
8148 gsi_remove (&gsi, true);
8149
8150 if (gimple_in_ssa_p (cfun))
8151 {
8152 release_defs (stmt);
8153 update_ssa (TODO_update_ssa_no_phi);
8154 }
8155
8156 return true;
8157}
8158
8159/* A subroutine of expand_omp_atomic. Implement the atomic operation as:
8160
8161 oldval = *addr;
8162 repeat:
01914336 8163 newval = rhs; // with oldval replacing *addr in rhs
629b3d75
MJ
8164 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
8165 if (oldval != newval)
8166 goto repeat;
8167
8168 INDEX is log2 of the size of the data type, and thus usable to find the
8169 index of the builtin decl. */
8170
8171static bool
8172expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
8173 tree addr, tree loaded_val, tree stored_val,
8174 int index)
8175{
8176 tree loadedi, storedi, initial, new_storedi, old_vali;
b4e47472 8177 tree type, itype, cmpxchg, iaddr, atype;
629b3d75
MJ
8178 gimple_stmt_iterator si;
8179 basic_block loop_header = single_succ (load_bb);
8180 gimple *phi, *stmt;
8181 edge e;
8182 enum built_in_function fncode;
8183
8184 /* ??? We need a non-pointer interface to __atomic_compare_exchange in
8185 order to use the RELAXED memory model effectively. */
8186 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
8187 + index + 1);
8188 cmpxchg = builtin_decl_explicit (fncode);
8189 if (cmpxchg == NULL_TREE)
8190 return false;
b4e47472
JJ
8191 type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
8192 atype = type;
629b3d75
MJ
8193 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
8194
dc06356a
JJ
8195 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
8196 || !can_atomic_load_p (TYPE_MODE (itype)))
629b3d75
MJ
8197 return false;
8198
8199 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
65f4b875 8200 si = gsi_last_nondebug_bb (load_bb);
629b3d75
MJ
8201 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
8202
8203 /* For floating-point values, we'll need to view-convert them to integers
8204 so that we can perform the atomic compare and swap. Simplify the
8205 following code by always setting up the "i"ntegral variables. */
8206 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
8207 {
8208 tree iaddr_val;
8209
8210 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
8211 true));
b4e47472 8212 atype = itype;
629b3d75
MJ
8213 iaddr_val
8214 = force_gimple_operand_gsi (&si,
8215 fold_convert (TREE_TYPE (iaddr), addr),
8216 false, NULL_TREE, true, GSI_SAME_STMT);
8217 stmt = gimple_build_assign (iaddr, iaddr_val);
8218 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8219 loadedi = create_tmp_var (itype);
8220 if (gimple_in_ssa_p (cfun))
8221 loadedi = make_ssa_name (loadedi);
8222 }
8223 else
8224 {
8225 iaddr = addr;
8226 loadedi = loaded_val;
8227 }
8228
8229 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
8230 tree loaddecl = builtin_decl_explicit (fncode);
8231 if (loaddecl)
8232 initial
b4e47472 8233 = fold_convert (atype,
629b3d75
MJ
8234 build_call_expr (loaddecl, 2, iaddr,
8235 build_int_cst (NULL_TREE,
8236 MEMMODEL_RELAXED)));
8237 else
b4e47472
JJ
8238 {
8239 tree off
8240 = build_int_cst (build_pointer_type_for_mode (atype, ptr_mode,
8241 true), 0);
8242 initial = build2 (MEM_REF, atype, iaddr, off);
8243 }
629b3d75
MJ
8244
8245 initial
8246 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
8247 GSI_SAME_STMT);
8248
8249 /* Move the value to the LOADEDI temporary. */
8250 if (gimple_in_ssa_p (cfun))
8251 {
8252 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
8253 phi = create_phi_node (loadedi, loop_header);
8254 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
8255 initial);
8256 }
8257 else
8258 gsi_insert_before (&si,
8259 gimple_build_assign (loadedi, initial),
8260 GSI_SAME_STMT);
8261 if (loadedi != loaded_val)
8262 {
8263 gimple_stmt_iterator gsi2;
8264 tree x;
8265
8266 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
8267 gsi2 = gsi_start_bb (loop_header);
8268 if (gimple_in_ssa_p (cfun))
8269 {
8270 gassign *stmt;
8271 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
8272 true, GSI_SAME_STMT);
8273 stmt = gimple_build_assign (loaded_val, x);
8274 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
8275 }
8276 else
8277 {
8278 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
8279 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
8280 true, GSI_SAME_STMT);
8281 }
8282 }
8283 gsi_remove (&si, true);
8284
65f4b875 8285 si = gsi_last_nondebug_bb (store_bb);
629b3d75
MJ
8286 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
8287
8288 if (iaddr == addr)
8289 storedi = stored_val;
8290 else
01914336
MJ
8291 storedi
8292 = force_gimple_operand_gsi (&si,
8293 build1 (VIEW_CONVERT_EXPR, itype,
8294 stored_val), true, NULL_TREE, true,
8295 GSI_SAME_STMT);
629b3d75
MJ
8296
8297 /* Build the compare&swap statement. */
8298 new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
8299 new_storedi = force_gimple_operand_gsi (&si,
8300 fold_convert (TREE_TYPE (loadedi),
8301 new_storedi),
8302 true, NULL_TREE,
8303 true, GSI_SAME_STMT);
8304
8305 if (gimple_in_ssa_p (cfun))
8306 old_vali = loadedi;
8307 else
8308 {
8309 old_vali = create_tmp_var (TREE_TYPE (loadedi));
8310 stmt = gimple_build_assign (old_vali, loadedi);
8311 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8312
8313 stmt = gimple_build_assign (loadedi, new_storedi);
8314 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8315 }
8316
8317 /* Note that we always perform the comparison as an integer, even for
8318 floating point. This allows the atomic operation to properly
8319 succeed even with NaNs and -0.0. */
01914336
MJ
8320 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
8321 stmt = gimple_build_cond_empty (ne);
629b3d75
MJ
8322 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8323
8324 /* Update cfg. */
8325 e = single_succ_edge (store_bb);
8326 e->flags &= ~EDGE_FALLTHRU;
8327 e->flags |= EDGE_FALSE_VALUE;
357067f2
JH
8328 /* Expect no looping. */
8329 e->probability = profile_probability::guessed_always ();
629b3d75
MJ
8330
8331 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
357067f2 8332 e->probability = profile_probability::guessed_never ();
629b3d75
MJ
8333
8334 /* Copy the new value to loadedi (we already did that before the condition
8335 if we are not in SSA). */
8336 if (gimple_in_ssa_p (cfun))
8337 {
8338 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
8339 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
8340 }
8341
8342 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
8343 gsi_remove (&si, true);
8344
99b1c316 8345 class loop *loop = alloc_loop ();
629b3d75
MJ
8346 loop->header = loop_header;
8347 loop->latch = store_bb;
8348 add_loop (loop, loop_header->loop_father);
8349
8350 if (gimple_in_ssa_p (cfun))
8351 update_ssa (TODO_update_ssa_no_phi);
8352
8353 return true;
8354}
8355
8356/* A subroutine of expand_omp_atomic. Implement the atomic operation as:
8357
01914336
MJ
8358 GOMP_atomic_start ();
8359 *addr = rhs;
8360 GOMP_atomic_end ();
629b3d75
MJ
8361
8362 The result is not globally atomic, but works so long as all parallel
8363 references are within #pragma omp atomic directives. According to
8364 responses received from omp@openmp.org, appears to be within spec.
8365 Which makes sense, since that's how several other compilers handle
8366 this situation as well.
8367 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
8368 expanding. STORED_VAL is the operand of the matching
8369 GIMPLE_OMP_ATOMIC_STORE.
8370
8371 We replace
8372 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
8373 loaded_val = *addr;
8374
8375 and replace
8376 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
8377 *addr = stored_val;
8378*/
8379
8380static bool
8381expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
8382 tree addr, tree loaded_val, tree stored_val)
8383{
8384 gimple_stmt_iterator si;
8385 gassign *stmt;
8386 tree t;
8387
65f4b875 8388 si = gsi_last_nondebug_bb (load_bb);
629b3d75
MJ
8389 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
8390
8391 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
8392 t = build_call_expr (t, 0);
8393 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
8394
b4e47472
JJ
8395 tree mem = build_simple_mem_ref (addr);
8396 TREE_TYPE (mem) = TREE_TYPE (loaded_val);
8397 TREE_OPERAND (mem, 1)
8398 = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode,
8399 true),
8400 TREE_OPERAND (mem, 1));
8401 stmt = gimple_build_assign (loaded_val, mem);
629b3d75
MJ
8402 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8403 gsi_remove (&si, true);
8404
65f4b875 8405 si = gsi_last_nondebug_bb (store_bb);
629b3d75
MJ
8406 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
8407
b4e47472 8408 stmt = gimple_build_assign (unshare_expr (mem), stored_val);
629b3d75
MJ
8409 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8410
8411 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
8412 t = build_call_expr (t, 0);
8413 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
8414 gsi_remove (&si, true);
8415
8416 if (gimple_in_ssa_p (cfun))
8417 update_ssa (TODO_update_ssa_no_phi);
8418 return true;
8419}
8420
8421/* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
01914336 8422 using expand_omp_atomic_fetch_op. If it failed, we try to
629b3d75
MJ
8423 call expand_omp_atomic_pipeline, and if it fails too, the
8424 ultimate fallback is wrapping the operation in a mutex
8425 (expand_omp_atomic_mutex). REGION is the atomic region built
8426 by build_omp_regions_1(). */
8427
8428static void
8429expand_omp_atomic (struct omp_region *region)
8430{
8431 basic_block load_bb = region->entry, store_bb = region->exit;
8432 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
8433 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
8434 tree loaded_val = gimple_omp_atomic_load_lhs (load);
8435 tree addr = gimple_omp_atomic_load_rhs (load);
8436 tree stored_val = gimple_omp_atomic_store_val (store);
b4e47472 8437 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
629b3d75
MJ
8438 HOST_WIDE_INT index;
8439
8440 /* Make sure the type is one of the supported sizes. */
8441 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
8442 index = exact_log2 (index);
8443 if (index >= 0 && index <= 4)
8444 {
8445 unsigned int align = TYPE_ALIGN_UNIT (type);
8446
8447 /* __sync builtins require strict data alignment. */
8448 if (exact_log2 (align) >= index)
8449 {
8450 /* Atomic load. */
3bd8f481 8451 scalar_mode smode;
629b3d75 8452 if (loaded_val == stored_val
3bd8f481
RS
8453 && (is_int_mode (TYPE_MODE (type), &smode)
8454 || is_float_mode (TYPE_MODE (type), &smode))
8455 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
629b3d75
MJ
8456 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
8457 return;
8458
8459 /* Atomic store. */
3bd8f481
RS
8460 if ((is_int_mode (TYPE_MODE (type), &smode)
8461 || is_float_mode (TYPE_MODE (type), &smode))
8462 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
629b3d75
MJ
8463 && store_bb == single_succ (load_bb)
8464 && first_stmt (store_bb) == store
8465 && expand_omp_atomic_store (load_bb, addr, loaded_val,
8466 stored_val, index))
8467 return;
8468
8469 /* When possible, use specialized atomic update functions. */
8470 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
8471 && store_bb == single_succ (load_bb)
8472 && expand_omp_atomic_fetch_op (load_bb, addr,
8473 loaded_val, stored_val, index))
8474 return;
8475
8476 /* If we don't have specialized __sync builtins, try and implement
8477 as a compare and swap loop. */
8478 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
8479 loaded_val, stored_val, index))
8480 return;
8481 }
8482 }
8483
8484 /* The ultimate fallback is wrapping the operation in a mutex. */
8485 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
8486}
8487
8488/* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
8489 at REGION_EXIT. */
8490
8491static void
8492mark_loops_in_oacc_kernels_region (basic_block region_entry,
8493 basic_block region_exit)
8494{
99b1c316 8495 class loop *outer = region_entry->loop_father;
629b3d75
MJ
8496 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
8497
8498 /* Don't parallelize the kernels region if it contains more than one outer
8499 loop. */
8500 unsigned int nr_outer_loops = 0;
99b1c316
MS
8501 class loop *single_outer = NULL;
8502 for (class loop *loop = outer->inner; loop != NULL; loop = loop->next)
629b3d75
MJ
8503 {
8504 gcc_assert (loop_outer (loop) == outer);
8505
8506 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
8507 continue;
8508
8509 if (region_exit != NULL
8510 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
8511 continue;
8512
8513 nr_outer_loops++;
8514 single_outer = loop;
8515 }
8516 if (nr_outer_loops != 1)
8517 return;
8518
99b1c316 8519 for (class loop *loop = single_outer->inner;
01914336
MJ
8520 loop != NULL;
8521 loop = loop->inner)
629b3d75
MJ
8522 if (loop->next)
8523 return;
8524
8525 /* Mark the loops in the region. */
99b1c316 8526 for (class loop *loop = single_outer; loop != NULL; loop = loop->inner)
629b3d75
MJ
8527 loop->in_oacc_kernels_region = true;
8528}
8529
8530/* Types used to pass grid and wortkgroup sizes to kernel invocation. */
8531
8532struct GTY(()) grid_launch_attributes_trees
8533{
8534 tree kernel_dim_array_type;
8535 tree kernel_lattrs_dimnum_decl;
8536 tree kernel_lattrs_grid_decl;
8537 tree kernel_lattrs_group_decl;
8538 tree kernel_launch_attributes_type;
8539};
8540
8541static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees;
8542
8543/* Create types used to pass kernel launch attributes to target. */
8544
8545static void
8546grid_create_kernel_launch_attr_types (void)
8547{
8548 if (grid_attr_trees)
8549 return;
8550 grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> ();
8551
8552 tree dim_arr_index_type
8553 = build_index_type (build_int_cst (integer_type_node, 2));
8554 grid_attr_trees->kernel_dim_array_type
8555 = build_array_type (uint32_type_node, dim_arr_index_type);
8556
8557 grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE);
8558 grid_attr_trees->kernel_lattrs_dimnum_decl
8559 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"),
8560 uint32_type_node);
8561 DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE;
8562
8563 grid_attr_trees->kernel_lattrs_grid_decl
8564 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"),
8565 grid_attr_trees->kernel_dim_array_type);
8566 DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl)
8567 = grid_attr_trees->kernel_lattrs_dimnum_decl;
8568 grid_attr_trees->kernel_lattrs_group_decl
8569 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"),
8570 grid_attr_trees->kernel_dim_array_type);
8571 DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl)
8572 = grid_attr_trees->kernel_lattrs_grid_decl;
8573 finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type,
8574 "__gomp_kernel_launch_attributes",
8575 grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE);
8576}
8577
8578/* Insert before the current statement in GSI a store of VALUE to INDEX of
8579 array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be
8580 of type uint32_type_node. */
8581
8582static void
8583grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var,
8584 tree fld_decl, int index, tree value)
8585{
8586 tree ref = build4 (ARRAY_REF, uint32_type_node,
8587 build3 (COMPONENT_REF,
8588 grid_attr_trees->kernel_dim_array_type,
8589 range_var, fld_decl, NULL_TREE),
8590 build_int_cst (integer_type_node, index),
8591 NULL_TREE, NULL_TREE);
8592 gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT);
8593}
8594
8595/* Return a tree representation of a pointer to a structure with grid and
8596 work-group size information. Statements filling that information will be
8597 inserted before GSI, TGT_STMT is the target statement which has the
8598 necessary information in it. */
8599
8600static tree
8601grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi,
8602 gomp_target *tgt_stmt)
8603{
8604 grid_create_kernel_launch_attr_types ();
8605 tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type,
8606 "__kernel_launch_attrs");
8607
8608 unsigned max_dim = 0;
8609 for (tree clause = gimple_omp_target_clauses (tgt_stmt);
8610 clause;
8611 clause = OMP_CLAUSE_CHAIN (clause))
8612 {
8613 if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_)
8614 continue;
8615
8616 unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause);
8617 max_dim = MAX (dim, max_dim);
8618
8619 grid_insert_store_range_dim (gsi, lattrs,
8620 grid_attr_trees->kernel_lattrs_grid_decl,
8621 dim, OMP_CLAUSE__GRIDDIM__SIZE (clause));
8622 grid_insert_store_range_dim (gsi, lattrs,
8623 grid_attr_trees->kernel_lattrs_group_decl,
8624 dim, OMP_CLAUSE__GRIDDIM__GROUP (clause));
8625 }
8626
8627 tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs,
8628 grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE);
8629 gcc_checking_assert (max_dim <= 2);
8630 tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1);
8631 gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions),
8632 GSI_SAME_STMT);
8633 TREE_ADDRESSABLE (lattrs) = 1;
8634 return build_fold_addr_expr (lattrs);
8635}
8636
8637/* Build target argument identifier from the DEVICE identifier, value
8638 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
8639
8640static tree
8641get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
8642{
8643 tree t = build_int_cst (integer_type_node, device);
8644 if (subseqent_param)
8645 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
8646 build_int_cst (integer_type_node,
8647 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
8648 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
8649 build_int_cst (integer_type_node, id));
8650 return t;
8651}
8652
8653/* Like above but return it in type that can be directly stored as an element
8654 of the argument array. */
8655
8656static tree
8657get_target_argument_identifier (int device, bool subseqent_param, int id)
8658{
8659 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
8660 return fold_convert (ptr_type_node, t);
8661}
8662
8663/* Return a target argument consisting of DEVICE identifier, value identifier
8664 ID, and the actual VALUE. */
8665
8666static tree
8667get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
8668 tree value)
8669{
8670 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
8671 fold_convert (integer_type_node, value),
8672 build_int_cst (unsigned_type_node,
8673 GOMP_TARGET_ARG_VALUE_SHIFT));
8674 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
8675 get_target_argument_identifier_1 (device, false, id));
8676 t = fold_convert (ptr_type_node, t);
8677 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
8678}
8679
8680/* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
8681 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
8682 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
8683 arguments. */
8684
8685static void
8686push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
8687 int id, tree value, vec <tree> *args)
8688{
8689 if (tree_fits_shwi_p (value)
8690 && tree_to_shwi (value) > -(1 << 15)
8691 && tree_to_shwi (value) < (1 << 15))
8692 args->quick_push (get_target_argument_value (gsi, device, id, value));
8693 else
8694 {
8695 args->quick_push (get_target_argument_identifier (device, true, id));
8696 value = fold_convert (ptr_type_node, value);
8697 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
8698 GSI_SAME_STMT);
8699 args->quick_push (value);
8700 }
8701}
8702
01914336 8703/* Create an array of arguments that is then passed to GOMP_target. */
629b3d75
MJ
8704
8705static tree
8706get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
8707{
8708 auto_vec <tree, 6> args;
8709 tree clauses = gimple_omp_target_clauses (tgt_stmt);
8710 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
8711 if (c)
8712 t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
8713 else
8714 t = integer_minus_one_node;
8715 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
8716 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
8717
8718 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
8719 if (c)
8720 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
8721 else
8722 t = integer_minus_one_node;
8723 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
8724 GOMP_TARGET_ARG_THREAD_LIMIT, t,
8725 &args);
8726
8727 /* Add HSA-specific grid sizes, if available. */
8728 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
8729 OMP_CLAUSE__GRIDDIM_))
8730 {
01914336
MJ
8731 int id = GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES;
8732 t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, id);
629b3d75
MJ
8733 args.quick_push (t);
8734 args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt));
8735 }
8736
8737 /* Produce more, perhaps device specific, arguments here. */
8738
8739 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
8740 args.length () + 1),
8741 ".omp_target_args");
8742 for (unsigned i = 0; i < args.length (); i++)
8743 {
8744 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
8745 build_int_cst (integer_type_node, i),
8746 NULL_TREE, NULL_TREE);
8747 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
8748 GSI_SAME_STMT);
8749 }
8750 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
8751 build_int_cst (integer_type_node, args.length ()),
8752 NULL_TREE, NULL_TREE);
8753 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
8754 GSI_SAME_STMT);
8755 TREE_ADDRESSABLE (argarray) = 1;
8756 return build_fold_addr_expr (argarray);
8757}
8758
8759/* Expand the GIMPLE_OMP_TARGET starting at REGION. */
8760
8761static void
8762expand_omp_target (struct omp_region *region)
8763{
8764 basic_block entry_bb, exit_bb, new_bb;
8765 struct function *child_cfun;
8766 tree child_fn, block, t;
8767 gimple_stmt_iterator gsi;
8768 gomp_target *entry_stmt;
8769 gimple *stmt;
8770 edge e;
8771 bool offloaded, data_region;
62aee289 8772 int target_kind;
629b3d75
MJ
8773
8774 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
62aee289 8775 target_kind = gimple_omp_target_kind (entry_stmt);
629b3d75
MJ
8776 new_bb = region->entry;
8777
8778 offloaded = is_gimple_omp_offloaded (entry_stmt);
62aee289 8779 switch (target_kind)
629b3d75
MJ
8780 {
8781 case GF_OMP_TARGET_KIND_REGION:
8782 case GF_OMP_TARGET_KIND_UPDATE:
8783 case GF_OMP_TARGET_KIND_ENTER_DATA:
8784 case GF_OMP_TARGET_KIND_EXIT_DATA:
8785 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8786 case GF_OMP_TARGET_KIND_OACC_KERNELS:
62aee289 8787 case GF_OMP_TARGET_KIND_OACC_SERIAL:
629b3d75
MJ
8788 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8789 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8790 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8791 data_region = false;
8792 break;
8793 case GF_OMP_TARGET_KIND_DATA:
8794 case GF_OMP_TARGET_KIND_OACC_DATA:
8795 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8796 data_region = true;
8797 break;
8798 default:
8799 gcc_unreachable ();
8800 }
8801
8802 child_fn = NULL_TREE;
8803 child_cfun = NULL;
8804 if (offloaded)
8805 {
8806 child_fn = gimple_omp_target_child_fn (entry_stmt);
8807 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
8808 }
8809
8810 /* Supported by expand_omp_taskreg, but not here. */
8811 if (child_cfun != NULL)
8812 gcc_checking_assert (!child_cfun->cfg);
8813 gcc_checking_assert (!gimple_in_ssa_p (cfun));
8814
8815 entry_bb = region->entry;
8816 exit_bb = region->exit;
8817
62aee289 8818 switch (target_kind)
25651634 8819 {
62aee289 8820 case GF_OMP_TARGET_KIND_OACC_KERNELS:
25651634
TS
8821 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
8822
62aee289
MR
8823 /* Further down, all OpenACC compute constructs will be mapped to
8824 BUILT_IN_GOACC_PARALLEL, and to distinguish between them, there
8825 is an "oacc kernels" attribute set for OpenACC kernels. */
25651634
TS
8826 DECL_ATTRIBUTES (child_fn)
8827 = tree_cons (get_identifier ("oacc kernels"),
8828 NULL_TREE, DECL_ATTRIBUTES (child_fn));
62aee289
MR
8829 break;
8830 case GF_OMP_TARGET_KIND_OACC_SERIAL:
8831 /* Further down, all OpenACC compute constructs will be mapped to
8832 BUILT_IN_GOACC_PARALLEL, and to distinguish between them, there
8833 is an "oacc serial" attribute set for OpenACC serial. */
8834 DECL_ATTRIBUTES (child_fn)
8835 = tree_cons (get_identifier ("oacc serial"),
8836 NULL_TREE, DECL_ATTRIBUTES (child_fn));
8837 break;
8838 default:
8839 break;
25651634 8840 }
629b3d75
MJ
8841
8842 if (offloaded)
8843 {
8844 unsigned srcidx, dstidx, num;
8845
8846 /* If the offloading region needs data sent from the parent
8847 function, then the very first statement (except possible
8848 tree profile counter updates) of the offloading body
8849 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
8850 &.OMP_DATA_O is passed as an argument to the child function,
8851 we need to replace it with the argument as seen by the child
8852 function.
8853
8854 In most cases, this will end up being the identity assignment
8855 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
8856 a function call that has been inlined, the original PARM_DECL
8857 .OMP_DATA_I may have been converted into a different local
8858 variable. In which case, we need to keep the assignment. */
8859 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
8860 if (data_arg)
8861 {
8862 basic_block entry_succ_bb = single_succ (entry_bb);
8863 gimple_stmt_iterator gsi;
8864 tree arg;
8865 gimple *tgtcopy_stmt = NULL;
8866 tree sender = TREE_VEC_ELT (data_arg, 0);
8867
8868 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
8869 {
8870 gcc_assert (!gsi_end_p (gsi));
8871 stmt = gsi_stmt (gsi);
8872 if (gimple_code (stmt) != GIMPLE_ASSIGN)
8873 continue;
8874
8875 if (gimple_num_ops (stmt) == 2)
8876 {
8877 tree arg = gimple_assign_rhs1 (stmt);
8878
8879 /* We're ignoring the subcode because we're
8880 effectively doing a STRIP_NOPS. */
8881
8882 if (TREE_CODE (arg) == ADDR_EXPR
8883 && TREE_OPERAND (arg, 0) == sender)
8884 {
8885 tgtcopy_stmt = stmt;
8886 break;
8887 }
8888 }
8889 }
8890
8891 gcc_assert (tgtcopy_stmt != NULL);
8892 arg = DECL_ARGUMENTS (child_fn);
8893
8894 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
8895 gsi_remove (&gsi, true);
8896 }
8897
8898 /* Declare local variables needed in CHILD_CFUN. */
8899 block = DECL_INITIAL (child_fn);
8900 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
8901 /* The gimplifier could record temporaries in the offloading block
8902 rather than in containing function's local_decls chain,
8903 which would mean cgraph missed finalizing them. Do it now. */
8904 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
8905 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
8906 varpool_node::finalize_decl (t);
8907 DECL_SAVED_TREE (child_fn) = NULL;
8908 /* We'll create a CFG for child_fn, so no gimple body is needed. */
8909 gimple_set_body (child_fn, NULL);
8910 TREE_USED (block) = 1;
8911
8912 /* Reset DECL_CONTEXT on function arguments. */
8913 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
8914 DECL_CONTEXT (t) = child_fn;
8915
8916 /* Split ENTRY_BB at GIMPLE_*,
8917 so that it can be moved to the child function. */
65f4b875 8918 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
8919 stmt = gsi_stmt (gsi);
8920 gcc_assert (stmt
8921 && gimple_code (stmt) == gimple_code (entry_stmt));
8922 e = split_block (entry_bb, stmt);
8923 gsi_remove (&gsi, true);
8924 entry_bb = e->dest;
8925 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8926
8927 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
8928 if (exit_bb)
8929 {
65f4b875 8930 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
8931 gcc_assert (!gsi_end_p (gsi)
8932 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
8933 stmt = gimple_build_return (NULL);
8934 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
8935 gsi_remove (&gsi, true);
8936 }
8937
8938 /* Move the offloading region into CHILD_CFUN. */
8939
8940 block = gimple_block (entry_stmt);
8941
8942 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
8943 if (exit_bb)
8944 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
8945 /* When the OMP expansion process cannot guarantee an up-to-date
8946 loop tree arrange for the child function to fixup loops. */
8947 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
8948 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
8949
8950 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
8951 num = vec_safe_length (child_cfun->local_decls);
8952 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
8953 {
8954 t = (*child_cfun->local_decls)[srcidx];
8955 if (DECL_CONTEXT (t) == cfun->decl)
8956 continue;
8957 if (srcidx != dstidx)
8958 (*child_cfun->local_decls)[dstidx] = t;
8959 dstidx++;
8960 }
8961 if (dstidx != num)
8962 vec_safe_truncate (child_cfun->local_decls, dstidx);
8963
8964 /* Inform the callgraph about the new function. */
8965 child_cfun->curr_properties = cfun->curr_properties;
8966 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
8967 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
8968 cgraph_node *node = cgraph_node::get_create (child_fn);
8969 node->parallelized_function = 1;
8970 cgraph_node::add_new_function (child_fn, true);
8971
8972 /* Add the new function to the offload table. */
8973 if (ENABLE_OFFLOADING)
60bf575c
TV
8974 {
8975 if (in_lto_p)
8976 DECL_PRESERVE_P (child_fn) = 1;
8977 vec_safe_push (offload_funcs, child_fn);
8978 }
629b3d75
MJ
8979
8980 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
8981 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
8982
8983 /* Fix the callgraph edges for child_cfun. Those for cfun will be
8984 fixed in a following pass. */
8985 push_cfun (child_cfun);
8986 if (need_asm)
9579db35 8987 assign_assembler_name_if_needed (child_fn);
629b3d75
MJ
8988 cgraph_edge::rebuild_edges ();
8989
8990 /* Some EH regions might become dead, see PR34608. If
8991 pass_cleanup_cfg isn't the first pass to happen with the
8992 new child, these dead EH edges might cause problems.
8993 Clean them up now. */
8994 if (flag_exceptions)
8995 {
8996 basic_block bb;
8997 bool changed = false;
8998
8999 FOR_EACH_BB_FN (bb, cfun)
9000 changed |= gimple_purge_dead_eh_edges (bb);
9001 if (changed)
9002 cleanup_tree_cfg ();
9003 }
9004 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9005 verify_loop_structure ();
9006 pop_cfun ();
9007
9008 if (dump_file && !gimple_in_ssa_p (cfun))
9009 {
9010 omp_any_child_fn_dumped = true;
9011 dump_function_header (dump_file, child_fn, dump_flags);
9012 dump_function_to_file (child_fn, dump_file, dump_flags);
9013 }
4ccc4e30
JJ
9014
9015 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
629b3d75
MJ
9016 }
9017
9018 /* Emit a library call to launch the offloading region, or do data
9019 transfers. */
59d5960c 9020 tree t1, t2, t3, t4, depend, c, clauses;
629b3d75 9021 enum built_in_function start_ix;
629b3d75 9022 unsigned int flags_i = 0;
629b3d75
MJ
9023
9024 switch (gimple_omp_target_kind (entry_stmt))
9025 {
9026 case GF_OMP_TARGET_KIND_REGION:
9027 start_ix = BUILT_IN_GOMP_TARGET;
9028 break;
9029 case GF_OMP_TARGET_KIND_DATA:
9030 start_ix = BUILT_IN_GOMP_TARGET_DATA;
9031 break;
9032 case GF_OMP_TARGET_KIND_UPDATE:
9033 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
9034 break;
9035 case GF_OMP_TARGET_KIND_ENTER_DATA:
9036 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
9037 break;
9038 case GF_OMP_TARGET_KIND_EXIT_DATA:
9039 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
9040 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
9041 break;
629b3d75 9042 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
62aee289
MR
9043 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9044 case GF_OMP_TARGET_KIND_OACC_SERIAL:
629b3d75
MJ
9045 start_ix = BUILT_IN_GOACC_PARALLEL;
9046 break;
9047 case GF_OMP_TARGET_KIND_OACC_DATA:
9048 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9049 start_ix = BUILT_IN_GOACC_DATA_START;
9050 break;
9051 case GF_OMP_TARGET_KIND_OACC_UPDATE:
9052 start_ix = BUILT_IN_GOACC_UPDATE;
9053 break;
9054 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
9055 start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA;
9056 break;
9057 case GF_OMP_TARGET_KIND_OACC_DECLARE:
9058 start_ix = BUILT_IN_GOACC_DECLARE;
9059 break;
9060 default:
9061 gcc_unreachable ();
9062 }
9063
9064 clauses = gimple_omp_target_clauses (entry_stmt);
9065
59d5960c
TS
9066 tree device = NULL_TREE;
9067 location_t device_loc = UNKNOWN_LOCATION;
9068 tree goacc_flags = NULL_TREE;
9069 if (is_gimple_omp_oacc (entry_stmt))
629b3d75 9070 {
59d5960c
TS
9071 /* By default, no GOACC_FLAGs are set. */
9072 goacc_flags = integer_zero_node;
629b3d75
MJ
9073 }
9074 else
59d5960c
TS
9075 {
9076 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
9077 if (c)
9078 {
9079 device = OMP_CLAUSE_DEVICE_ID (c);
9080 device_loc = OMP_CLAUSE_LOCATION (c);
9081 }
9082 else
9083 {
9084 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
9085 library choose). */
9086 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
9087 device_loc = gimple_location (entry_stmt);
9088 }
629b3d75 9089
59d5960c
TS
9090 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
9091 if (c)
9092 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
9093 }
629b3d75 9094
59d5960c
TS
9095 /* By default, there is no conditional. */
9096 tree cond = NULL_TREE;
9097 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
9098 if (c)
9099 cond = OMP_CLAUSE_IF_EXPR (c);
9100 /* If we found the clause 'if (cond)', build:
9101 OpenACC: goacc_flags = (cond ? goacc_flags : flags | GOACC_FLAG_HOST_FALLBACK)
9102 OpenMP: device = (cond ? device : GOMP_DEVICE_HOST_FALLBACK) */
629b3d75
MJ
9103 if (cond)
9104 {
59d5960c
TS
9105 tree *tp;
9106 if (is_gimple_omp_oacc (entry_stmt))
9107 tp = &goacc_flags;
9108 else
9109 {
9110 /* Ensure 'device' is of the correct type. */
9111 device = fold_convert_loc (device_loc, integer_type_node, device);
9112
9113 tp = &device;
9114 }
9115
629b3d75
MJ
9116 cond = gimple_boolify (cond);
9117
9118 basic_block cond_bb, then_bb, else_bb;
9119 edge e;
9120 tree tmp_var;
9121
59d5960c 9122 tmp_var = create_tmp_var (TREE_TYPE (*tp));
629b3d75
MJ
9123 if (offloaded)
9124 e = split_block_after_labels (new_bb);
9125 else
9126 {
65f4b875 9127 gsi = gsi_last_nondebug_bb (new_bb);
629b3d75
MJ
9128 gsi_prev (&gsi);
9129 e = split_block (new_bb, gsi_stmt (gsi));
9130 }
9131 cond_bb = e->src;
9132 new_bb = e->dest;
9133 remove_edge (e);
9134
9135 then_bb = create_empty_bb (cond_bb);
9136 else_bb = create_empty_bb (then_bb);
9137 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
9138 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
9139
9140 stmt = gimple_build_cond_empty (cond);
9141 gsi = gsi_last_bb (cond_bb);
9142 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
9143
9144 gsi = gsi_start_bb (then_bb);
59d5960c 9145 stmt = gimple_build_assign (tmp_var, *tp);
629b3d75
MJ
9146 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
9147
9148 gsi = gsi_start_bb (else_bb);
59d5960c
TS
9149 if (is_gimple_omp_oacc (entry_stmt))
9150 stmt = gimple_build_assign (tmp_var,
9151 BIT_IOR_EXPR,
9152 *tp,
9153 build_int_cst (integer_type_node,
9154 GOACC_FLAG_HOST_FALLBACK));
9155 else
9156 stmt = gimple_build_assign (tmp_var,
9157 build_int_cst (integer_type_node,
9158 GOMP_DEVICE_HOST_FALLBACK));
629b3d75
MJ
9159 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
9160
9161 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
9162 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
9163 add_bb_to_loop (then_bb, cond_bb->loop_father);
9164 add_bb_to_loop (else_bb, cond_bb->loop_father);
9165 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
9166 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
9167
59d5960c
TS
9168 *tp = tmp_var;
9169
65f4b875 9170 gsi = gsi_last_nondebug_bb (new_bb);
629b3d75
MJ
9171 }
9172 else
9173 {
65f4b875 9174 gsi = gsi_last_nondebug_bb (new_bb);
59d5960c
TS
9175
9176 if (device != NULL_TREE)
9177 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
9178 true, GSI_SAME_STMT);
629b3d75
MJ
9179 }
9180
9181 t = gimple_omp_target_data_arg (entry_stmt);
9182 if (t == NULL)
9183 {
9184 t1 = size_zero_node;
9185 t2 = build_zero_cst (ptr_type_node);
9186 t3 = t2;
9187 t4 = t2;
9188 }
9189 else
9190 {
9191 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
9192 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
9193 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
9194 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
9195 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
9196 }
9197
9198 gimple *g;
9199 bool tagging = false;
9200 /* The maximum number used by any start_ix, without varargs. */
9201 auto_vec<tree, 11> args;
59d5960c
TS
9202 if (is_gimple_omp_oacc (entry_stmt))
9203 {
9204 tree goacc_flags_m = fold_build1 (GOACC_FLAGS_MARSHAL_OP,
9205 TREE_TYPE (goacc_flags), goacc_flags);
9206 goacc_flags_m = force_gimple_operand_gsi (&gsi, goacc_flags_m, true,
9207 NULL_TREE, true,
9208 GSI_SAME_STMT);
9209 args.quick_push (goacc_flags_m);
9210 }
9211 else
9212 args.quick_push (device);
629b3d75
MJ
9213 if (offloaded)
9214 args.quick_push (build_fold_addr_expr (child_fn));
9215 args.quick_push (t1);
9216 args.quick_push (t2);
9217 args.quick_push (t3);
9218 args.quick_push (t4);
9219 switch (start_ix)
9220 {
9221 case BUILT_IN_GOACC_DATA_START:
9222 case BUILT_IN_GOACC_DECLARE:
9223 case BUILT_IN_GOMP_TARGET_DATA:
9224 break;
9225 case BUILT_IN_GOMP_TARGET:
9226 case BUILT_IN_GOMP_TARGET_UPDATE:
9227 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
9228 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
9229 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
9230 if (c)
9231 depend = OMP_CLAUSE_DECL (c);
9232 else
9233 depend = build_int_cst (ptr_type_node, 0);
9234 args.quick_push (depend);
9235 if (start_ix == BUILT_IN_GOMP_TARGET)
9236 args.quick_push (get_target_arguments (&gsi, entry_stmt));
9237 break;
9238 case BUILT_IN_GOACC_PARALLEL:
62aee289
MR
9239 if (lookup_attribute ("oacc serial", DECL_ATTRIBUTES (child_fn)) != NULL)
9240 {
9241 tree dims = NULL_TREE;
9242 unsigned int ix;
9243
9244 /* For serial constructs we set all dimensions to 1. */
9245 for (ix = GOMP_DIM_MAX; ix--;)
9246 dims = tree_cons (NULL_TREE, integer_one_node, dims);
9247 oacc_replace_fn_attrib (child_fn, dims);
9248 }
9249 else
9250 oacc_set_fn_attrib (child_fn, clauses, &args);
25651634 9251 tagging = true;
629b3d75
MJ
9252 /* FALLTHRU */
9253 case BUILT_IN_GOACC_ENTER_EXIT_DATA:
9254 case BUILT_IN_GOACC_UPDATE:
9255 {
9256 tree t_async = NULL_TREE;
9257
9258 /* If present, use the value specified by the respective
9259 clause, making sure that is of the correct type. */
9260 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
9261 if (c)
9262 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
9263 integer_type_node,
9264 OMP_CLAUSE_ASYNC_EXPR (c));
9265 else if (!tagging)
9266 /* Default values for t_async. */
9267 t_async = fold_convert_loc (gimple_location (entry_stmt),
9268 integer_type_node,
9269 build_int_cst (integer_type_node,
9270 GOMP_ASYNC_SYNC));
9271 if (tagging && t_async)
9272 {
9273 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
9274
9275 if (TREE_CODE (t_async) == INTEGER_CST)
9276 {
9277 /* See if we can pack the async arg in to the tag's
9278 operand. */
9279 i_async = TREE_INT_CST_LOW (t_async);
9280 if (i_async < GOMP_LAUNCH_OP_MAX)
9281 t_async = NULL_TREE;
9282 else
9283 i_async = GOMP_LAUNCH_OP_MAX;
9284 }
9285 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
9286 i_async));
9287 }
9288 if (t_async)
ee9fcee3
AS
9289 args.safe_push (force_gimple_operand_gsi (&gsi, t_async, true,
9290 NULL_TREE, true,
9291 GSI_SAME_STMT));
629b3d75
MJ
9292
9293 /* Save the argument index, and ... */
9294 unsigned t_wait_idx = args.length ();
9295 unsigned num_waits = 0;
9296 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
9297 if (!tagging || c)
9298 /* ... push a placeholder. */
9299 args.safe_push (integer_zero_node);
9300
9301 for (; c; c = OMP_CLAUSE_CHAIN (c))
9302 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
9303 {
ee9fcee3
AS
9304 tree arg = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
9305 integer_type_node,
9306 OMP_CLAUSE_WAIT_EXPR (c));
9307 arg = force_gimple_operand_gsi (&gsi, arg, true, NULL_TREE, true,
9308 GSI_SAME_STMT);
9309 args.safe_push (arg);
629b3d75
MJ
9310 num_waits++;
9311 }
9312
9313 if (!tagging || num_waits)
9314 {
9315 tree len;
9316
9317 /* Now that we know the number, update the placeholder. */
9318 if (tagging)
9319 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
9320 else
9321 len = build_int_cst (integer_type_node, num_waits);
9322 len = fold_convert_loc (gimple_location (entry_stmt),
9323 unsigned_type_node, len);
9324 args[t_wait_idx] = len;
9325 }
9326 }
9327 break;
9328 default:
9329 gcc_unreachable ();
9330 }
9331 if (tagging)
9332 /* Push terminal marker - zero. */
9333 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
9334
9335 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
9336 gimple_set_location (g, gimple_location (entry_stmt));
9337 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9338 if (!offloaded)
9339 {
9340 g = gsi_stmt (gsi);
9341 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
9342 gsi_remove (&gsi, true);
9343 }
9344 if (data_region && region->exit)
9345 {
65f4b875 9346 gsi = gsi_last_nondebug_bb (region->exit);
629b3d75
MJ
9347 g = gsi_stmt (gsi);
9348 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN);
9349 gsi_remove (&gsi, true);
9350 }
9351}
9352
9353/* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with
9354 iteration variable derived from the thread number. INTRA_GROUP means this
9355 is an expansion of a loop iterating over work-items within a separate
01914336 9356 iteration over groups. */
629b3d75
MJ
9357
9358static void
9359grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group)
9360{
9361 gimple_stmt_iterator gsi;
9362 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
9363 gcc_checking_assert (gimple_omp_for_kind (for_stmt)
9364 == GF_OMP_FOR_KIND_GRID_LOOP);
9365 size_t collapse = gimple_omp_for_collapse (for_stmt);
9366 struct omp_for_data_loop *loops
9367 = XALLOCAVEC (struct omp_for_data_loop,
01914336 9368 gimple_omp_for_collapse (for_stmt));
629b3d75
MJ
9369 struct omp_for_data fd;
9370
9371 remove_edge (BRANCH_EDGE (kfor->entry));
9372 basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest;
9373
9374 gcc_assert (kfor->cont);
9375 omp_extract_for_data (for_stmt, &fd, loops);
9376
9377 gsi = gsi_start_bb (body_bb);
9378
9379 for (size_t dim = 0; dim < collapse; dim++)
9380 {
9381 tree type, itype;
9382 itype = type = TREE_TYPE (fd.loops[dim].v);
9383 if (POINTER_TYPE_P (type))
9384 itype = signed_type_for (type);
9385
9386 tree n1 = fd.loops[dim].n1;
9387 tree step = fd.loops[dim].step;
9388 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
9389 true, NULL_TREE, true, GSI_SAME_STMT);
9390 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
9391 true, NULL_TREE, true, GSI_SAME_STMT);
9392 tree threadid;
9393 if (gimple_omp_for_grid_group_iter (for_stmt))
9394 {
9395 gcc_checking_assert (!intra_group);
9396 threadid = build_call_expr (builtin_decl_explicit
9397 (BUILT_IN_HSA_WORKGROUPID), 1,
9398 build_int_cstu (unsigned_type_node, dim));
9399 }
9400 else if (intra_group)
9401 threadid = build_call_expr (builtin_decl_explicit
9402 (BUILT_IN_HSA_WORKITEMID), 1,
9403 build_int_cstu (unsigned_type_node, dim));
9404 else
9405 threadid = build_call_expr (builtin_decl_explicit
9406 (BUILT_IN_HSA_WORKITEMABSID), 1,
9407 build_int_cstu (unsigned_type_node, dim));
9408 threadid = fold_convert (itype, threadid);
9409 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
9410 true, GSI_SAME_STMT);
9411
9412 tree startvar = fd.loops[dim].v;
9413 tree t = fold_build2 (MULT_EXPR, itype, threadid, step);
9414 if (POINTER_TYPE_P (type))
9415 t = fold_build_pointer_plus (n1, t);
9416 else
9417 t = fold_build2 (PLUS_EXPR, type, t, n1);
9418 t = fold_convert (type, t);
9419 t = force_gimple_operand_gsi (&gsi, t,
9420 DECL_P (startvar)
9421 && TREE_ADDRESSABLE (startvar),
9422 NULL_TREE, true, GSI_SAME_STMT);
9423 gassign *assign_stmt = gimple_build_assign (startvar, t);
9424 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
9425 }
01914336 9426 /* Remove the omp for statement. */
65f4b875 9427 gsi = gsi_last_nondebug_bb (kfor->entry);
629b3d75
MJ
9428 gsi_remove (&gsi, true);
9429
9430 /* Remove the GIMPLE_OMP_CONTINUE statement. */
65f4b875 9431 gsi = gsi_last_nondebug_bb (kfor->cont);
629b3d75
MJ
9432 gcc_assert (!gsi_end_p (gsi)
9433 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE);
9434 gsi_remove (&gsi, true);
9435
9436 /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary. */
65f4b875 9437 gsi = gsi_last_nondebug_bb (kfor->exit);
629b3d75
MJ
9438 gcc_assert (!gsi_end_p (gsi)
9439 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
9440 if (intra_group)
9441 gsi_insert_before (&gsi, omp_build_barrier (NULL_TREE), GSI_SAME_STMT);
9442 gsi_remove (&gsi, true);
9443
9444 /* Fixup the much simpler CFG. */
9445 remove_edge (find_edge (kfor->cont, body_bb));
9446
9447 if (kfor->cont != body_bb)
9448 set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb);
9449 set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont);
9450}
9451
9452/* Structure passed to grid_remap_kernel_arg_accesses so that it can remap
9453 argument_decls. */
9454
9455struct grid_arg_decl_map
9456{
9457 tree old_arg;
9458 tree new_arg;
9459};
9460
9461/* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones
9462 pertaining to kernel function. */
9463
9464static tree
9465grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data)
9466{
9467 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
9468 struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info;
9469 tree t = *tp;
9470
9471 if (t == adm->old_arg)
9472 *tp = adm->new_arg;
9473 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
9474 return NULL_TREE;
9475}
9476
9477/* If TARGET region contains a kernel body for loop, remove its region from the
01914336 9478 TARGET and expand it in HSA gridified kernel fashion. */
629b3d75
MJ
9479
9480static void
9481grid_expand_target_grid_body (struct omp_region *target)
9482{
9483 if (!hsa_gen_requested_p ())
9484 return;
9485
9486 gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry));
9487 struct omp_region **pp;
9488
9489 for (pp = &target->inner; *pp; pp = &(*pp)->next)
9490 if ((*pp)->type == GIMPLE_OMP_GRID_BODY)
9491 break;
9492
9493 struct omp_region *gpukernel = *pp;
9494
9495 tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt);
9496 if (!gpukernel)
9497 {
9498 /* HSA cannot handle OACC stuff. */
9499 if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION)
9500 return;
9501 gcc_checking_assert (orig_child_fndecl);
9502 gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
9503 OMP_CLAUSE__GRIDDIM_));
9504 cgraph_node *n = cgraph_node::get (orig_child_fndecl);
9505
9506 hsa_register_kernel (n);
9507 return;
9508 }
9509
9510 gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
9511 OMP_CLAUSE__GRIDDIM_));
01914336
MJ
9512 tree inside_block
9513 = gimple_block (first_stmt (single_succ (gpukernel->entry)));
629b3d75
MJ
9514 *pp = gpukernel->next;
9515 for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next)
9516 if ((*pp)->type == GIMPLE_OMP_FOR)
9517 break;
9518
9519 struct omp_region *kfor = *pp;
9520 gcc_assert (kfor);
9521 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
9522 gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP);
9523 *pp = kfor->next;
9524 if (kfor->inner)
9525 {
9526 if (gimple_omp_for_grid_group_iter (for_stmt))
9527 {
9528 struct omp_region **next_pp;
9529 for (pp = &kfor->inner; *pp; pp = next_pp)
9530 {
9531 next_pp = &(*pp)->next;
9532 if ((*pp)->type != GIMPLE_OMP_FOR)
9533 continue;
9534 gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry));
9535 gcc_assert (gimple_omp_for_kind (inner)
9536 == GF_OMP_FOR_KIND_GRID_LOOP);
9537 grid_expand_omp_for_loop (*pp, true);
9538 *pp = (*pp)->next;
9539 next_pp = pp;
9540 }
9541 }
9542 expand_omp (kfor->inner);
9543 }
9544 if (gpukernel->inner)
9545 expand_omp (gpukernel->inner);
9546
9547 tree kern_fndecl = copy_node (orig_child_fndecl);
7958186b
MP
9548 DECL_NAME (kern_fndecl) = clone_function_name_numbered (kern_fndecl,
9549 "kernel");
629b3d75
MJ
9550 SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl));
9551 tree tgtblock = gimple_block (tgt_stmt);
9552 tree fniniblock = make_node (BLOCK);
dc16b007 9553 BLOCK_ABSTRACT_ORIGIN (fniniblock) = BLOCK_ORIGIN (tgtblock);
629b3d75
MJ
9554 BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock);
9555 BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock);
9556 BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl;
9557 DECL_INITIAL (kern_fndecl) = fniniblock;
9558 push_struct_function (kern_fndecl);
9559 cfun->function_end_locus = gimple_location (tgt_stmt);
9560 init_tree_ssa (cfun);
9561 pop_cfun ();
9562
9563 tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl);
9564 gcc_assert (!DECL_CHAIN (old_parm_decl));
9565 tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl));
9566 DECL_CONTEXT (new_parm_decl) = kern_fndecl;
9567 DECL_ARGUMENTS (kern_fndecl) = new_parm_decl;
9568 gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl))));
9569 DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl));
9570 DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl;
9571 struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl);
9572 kern_cfun->curr_properties = cfun->curr_properties;
9573
9574 grid_expand_omp_for_loop (kfor, false);
9575
01914336 9576 /* Remove the omp for statement. */
65f4b875 9577 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (gpukernel->entry);
629b3d75
MJ
9578 gsi_remove (&gsi, true);
9579 /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real
9580 return. */
65f4b875 9581 gsi = gsi_last_nondebug_bb (gpukernel->exit);
629b3d75
MJ
9582 gcc_assert (!gsi_end_p (gsi)
9583 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
9584 gimple *ret_stmt = gimple_build_return (NULL);
9585 gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT);
9586 gsi_remove (&gsi, true);
9587
9588 /* Statements in the first BB in the target construct have been produced by
9589 target lowering and must be copied inside the GPUKERNEL, with the two
9590 exceptions of the first OMP statement and the OMP_DATA assignment
9591 statement. */
9592 gsi = gsi_start_bb (single_succ (gpukernel->entry));
9593 tree data_arg = gimple_omp_target_data_arg (tgt_stmt);
9594 tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL;
9595 for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry));
9596 !gsi_end_p (tsi); gsi_next (&tsi))
9597 {
9598 gimple *stmt = gsi_stmt (tsi);
9599 if (is_gimple_omp (stmt))
9600 break;
9601 if (sender
9602 && is_gimple_assign (stmt)
9603 && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR
9604 && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender)
9605 continue;
9606 gimple *copy = gimple_copy (stmt);
9607 gsi_insert_before (&gsi, copy, GSI_SAME_STMT);
9608 gimple_set_block (copy, fniniblock);
9609 }
9610
9611 move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry),
9612 gpukernel->exit, inside_block);
9613
9614 cgraph_node *kcn = cgraph_node::get_create (kern_fndecl);
9615 kcn->mark_force_output ();
9616 cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl);
9617
9618 hsa_register_kernel (kcn, orig_child);
9619
9620 cgraph_node::add_new_function (kern_fndecl, true);
9621 push_cfun (kern_cfun);
9622 cgraph_edge::rebuild_edges ();
9623
9624 /* Re-map any mention of the PARM_DECL of the original function to the
9625 PARM_DECL of the new one.
9626
9627 TODO: It would be great if lowering produced references into the GPU
9628 kernel decl straight away and we did not have to do this. */
9629 struct grid_arg_decl_map adm;
9630 adm.old_arg = old_parm_decl;
9631 adm.new_arg = new_parm_decl;
9632 basic_block bb;
9633 FOR_EACH_BB_FN (bb, kern_cfun)
9634 {
9635 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
9636 {
9637 gimple *stmt = gsi_stmt (gsi);
9638 struct walk_stmt_info wi;
9639 memset (&wi, 0, sizeof (wi));
9640 wi.info = &adm;
9641 walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi);
9642 }
9643 }
9644 pop_cfun ();
9645
9646 return;
9647}
9648
9649/* Expand the parallel region tree rooted at REGION. Expansion
9650 proceeds in depth-first order. Innermost regions are expanded
9651 first. This way, parallel regions that require a new function to
9652 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
9653 internal dependencies in their body. */
9654
9655static void
9656expand_omp (struct omp_region *region)
9657{
9658 omp_any_child_fn_dumped = false;
9659 while (region)
9660 {
9661 location_t saved_location;
9662 gimple *inner_stmt = NULL;
9663
9664 /* First, determine whether this is a combined parallel+workshare
01914336 9665 region. */
629b3d75
MJ
9666 if (region->type == GIMPLE_OMP_PARALLEL)
9667 determine_parallel_type (region);
9668 else if (region->type == GIMPLE_OMP_TARGET)
9669 grid_expand_target_grid_body (region);
9670
9671 if (region->type == GIMPLE_OMP_FOR
9672 && gimple_omp_for_combined_p (last_stmt (region->entry)))
9673 inner_stmt = last_stmt (region->inner->entry);
9674
9675 if (region->inner)
9676 expand_omp (region->inner);
9677
9678 saved_location = input_location;
9679 if (gimple_has_location (last_stmt (region->entry)))
9680 input_location = gimple_location (last_stmt (region->entry));
9681
9682 switch (region->type)
9683 {
9684 case GIMPLE_OMP_PARALLEL:
9685 case GIMPLE_OMP_TASK:
9686 expand_omp_taskreg (region);
9687 break;
9688
9689 case GIMPLE_OMP_FOR:
9690 expand_omp_for (region, inner_stmt);
9691 break;
9692
9693 case GIMPLE_OMP_SECTIONS:
9694 expand_omp_sections (region);
9695 break;
9696
9697 case GIMPLE_OMP_SECTION:
9698 /* Individual omp sections are handled together with their
9699 parent GIMPLE_OMP_SECTIONS region. */
9700 break;
9701
9702 case GIMPLE_OMP_SINGLE:
9703 expand_omp_single (region);
9704 break;
9705
9706 case GIMPLE_OMP_ORDERED:
9707 {
9708 gomp_ordered *ord_stmt
9709 = as_a <gomp_ordered *> (last_stmt (region->entry));
9710 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
9711 OMP_CLAUSE_DEPEND))
9712 {
9713 /* We'll expand these when expanding corresponding
9714 worksharing region with ordered(n) clause. */
9715 gcc_assert (region->outer
9716 && region->outer->type == GIMPLE_OMP_FOR);
9717 region->ord_stmt = ord_stmt;
9718 break;
9719 }
9720 }
9721 /* FALLTHRU */
9722 case GIMPLE_OMP_MASTER:
9723 case GIMPLE_OMP_TASKGROUP:
9724 case GIMPLE_OMP_CRITICAL:
9725 case GIMPLE_OMP_TEAMS:
9726 expand_omp_synch (region);
9727 break;
9728
9729 case GIMPLE_OMP_ATOMIC_LOAD:
9730 expand_omp_atomic (region);
9731 break;
9732
9733 case GIMPLE_OMP_TARGET:
9734 expand_omp_target (region);
9735 break;
9736
9737 default:
9738 gcc_unreachable ();
9739 }
9740
9741 input_location = saved_location;
9742 region = region->next;
9743 }
9744 if (omp_any_child_fn_dumped)
9745 {
9746 if (dump_file)
9747 dump_function_header (dump_file, current_function_decl, dump_flags);
9748 omp_any_child_fn_dumped = false;
9749 }
9750}
9751
9752/* Helper for build_omp_regions. Scan the dominator tree starting at
9753 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
9754 true, the function ends once a single tree is built (otherwise, whole
9755 forest of OMP constructs may be built). */
9756
9757static void
9758build_omp_regions_1 (basic_block bb, struct omp_region *parent,
9759 bool single_tree)
9760{
9761 gimple_stmt_iterator gsi;
9762 gimple *stmt;
9763 basic_block son;
9764
65f4b875 9765 gsi = gsi_last_nondebug_bb (bb);
629b3d75
MJ
9766 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
9767 {
9768 struct omp_region *region;
9769 enum gimple_code code;
9770
9771 stmt = gsi_stmt (gsi);
9772 code = gimple_code (stmt);
9773 if (code == GIMPLE_OMP_RETURN)
9774 {
9775 /* STMT is the return point out of region PARENT. Mark it
9776 as the exit point and make PARENT the immediately
9777 enclosing region. */
9778 gcc_assert (parent);
9779 region = parent;
9780 region->exit = bb;
9781 parent = parent->outer;
9782 }
9783 else if (code == GIMPLE_OMP_ATOMIC_STORE)
9784 {
5764ee3c 9785 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
629b3d75
MJ
9786 GIMPLE_OMP_RETURN, but matches with
9787 GIMPLE_OMP_ATOMIC_LOAD. */
9788 gcc_assert (parent);
9789 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
9790 region = parent;
9791 region->exit = bb;
9792 parent = parent->outer;
9793 }
9794 else if (code == GIMPLE_OMP_CONTINUE)
9795 {
9796 gcc_assert (parent);
9797 parent->cont = bb;
9798 }
9799 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
9800 {
9801 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
9802 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
9803 }
9804 else
9805 {
9806 region = new_omp_region (bb, code, parent);
9807 /* Otherwise... */
9808 if (code == GIMPLE_OMP_TARGET)
9809 {
9810 switch (gimple_omp_target_kind (stmt))
9811 {
9812 case GF_OMP_TARGET_KIND_REGION:
9813 case GF_OMP_TARGET_KIND_DATA:
9814 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9815 case GF_OMP_TARGET_KIND_OACC_KERNELS:
62aee289 9816 case GF_OMP_TARGET_KIND_OACC_SERIAL:
629b3d75
MJ
9817 case GF_OMP_TARGET_KIND_OACC_DATA:
9818 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9819 break;
9820 case GF_OMP_TARGET_KIND_UPDATE:
9821 case GF_OMP_TARGET_KIND_ENTER_DATA:
9822 case GF_OMP_TARGET_KIND_EXIT_DATA:
9823 case GF_OMP_TARGET_KIND_OACC_UPDATE:
9824 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
9825 case GF_OMP_TARGET_KIND_OACC_DECLARE:
9826 /* ..., other than for those stand-alone directives... */
9827 region = NULL;
9828 break;
9829 default:
9830 gcc_unreachable ();
9831 }
9832 }
9833 else if (code == GIMPLE_OMP_ORDERED
9834 && omp_find_clause (gimple_omp_ordered_clauses
9835 (as_a <gomp_ordered *> (stmt)),
9836 OMP_CLAUSE_DEPEND))
9837 /* #pragma omp ordered depend is also just a stand-alone
9838 directive. */
9839 region = NULL;
28567c40
JJ
9840 else if (code == GIMPLE_OMP_TASK
9841 && gimple_omp_task_taskwait_p (stmt))
9842 /* #pragma omp taskwait depend(...) is a stand-alone directive. */
9843 region = NULL;
629b3d75
MJ
9844 /* ..., this directive becomes the parent for a new region. */
9845 if (region)
9846 parent = region;
9847 }
9848 }
9849
9850 if (single_tree && !parent)
9851 return;
9852
9853 for (son = first_dom_son (CDI_DOMINATORS, bb);
9854 son;
9855 son = next_dom_son (CDI_DOMINATORS, son))
9856 build_omp_regions_1 (son, parent, single_tree);
9857}
9858
9859/* Builds the tree of OMP regions rooted at ROOT, storing it to
9860 root_omp_region. */
9861
9862static void
9863build_omp_regions_root (basic_block root)
9864{
9865 gcc_assert (root_omp_region == NULL);
9866 build_omp_regions_1 (root, NULL, true);
9867 gcc_assert (root_omp_region != NULL);
9868}
9869
9870/* Expands omp construct (and its subconstructs) starting in HEAD. */
9871
9872void
9873omp_expand_local (basic_block head)
9874{
9875 build_omp_regions_root (head);
9876 if (dump_file && (dump_flags & TDF_DETAILS))
9877 {
9878 fprintf (dump_file, "\nOMP region tree\n\n");
9879 dump_omp_region (dump_file, root_omp_region, 0);
9880 fprintf (dump_file, "\n");
9881 }
9882
9883 remove_exit_barriers (root_omp_region);
9884 expand_omp (root_omp_region);
9885
9886 omp_free_regions ();
9887}
9888
9889/* Scan the CFG and build a tree of OMP regions. Return the root of
9890 the OMP region tree. */
9891
9892static void
9893build_omp_regions (void)
9894{
9895 gcc_assert (root_omp_region == NULL);
9896 calculate_dominance_info (CDI_DOMINATORS);
9897 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
9898}
9899
9900/* Main entry point for expanding OMP-GIMPLE into runtime calls. */
9901
9902static unsigned int
9903execute_expand_omp (void)
9904{
9905 build_omp_regions ();
9906
9907 if (!root_omp_region)
9908 return 0;
9909
9910 if (dump_file)
9911 {
9912 fprintf (dump_file, "\nOMP region tree\n\n");
9913 dump_omp_region (dump_file, root_omp_region, 0);
9914 fprintf (dump_file, "\n");
9915 }
9916
9917 remove_exit_barriers (root_omp_region);
9918
9919 expand_omp (root_omp_region);
9920
9921 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9922 verify_loop_structure ();
9923 cleanup_tree_cfg ();
9924
9925 omp_free_regions ();
9926
9927 return 0;
9928}
9929
9930/* OMP expansion -- the default pass, run before creation of SSA form. */
9931
9932namespace {
9933
9934const pass_data pass_data_expand_omp =
9935{
9936 GIMPLE_PASS, /* type */
9937 "ompexp", /* name */
fd2b8c8b 9938 OPTGROUP_OMP, /* optinfo_flags */
629b3d75
MJ
9939 TV_NONE, /* tv_id */
9940 PROP_gimple_any, /* properties_required */
9941 PROP_gimple_eomp, /* properties_provided */
9942 0, /* properties_destroyed */
9943 0, /* todo_flags_start */
9944 0, /* todo_flags_finish */
9945};
9946
9947class pass_expand_omp : public gimple_opt_pass
9948{
9949public:
9950 pass_expand_omp (gcc::context *ctxt)
9951 : gimple_opt_pass (pass_data_expand_omp, ctxt)
9952 {}
9953
9954 /* opt_pass methods: */
9955 virtual unsigned int execute (function *)
9956 {
5e9d6aa4 9957 bool gate = ((flag_openacc != 0 || flag_openmp != 0
629b3d75
MJ
9958 || flag_openmp_simd != 0)
9959 && !seen_error ());
9960
9961 /* This pass always runs, to provide PROP_gimple_eomp.
9962 But often, there is nothing to do. */
9963 if (!gate)
9964 return 0;
9965
9966 return execute_expand_omp ();
9967 }
9968
9969}; // class pass_expand_omp
9970
9971} // anon namespace
9972
9973gimple_opt_pass *
9974make_pass_expand_omp (gcc::context *ctxt)
9975{
9976 return new pass_expand_omp (ctxt);
9977}
9978
9979namespace {
9980
9981const pass_data pass_data_expand_omp_ssa =
9982{
9983 GIMPLE_PASS, /* type */
9984 "ompexpssa", /* name */
fd2b8c8b 9985 OPTGROUP_OMP, /* optinfo_flags */
629b3d75
MJ
9986 TV_NONE, /* tv_id */
9987 PROP_cfg | PROP_ssa, /* properties_required */
9988 PROP_gimple_eomp, /* properties_provided */
9989 0, /* properties_destroyed */
9990 0, /* todo_flags_start */
9991 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
9992};
9993
9994class pass_expand_omp_ssa : public gimple_opt_pass
9995{
9996public:
9997 pass_expand_omp_ssa (gcc::context *ctxt)
9998 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
9999 {}
10000
10001 /* opt_pass methods: */
10002 virtual bool gate (function *fun)
10003 {
10004 return !(fun->curr_properties & PROP_gimple_eomp);
10005 }
10006 virtual unsigned int execute (function *) { return execute_expand_omp (); }
10007 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
10008
10009}; // class pass_expand_omp_ssa
10010
10011} // anon namespace
10012
10013gimple_opt_pass *
10014make_pass_expand_omp_ssa (gcc::context *ctxt)
10015{
10016 return new pass_expand_omp_ssa (ctxt);
10017}
10018
10019/* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
10020 GIMPLE_* codes. */
10021
10022bool
10023omp_make_gimple_edges (basic_block bb, struct omp_region **region,
10024 int *region_idx)
10025{
10026 gimple *last = last_stmt (bb);
10027 enum gimple_code code = gimple_code (last);
10028 struct omp_region *cur_region = *region;
10029 bool fallthru = false;
10030
10031 switch (code)
10032 {
10033 case GIMPLE_OMP_PARALLEL:
629b3d75
MJ
10034 case GIMPLE_OMP_FOR:
10035 case GIMPLE_OMP_SINGLE:
10036 case GIMPLE_OMP_TEAMS:
10037 case GIMPLE_OMP_MASTER:
10038 case GIMPLE_OMP_TASKGROUP:
10039 case GIMPLE_OMP_CRITICAL:
10040 case GIMPLE_OMP_SECTION:
10041 case GIMPLE_OMP_GRID_BODY:
10042 cur_region = new_omp_region (bb, code, cur_region);
10043 fallthru = true;
10044 break;
10045
28567c40
JJ
10046 case GIMPLE_OMP_TASK:
10047 cur_region = new_omp_region (bb, code, cur_region);
10048 fallthru = true;
10049 if (gimple_omp_task_taskwait_p (last))
10050 cur_region = cur_region->outer;
10051 break;
10052
629b3d75
MJ
10053 case GIMPLE_OMP_ORDERED:
10054 cur_region = new_omp_region (bb, code, cur_region);
10055 fallthru = true;
10056 if (omp_find_clause (gimple_omp_ordered_clauses
10057 (as_a <gomp_ordered *> (last)),
10058 OMP_CLAUSE_DEPEND))
10059 cur_region = cur_region->outer;
10060 break;
10061
10062 case GIMPLE_OMP_TARGET:
10063 cur_region = new_omp_region (bb, code, cur_region);
10064 fallthru = true;
10065 switch (gimple_omp_target_kind (last))
10066 {
10067 case GF_OMP_TARGET_KIND_REGION:
10068 case GF_OMP_TARGET_KIND_DATA:
10069 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
10070 case GF_OMP_TARGET_KIND_OACC_KERNELS:
62aee289 10071 case GF_OMP_TARGET_KIND_OACC_SERIAL:
629b3d75
MJ
10072 case GF_OMP_TARGET_KIND_OACC_DATA:
10073 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
10074 break;
10075 case GF_OMP_TARGET_KIND_UPDATE:
10076 case GF_OMP_TARGET_KIND_ENTER_DATA:
10077 case GF_OMP_TARGET_KIND_EXIT_DATA:
10078 case GF_OMP_TARGET_KIND_OACC_UPDATE:
10079 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
10080 case GF_OMP_TARGET_KIND_OACC_DECLARE:
10081 cur_region = cur_region->outer;
10082 break;
10083 default:
10084 gcc_unreachable ();
10085 }
10086 break;
10087
10088 case GIMPLE_OMP_SECTIONS:
10089 cur_region = new_omp_region (bb, code, cur_region);
10090 fallthru = true;
10091 break;
10092
10093 case GIMPLE_OMP_SECTIONS_SWITCH:
10094 fallthru = false;
10095 break;
10096
10097 case GIMPLE_OMP_ATOMIC_LOAD:
10098 case GIMPLE_OMP_ATOMIC_STORE:
10099 fallthru = true;
10100 break;
10101
10102 case GIMPLE_OMP_RETURN:
10103 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
10104 somewhere other than the next block. This will be
10105 created later. */
10106 cur_region->exit = bb;
10107 if (cur_region->type == GIMPLE_OMP_TASK)
10108 /* Add an edge corresponding to not scheduling the task
10109 immediately. */
10110 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
10111 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
10112 cur_region = cur_region->outer;
10113 break;
10114
10115 case GIMPLE_OMP_CONTINUE:
10116 cur_region->cont = bb;
10117 switch (cur_region->type)
10118 {
10119 case GIMPLE_OMP_FOR:
10120 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
10121 succs edges as abnormal to prevent splitting
10122 them. */
10123 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
10124 /* Make the loopback edge. */
10125 make_edge (bb, single_succ (cur_region->entry),
10126 EDGE_ABNORMAL);
10127
10128 /* Create an edge from GIMPLE_OMP_FOR to exit, which
10129 corresponds to the case that the body of the loop
10130 is not executed at all. */
10131 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
10132 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
10133 fallthru = false;
10134 break;
10135
10136 case GIMPLE_OMP_SECTIONS:
10137 /* Wire up the edges into and out of the nested sections. */
10138 {
10139 basic_block switch_bb = single_succ (cur_region->entry);
10140
10141 struct omp_region *i;
10142 for (i = cur_region->inner; i ; i = i->next)
10143 {
10144 gcc_assert (i->type == GIMPLE_OMP_SECTION);
10145 make_edge (switch_bb, i->entry, 0);
10146 make_edge (i->exit, bb, EDGE_FALLTHRU);
10147 }
10148
10149 /* Make the loopback edge to the block with
10150 GIMPLE_OMP_SECTIONS_SWITCH. */
10151 make_edge (bb, switch_bb, 0);
10152
10153 /* Make the edge from the switch to exit. */
10154 make_edge (switch_bb, bb->next_bb, 0);
10155 fallthru = false;
10156 }
10157 break;
10158
10159 case GIMPLE_OMP_TASK:
10160 fallthru = true;
10161 break;
10162
10163 default:
10164 gcc_unreachable ();
10165 }
10166 break;
10167
10168 default:
10169 gcc_unreachable ();
10170 }
10171
10172 if (*region != cur_region)
10173 {
10174 *region = cur_region;
10175 if (cur_region)
10176 *region_idx = cur_region->entry->index;
10177 else
10178 *region_idx = 0;
10179 }
10180
10181 return fallthru;
10182}
10183
10184#include "gt-omp-expand.h"