]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/omp-expand.c
c-family: Use TYPE_OVERFLOW_UNDEFINED instead of !TYPE_UNSIGNED in pointer_sum [PR95903]
[thirdparty/gcc.git] / gcc / omp-expand.c
CommitLineData
629b3d75
MJ
1/* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
4
8d9254fc 5Copyright (C) 2005-2020 Free Software Foundation, Inc.
629b3d75
MJ
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
11Software Foundation; either version 3, or (at your option) any later
12version.
13
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING3. If not see
21<http://www.gnu.org/licenses/>. */
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "memmodel.h"
27#include "backend.h"
28#include "target.h"
29#include "rtl.h"
30#include "tree.h"
31#include "gimple.h"
32#include "cfghooks.h"
33#include "tree-pass.h"
34#include "ssa.h"
35#include "optabs.h"
36#include "cgraph.h"
37#include "pretty-print.h"
38#include "diagnostic-core.h"
39#include "fold-const.h"
40#include "stor-layout.h"
41#include "cfganal.h"
42#include "internal-fn.h"
43#include "gimplify.h"
44#include "gimple-iterator.h"
45#include "gimplify-me.h"
46#include "gimple-walk.h"
47#include "tree-cfg.h"
48#include "tree-into-ssa.h"
49#include "tree-ssa.h"
50#include "splay-tree.h"
51#include "cfgloop.h"
52#include "omp-general.h"
53#include "omp-offload.h"
54#include "tree-cfgcleanup.h"
a895e6d7 55#include "alloc-pool.h"
629b3d75 56#include "symbol-summary.h"
629b3d75
MJ
57#include "gomp-constants.h"
58#include "gimple-pretty-print.h"
13293add 59#include "hsa-common.h"
314e6352
ML
60#include "stringpool.h"
61#include "attribs.h"
629b3d75
MJ
62
63/* OMP region information. Every parallel and workshare
64 directive is enclosed between two markers, the OMP_* directive
65 and a corresponding GIMPLE_OMP_RETURN statement. */
66
67struct omp_region
68{
69 /* The enclosing region. */
70 struct omp_region *outer;
71
72 /* First child region. */
73 struct omp_region *inner;
74
75 /* Next peer region. */
76 struct omp_region *next;
77
78 /* Block containing the omp directive as its last stmt. */
79 basic_block entry;
80
81 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
82 basic_block exit;
83
84 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
85 basic_block cont;
86
87 /* If this is a combined parallel+workshare region, this is a list
88 of additional arguments needed by the combined parallel+workshare
89 library call. */
90 vec<tree, va_gc> *ws_args;
91
92 /* The code for the omp directive of this region. */
93 enum gimple_code type;
94
95 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
96 enum omp_clause_schedule_kind sched_kind;
97
98 /* Schedule modifiers. */
99 unsigned char sched_modifiers;
100
101 /* True if this is a combined parallel+workshare region. */
102 bool is_combined_parallel;
103
0b887b75
JJ
104 /* Copy of fd.lastprivate_conditional != 0. */
105 bool has_lastprivate_conditional;
106
629b3d75
MJ
107 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
108 a depend clause. */
109 gomp_ordered *ord_stmt;
110};
111
112static struct omp_region *root_omp_region;
113static bool omp_any_child_fn_dumped;
114
115static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
116 bool = false);
117static gphi *find_phi_with_arg_on_edge (tree, edge);
118static void expand_omp (struct omp_region *region);
119
120/* Return true if REGION is a combined parallel+workshare region. */
121
122static inline bool
123is_combined_parallel (struct omp_region *region)
124{
125 return region->is_combined_parallel;
126}
127
128/* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
129 is the immediate dominator of PAR_ENTRY_BB, return true if there
130 are no data dependencies that would prevent expanding the parallel
131 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
132
133 When expanding a combined parallel+workshare region, the call to
134 the child function may need additional arguments in the case of
135 GIMPLE_OMP_FOR regions. In some cases, these arguments are
136 computed out of variables passed in from the parent to the child
137 via 'struct .omp_data_s'. For instance:
138
139 #pragma omp parallel for schedule (guided, i * 4)
140 for (j ...)
141
142 Is lowered into:
143
01914336 144 # BLOCK 2 (PAR_ENTRY_BB)
629b3d75
MJ
145 .omp_data_o.i = i;
146 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
147
148 # BLOCK 3 (WS_ENTRY_BB)
149 .omp_data_i = &.omp_data_o;
150 D.1667 = .omp_data_i->i;
151 D.1598 = D.1667 * 4;
152 #pragma omp for schedule (guided, D.1598)
153
154 When we outline the parallel region, the call to the child function
155 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
156 that value is computed *after* the call site. So, in principle we
157 cannot do the transformation.
158
159 To see whether the code in WS_ENTRY_BB blocks the combined
160 parallel+workshare call, we collect all the variables used in the
161 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
162 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
163 call.
164
165 FIXME. If we had the SSA form built at this point, we could merely
166 hoist the code in block 3 into block 2 and be done with it. But at
167 this point we don't have dataflow information and though we could
168 hack something up here, it is really not worth the aggravation. */
169
170static bool
171workshare_safe_to_combine_p (basic_block ws_entry_bb)
172{
173 struct omp_for_data fd;
174 gimple *ws_stmt = last_stmt (ws_entry_bb);
175
176 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
177 return true;
178
179 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
28567c40
JJ
180 if (gimple_omp_for_kind (ws_stmt) != GF_OMP_FOR_KIND_FOR)
181 return false;
629b3d75
MJ
182
183 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
184
185 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
186 return false;
187 if (fd.iter_type != long_integer_type_node)
188 return false;
189
190 /* FIXME. We give up too easily here. If any of these arguments
191 are not constants, they will likely involve variables that have
192 been mapped into fields of .omp_data_s for sharing with the child
193 function. With appropriate data flow, it would be possible to
194 see through this. */
195 if (!is_gimple_min_invariant (fd.loop.n1)
196 || !is_gimple_min_invariant (fd.loop.n2)
197 || !is_gimple_min_invariant (fd.loop.step)
198 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
199 return false;
200
201 return true;
202}
203
204/* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
205 presence (SIMD_SCHEDULE). */
206
207static tree
208omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
209{
28567c40 210 if (!simd_schedule || integer_zerop (chunk_size))
629b3d75
MJ
211 return chunk_size;
212
9d2f08ab
RS
213 poly_uint64 vf = omp_max_vf ();
214 if (known_eq (vf, 1U))
629b3d75
MJ
215 return chunk_size;
216
217 tree type = TREE_TYPE (chunk_size);
218 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
219 build_int_cst (type, vf - 1));
220 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
221 build_int_cst (type, -vf));
222}
223
224/* Collect additional arguments needed to emit a combined
225 parallel+workshare call. WS_STMT is the workshare directive being
226 expanded. */
227
228static vec<tree, va_gc> *
229get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
230{
231 tree t;
232 location_t loc = gimple_location (ws_stmt);
233 vec<tree, va_gc> *ws_args;
234
235 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
236 {
237 struct omp_for_data fd;
238 tree n1, n2;
239
240 omp_extract_for_data (for_stmt, &fd, NULL);
241 n1 = fd.loop.n1;
242 n2 = fd.loop.n2;
243
244 if (gimple_omp_for_combined_into_p (for_stmt))
245 {
246 tree innerc
247 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
248 OMP_CLAUSE__LOOPTEMP_);
249 gcc_assert (innerc);
250 n1 = OMP_CLAUSE_DECL (innerc);
251 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
252 OMP_CLAUSE__LOOPTEMP_);
253 gcc_assert (innerc);
254 n2 = OMP_CLAUSE_DECL (innerc);
255 }
256
257 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
258
259 t = fold_convert_loc (loc, long_integer_type_node, n1);
260 ws_args->quick_push (t);
261
262 t = fold_convert_loc (loc, long_integer_type_node, n2);
263 ws_args->quick_push (t);
264
265 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
266 ws_args->quick_push (t);
267
268 if (fd.chunk_size)
269 {
270 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
271 t = omp_adjust_chunk_size (t, fd.simd_schedule);
272 ws_args->quick_push (t);
273 }
274
275 return ws_args;
276 }
277 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
278 {
279 /* Number of sections is equal to the number of edges from the
280 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
281 the exit of the sections region. */
282 basic_block bb = single_succ (gimple_bb (ws_stmt));
283 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
284 vec_alloc (ws_args, 1);
285 ws_args->quick_push (t);
286 return ws_args;
287 }
288
289 gcc_unreachable ();
290}
291
292/* Discover whether REGION is a combined parallel+workshare region. */
293
294static void
295determine_parallel_type (struct omp_region *region)
296{
297 basic_block par_entry_bb, par_exit_bb;
298 basic_block ws_entry_bb, ws_exit_bb;
299
300 if (region == NULL || region->inner == NULL
301 || region->exit == NULL || region->inner->exit == NULL
302 || region->inner->cont == NULL)
303 return;
304
305 /* We only support parallel+for and parallel+sections. */
306 if (region->type != GIMPLE_OMP_PARALLEL
307 || (region->inner->type != GIMPLE_OMP_FOR
308 && region->inner->type != GIMPLE_OMP_SECTIONS))
309 return;
310
311 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
312 WS_EXIT_BB -> PAR_EXIT_BB. */
313 par_entry_bb = region->entry;
314 par_exit_bb = region->exit;
315 ws_entry_bb = region->inner->entry;
316 ws_exit_bb = region->inner->exit;
317
28567c40
JJ
318 /* Give up for task reductions on the parallel, while it is implementable,
319 adding another big set of APIs or slowing down the normal paths is
320 not acceptable. */
321 tree pclauses = gimple_omp_parallel_clauses (last_stmt (par_entry_bb));
322 if (omp_find_clause (pclauses, OMP_CLAUSE__REDUCTEMP_))
323 return;
324
629b3d75
MJ
325 if (single_succ (par_entry_bb) == ws_entry_bb
326 && single_succ (ws_exit_bb) == par_exit_bb
327 && workshare_safe_to_combine_p (ws_entry_bb)
328 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
329 || (last_and_only_stmt (ws_entry_bb)
330 && last_and_only_stmt (par_exit_bb))))
331 {
332 gimple *par_stmt = last_stmt (par_entry_bb);
333 gimple *ws_stmt = last_stmt (ws_entry_bb);
334
335 if (region->inner->type == GIMPLE_OMP_FOR)
336 {
337 /* If this is a combined parallel loop, we need to determine
338 whether or not to use the combined library calls. There
339 are two cases where we do not apply the transformation:
340 static loops and any kind of ordered loop. In the first
341 case, we already open code the loop so there is no need
342 to do anything else. In the latter case, the combined
343 parallel loop call would still need extra synchronization
344 to implement ordered semantics, so there would not be any
345 gain in using the combined call. */
346 tree clauses = gimple_omp_for_clauses (ws_stmt);
347 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
348 if (c == NULL
349 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
350 == OMP_CLAUSE_SCHEDULE_STATIC)
28567c40 351 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED)
6c7ae8c5 352 || omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_)
8221c30b
JJ
353 || ((c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_))
354 && POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (c)))))
28567c40 355 return;
629b3d75 356 }
28567c40 357 else if (region->inner->type == GIMPLE_OMP_SECTIONS
8221c30b
JJ
358 && (omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
359 OMP_CLAUSE__REDUCTEMP_)
360 || omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
361 OMP_CLAUSE__CONDTEMP_)))
28567c40 362 return;
629b3d75
MJ
363
364 region->is_combined_parallel = true;
365 region->inner->is_combined_parallel = true;
366 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
367 }
368}
369
370/* Debugging dumps for parallel regions. */
371void dump_omp_region (FILE *, struct omp_region *, int);
372void debug_omp_region (struct omp_region *);
373void debug_all_omp_regions (void);
374
375/* Dump the parallel region tree rooted at REGION. */
376
377void
378dump_omp_region (FILE *file, struct omp_region *region, int indent)
379{
380 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
381 gimple_code_name[region->type]);
382
383 if (region->inner)
384 dump_omp_region (file, region->inner, indent + 4);
385
386 if (region->cont)
387 {
388 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
389 region->cont->index);
390 }
391
392 if (region->exit)
393 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
394 region->exit->index);
395 else
396 fprintf (file, "%*s[no exit marker]\n", indent, "");
397
398 if (region->next)
399 dump_omp_region (file, region->next, indent);
400}
401
402DEBUG_FUNCTION void
403debug_omp_region (struct omp_region *region)
404{
405 dump_omp_region (stderr, region, 0);
406}
407
408DEBUG_FUNCTION void
409debug_all_omp_regions (void)
410{
411 dump_omp_region (stderr, root_omp_region, 0);
412}
413
414/* Create a new parallel region starting at STMT inside region PARENT. */
415
416static struct omp_region *
417new_omp_region (basic_block bb, enum gimple_code type,
418 struct omp_region *parent)
419{
420 struct omp_region *region = XCNEW (struct omp_region);
421
422 region->outer = parent;
423 region->entry = bb;
424 region->type = type;
425
426 if (parent)
427 {
428 /* This is a nested region. Add it to the list of inner
429 regions in PARENT. */
430 region->next = parent->inner;
431 parent->inner = region;
432 }
433 else
434 {
435 /* This is a toplevel region. Add it to the list of toplevel
436 regions in ROOT_OMP_REGION. */
437 region->next = root_omp_region;
438 root_omp_region = region;
439 }
440
441 return region;
442}
443
444/* Release the memory associated with the region tree rooted at REGION. */
445
446static void
447free_omp_region_1 (struct omp_region *region)
448{
449 struct omp_region *i, *n;
450
451 for (i = region->inner; i ; i = n)
452 {
453 n = i->next;
454 free_omp_region_1 (i);
455 }
456
457 free (region);
458}
459
460/* Release the memory for the entire omp region tree. */
461
462void
463omp_free_regions (void)
464{
465 struct omp_region *r, *n;
466 for (r = root_omp_region; r ; r = n)
467 {
468 n = r->next;
469 free_omp_region_1 (r);
470 }
471 root_omp_region = NULL;
472}
473
474/* A convenience function to build an empty GIMPLE_COND with just the
475 condition. */
476
477static gcond *
478gimple_build_cond_empty (tree cond)
479{
480 enum tree_code pred_code;
481 tree lhs, rhs;
482
483 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
484 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
485}
486
487/* Return true if a parallel REGION is within a declare target function or
488 within a target region and is not a part of a gridified target. */
489
490static bool
491parallel_needs_hsa_kernel_p (struct omp_region *region)
492{
493 bool indirect = false;
494 for (region = region->outer; region; region = region->outer)
495 {
496 if (region->type == GIMPLE_OMP_PARALLEL)
497 indirect = true;
498 else if (region->type == GIMPLE_OMP_TARGET)
499 {
500 gomp_target *tgt_stmt
501 = as_a <gomp_target *> (last_stmt (region->entry));
502
503 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
504 OMP_CLAUSE__GRIDDIM_))
505 return indirect;
506 else
507 return true;
508 }
509 }
510
511 if (lookup_attribute ("omp declare target",
512 DECL_ATTRIBUTES (current_function_decl)))
513 return true;
514
515 return false;
516}
517
623c6df5
KB
518/* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
519 Add CHILD_FNDECL to decl chain of the supercontext of the block
520 ENTRY_BLOCK - this is the block which originally contained the
521 code from which CHILD_FNDECL was created.
522
523 Together, these actions ensure that the debug info for the outlined
524 function will be emitted with the correct lexical scope. */
525
526static void
4ccc4e30
JJ
527adjust_context_and_scope (struct omp_region *region, tree entry_block,
528 tree child_fndecl)
623c6df5 529{
4ccc4e30
JJ
530 tree parent_fndecl = NULL_TREE;
531 gimple *entry_stmt;
532 /* OMP expansion expands inner regions before outer ones, so if
533 we e.g. have explicit task region nested in parallel region, when
534 expanding the task region current_function_decl will be the original
535 source function, but we actually want to use as context the child
536 function of the parallel. */
537 for (region = region->outer;
538 region && parent_fndecl == NULL_TREE; region = region->outer)
539 switch (region->type)
540 {
541 case GIMPLE_OMP_PARALLEL:
542 case GIMPLE_OMP_TASK:
543 case GIMPLE_OMP_TEAMS:
544 entry_stmt = last_stmt (region->entry);
545 parent_fndecl = gimple_omp_taskreg_child_fn (entry_stmt);
546 break;
547 case GIMPLE_OMP_TARGET:
548 entry_stmt = last_stmt (region->entry);
549 parent_fndecl
550 = gimple_omp_target_child_fn (as_a <gomp_target *> (entry_stmt));
551 break;
552 default:
553 break;
554 }
555
556 if (parent_fndecl == NULL_TREE)
557 parent_fndecl = current_function_decl;
558 DECL_CONTEXT (child_fndecl) = parent_fndecl;
559
623c6df5
KB
560 if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
561 {
562 tree b = BLOCK_SUPERCONTEXT (entry_block);
623c6df5
KB
563 if (TREE_CODE (b) == BLOCK)
564 {
623c6df5
KB
565 DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
566 BLOCK_VARS (b) = child_fndecl;
567 }
568 }
569}
570
28567c40 571/* Build the function calls to GOMP_parallel etc to actually
629b3d75
MJ
572 generate the parallel operation. REGION is the parallel region
573 being expanded. BB is the block where to insert the code. WS_ARGS
574 will be set if this is a call to a combined parallel+workshare
575 construct, it contains the list of additional arguments needed by
576 the workshare construct. */
577
578static void
579expand_parallel_call (struct omp_region *region, basic_block bb,
580 gomp_parallel *entry_stmt,
581 vec<tree, va_gc> *ws_args)
582{
583 tree t, t1, t2, val, cond, c, clauses, flags;
584 gimple_stmt_iterator gsi;
585 gimple *stmt;
586 enum built_in_function start_ix;
587 int start_ix2;
588 location_t clause_loc;
589 vec<tree, va_gc> *args;
590
591 clauses = gimple_omp_parallel_clauses (entry_stmt);
592
593 /* Determine what flavor of GOMP_parallel we will be
594 emitting. */
595 start_ix = BUILT_IN_GOMP_PARALLEL;
28567c40
JJ
596 tree rtmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
597 if (rtmp)
598 start_ix = BUILT_IN_GOMP_PARALLEL_REDUCTIONS;
599 else if (is_combined_parallel (region))
629b3d75
MJ
600 {
601 switch (region->inner->type)
602 {
603 case GIMPLE_OMP_FOR:
604 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
605 switch (region->inner->sched_kind)
606 {
607 case OMP_CLAUSE_SCHEDULE_RUNTIME:
0b887b75
JJ
608 /* For lastprivate(conditional:), our implementation
609 requires monotonic behavior. */
610 if (region->inner->has_lastprivate_conditional != 0)
611 start_ix2 = 3;
612 else if ((region->inner->sched_modifiers
613 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
28567c40
JJ
614 start_ix2 = 6;
615 else if ((region->inner->sched_modifiers
616 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
617 start_ix2 = 7;
618 else
619 start_ix2 = 3;
629b3d75
MJ
620 break;
621 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
622 case OMP_CLAUSE_SCHEDULE_GUIDED:
28567c40 623 if ((region->inner->sched_modifiers
0b887b75
JJ
624 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
625 && !region->inner->has_lastprivate_conditional)
629b3d75
MJ
626 {
627 start_ix2 = 3 + region->inner->sched_kind;
628 break;
629 }
630 /* FALLTHRU */
631 default:
632 start_ix2 = region->inner->sched_kind;
633 break;
634 }
635 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
636 start_ix = (enum built_in_function) start_ix2;
637 break;
638 case GIMPLE_OMP_SECTIONS:
639 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
640 break;
641 default:
642 gcc_unreachable ();
643 }
644 }
645
646 /* By default, the value of NUM_THREADS is zero (selected at run time)
647 and there is no conditional. */
648 cond = NULL_TREE;
649 val = build_int_cst (unsigned_type_node, 0);
650 flags = build_int_cst (unsigned_type_node, 0);
651
652 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
653 if (c)
654 cond = OMP_CLAUSE_IF_EXPR (c);
655
656 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
657 if (c)
658 {
659 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
660 clause_loc = OMP_CLAUSE_LOCATION (c);
661 }
662 else
663 clause_loc = gimple_location (entry_stmt);
664
665 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
666 if (c)
667 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
668
669 /* Ensure 'val' is of the correct type. */
670 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
671
672 /* If we found the clause 'if (cond)', build either
673 (cond != 0) or (cond ? val : 1u). */
674 if (cond)
675 {
676 cond = gimple_boolify (cond);
677
678 if (integer_zerop (val))
679 val = fold_build2_loc (clause_loc,
680 EQ_EXPR, unsigned_type_node, cond,
681 build_int_cst (TREE_TYPE (cond), 0));
682 else
683 {
684 basic_block cond_bb, then_bb, else_bb;
685 edge e, e_then, e_else;
686 tree tmp_then, tmp_else, tmp_join, tmp_var;
687
688 tmp_var = create_tmp_var (TREE_TYPE (val));
689 if (gimple_in_ssa_p (cfun))
690 {
691 tmp_then = make_ssa_name (tmp_var);
692 tmp_else = make_ssa_name (tmp_var);
693 tmp_join = make_ssa_name (tmp_var);
694 }
695 else
696 {
697 tmp_then = tmp_var;
698 tmp_else = tmp_var;
699 tmp_join = tmp_var;
700 }
701
702 e = split_block_after_labels (bb);
703 cond_bb = e->src;
704 bb = e->dest;
705 remove_edge (e);
706
707 then_bb = create_empty_bb (cond_bb);
708 else_bb = create_empty_bb (then_bb);
709 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
710 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
711
712 stmt = gimple_build_cond_empty (cond);
713 gsi = gsi_start_bb (cond_bb);
714 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
715
716 gsi = gsi_start_bb (then_bb);
717 expand_omp_build_assign (&gsi, tmp_then, val, true);
718
719 gsi = gsi_start_bb (else_bb);
720 expand_omp_build_assign (&gsi, tmp_else,
721 build_int_cst (unsigned_type_node, 1),
722 true);
723
724 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
725 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
726 add_bb_to_loop (then_bb, cond_bb->loop_father);
727 add_bb_to_loop (else_bb, cond_bb->loop_father);
728 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
729 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
730
731 if (gimple_in_ssa_p (cfun))
732 {
733 gphi *phi = create_phi_node (tmp_join, bb);
734 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
735 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
736 }
737
738 val = tmp_join;
739 }
740
741 gsi = gsi_start_bb (bb);
742 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
743 false, GSI_CONTINUE_LINKING);
744 }
745
65f4b875 746 gsi = gsi_last_nondebug_bb (bb);
629b3d75
MJ
747 t = gimple_omp_parallel_data_arg (entry_stmt);
748 if (t == NULL)
749 t1 = null_pointer_node;
750 else
751 t1 = build_fold_addr_expr (t);
752 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
753 t2 = build_fold_addr_expr (child_fndecl);
754
755 vec_alloc (args, 4 + vec_safe_length (ws_args));
756 args->quick_push (t2);
757 args->quick_push (t1);
758 args->quick_push (val);
759 if (ws_args)
760 args->splice (*ws_args);
761 args->quick_push (flags);
762
763 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
764 builtin_decl_explicit (start_ix), args);
765
28567c40
JJ
766 if (rtmp)
767 {
768 tree type = TREE_TYPE (OMP_CLAUSE_DECL (rtmp));
769 t = build2 (MODIFY_EXPR, type, OMP_CLAUSE_DECL (rtmp),
770 fold_convert (type,
771 fold_convert (pointer_sized_int_node, t)));
772 }
629b3d75
MJ
773 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
774 false, GSI_CONTINUE_LINKING);
775
776 if (hsa_gen_requested_p ()
777 && parallel_needs_hsa_kernel_p (region))
778 {
779 cgraph_node *child_cnode = cgraph_node::get (child_fndecl);
780 hsa_register_kernel (child_cnode);
781 }
782}
783
629b3d75
MJ
784/* Build the function call to GOMP_task to actually
785 generate the task operation. BB is the block where to insert the code. */
786
787static void
788expand_task_call (struct omp_region *region, basic_block bb,
789 gomp_task *entry_stmt)
790{
791 tree t1, t2, t3;
792 gimple_stmt_iterator gsi;
793 location_t loc = gimple_location (entry_stmt);
794
795 tree clauses = gimple_omp_task_clauses (entry_stmt);
796
797 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
798 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
799 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
800 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
801 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
802 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
803
804 unsigned int iflags
805 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
806 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
807 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
808
809 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
810 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
811 tree num_tasks = NULL_TREE;
812 bool ull = false;
813 if (taskloop_p)
814 {
815 gimple *g = last_stmt (region->outer->entry);
816 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
817 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
818 struct omp_for_data fd;
819 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
820 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
821 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
822 OMP_CLAUSE__LOOPTEMP_);
823 startvar = OMP_CLAUSE_DECL (startvar);
824 endvar = OMP_CLAUSE_DECL (endvar);
825 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
826 if (fd.loop.cond_code == LT_EXPR)
827 iflags |= GOMP_TASK_FLAG_UP;
828 tree tclauses = gimple_omp_for_clauses (g);
829 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
830 if (num_tasks)
831 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
832 else
833 {
834 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
835 if (num_tasks)
836 {
837 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
838 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
839 }
840 else
841 num_tasks = integer_zero_node;
842 }
843 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
844 if (ifc == NULL_TREE)
845 iflags |= GOMP_TASK_FLAG_IF;
846 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
847 iflags |= GOMP_TASK_FLAG_NOGROUP;
848 ull = fd.iter_type == long_long_unsigned_type_node;
28567c40
JJ
849 if (omp_find_clause (clauses, OMP_CLAUSE_REDUCTION))
850 iflags |= GOMP_TASK_FLAG_REDUCTION;
629b3d75
MJ
851 }
852 else if (priority)
853 iflags |= GOMP_TASK_FLAG_PRIORITY;
854
855 tree flags = build_int_cst (unsigned_type_node, iflags);
856
857 tree cond = boolean_true_node;
858 if (ifc)
859 {
860 if (taskloop_p)
861 {
862 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
863 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
864 build_int_cst (unsigned_type_node,
865 GOMP_TASK_FLAG_IF),
866 build_int_cst (unsigned_type_node, 0));
867 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
868 flags, t);
869 }
870 else
871 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
872 }
873
874 if (finalc)
875 {
876 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
877 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
878 build_int_cst (unsigned_type_node,
879 GOMP_TASK_FLAG_FINAL),
880 build_int_cst (unsigned_type_node, 0));
881 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
882 }
883 if (depend)
884 depend = OMP_CLAUSE_DECL (depend);
885 else
886 depend = build_int_cst (ptr_type_node, 0);
887 if (priority)
888 priority = fold_convert (integer_type_node,
889 OMP_CLAUSE_PRIORITY_EXPR (priority));
890 else
891 priority = integer_zero_node;
892
65f4b875 893 gsi = gsi_last_nondebug_bb (bb);
629b3d75
MJ
894 tree t = gimple_omp_task_data_arg (entry_stmt);
895 if (t == NULL)
896 t2 = null_pointer_node;
897 else
898 t2 = build_fold_addr_expr_loc (loc, t);
899 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
900 t = gimple_omp_task_copy_fn (entry_stmt);
901 if (t == NULL)
902 t3 = null_pointer_node;
903 else
904 t3 = build_fold_addr_expr_loc (loc, t);
905
906 if (taskloop_p)
907 t = build_call_expr (ull
908 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
909 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
910 11, t1, t2, t3,
911 gimple_omp_task_arg_size (entry_stmt),
912 gimple_omp_task_arg_align (entry_stmt), flags,
913 num_tasks, priority, startvar, endvar, step);
914 else
915 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
916 9, t1, t2, t3,
917 gimple_omp_task_arg_size (entry_stmt),
918 gimple_omp_task_arg_align (entry_stmt), cond, flags,
919 depend, priority);
920
921 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
922 false, GSI_CONTINUE_LINKING);
923}
924
28567c40
JJ
925/* Build the function call to GOMP_taskwait_depend to actually
926 generate the taskwait operation. BB is the block where to insert the
927 code. */
928
929static void
930expand_taskwait_call (basic_block bb, gomp_task *entry_stmt)
931{
932 tree clauses = gimple_omp_task_clauses (entry_stmt);
933 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
934 if (depend == NULL_TREE)
935 return;
936
937 depend = OMP_CLAUSE_DECL (depend);
938
939 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
940 tree t
941 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASKWAIT_DEPEND),
942 1, depend);
943
944 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
945 false, GSI_CONTINUE_LINKING);
946}
947
948/* Build the function call to GOMP_teams_reg to actually
949 generate the host teams operation. REGION is the teams region
950 being expanded. BB is the block where to insert the code. */
951
952static void
953expand_teams_call (basic_block bb, gomp_teams *entry_stmt)
954{
955 tree clauses = gimple_omp_teams_clauses (entry_stmt);
956 tree num_teams = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
957 if (num_teams == NULL_TREE)
958 num_teams = build_int_cst (unsigned_type_node, 0);
959 else
960 {
961 num_teams = OMP_CLAUSE_NUM_TEAMS_EXPR (num_teams);
962 num_teams = fold_convert (unsigned_type_node, num_teams);
963 }
964 tree thread_limit = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
965 if (thread_limit == NULL_TREE)
966 thread_limit = build_int_cst (unsigned_type_node, 0);
967 else
968 {
969 thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit);
970 thread_limit = fold_convert (unsigned_type_node, thread_limit);
971 }
972
973 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
974 tree t = gimple_omp_teams_data_arg (entry_stmt), t1;
975 if (t == NULL)
976 t1 = null_pointer_node;
977 else
978 t1 = build_fold_addr_expr (t);
979 tree child_fndecl = gimple_omp_teams_child_fn (entry_stmt);
980 tree t2 = build_fold_addr_expr (child_fndecl);
981
28567c40
JJ
982 vec<tree, va_gc> *args;
983 vec_alloc (args, 5);
984 args->quick_push (t2);
985 args->quick_push (t1);
986 args->quick_push (num_teams);
987 args->quick_push (thread_limit);
988 /* For future extensibility. */
989 args->quick_push (build_zero_cst (unsigned_type_node));
990
991 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
992 builtin_decl_explicit (BUILT_IN_GOMP_TEAMS_REG),
993 args);
994
995 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
996 false, GSI_CONTINUE_LINKING);
997}
998
629b3d75
MJ
999/* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
1000
1001static tree
1002vec2chain (vec<tree, va_gc> *v)
1003{
1004 tree chain = NULL_TREE, t;
1005 unsigned ix;
1006
1007 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
1008 {
1009 DECL_CHAIN (t) = chain;
1010 chain = t;
1011 }
1012
1013 return chain;
1014}
1015
1016/* Remove barriers in REGION->EXIT's block. Note that this is only
1017 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
1018 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
1019 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
1020 removed. */
1021
1022static void
1023remove_exit_barrier (struct omp_region *region)
1024{
1025 gimple_stmt_iterator gsi;
1026 basic_block exit_bb;
1027 edge_iterator ei;
1028 edge e;
1029 gimple *stmt;
1030 int any_addressable_vars = -1;
1031
1032 exit_bb = region->exit;
1033
1034 /* If the parallel region doesn't return, we don't have REGION->EXIT
1035 block at all. */
1036 if (! exit_bb)
1037 return;
1038
1039 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
1040 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
1041 statements that can appear in between are extremely limited -- no
1042 memory operations at all. Here, we allow nothing at all, so the
1043 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
65f4b875 1044 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75 1045 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
65f4b875 1046 gsi_prev_nondebug (&gsi);
629b3d75
MJ
1047 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
1048 return;
1049
1050 FOR_EACH_EDGE (e, ei, exit_bb->preds)
1051 {
65f4b875 1052 gsi = gsi_last_nondebug_bb (e->src);
629b3d75
MJ
1053 if (gsi_end_p (gsi))
1054 continue;
1055 stmt = gsi_stmt (gsi);
1056 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
1057 && !gimple_omp_return_nowait_p (stmt))
1058 {
1059 /* OpenMP 3.0 tasks unfortunately prevent this optimization
1060 in many cases. If there could be tasks queued, the barrier
1061 might be needed to let the tasks run before some local
1062 variable of the parallel that the task uses as shared
1063 runs out of scope. The task can be spawned either
1064 from within current function (this would be easy to check)
1065 or from some function it calls and gets passed an address
1066 of such a variable. */
1067 if (any_addressable_vars < 0)
1068 {
1069 gomp_parallel *parallel_stmt
1070 = as_a <gomp_parallel *> (last_stmt (region->entry));
1071 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
1072 tree local_decls, block, decl;
1073 unsigned ix;
1074
1075 any_addressable_vars = 0;
1076 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
1077 if (TREE_ADDRESSABLE (decl))
1078 {
1079 any_addressable_vars = 1;
1080 break;
1081 }
1082 for (block = gimple_block (stmt);
1083 !any_addressable_vars
1084 && block
1085 && TREE_CODE (block) == BLOCK;
1086 block = BLOCK_SUPERCONTEXT (block))
1087 {
1088 for (local_decls = BLOCK_VARS (block);
1089 local_decls;
1090 local_decls = DECL_CHAIN (local_decls))
1091 if (TREE_ADDRESSABLE (local_decls))
1092 {
1093 any_addressable_vars = 1;
1094 break;
1095 }
1096 if (block == gimple_block (parallel_stmt))
1097 break;
1098 }
1099 }
1100 if (!any_addressable_vars)
1101 gimple_omp_return_set_nowait (stmt);
1102 }
1103 }
1104}
1105
1106static void
1107remove_exit_barriers (struct omp_region *region)
1108{
1109 if (region->type == GIMPLE_OMP_PARALLEL)
1110 remove_exit_barrier (region);
1111
1112 if (region->inner)
1113 {
1114 region = region->inner;
1115 remove_exit_barriers (region);
1116 while (region->next)
1117 {
1118 region = region->next;
1119 remove_exit_barriers (region);
1120 }
1121 }
1122}
1123
1124/* Optimize omp_get_thread_num () and omp_get_num_threads ()
1125 calls. These can't be declared as const functions, but
1126 within one parallel body they are constant, so they can be
1127 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1128 which are declared const. Similarly for task body, except
1129 that in untied task omp_get_thread_num () can change at any task
1130 scheduling point. */
1131
1132static void
1133optimize_omp_library_calls (gimple *entry_stmt)
1134{
1135 basic_block bb;
1136 gimple_stmt_iterator gsi;
1137 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1138 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1139 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1140 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1141 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1142 && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1143 OMP_CLAUSE_UNTIED) != NULL);
1144
1145 FOR_EACH_BB_FN (bb, cfun)
1146 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1147 {
1148 gimple *call = gsi_stmt (gsi);
1149 tree decl;
1150
1151 if (is_gimple_call (call)
1152 && (decl = gimple_call_fndecl (call))
1153 && DECL_EXTERNAL (decl)
1154 && TREE_PUBLIC (decl)
1155 && DECL_INITIAL (decl) == NULL)
1156 {
1157 tree built_in;
1158
1159 if (DECL_NAME (decl) == thr_num_id)
1160 {
1161 /* In #pragma omp task untied omp_get_thread_num () can change
1162 during the execution of the task region. */
1163 if (untied_task)
1164 continue;
1165 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1166 }
1167 else if (DECL_NAME (decl) == num_thr_id)
1168 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1169 else
1170 continue;
1171
1172 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1173 || gimple_call_num_args (call) != 0)
1174 continue;
1175
1176 if (flag_exceptions && !TREE_NOTHROW (decl))
1177 continue;
1178
1179 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1180 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1181 TREE_TYPE (TREE_TYPE (built_in))))
1182 continue;
1183
1184 gimple_call_set_fndecl (call, built_in);
1185 }
1186 }
1187}
1188
1189/* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1190 regimplified. */
1191
1192static tree
1193expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1194{
1195 tree t = *tp;
1196
1197 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1198 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1199 return t;
1200
1201 if (TREE_CODE (t) == ADDR_EXPR)
1202 recompute_tree_invariant_for_addr_expr (t);
1203
1204 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1205 return NULL_TREE;
1206}
1207
1208/* Prepend or append TO = FROM assignment before or after *GSI_P. */
1209
1210static void
1211expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1212 bool after)
1213{
1214 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1215 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1216 !after, after ? GSI_CONTINUE_LINKING
1217 : GSI_SAME_STMT);
1218 gimple *stmt = gimple_build_assign (to, from);
1219 if (after)
1220 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1221 else
1222 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1223 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1224 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1225 {
1226 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1227 gimple_regimplify_operands (stmt, &gsi);
1228 }
1229}
1230
1231/* Expand the OpenMP parallel or task directive starting at REGION. */
1232
1233static void
1234expand_omp_taskreg (struct omp_region *region)
1235{
1236 basic_block entry_bb, exit_bb, new_bb;
1237 struct function *child_cfun;
1238 tree child_fn, block, t;
1239 gimple_stmt_iterator gsi;
1240 gimple *entry_stmt, *stmt;
1241 edge e;
1242 vec<tree, va_gc> *ws_args;
1243
1244 entry_stmt = last_stmt (region->entry);
28567c40
JJ
1245 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1246 && gimple_omp_task_taskwait_p (entry_stmt))
1247 {
1248 new_bb = region->entry;
1249 gsi = gsi_last_nondebug_bb (region->entry);
1250 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1251 gsi_remove (&gsi, true);
1252 expand_taskwait_call (new_bb, as_a <gomp_task *> (entry_stmt));
1253 return;
1254 }
1255
629b3d75
MJ
1256 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1257 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1258
1259 entry_bb = region->entry;
1260 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1261 exit_bb = region->cont;
1262 else
1263 exit_bb = region->exit;
1264
5e9d6aa4 1265 if (is_combined_parallel (region))
629b3d75
MJ
1266 ws_args = region->ws_args;
1267 else
1268 ws_args = NULL;
1269
1270 if (child_cfun->cfg)
1271 {
1272 /* Due to inlining, it may happen that we have already outlined
1273 the region, in which case all we need to do is make the
1274 sub-graph unreachable and emit the parallel call. */
1275 edge entry_succ_e, exit_succ_e;
1276
1277 entry_succ_e = single_succ_edge (entry_bb);
1278
65f4b875 1279 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75 1280 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
28567c40
JJ
1281 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK
1282 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TEAMS);
629b3d75
MJ
1283 gsi_remove (&gsi, true);
1284
1285 new_bb = entry_bb;
1286 if (exit_bb)
1287 {
1288 exit_succ_e = single_succ_edge (exit_bb);
1289 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1290 }
1291 remove_edge_and_dominated_blocks (entry_succ_e);
1292 }
1293 else
1294 {
1295 unsigned srcidx, dstidx, num;
1296
1297 /* If the parallel region needs data sent from the parent
1298 function, then the very first statement (except possible
1299 tree profile counter updates) of the parallel body
1300 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1301 &.OMP_DATA_O is passed as an argument to the child function,
1302 we need to replace it with the argument as seen by the child
1303 function.
1304
1305 In most cases, this will end up being the identity assignment
1306 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1307 a function call that has been inlined, the original PARM_DECL
1308 .OMP_DATA_I may have been converted into a different local
1309 variable. In which case, we need to keep the assignment. */
1310 if (gimple_omp_taskreg_data_arg (entry_stmt))
1311 {
1312 basic_block entry_succ_bb
1313 = single_succ_p (entry_bb) ? single_succ (entry_bb)
1314 : FALLTHRU_EDGE (entry_bb)->dest;
1315 tree arg;
1316 gimple *parcopy_stmt = NULL;
1317
1318 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1319 {
1320 gimple *stmt;
1321
1322 gcc_assert (!gsi_end_p (gsi));
1323 stmt = gsi_stmt (gsi);
1324 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1325 continue;
1326
1327 if (gimple_num_ops (stmt) == 2)
1328 {
1329 tree arg = gimple_assign_rhs1 (stmt);
1330
1331 /* We're ignore the subcode because we're
1332 effectively doing a STRIP_NOPS. */
1333
1334 if (TREE_CODE (arg) == ADDR_EXPR
28567c40
JJ
1335 && (TREE_OPERAND (arg, 0)
1336 == gimple_omp_taskreg_data_arg (entry_stmt)))
629b3d75
MJ
1337 {
1338 parcopy_stmt = stmt;
1339 break;
1340 }
1341 }
1342 }
1343
1344 gcc_assert (parcopy_stmt != NULL);
1345 arg = DECL_ARGUMENTS (child_fn);
1346
1347 if (!gimple_in_ssa_p (cfun))
1348 {
1349 if (gimple_assign_lhs (parcopy_stmt) == arg)
1350 gsi_remove (&gsi, true);
1351 else
1352 {
01914336 1353 /* ?? Is setting the subcode really necessary ?? */
629b3d75
MJ
1354 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1355 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1356 }
1357 }
1358 else
1359 {
1360 tree lhs = gimple_assign_lhs (parcopy_stmt);
1361 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1362 /* We'd like to set the rhs to the default def in the child_fn,
1363 but it's too early to create ssa names in the child_fn.
1364 Instead, we set the rhs to the parm. In
1365 move_sese_region_to_fn, we introduce a default def for the
1366 parm, map the parm to it's default def, and once we encounter
1367 this stmt, replace the parm with the default def. */
1368 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1369 update_stmt (parcopy_stmt);
1370 }
1371 }
1372
1373 /* Declare local variables needed in CHILD_CFUN. */
1374 block = DECL_INITIAL (child_fn);
1375 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1376 /* The gimplifier could record temporaries in parallel/task block
1377 rather than in containing function's local_decls chain,
1378 which would mean cgraph missed finalizing them. Do it now. */
1379 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1380 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1381 varpool_node::finalize_decl (t);
1382 DECL_SAVED_TREE (child_fn) = NULL;
1383 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1384 gimple_set_body (child_fn, NULL);
1385 TREE_USED (block) = 1;
1386
1387 /* Reset DECL_CONTEXT on function arguments. */
1388 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1389 DECL_CONTEXT (t) = child_fn;
1390
1391 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1392 so that it can be moved to the child function. */
65f4b875 1393 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
1394 stmt = gsi_stmt (gsi);
1395 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
28567c40
JJ
1396 || gimple_code (stmt) == GIMPLE_OMP_TASK
1397 || gimple_code (stmt) == GIMPLE_OMP_TEAMS));
629b3d75
MJ
1398 e = split_block (entry_bb, stmt);
1399 gsi_remove (&gsi, true);
1400 entry_bb = e->dest;
1401 edge e2 = NULL;
28567c40 1402 if (gimple_code (entry_stmt) != GIMPLE_OMP_TASK)
629b3d75
MJ
1403 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1404 else
1405 {
1406 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1407 gcc_assert (e2->dest == region->exit);
1408 remove_edge (BRANCH_EDGE (entry_bb));
1409 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
65f4b875 1410 gsi = gsi_last_nondebug_bb (region->exit);
629b3d75
MJ
1411 gcc_assert (!gsi_end_p (gsi)
1412 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1413 gsi_remove (&gsi, true);
1414 }
1415
1416 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1417 if (exit_bb)
1418 {
65f4b875 1419 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
1420 gcc_assert (!gsi_end_p (gsi)
1421 && (gimple_code (gsi_stmt (gsi))
1422 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1423 stmt = gimple_build_return (NULL);
1424 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1425 gsi_remove (&gsi, true);
1426 }
1427
1428 /* Move the parallel region into CHILD_CFUN. */
1429
1430 if (gimple_in_ssa_p (cfun))
1431 {
1432 init_tree_ssa (child_cfun);
1433 init_ssa_operands (child_cfun);
1434 child_cfun->gimple_df->in_ssa_p = true;
1435 block = NULL_TREE;
1436 }
1437 else
1438 block = gimple_block (entry_stmt);
1439
1440 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1441 if (exit_bb)
1442 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1443 if (e2)
1444 {
1445 basic_block dest_bb = e2->dest;
1446 if (!exit_bb)
1447 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1448 remove_edge (e2);
1449 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1450 }
1451 /* When the OMP expansion process cannot guarantee an up-to-date
01914336 1452 loop tree arrange for the child function to fixup loops. */
629b3d75
MJ
1453 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1454 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1455
1456 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1457 num = vec_safe_length (child_cfun->local_decls);
1458 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1459 {
1460 t = (*child_cfun->local_decls)[srcidx];
1461 if (DECL_CONTEXT (t) == cfun->decl)
1462 continue;
1463 if (srcidx != dstidx)
1464 (*child_cfun->local_decls)[dstidx] = t;
1465 dstidx++;
1466 }
1467 if (dstidx != num)
1468 vec_safe_truncate (child_cfun->local_decls, dstidx);
1469
1470 /* Inform the callgraph about the new function. */
1471 child_cfun->curr_properties = cfun->curr_properties;
1472 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1473 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1474 cgraph_node *node = cgraph_node::get_create (child_fn);
1475 node->parallelized_function = 1;
1476 cgraph_node::add_new_function (child_fn, true);
1477
1478 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1479 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1480
1481 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1482 fixed in a following pass. */
1483 push_cfun (child_cfun);
1484 if (need_asm)
9579db35 1485 assign_assembler_name_if_needed (child_fn);
629b3d75
MJ
1486
1487 if (optimize)
1488 optimize_omp_library_calls (entry_stmt);
fc06ae0d 1489 update_max_bb_count ();
629b3d75
MJ
1490 cgraph_edge::rebuild_edges ();
1491
1492 /* Some EH regions might become dead, see PR34608. If
1493 pass_cleanup_cfg isn't the first pass to happen with the
1494 new child, these dead EH edges might cause problems.
1495 Clean them up now. */
1496 if (flag_exceptions)
1497 {
1498 basic_block bb;
1499 bool changed = false;
1500
1501 FOR_EACH_BB_FN (bb, cfun)
1502 changed |= gimple_purge_dead_eh_edges (bb);
1503 if (changed)
1504 cleanup_tree_cfg ();
1505 }
1506 if (gimple_in_ssa_p (cfun))
1507 update_ssa (TODO_update_ssa);
1508 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1509 verify_loop_structure ();
1510 pop_cfun ();
1511
1512 if (dump_file && !gimple_in_ssa_p (cfun))
1513 {
1514 omp_any_child_fn_dumped = true;
1515 dump_function_header (dump_file, child_fn, dump_flags);
1516 dump_function_to_file (child_fn, dump_file, dump_flags);
1517 }
1518 }
1519
4ccc4e30
JJ
1520 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
1521
5e9d6aa4 1522 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
629b3d75
MJ
1523 expand_parallel_call (region, new_bb,
1524 as_a <gomp_parallel *> (entry_stmt), ws_args);
28567c40
JJ
1525 else if (gimple_code (entry_stmt) == GIMPLE_OMP_TEAMS)
1526 expand_teams_call (new_bb, as_a <gomp_teams *> (entry_stmt));
629b3d75
MJ
1527 else
1528 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1529 if (gimple_in_ssa_p (cfun))
1530 update_ssa (TODO_update_ssa_only_virtuals);
1531}
1532
1533/* Information about members of an OpenACC collapsed loop nest. */
1534
1535struct oacc_collapse
1536{
01914336 1537 tree base; /* Base value. */
629b3d75 1538 tree iters; /* Number of steps. */
02889d23
CLT
1539 tree step; /* Step size. */
1540 tree tile; /* Tile increment (if tiled). */
1541 tree outer; /* Tile iterator var. */
629b3d75
MJ
1542};
1543
1544/* Helper for expand_oacc_for. Determine collapsed loop information.
1545 Fill in COUNTS array. Emit any initialization code before GSI.
1546 Return the calculated outer loop bound of BOUND_TYPE. */
1547
1548static tree
1549expand_oacc_collapse_init (const struct omp_for_data *fd,
1550 gimple_stmt_iterator *gsi,
02889d23
CLT
1551 oacc_collapse *counts, tree bound_type,
1552 location_t loc)
629b3d75 1553{
02889d23 1554 tree tiling = fd->tiling;
629b3d75
MJ
1555 tree total = build_int_cst (bound_type, 1);
1556 int ix;
1557
1558 gcc_assert (integer_onep (fd->loop.step));
1559 gcc_assert (integer_zerop (fd->loop.n1));
1560
02889d23
CLT
1561 /* When tiling, the first operand of the tile clause applies to the
1562 innermost loop, and we work outwards from there. Seems
1563 backwards, but whatever. */
1564 for (ix = fd->collapse; ix--;)
629b3d75
MJ
1565 {
1566 const omp_for_data_loop *loop = &fd->loops[ix];
1567
1568 tree iter_type = TREE_TYPE (loop->v);
1569 tree diff_type = iter_type;
1570 tree plus_type = iter_type;
1571
1572 gcc_assert (loop->cond_code == fd->loop.cond_code);
1573
1574 if (POINTER_TYPE_P (iter_type))
1575 plus_type = sizetype;
1576 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
1577 diff_type = signed_type_for (diff_type);
c31bc4ac
TV
1578 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
1579 diff_type = integer_type_node;
629b3d75 1580
02889d23
CLT
1581 if (tiling)
1582 {
1583 tree num = build_int_cst (integer_type_node, fd->collapse);
1584 tree loop_no = build_int_cst (integer_type_node, ix);
1585 tree tile = TREE_VALUE (tiling);
1586 gcall *call
1587 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1588 /* gwv-outer=*/integer_zero_node,
1589 /* gwv-inner=*/integer_zero_node);
1590
1591 counts[ix].outer = create_tmp_var (iter_type, ".outer");
1592 counts[ix].tile = create_tmp_var (diff_type, ".tile");
1593 gimple_call_set_lhs (call, counts[ix].tile);
1594 gimple_set_location (call, loc);
1595 gsi_insert_before (gsi, call, GSI_SAME_STMT);
1596
1597 tiling = TREE_CHAIN (tiling);
1598 }
1599 else
1600 {
1601 counts[ix].tile = NULL;
1602 counts[ix].outer = loop->v;
1603 }
1604
629b3d75
MJ
1605 tree b = loop->n1;
1606 tree e = loop->n2;
1607 tree s = loop->step;
1608 bool up = loop->cond_code == LT_EXPR;
1609 tree dir = build_int_cst (diff_type, up ? +1 : -1);
1610 bool negating;
1611 tree expr;
1612
1613 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1614 true, GSI_SAME_STMT);
1615 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1616 true, GSI_SAME_STMT);
1617
01914336 1618 /* Convert the step, avoiding possible unsigned->signed overflow. */
629b3d75
MJ
1619 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1620 if (negating)
1621 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1622 s = fold_convert (diff_type, s);
1623 if (negating)
1624 s = fold_build1 (NEGATE_EXPR, diff_type, s);
1625 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1626 true, GSI_SAME_STMT);
1627
01914336 1628 /* Determine the range, avoiding possible unsigned->signed overflow. */
629b3d75
MJ
1629 negating = !up && TYPE_UNSIGNED (iter_type);
1630 expr = fold_build2 (MINUS_EXPR, plus_type,
1631 fold_convert (plus_type, negating ? b : e),
1632 fold_convert (plus_type, negating ? e : b));
1633 expr = fold_convert (diff_type, expr);
1634 if (negating)
1635 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1636 tree range = force_gimple_operand_gsi
1637 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1638
1639 /* Determine number of iterations. */
1640 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1641 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1642 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1643
1644 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1645 true, GSI_SAME_STMT);
1646
1647 counts[ix].base = b;
1648 counts[ix].iters = iters;
1649 counts[ix].step = s;
1650
1651 total = fold_build2 (MULT_EXPR, bound_type, total,
1652 fold_convert (bound_type, iters));
1653 }
1654
1655 return total;
1656}
1657
02889d23
CLT
1658/* Emit initializers for collapsed loop members. INNER is true if
1659 this is for the element loop of a TILE. IVAR is the outer
629b3d75
MJ
1660 loop iteration variable, from which collapsed loop iteration values
1661 are calculated. COUNTS array has been initialized by
1662 expand_oacc_collapse_inits. */
1663
1664static void
02889d23 1665expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
629b3d75
MJ
1666 gimple_stmt_iterator *gsi,
1667 const oacc_collapse *counts, tree ivar)
1668{
1669 tree ivar_type = TREE_TYPE (ivar);
1670
1671 /* The most rapidly changing iteration variable is the innermost
1672 one. */
1673 for (int ix = fd->collapse; ix--;)
1674 {
1675 const omp_for_data_loop *loop = &fd->loops[ix];
1676 const oacc_collapse *collapse = &counts[ix];
02889d23
CLT
1677 tree v = inner ? loop->v : collapse->outer;
1678 tree iter_type = TREE_TYPE (v);
629b3d75
MJ
1679 tree diff_type = TREE_TYPE (collapse->step);
1680 tree plus_type = iter_type;
1681 enum tree_code plus_code = PLUS_EXPR;
1682 tree expr;
1683
1684 if (POINTER_TYPE_P (iter_type))
1685 {
1686 plus_code = POINTER_PLUS_EXPR;
1687 plus_type = sizetype;
1688 }
1689
02889d23
CLT
1690 expr = ivar;
1691 if (ix)
1692 {
1693 tree mod = fold_convert (ivar_type, collapse->iters);
1694 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1695 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1696 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1697 true, GSI_SAME_STMT);
1698 }
1699
629b3d75
MJ
1700 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1701 collapse->step);
02889d23
CLT
1702 expr = fold_build2 (plus_code, iter_type,
1703 inner ? collapse->outer : collapse->base,
629b3d75
MJ
1704 fold_convert (plus_type, expr));
1705 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1706 true, GSI_SAME_STMT);
02889d23 1707 gassign *ass = gimple_build_assign (v, expr);
629b3d75 1708 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
629b3d75
MJ
1709 }
1710}
1711
1712/* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1713 of the combined collapse > 1 loop constructs, generate code like:
1714 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1715 if (cond3 is <)
1716 adj = STEP3 - 1;
1717 else
1718 adj = STEP3 + 1;
1719 count3 = (adj + N32 - N31) / STEP3;
1720 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1721 if (cond2 is <)
1722 adj = STEP2 - 1;
1723 else
1724 adj = STEP2 + 1;
1725 count2 = (adj + N22 - N21) / STEP2;
1726 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1727 if (cond1 is <)
1728 adj = STEP1 - 1;
1729 else
1730 adj = STEP1 + 1;
1731 count1 = (adj + N12 - N11) / STEP1;
1732 count = count1 * count2 * count3;
1733 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1734 count = 0;
1735 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1736 of the combined loop constructs, just initialize COUNTS array
1737 from the _looptemp_ clauses. */
1738
1739/* NOTE: It *could* be better to moosh all of the BBs together,
1740 creating one larger BB with all the computation and the unexpected
1741 jump at the end. I.e.
1742
1743 bool zero3, zero2, zero1, zero;
1744
1745 zero3 = N32 c3 N31;
1746 count3 = (N32 - N31) /[cl] STEP3;
1747 zero2 = N22 c2 N21;
1748 count2 = (N22 - N21) /[cl] STEP2;
1749 zero1 = N12 c1 N11;
1750 count1 = (N12 - N11) /[cl] STEP1;
1751 zero = zero3 || zero2 || zero1;
1752 count = count1 * count2 * count3;
1753 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1754
1755 After all, we expect the zero=false, and thus we expect to have to
1756 evaluate all of the comparison expressions, so short-circuiting
1757 oughtn't be a win. Since the condition isn't protecting a
1758 denominator, we're not concerned about divide-by-zero, so we can
1759 fully evaluate count even if a numerator turned out to be wrong.
1760
1761 It seems like putting this all together would create much better
1762 scheduling opportunities, and less pressure on the chip's branch
1763 predictor. */
1764
1765static void
1766expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1767 basic_block &entry_bb, tree *counts,
1768 basic_block &zero_iter1_bb, int &first_zero_iter1,
1769 basic_block &zero_iter2_bb, int &first_zero_iter2,
1770 basic_block &l2_dom_bb)
1771{
1772 tree t, type = TREE_TYPE (fd->loop.v);
1773 edge e, ne;
1774 int i;
1775
1776 /* Collapsed loops need work for expansion into SSA form. */
1777 gcc_assert (!gimple_in_ssa_p (cfun));
1778
1779 if (gimple_omp_for_combined_into_p (fd->for_stmt)
1780 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1781 {
1782 gcc_assert (fd->ordered == 0);
1783 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1784 isn't supposed to be handled, as the inner loop doesn't
1785 use it. */
1786 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1787 OMP_CLAUSE__LOOPTEMP_);
1788 gcc_assert (innerc);
1789 for (i = 0; i < fd->collapse; i++)
1790 {
1791 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1792 OMP_CLAUSE__LOOPTEMP_);
1793 gcc_assert (innerc);
1794 if (i)
1795 counts[i] = OMP_CLAUSE_DECL (innerc);
1796 else
1797 counts[0] = NULL_TREE;
1798 }
1799 return;
1800 }
1801
1802 for (i = fd->collapse; i < fd->ordered; i++)
1803 {
1804 tree itype = TREE_TYPE (fd->loops[i].v);
1805 counts[i] = NULL_TREE;
1806 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1807 fold_convert (itype, fd->loops[i].n1),
1808 fold_convert (itype, fd->loops[i].n2));
1809 if (t && integer_zerop (t))
1810 {
1811 for (i = fd->collapse; i < fd->ordered; i++)
1812 counts[i] = build_int_cst (type, 0);
1813 break;
1814 }
1815 }
1816 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1817 {
1818 tree itype = TREE_TYPE (fd->loops[i].v);
1819
1820 if (i >= fd->collapse && counts[i])
1821 continue;
1822 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1823 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1824 fold_convert (itype, fd->loops[i].n1),
1825 fold_convert (itype, fd->loops[i].n2)))
1826 == NULL_TREE || !integer_onep (t)))
1827 {
1828 gcond *cond_stmt;
1829 tree n1, n2;
1830 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1831 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1832 true, GSI_SAME_STMT);
1833 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1834 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1835 true, GSI_SAME_STMT);
1836 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1837 NULL_TREE, NULL_TREE);
1838 gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1839 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1840 expand_omp_regimplify_p, NULL, NULL)
1841 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1842 expand_omp_regimplify_p, NULL, NULL))
1843 {
1844 *gsi = gsi_for_stmt (cond_stmt);
1845 gimple_regimplify_operands (cond_stmt, gsi);
1846 }
1847 e = split_block (entry_bb, cond_stmt);
1848 basic_block &zero_iter_bb
1849 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1850 int &first_zero_iter
1851 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1852 if (zero_iter_bb == NULL)
1853 {
1854 gassign *assign_stmt;
1855 first_zero_iter = i;
1856 zero_iter_bb = create_empty_bb (entry_bb);
1857 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1858 *gsi = gsi_after_labels (zero_iter_bb);
1859 if (i < fd->collapse)
1860 assign_stmt = gimple_build_assign (fd->loop.n2,
1861 build_zero_cst (type));
1862 else
1863 {
1864 counts[i] = create_tmp_reg (type, ".count");
1865 assign_stmt
1866 = gimple_build_assign (counts[i], build_zero_cst (type));
1867 }
1868 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1869 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1870 entry_bb);
1871 }
1872 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
357067f2 1873 ne->probability = profile_probability::very_unlikely ();
629b3d75 1874 e->flags = EDGE_TRUE_VALUE;
357067f2 1875 e->probability = ne->probability.invert ();
629b3d75
MJ
1876 if (l2_dom_bb == NULL)
1877 l2_dom_bb = entry_bb;
1878 entry_bb = e->dest;
65f4b875 1879 *gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
1880 }
1881
1882 if (POINTER_TYPE_P (itype))
1883 itype = signed_type_for (itype);
1884 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1885 ? -1 : 1));
1886 t = fold_build2 (PLUS_EXPR, itype,
1887 fold_convert (itype, fd->loops[i].step), t);
1888 t = fold_build2 (PLUS_EXPR, itype, t,
1889 fold_convert (itype, fd->loops[i].n2));
1890 t = fold_build2 (MINUS_EXPR, itype, t,
1891 fold_convert (itype, fd->loops[i].n1));
1892 /* ?? We could probably use CEIL_DIV_EXPR instead of
1893 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1894 generate the same code in the end because generically we
1895 don't know that the values involved must be negative for
1896 GT?? */
1897 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1898 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1899 fold_build1 (NEGATE_EXPR, itype, t),
1900 fold_build1 (NEGATE_EXPR, itype,
1901 fold_convert (itype,
1902 fd->loops[i].step)));
1903 else
1904 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1905 fold_convert (itype, fd->loops[i].step));
1906 t = fold_convert (type, t);
1907 if (TREE_CODE (t) == INTEGER_CST)
1908 counts[i] = t;
1909 else
1910 {
1911 if (i < fd->collapse || i != first_zero_iter2)
1912 counts[i] = create_tmp_reg (type, ".count");
1913 expand_omp_build_assign (gsi, counts[i], t);
1914 }
1915 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1916 {
1917 if (i == 0)
1918 t = counts[0];
1919 else
1920 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1921 expand_omp_build_assign (gsi, fd->loop.n2, t);
1922 }
1923 }
1924}
1925
1926/* Helper function for expand_omp_{for_*,simd}. Generate code like:
1927 T = V;
1928 V3 = N31 + (T % count3) * STEP3;
1929 T = T / count3;
1930 V2 = N21 + (T % count2) * STEP2;
1931 T = T / count2;
1932 V1 = N11 + T * STEP1;
1933 if this loop doesn't have an inner loop construct combined with it.
1934 If it does have an inner loop construct combined with it and the
1935 iteration count isn't known constant, store values from counts array
1936 into its _looptemp_ temporaries instead. */
1937
1938static void
1939expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1940 tree *counts, gimple *inner_stmt, tree startvar)
1941{
1942 int i;
1943 if (gimple_omp_for_combined_p (fd->for_stmt))
1944 {
1945 /* If fd->loop.n2 is constant, then no propagation of the counts
1946 is needed, they are constant. */
1947 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
1948 return;
1949
1950 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
1951 ? gimple_omp_taskreg_clauses (inner_stmt)
1952 : gimple_omp_for_clauses (inner_stmt);
1953 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1954 isn't supposed to be handled, as the inner loop doesn't
1955 use it. */
1956 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
1957 gcc_assert (innerc);
1958 for (i = 0; i < fd->collapse; i++)
1959 {
1960 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1961 OMP_CLAUSE__LOOPTEMP_);
1962 gcc_assert (innerc);
1963 if (i)
1964 {
1965 tree tem = OMP_CLAUSE_DECL (innerc);
1966 tree t = fold_convert (TREE_TYPE (tem), counts[i]);
1967 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1968 false, GSI_CONTINUE_LINKING);
1969 gassign *stmt = gimple_build_assign (tem, t);
1970 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1971 }
1972 }
1973 return;
1974 }
1975
1976 tree type = TREE_TYPE (fd->loop.v);
1977 tree tem = create_tmp_reg (type, ".tem");
1978 gassign *stmt = gimple_build_assign (tem, startvar);
1979 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1980
1981 for (i = fd->collapse - 1; i >= 0; i--)
1982 {
1983 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
1984 itype = vtype;
1985 if (POINTER_TYPE_P (vtype))
1986 itype = signed_type_for (vtype);
1987 if (i != 0)
1988 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
1989 else
1990 t = tem;
1991 t = fold_convert (itype, t);
1992 t = fold_build2 (MULT_EXPR, itype, t,
1993 fold_convert (itype, fd->loops[i].step));
1994 if (POINTER_TYPE_P (vtype))
1995 t = fold_build_pointer_plus (fd->loops[i].n1, t);
1996 else
1997 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
1998 t = force_gimple_operand_gsi (gsi, t,
1999 DECL_P (fd->loops[i].v)
2000 && TREE_ADDRESSABLE (fd->loops[i].v),
2001 NULL_TREE, false,
2002 GSI_CONTINUE_LINKING);
2003 stmt = gimple_build_assign (fd->loops[i].v, t);
2004 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2005 if (i != 0)
2006 {
2007 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
2008 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
2009 false, GSI_CONTINUE_LINKING);
2010 stmt = gimple_build_assign (tem, t);
2011 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2012 }
2013 }
2014}
2015
2016/* Helper function for expand_omp_for_*. Generate code like:
2017 L10:
2018 V3 += STEP3;
2019 if (V3 cond3 N32) goto BODY_BB; else goto L11;
2020 L11:
2021 V3 = N31;
2022 V2 += STEP2;
2023 if (V2 cond2 N22) goto BODY_BB; else goto L12;
2024 L12:
2025 V2 = N21;
2026 V1 += STEP1;
2027 goto BODY_BB; */
2028
2029static basic_block
2030extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb,
2031 basic_block body_bb)
2032{
2033 basic_block last_bb, bb, collapse_bb = NULL;
2034 int i;
2035 gimple_stmt_iterator gsi;
2036 edge e;
2037 tree t;
2038 gimple *stmt;
2039
2040 last_bb = cont_bb;
2041 for (i = fd->collapse - 1; i >= 0; i--)
2042 {
2043 tree vtype = TREE_TYPE (fd->loops[i].v);
2044
2045 bb = create_empty_bb (last_bb);
2046 add_bb_to_loop (bb, last_bb->loop_father);
2047 gsi = gsi_start_bb (bb);
2048
2049 if (i < fd->collapse - 1)
2050 {
2051 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
357067f2 2052 e->probability = profile_probability::guessed_always ().apply_scale (1, 8);
629b3d75
MJ
2053
2054 t = fd->loops[i + 1].n1;
2055 t = force_gimple_operand_gsi (&gsi, t,
2056 DECL_P (fd->loops[i + 1].v)
2057 && TREE_ADDRESSABLE (fd->loops[i
2058 + 1].v),
2059 NULL_TREE, false,
2060 GSI_CONTINUE_LINKING);
2061 stmt = gimple_build_assign (fd->loops[i + 1].v, t);
2062 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2063 }
2064 else
2065 collapse_bb = bb;
2066
2067 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
2068
2069 if (POINTER_TYPE_P (vtype))
2070 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
2071 else
2072 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
2073 t = force_gimple_operand_gsi (&gsi, t,
2074 DECL_P (fd->loops[i].v)
2075 && TREE_ADDRESSABLE (fd->loops[i].v),
2076 NULL_TREE, false, GSI_CONTINUE_LINKING);
2077 stmt = gimple_build_assign (fd->loops[i].v, t);
2078 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2079
2080 if (i > 0)
2081 {
2082 t = fd->loops[i].n2;
2083 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2084 false, GSI_CONTINUE_LINKING);
2085 tree v = fd->loops[i].v;
2086 if (DECL_P (v) && TREE_ADDRESSABLE (v))
2087 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
2088 false, GSI_CONTINUE_LINKING);
2089 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
2090 stmt = gimple_build_cond_empty (t);
2091 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
d1ffbd43
JJ
2092 if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)),
2093 expand_omp_regimplify_p, NULL, NULL)
2094 || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)),
2095 expand_omp_regimplify_p, NULL, NULL))
2096 gimple_regimplify_operands (stmt, &gsi);
629b3d75 2097 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
357067f2 2098 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
629b3d75
MJ
2099 }
2100 else
2101 make_edge (bb, body_bb, EDGE_FALLTHRU);
2102 last_bb = bb;
2103 }
2104
2105 return collapse_bb;
2106}
2107
2108/* Expand #pragma omp ordered depend(source). */
2109
2110static void
2111expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
2112 tree *counts, location_t loc)
2113{
2114 enum built_in_function source_ix
2115 = fd->iter_type == long_integer_type_node
2116 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
2117 gimple *g
2118 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
2119 build_fold_addr_expr (counts[fd->ordered]));
2120 gimple_set_location (g, loc);
2121 gsi_insert_before (gsi, g, GSI_SAME_STMT);
2122}
2123
2124/* Expand a single depend from #pragma omp ordered depend(sink:...). */
2125
2126static void
2127expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
2128 tree *counts, tree c, location_t loc)
2129{
2130 auto_vec<tree, 10> args;
2131 enum built_in_function sink_ix
2132 = fd->iter_type == long_integer_type_node
2133 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
2134 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
2135 int i;
2136 gimple_stmt_iterator gsi2 = *gsi;
2137 bool warned_step = false;
2138
2139 for (i = 0; i < fd->ordered; i++)
2140 {
2141 tree step = NULL_TREE;
2142 off = TREE_PURPOSE (deps);
2143 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2144 {
2145 step = TREE_OPERAND (off, 1);
2146 off = TREE_OPERAND (off, 0);
2147 }
2148 if (!integer_zerop (off))
2149 {
2150 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2151 || fd->loops[i].cond_code == GT_EXPR);
2152 bool forward = fd->loops[i].cond_code == LT_EXPR;
2153 if (step)
2154 {
2155 /* Non-simple Fortran DO loops. If step is variable,
2156 we don't know at compile even the direction, so can't
2157 warn. */
2158 if (TREE_CODE (step) != INTEGER_CST)
2159 break;
2160 forward = tree_int_cst_sgn (step) != -1;
2161 }
2162 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
90a0bf4e
JJ
2163 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
2164 "waiting for lexically later iteration");
629b3d75
MJ
2165 break;
2166 }
2167 deps = TREE_CHAIN (deps);
2168 }
2169 /* If all offsets corresponding to the collapsed loops are zero,
2170 this depend clause can be ignored. FIXME: but there is still a
2171 flush needed. We need to emit one __sync_synchronize () for it
2172 though (perhaps conditionally)? Solve this together with the
2173 conservative dependence folding optimization.
2174 if (i >= fd->collapse)
2175 return; */
2176
2177 deps = OMP_CLAUSE_DECL (c);
2178 gsi_prev (&gsi2);
2179 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
2180 edge e2 = split_block_after_labels (e1->dest);
2181
2182 gsi2 = gsi_after_labels (e1->dest);
2183 *gsi = gsi_last_bb (e1->src);
2184 for (i = 0; i < fd->ordered; i++)
2185 {
2186 tree itype = TREE_TYPE (fd->loops[i].v);
2187 tree step = NULL_TREE;
2188 tree orig_off = NULL_TREE;
2189 if (POINTER_TYPE_P (itype))
2190 itype = sizetype;
2191 if (i)
2192 deps = TREE_CHAIN (deps);
2193 off = TREE_PURPOSE (deps);
2194 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2195 {
2196 step = TREE_OPERAND (off, 1);
2197 off = TREE_OPERAND (off, 0);
2198 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2199 && integer_onep (fd->loops[i].step)
2200 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
2201 }
2202 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
2203 if (step)
2204 {
2205 off = fold_convert_loc (loc, itype, off);
2206 orig_off = off;
2207 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2208 }
2209
2210 if (integer_zerop (off))
2211 t = boolean_true_node;
2212 else
2213 {
2214 tree a;
2215 tree co = fold_convert_loc (loc, itype, off);
2216 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
2217 {
2218 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2219 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
2220 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
2221 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
2222 co);
2223 }
2224 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2225 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2226 fd->loops[i].v, co);
2227 else
2228 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
2229 fd->loops[i].v, co);
2230 if (step)
2231 {
2232 tree t1, t2;
2233 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2234 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2235 fd->loops[i].n1);
2236 else
2237 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2238 fd->loops[i].n2);
2239 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2240 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2241 fd->loops[i].n2);
2242 else
2243 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2244 fd->loops[i].n1);
2245 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
2246 step, build_int_cst (TREE_TYPE (step), 0));
2247 if (TREE_CODE (step) != INTEGER_CST)
2248 {
2249 t1 = unshare_expr (t1);
2250 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
2251 false, GSI_CONTINUE_LINKING);
2252 t2 = unshare_expr (t2);
2253 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
2254 false, GSI_CONTINUE_LINKING);
2255 }
2256 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
2257 t, t2, t1);
2258 }
2259 else if (fd->loops[i].cond_code == LT_EXPR)
2260 {
2261 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2262 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2263 fd->loops[i].n1);
2264 else
2265 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2266 fd->loops[i].n2);
2267 }
2268 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2269 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
2270 fd->loops[i].n2);
2271 else
2272 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
2273 fd->loops[i].n1);
2274 }
2275 if (cond)
2276 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
2277 else
2278 cond = t;
2279
2280 off = fold_convert_loc (loc, itype, off);
2281
2282 if (step
2283 || (fd->loops[i].cond_code == LT_EXPR
2284 ? !integer_onep (fd->loops[i].step)
2285 : !integer_minus_onep (fd->loops[i].step)))
2286 {
2287 if (step == NULL_TREE
2288 && TYPE_UNSIGNED (itype)
2289 && fd->loops[i].cond_code == GT_EXPR)
2290 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
2291 fold_build1_loc (loc, NEGATE_EXPR, itype,
2292 s));
2293 else
2294 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
2295 orig_off ? orig_off : off, s);
2296 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
2297 build_int_cst (itype, 0));
2298 if (integer_zerop (t) && !warned_step)
2299 {
90a0bf4e
JJ
2300 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
2301 "refers to iteration never in the iteration "
2302 "space");
629b3d75
MJ
2303 warned_step = true;
2304 }
2305 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
2306 cond, t);
2307 }
2308
2309 if (i <= fd->collapse - 1 && fd->collapse > 1)
2310 t = fd->loop.v;
2311 else if (counts[i])
2312 t = counts[i];
2313 else
2314 {
2315 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2316 fd->loops[i].v, fd->loops[i].n1);
2317 t = fold_convert_loc (loc, fd->iter_type, t);
2318 }
2319 if (step)
2320 /* We have divided off by step already earlier. */;
2321 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
2322 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
2323 fold_build1_loc (loc, NEGATE_EXPR, itype,
2324 s));
2325 else
2326 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2327 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2328 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
2329 off = fold_convert_loc (loc, fd->iter_type, off);
2330 if (i <= fd->collapse - 1 && fd->collapse > 1)
2331 {
2332 if (i)
2333 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
2334 off);
2335 if (i < fd->collapse - 1)
2336 {
2337 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
2338 counts[i]);
2339 continue;
2340 }
2341 }
2342 off = unshare_expr (off);
2343 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
2344 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2345 true, GSI_SAME_STMT);
2346 args.safe_push (t);
2347 }
2348 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
2349 gimple_set_location (g, loc);
2350 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
2351
2352 cond = unshare_expr (cond);
2353 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
2354 GSI_CONTINUE_LINKING);
2355 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
2356 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
357067f2
JH
2357 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2358 e1->probability = e3->probability.invert ();
629b3d75
MJ
2359 e1->flags = EDGE_TRUE_VALUE;
2360 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
2361
2362 *gsi = gsi_after_labels (e2->dest);
2363}
2364
2365/* Expand all #pragma omp ordered depend(source) and
2366 #pragma omp ordered depend(sink:...) constructs in the current
2367 #pragma omp for ordered(n) region. */
2368
2369static void
2370expand_omp_ordered_source_sink (struct omp_region *region,
2371 struct omp_for_data *fd, tree *counts,
2372 basic_block cont_bb)
2373{
2374 struct omp_region *inner;
2375 int i;
2376 for (i = fd->collapse - 1; i < fd->ordered; i++)
2377 if (i == fd->collapse - 1 && fd->collapse > 1)
2378 counts[i] = NULL_TREE;
2379 else if (i >= fd->collapse && !cont_bb)
2380 counts[i] = build_zero_cst (fd->iter_type);
2381 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
2382 && integer_onep (fd->loops[i].step))
2383 counts[i] = NULL_TREE;
2384 else
2385 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
2386 tree atype
2387 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
2388 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
2389 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
2390
2391 for (inner = region->inner; inner; inner = inner->next)
2392 if (inner->type == GIMPLE_OMP_ORDERED)
2393 {
2394 gomp_ordered *ord_stmt = inner->ord_stmt;
2395 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
2396 location_t loc = gimple_location (ord_stmt);
2397 tree c;
2398 for (c = gimple_omp_ordered_clauses (ord_stmt);
2399 c; c = OMP_CLAUSE_CHAIN (c))
2400 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
2401 break;
2402 if (c)
2403 expand_omp_ordered_source (&gsi, fd, counts, loc);
2404 for (c = gimple_omp_ordered_clauses (ord_stmt);
2405 c; c = OMP_CLAUSE_CHAIN (c))
2406 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
2407 expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
2408 gsi_remove (&gsi, true);
2409 }
2410}
2411
2412/* Wrap the body into fd->ordered - fd->collapse loops that aren't
2413 collapsed. */
2414
2415static basic_block
2416expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
2417 basic_block cont_bb, basic_block body_bb,
2418 bool ordered_lastprivate)
2419{
2420 if (fd->ordered == fd->collapse)
2421 return cont_bb;
2422
2423 if (!cont_bb)
2424 {
2425 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2426 for (int i = fd->collapse; i < fd->ordered; i++)
2427 {
2428 tree type = TREE_TYPE (fd->loops[i].v);
2429 tree n1 = fold_convert (type, fd->loops[i].n1);
2430 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
2431 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2432 size_int (i - fd->collapse + 1),
2433 NULL_TREE, NULL_TREE);
2434 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2435 }
2436 return NULL;
2437 }
2438
2439 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
2440 {
2441 tree t, type = TREE_TYPE (fd->loops[i].v);
2442 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2443 expand_omp_build_assign (&gsi, fd->loops[i].v,
2444 fold_convert (type, fd->loops[i].n1));
2445 if (counts[i])
2446 expand_omp_build_assign (&gsi, counts[i],
2447 build_zero_cst (fd->iter_type));
2448 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2449 size_int (i - fd->collapse + 1),
2450 NULL_TREE, NULL_TREE);
2451 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2452 if (!gsi_end_p (gsi))
2453 gsi_prev (&gsi);
2454 else
2455 gsi = gsi_last_bb (body_bb);
2456 edge e1 = split_block (body_bb, gsi_stmt (gsi));
2457 basic_block new_body = e1->dest;
2458 if (body_bb == cont_bb)
2459 cont_bb = new_body;
2460 edge e2 = NULL;
2461 basic_block new_header;
2462 if (EDGE_COUNT (cont_bb->preds) > 0)
2463 {
2464 gsi = gsi_last_bb (cont_bb);
2465 if (POINTER_TYPE_P (type))
2466 t = fold_build_pointer_plus (fd->loops[i].v,
2467 fold_convert (sizetype,
2468 fd->loops[i].step));
2469 else
2470 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
2471 fold_convert (type, fd->loops[i].step));
2472 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
2473 if (counts[i])
2474 {
2475 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
2476 build_int_cst (fd->iter_type, 1));
2477 expand_omp_build_assign (&gsi, counts[i], t);
2478 t = counts[i];
2479 }
2480 else
2481 {
2482 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2483 fd->loops[i].v, fd->loops[i].n1);
2484 t = fold_convert (fd->iter_type, t);
2485 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2486 true, GSI_SAME_STMT);
2487 }
2488 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2489 size_int (i - fd->collapse + 1),
2490 NULL_TREE, NULL_TREE);
2491 expand_omp_build_assign (&gsi, aref, t);
2492 gsi_prev (&gsi);
2493 e2 = split_block (cont_bb, gsi_stmt (gsi));
2494 new_header = e2->dest;
2495 }
2496 else
2497 new_header = cont_bb;
2498 gsi = gsi_after_labels (new_header);
2499 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
2500 true, GSI_SAME_STMT);
2501 tree n2
2502 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
2503 true, NULL_TREE, true, GSI_SAME_STMT);
2504 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
2505 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
2506 edge e3 = split_block (new_header, gsi_stmt (gsi));
2507 cont_bb = e3->dest;
2508 remove_edge (e1);
2509 make_edge (body_bb, new_header, EDGE_FALLTHRU);
2510 e3->flags = EDGE_FALSE_VALUE;
357067f2 2511 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
629b3d75 2512 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
357067f2 2513 e1->probability = e3->probability.invert ();
629b3d75
MJ
2514
2515 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
2516 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
2517
2518 if (e2)
2519 {
99b1c316 2520 class loop *loop = alloc_loop ();
629b3d75
MJ
2521 loop->header = new_header;
2522 loop->latch = e2->src;
2523 add_loop (loop, body_bb->loop_father);
2524 }
2525 }
2526
2527 /* If there are any lastprivate clauses and it is possible some loops
2528 might have zero iterations, ensure all the decls are initialized,
2529 otherwise we could crash evaluating C++ class iterators with lastprivate
2530 clauses. */
2531 bool need_inits = false;
2532 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
2533 if (need_inits)
2534 {
2535 tree type = TREE_TYPE (fd->loops[i].v);
2536 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2537 expand_omp_build_assign (&gsi, fd->loops[i].v,
2538 fold_convert (type, fd->loops[i].n1));
2539 }
2540 else
2541 {
2542 tree type = TREE_TYPE (fd->loops[i].v);
2543 tree this_cond = fold_build2 (fd->loops[i].cond_code,
2544 boolean_type_node,
2545 fold_convert (type, fd->loops[i].n1),
2546 fold_convert (type, fd->loops[i].n2));
2547 if (!integer_onep (this_cond))
2548 need_inits = true;
2549 }
2550
2551 return cont_bb;
2552}
2553
2554/* A subroutine of expand_omp_for. Generate code for a parallel
2555 loop with any schedule. Given parameters:
2556
2557 for (V = N1; V cond N2; V += STEP) BODY;
2558
2559 where COND is "<" or ">", we generate pseudocode
2560
2561 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
2562 if (more) goto L0; else goto L3;
2563 L0:
2564 V = istart0;
2565 iend = iend0;
2566 L1:
2567 BODY;
2568 V += STEP;
2569 if (V cond iend) goto L1; else goto L2;
2570 L2:
2571 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2572 L3:
2573
2574 If this is a combined omp parallel loop, instead of the call to
2575 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
2576 If this is gimple_omp_for_combined_p loop, then instead of assigning
2577 V and iend in L0 we assign the first two _looptemp_ clause decls of the
2578 inner GIMPLE_OMP_FOR and V += STEP; and
2579 if (V cond iend) goto L1; else goto L2; are removed.
2580
2581 For collapsed loops, given parameters:
2582 collapse(3)
2583 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2584 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2585 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
2586 BODY;
2587
2588 we generate pseudocode
2589
2590 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
2591 if (cond3 is <)
2592 adj = STEP3 - 1;
2593 else
2594 adj = STEP3 + 1;
2595 count3 = (adj + N32 - N31) / STEP3;
2596 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
2597 if (cond2 is <)
2598 adj = STEP2 - 1;
2599 else
2600 adj = STEP2 + 1;
2601 count2 = (adj + N22 - N21) / STEP2;
2602 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
2603 if (cond1 is <)
2604 adj = STEP1 - 1;
2605 else
2606 adj = STEP1 + 1;
2607 count1 = (adj + N12 - N11) / STEP1;
2608 count = count1 * count2 * count3;
2609 goto Z1;
2610 Z0:
2611 count = 0;
2612 Z1:
2613 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
2614 if (more) goto L0; else goto L3;
2615 L0:
2616 V = istart0;
2617 T = V;
2618 V3 = N31 + (T % count3) * STEP3;
2619 T = T / count3;
2620 V2 = N21 + (T % count2) * STEP2;
2621 T = T / count2;
2622 V1 = N11 + T * STEP1;
2623 iend = iend0;
2624 L1:
2625 BODY;
2626 V += 1;
2627 if (V < iend) goto L10; else goto L2;
2628 L10:
2629 V3 += STEP3;
2630 if (V3 cond3 N32) goto L1; else goto L11;
2631 L11:
2632 V3 = N31;
2633 V2 += STEP2;
2634 if (V2 cond2 N22) goto L1; else goto L12;
2635 L12:
2636 V2 = N21;
2637 V1 += STEP1;
2638 goto L1;
2639 L2:
2640 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2641 L3:
2642
2643 */
2644
2645static void
2646expand_omp_for_generic (struct omp_region *region,
2647 struct omp_for_data *fd,
2648 enum built_in_function start_fn,
2649 enum built_in_function next_fn,
28567c40 2650 tree sched_arg,
629b3d75
MJ
2651 gimple *inner_stmt)
2652{
2653 tree type, istart0, iend0, iend;
2654 tree t, vmain, vback, bias = NULL_TREE;
2655 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
2656 basic_block l2_bb = NULL, l3_bb = NULL;
2657 gimple_stmt_iterator gsi;
2658 gassign *assign_stmt;
2659 bool in_combined_parallel = is_combined_parallel (region);
2660 bool broken_loop = region->cont == NULL;
2661 edge e, ne;
2662 tree *counts = NULL;
2663 int i;
2664 bool ordered_lastprivate = false;
2665
2666 gcc_assert (!broken_loop || !in_combined_parallel);
2667 gcc_assert (fd->iter_type == long_integer_type_node
2668 || !in_combined_parallel);
2669
2670 entry_bb = region->entry;
2671 cont_bb = region->cont;
2672 collapse_bb = NULL;
2673 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
2674 gcc_assert (broken_loop
2675 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
2676 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
2677 l1_bb = single_succ (l0_bb);
2678 if (!broken_loop)
2679 {
2680 l2_bb = create_empty_bb (cont_bb);
2681 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
2682 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
2683 == l1_bb));
2684 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
2685 }
2686 else
2687 l2_bb = NULL;
2688 l3_bb = BRANCH_EDGE (entry_bb)->dest;
2689 exit_bb = region->exit;
2690
65f4b875 2691 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
2692
2693 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
2694 if (fd->ordered
6c7ae8c5 2695 && omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
629b3d75
MJ
2696 OMP_CLAUSE_LASTPRIVATE))
2697 ordered_lastprivate = false;
28567c40 2698 tree reductions = NULL_TREE;
6c7ae8c5
JJ
2699 tree mem = NULL_TREE, cond_var = NULL_TREE, condtemp = NULL_TREE;
2700 tree memv = NULL_TREE;
8221c30b
JJ
2701 if (fd->lastprivate_conditional)
2702 {
2703 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2704 OMP_CLAUSE__CONDTEMP_);
2705 if (fd->have_pointer_condtemp)
2706 condtemp = OMP_CLAUSE_DECL (c);
2707 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
2708 cond_var = OMP_CLAUSE_DECL (c);
2709 }
28567c40
JJ
2710 if (sched_arg)
2711 {
2712 if (fd->have_reductemp)
2713 {
6c7ae8c5 2714 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
28567c40
JJ
2715 OMP_CLAUSE__REDUCTEMP_);
2716 reductions = OMP_CLAUSE_DECL (c);
2717 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
2718 gimple *g = SSA_NAME_DEF_STMT (reductions);
2719 reductions = gimple_assign_rhs1 (g);
2720 OMP_CLAUSE_DECL (c) = reductions;
2721 entry_bb = gimple_bb (g);
2722 edge e = split_block (entry_bb, g);
2723 if (region->entry == entry_bb)
2724 region->entry = e->dest;
2725 gsi = gsi_last_bb (entry_bb);
2726 }
2727 else
2728 reductions = null_pointer_node;
8221c30b 2729 if (fd->have_pointer_condtemp)
6c7ae8c5 2730 {
6c7ae8c5
JJ
2731 tree type = TREE_TYPE (condtemp);
2732 memv = create_tmp_var (type);
2733 TREE_ADDRESSABLE (memv) = 1;
2734 unsigned HOST_WIDE_INT sz
2735 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
2736 sz *= fd->lastprivate_conditional;
2737 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
2738 false);
2739 mem = build_fold_addr_expr (memv);
2740 }
2741 else
2742 mem = null_pointer_node;
28567c40 2743 }
629b3d75
MJ
2744 if (fd->collapse > 1 || fd->ordered)
2745 {
2746 int first_zero_iter1 = -1, first_zero_iter2 = -1;
2747 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
2748
2749 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
2750 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
2751 zero_iter1_bb, first_zero_iter1,
2752 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
2753
2754 if (zero_iter1_bb)
2755 {
2756 /* Some counts[i] vars might be uninitialized if
2757 some loop has zero iterations. But the body shouldn't
2758 be executed in that case, so just avoid uninit warnings. */
2759 for (i = first_zero_iter1;
2760 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
2761 if (SSA_VAR_P (counts[i]))
2762 TREE_NO_WARNING (counts[i]) = 1;
2763 gsi_prev (&gsi);
2764 e = split_block (entry_bb, gsi_stmt (gsi));
2765 entry_bb = e->dest;
2766 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
65f4b875 2767 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
2768 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2769 get_immediate_dominator (CDI_DOMINATORS,
2770 zero_iter1_bb));
2771 }
2772 if (zero_iter2_bb)
2773 {
2774 /* Some counts[i] vars might be uninitialized if
2775 some loop has zero iterations. But the body shouldn't
2776 be executed in that case, so just avoid uninit warnings. */
2777 for (i = first_zero_iter2; i < fd->ordered; i++)
2778 if (SSA_VAR_P (counts[i]))
2779 TREE_NO_WARNING (counts[i]) = 1;
2780 if (zero_iter1_bb)
2781 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2782 else
2783 {
2784 gsi_prev (&gsi);
2785 e = split_block (entry_bb, gsi_stmt (gsi));
2786 entry_bb = e->dest;
2787 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
65f4b875 2788 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
2789 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2790 get_immediate_dominator
2791 (CDI_DOMINATORS, zero_iter2_bb));
2792 }
2793 }
2794 if (fd->collapse == 1)
2795 {
2796 counts[0] = fd->loop.n2;
2797 fd->loop = fd->loops[0];
2798 }
2799 }
2800
2801 type = TREE_TYPE (fd->loop.v);
2802 istart0 = create_tmp_var (fd->iter_type, ".istart0");
2803 iend0 = create_tmp_var (fd->iter_type, ".iend0");
2804 TREE_ADDRESSABLE (istart0) = 1;
2805 TREE_ADDRESSABLE (iend0) = 1;
2806
2807 /* See if we need to bias by LLONG_MIN. */
2808 if (fd->iter_type == long_long_unsigned_type_node
2809 && TREE_CODE (type) == INTEGER_TYPE
2810 && !TYPE_UNSIGNED (type)
2811 && fd->ordered == 0)
2812 {
2813 tree n1, n2;
2814
2815 if (fd->loop.cond_code == LT_EXPR)
2816 {
2817 n1 = fd->loop.n1;
2818 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
2819 }
2820 else
2821 {
2822 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
2823 n2 = fd->loop.n1;
2824 }
2825 if (TREE_CODE (n1) != INTEGER_CST
2826 || TREE_CODE (n2) != INTEGER_CST
2827 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
2828 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
2829 }
2830
2831 gimple_stmt_iterator gsif = gsi;
2832 gsi_prev (&gsif);
2833
2834 tree arr = NULL_TREE;
2835 if (in_combined_parallel)
2836 {
2837 gcc_assert (fd->ordered == 0);
2838 /* In a combined parallel loop, emit a call to
2839 GOMP_loop_foo_next. */
2840 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
2841 build_fold_addr_expr (istart0),
2842 build_fold_addr_expr (iend0));
2843 }
2844 else
2845 {
2846 tree t0, t1, t2, t3, t4;
2847 /* If this is not a combined parallel loop, emit a call to
2848 GOMP_loop_foo_start in ENTRY_BB. */
2849 t4 = build_fold_addr_expr (iend0);
2850 t3 = build_fold_addr_expr (istart0);
2851 if (fd->ordered)
2852 {
2853 t0 = build_int_cst (unsigned_type_node,
2854 fd->ordered - fd->collapse + 1);
2855 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
2856 fd->ordered
2857 - fd->collapse + 1),
2858 ".omp_counts");
2859 DECL_NAMELESS (arr) = 1;
2860 TREE_ADDRESSABLE (arr) = 1;
2861 TREE_STATIC (arr) = 1;
2862 vec<constructor_elt, va_gc> *v;
2863 vec_alloc (v, fd->ordered - fd->collapse + 1);
2864 int idx;
2865
2866 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
2867 {
2868 tree c;
2869 if (idx == 0 && fd->collapse > 1)
2870 c = fd->loop.n2;
2871 else
2872 c = counts[idx + fd->collapse - 1];
2873 tree purpose = size_int (idx);
2874 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
2875 if (TREE_CODE (c) != INTEGER_CST)
2876 TREE_STATIC (arr) = 0;
2877 }
2878
2879 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
2880 if (!TREE_STATIC (arr))
2881 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
2882 void_type_node, arr),
2883 true, NULL_TREE, true, GSI_SAME_STMT);
2884 t1 = build_fold_addr_expr (arr);
2885 t2 = NULL_TREE;
2886 }
2887 else
2888 {
2889 t2 = fold_convert (fd->iter_type, fd->loop.step);
2890 t1 = fd->loop.n2;
2891 t0 = fd->loop.n1;
2892 if (gimple_omp_for_combined_into_p (fd->for_stmt))
2893 {
2894 tree innerc
2895 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2896 OMP_CLAUSE__LOOPTEMP_);
2897 gcc_assert (innerc);
2898 t0 = OMP_CLAUSE_DECL (innerc);
2899 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2900 OMP_CLAUSE__LOOPTEMP_);
2901 gcc_assert (innerc);
2902 t1 = OMP_CLAUSE_DECL (innerc);
2903 }
2904 if (POINTER_TYPE_P (TREE_TYPE (t0))
2905 && TYPE_PRECISION (TREE_TYPE (t0))
2906 != TYPE_PRECISION (fd->iter_type))
2907 {
2908 /* Avoid casting pointers to integer of a different size. */
2909 tree itype = signed_type_for (type);
2910 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
2911 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
2912 }
2913 else
2914 {
2915 t1 = fold_convert (fd->iter_type, t1);
2916 t0 = fold_convert (fd->iter_type, t0);
2917 }
2918 if (bias)
2919 {
2920 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
2921 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
2922 }
2923 }
2924 if (fd->iter_type == long_integer_type_node || fd->ordered)
2925 {
2926 if (fd->chunk_size)
2927 {
2928 t = fold_convert (fd->iter_type, fd->chunk_size);
2929 t = omp_adjust_chunk_size (t, fd->simd_schedule);
28567c40
JJ
2930 if (sched_arg)
2931 {
2932 if (fd->ordered)
2933 t = build_call_expr (builtin_decl_explicit (start_fn),
2934 8, t0, t1, sched_arg, t, t3, t4,
2935 reductions, mem);
2936 else
2937 t = build_call_expr (builtin_decl_explicit (start_fn),
2938 9, t0, t1, t2, sched_arg, t, t3, t4,
2939 reductions, mem);
2940 }
2941 else if (fd->ordered)
629b3d75
MJ
2942 t = build_call_expr (builtin_decl_explicit (start_fn),
2943 5, t0, t1, t, t3, t4);
2944 else
2945 t = build_call_expr (builtin_decl_explicit (start_fn),
2946 6, t0, t1, t2, t, t3, t4);
2947 }
2948 else if (fd->ordered)
2949 t = build_call_expr (builtin_decl_explicit (start_fn),
2950 4, t0, t1, t3, t4);
2951 else
2952 t = build_call_expr (builtin_decl_explicit (start_fn),
2953 5, t0, t1, t2, t3, t4);
2954 }
2955 else
2956 {
2957 tree t5;
2958 tree c_bool_type;
2959 tree bfn_decl;
2960
2961 /* The GOMP_loop_ull_*start functions have additional boolean
2962 argument, true for < loops and false for > loops.
2963 In Fortran, the C bool type can be different from
2964 boolean_type_node. */
2965 bfn_decl = builtin_decl_explicit (start_fn);
2966 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
2967 t5 = build_int_cst (c_bool_type,
2968 fd->loop.cond_code == LT_EXPR ? 1 : 0);
2969 if (fd->chunk_size)
2970 {
2971 tree bfn_decl = builtin_decl_explicit (start_fn);
2972 t = fold_convert (fd->iter_type, fd->chunk_size);
2973 t = omp_adjust_chunk_size (t, fd->simd_schedule);
28567c40
JJ
2974 if (sched_arg)
2975 t = build_call_expr (bfn_decl, 10, t5, t0, t1, t2, sched_arg,
2976 t, t3, t4, reductions, mem);
2977 else
2978 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
629b3d75
MJ
2979 }
2980 else
2981 t = build_call_expr (builtin_decl_explicit (start_fn),
2982 6, t5, t0, t1, t2, t3, t4);
2983 }
2984 }
2985 if (TREE_TYPE (t) != boolean_type_node)
2986 t = fold_build2 (NE_EXPR, boolean_type_node,
2987 t, build_int_cst (TREE_TYPE (t), 0));
2988 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
01914336 2989 true, GSI_SAME_STMT);
629b3d75
MJ
2990 if (arr && !TREE_STATIC (arr))
2991 {
25b45c7c 2992 tree clobber = build_clobber (TREE_TYPE (arr));
629b3d75
MJ
2993 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
2994 GSI_SAME_STMT);
2995 }
8221c30b 2996 if (fd->have_pointer_condtemp)
6c7ae8c5 2997 expand_omp_build_assign (&gsi, condtemp, memv, false);
28567c40
JJ
2998 if (fd->have_reductemp)
2999 {
3000 gimple *g = gsi_stmt (gsi);
3001 gsi_remove (&gsi, true);
3002 release_ssa_name (gimple_assign_lhs (g));
3003
3004 entry_bb = region->entry;
3005 gsi = gsi_last_nondebug_bb (entry_bb);
3006
3007 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3008 }
629b3d75
MJ
3009 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3010
3011 /* Remove the GIMPLE_OMP_FOR statement. */
3012 gsi_remove (&gsi, true);
3013
3014 if (gsi_end_p (gsif))
3015 gsif = gsi_after_labels (gsi_bb (gsif));
3016 gsi_next (&gsif);
3017
3018 /* Iteration setup for sequential loop goes in L0_BB. */
3019 tree startvar = fd->loop.v;
3020 tree endvar = NULL_TREE;
3021
3022 if (gimple_omp_for_combined_p (fd->for_stmt))
3023 {
3024 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
3025 && gimple_omp_for_kind (inner_stmt)
3026 == GF_OMP_FOR_KIND_SIMD);
3027 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
3028 OMP_CLAUSE__LOOPTEMP_);
3029 gcc_assert (innerc);
3030 startvar = OMP_CLAUSE_DECL (innerc);
3031 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3032 OMP_CLAUSE__LOOPTEMP_);
3033 gcc_assert (innerc);
3034 endvar = OMP_CLAUSE_DECL (innerc);
3035 }
3036
3037 gsi = gsi_start_bb (l0_bb);
3038 t = istart0;
3039 if (fd->ordered && fd->collapse == 1)
3040 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
3041 fold_convert (fd->iter_type, fd->loop.step));
3042 else if (bias)
3043 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
3044 if (fd->ordered && fd->collapse == 1)
3045 {
3046 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3047 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
3048 fd->loop.n1, fold_convert (sizetype, t));
3049 else
3050 {
3051 t = fold_convert (TREE_TYPE (startvar), t);
3052 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
3053 fd->loop.n1, t);
3054 }
3055 }
3056 else
3057 {
3058 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3059 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
3060 t = fold_convert (TREE_TYPE (startvar), t);
3061 }
3062 t = force_gimple_operand_gsi (&gsi, t,
3063 DECL_P (startvar)
3064 && TREE_ADDRESSABLE (startvar),
3065 NULL_TREE, false, GSI_CONTINUE_LINKING);
3066 assign_stmt = gimple_build_assign (startvar, t);
3067 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6c7ae8c5
JJ
3068 if (cond_var)
3069 {
3070 tree itype = TREE_TYPE (cond_var);
3071 /* For lastprivate(conditional:) itervar, we need some iteration
3072 counter that starts at unsigned non-zero and increases.
3073 Prefer as few IVs as possible, so if we can use startvar
3074 itself, use that, or startvar + constant (those would be
3075 incremented with step), and as last resort use the s0 + 1
3076 incremented by 1. */
3077 if ((fd->ordered && fd->collapse == 1)
3078 || bias
3079 || POINTER_TYPE_P (type)
3080 || TREE_CODE (fd->loop.n1) != INTEGER_CST
3081 || fd->loop.cond_code != LT_EXPR)
3082 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, istart0),
3083 build_int_cst (itype, 1));
3084 else if (tree_int_cst_sgn (fd->loop.n1) == 1)
3085 t = fold_convert (itype, t);
3086 else
3087 {
3088 tree c = fold_convert (itype, fd->loop.n1);
3089 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
3090 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
3091 }
3092 t = force_gimple_operand_gsi (&gsi, t, false,
3093 NULL_TREE, false, GSI_CONTINUE_LINKING);
3094 assign_stmt = gimple_build_assign (cond_var, t);
3095 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3096 }
629b3d75
MJ
3097
3098 t = iend0;
3099 if (fd->ordered && fd->collapse == 1)
3100 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
3101 fold_convert (fd->iter_type, fd->loop.step));
3102 else if (bias)
3103 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
3104 if (fd->ordered && fd->collapse == 1)
3105 {
3106 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3107 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
3108 fd->loop.n1, fold_convert (sizetype, t));
3109 else
3110 {
3111 t = fold_convert (TREE_TYPE (startvar), t);
3112 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
3113 fd->loop.n1, t);
3114 }
3115 }
3116 else
3117 {
3118 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3119 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
3120 t = fold_convert (TREE_TYPE (startvar), t);
3121 }
3122 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3123 false, GSI_CONTINUE_LINKING);
3124 if (endvar)
3125 {
3126 assign_stmt = gimple_build_assign (endvar, iend);
3127 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3128 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
3129 assign_stmt = gimple_build_assign (fd->loop.v, iend);
3130 else
3131 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
3132 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3133 }
3134 /* Handle linear clause adjustments. */
3135 tree itercnt = NULL_TREE;
3136 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
3137 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
3138 c; c = OMP_CLAUSE_CHAIN (c))
3139 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
3140 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
3141 {
3142 tree d = OMP_CLAUSE_DECL (c);
3143 bool is_ref = omp_is_reference (d);
3144 tree t = d, a, dest;
3145 if (is_ref)
3146 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
3147 tree type = TREE_TYPE (t);
3148 if (POINTER_TYPE_P (type))
3149 type = sizetype;
3150 dest = unshare_expr (t);
3151 tree v = create_tmp_var (TREE_TYPE (t), NULL);
3152 expand_omp_build_assign (&gsif, v, t);
3153 if (itercnt == NULL_TREE)
3154 {
3155 itercnt = startvar;
3156 tree n1 = fd->loop.n1;
3157 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
3158 {
3159 itercnt
3160 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
3161 itercnt);
3162 n1 = fold_convert (TREE_TYPE (itercnt), n1);
3163 }
3164 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
3165 itercnt, n1);
3166 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
3167 itercnt, fd->loop.step);
3168 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
3169 NULL_TREE, false,
3170 GSI_CONTINUE_LINKING);
3171 }
3172 a = fold_build2 (MULT_EXPR, type,
3173 fold_convert (type, itercnt),
3174 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
3175 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
3176 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
3177 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3178 false, GSI_CONTINUE_LINKING);
3179 assign_stmt = gimple_build_assign (dest, t);
3180 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3181 }
3182 if (fd->collapse > 1)
3183 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
3184
3185 if (fd->ordered)
3186 {
3187 /* Until now, counts array contained number of iterations or
3188 variable containing it for ith loop. From now on, we need
3189 those counts only for collapsed loops, and only for the 2nd
3190 till the last collapsed one. Move those one element earlier,
3191 we'll use counts[fd->collapse - 1] for the first source/sink
3192 iteration counter and so on and counts[fd->ordered]
3193 as the array holding the current counter values for
3194 depend(source). */
3195 if (fd->collapse > 1)
3196 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
3197 if (broken_loop)
3198 {
3199 int i;
3200 for (i = fd->collapse; i < fd->ordered; i++)
3201 {
3202 tree type = TREE_TYPE (fd->loops[i].v);
3203 tree this_cond
3204 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
3205 fold_convert (type, fd->loops[i].n1),
3206 fold_convert (type, fd->loops[i].n2));
3207 if (!integer_onep (this_cond))
3208 break;
3209 }
3210 if (i < fd->ordered)
3211 {
3212 cont_bb
3213 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
3214 add_bb_to_loop (cont_bb, l1_bb->loop_father);
3215 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
3216 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
3217 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3218 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
3219 make_edge (cont_bb, l1_bb, 0);
3220 l2_bb = create_empty_bb (cont_bb);
3221 broken_loop = false;
3222 }
3223 }
3224 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
3225 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
3226 ordered_lastprivate);
3227 if (counts[fd->collapse - 1])
3228 {
3229 gcc_assert (fd->collapse == 1);
3230 gsi = gsi_last_bb (l0_bb);
3231 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
3232 istart0, true);
3233 gsi = gsi_last_bb (cont_bb);
3234 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1],
3235 build_int_cst (fd->iter_type, 1));
3236 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
3237 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3238 size_zero_node, NULL_TREE, NULL_TREE);
3239 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
3240 t = counts[fd->collapse - 1];
3241 }
3242 else if (fd->collapse > 1)
3243 t = fd->loop.v;
3244 else
3245 {
3246 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3247 fd->loops[0].v, fd->loops[0].n1);
3248 t = fold_convert (fd->iter_type, t);
3249 }
3250 gsi = gsi_last_bb (l0_bb);
3251 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3252 size_zero_node, NULL_TREE, NULL_TREE);
3253 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3254 false, GSI_CONTINUE_LINKING);
3255 expand_omp_build_assign (&gsi, aref, t, true);
3256 }
3257
3258 if (!broken_loop)
3259 {
3260 /* Code to control the increment and predicate for the sequential
3261 loop goes in the CONT_BB. */
65f4b875 3262 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
3263 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3264 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3265 vmain = gimple_omp_continue_control_use (cont_stmt);
3266 vback = gimple_omp_continue_control_def (cont_stmt);
3267
7855700e
JJ
3268 if (cond_var)
3269 {
3270 tree itype = TREE_TYPE (cond_var);
3271 tree t2;
3272 if ((fd->ordered && fd->collapse == 1)
3273 || bias
3274 || POINTER_TYPE_P (type)
3275 || TREE_CODE (fd->loop.n1) != INTEGER_CST
3276 || fd->loop.cond_code != LT_EXPR)
3277 t2 = build_int_cst (itype, 1);
3278 else
3279 t2 = fold_convert (itype, fd->loop.step);
3280 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
3281 t2 = force_gimple_operand_gsi (&gsi, t2, false,
3282 NULL_TREE, true, GSI_SAME_STMT);
3283 assign_stmt = gimple_build_assign (cond_var, t2);
3284 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3285 }
3286
629b3d75
MJ
3287 if (!gimple_omp_for_combined_p (fd->for_stmt))
3288 {
3289 if (POINTER_TYPE_P (type))
3290 t = fold_build_pointer_plus (vmain, fd->loop.step);
3291 else
3292 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
3293 t = force_gimple_operand_gsi (&gsi, t,
3294 DECL_P (vback)
3295 && TREE_ADDRESSABLE (vback),
3296 NULL_TREE, true, GSI_SAME_STMT);
3297 assign_stmt = gimple_build_assign (vback, t);
3298 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3299
3300 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
3301 {
d1ffbd43 3302 tree tem;
629b3d75 3303 if (fd->collapse > 1)
d1ffbd43 3304 tem = fd->loop.v;
629b3d75
MJ
3305 else
3306 {
d1ffbd43
JJ
3307 tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3308 fd->loops[0].v, fd->loops[0].n1);
3309 tem = fold_convert (fd->iter_type, tem);
629b3d75
MJ
3310 }
3311 tree aref = build4 (ARRAY_REF, fd->iter_type,
3312 counts[fd->ordered], size_zero_node,
3313 NULL_TREE, NULL_TREE);
d1ffbd43
JJ
3314 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE,
3315 true, GSI_SAME_STMT);
3316 expand_omp_build_assign (&gsi, aref, tem);
629b3d75
MJ
3317 }
3318
3319 t = build2 (fd->loop.cond_code, boolean_type_node,
3320 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
3321 iend);
3322 gcond *cond_stmt = gimple_build_cond_empty (t);
3323 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3324 }
3325
3326 /* Remove GIMPLE_OMP_CONTINUE. */
3327 gsi_remove (&gsi, true);
3328
3329 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3330 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb);
3331
3332 /* Emit code to get the next parallel iteration in L2_BB. */
3333 gsi = gsi_start_bb (l2_bb);
3334
3335 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
3336 build_fold_addr_expr (istart0),
3337 build_fold_addr_expr (iend0));
3338 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3339 false, GSI_CONTINUE_LINKING);
3340 if (TREE_TYPE (t) != boolean_type_node)
3341 t = fold_build2 (NE_EXPR, boolean_type_node,
3342 t, build_int_cst (TREE_TYPE (t), 0));
3343 gcond *cond_stmt = gimple_build_cond_empty (t);
3344 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
3345 }
3346
3347 /* Add the loop cleanup function. */
65f4b875 3348 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
3349 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3350 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
3351 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3352 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
3353 else
3354 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
3355 gcall *call_stmt = gimple_build_call (t, 0);
629b3d75
MJ
3356 if (fd->ordered)
3357 {
3358 tree arr = counts[fd->ordered];
25b45c7c 3359 tree clobber = build_clobber (TREE_TYPE (arr));
629b3d75
MJ
3360 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
3361 GSI_SAME_STMT);
3362 }
28567c40
JJ
3363 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3364 {
3365 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
3366 if (fd->have_reductemp)
3367 {
3368 gimple *g = gimple_build_assign (reductions, NOP_EXPR,
3369 gimple_call_lhs (call_stmt));
3370 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
3371 }
3372 }
3373 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
629b3d75
MJ
3374 gsi_remove (&gsi, true);
3375
3376 /* Connect the new blocks. */
3377 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
3378 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
3379
3380 if (!broken_loop)
3381 {
3382 gimple_seq phis;
3383
3384 e = find_edge (cont_bb, l3_bb);
3385 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
3386
3387 phis = phi_nodes (l3_bb);
3388 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
3389 {
3390 gimple *phi = gsi_stmt (gsi);
3391 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
3392 PHI_ARG_DEF_FROM_EDGE (phi, e));
3393 }
3394 remove_edge (e);
3395
3396 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
3397 e = find_edge (cont_bb, l1_bb);
3398 if (e == NULL)
3399 {
3400 e = BRANCH_EDGE (cont_bb);
3401 gcc_assert (single_succ (e->dest) == l1_bb);
3402 }
3403 if (gimple_omp_for_combined_p (fd->for_stmt))
3404 {
3405 remove_edge (e);
3406 e = NULL;
3407 }
3408 else if (fd->collapse > 1)
3409 {
3410 remove_edge (e);
3411 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3412 }
3413 else
3414 e->flags = EDGE_TRUE_VALUE;
3415 if (e)
3416 {
357067f2
JH
3417 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
3418 find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
629b3d75
MJ
3419 }
3420 else
3421 {
3422 e = find_edge (cont_bb, l2_bb);
3423 e->flags = EDGE_FALLTHRU;
3424 }
3425 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
3426
3427 if (gimple_in_ssa_p (cfun))
3428 {
3429 /* Add phis to the outer loop that connect to the phis in the inner,
3430 original loop, and move the loop entry value of the inner phi to
3431 the loop entry value of the outer phi. */
3432 gphi_iterator psi;
3433 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
3434 {
620e594b 3435 location_t locus;
629b3d75
MJ
3436 gphi *nphi;
3437 gphi *exit_phi = psi.phi ();
3438
164485b5
JJ
3439 if (virtual_operand_p (gimple_phi_result (exit_phi)))
3440 continue;
3441
629b3d75
MJ
3442 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
3443 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
3444
3445 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
3446 edge latch_to_l1 = find_edge (latch, l1_bb);
3447 gphi *inner_phi
3448 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
3449
3450 tree t = gimple_phi_result (exit_phi);
3451 tree new_res = copy_ssa_name (t, NULL);
3452 nphi = create_phi_node (new_res, l0_bb);
3453
3454 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
3455 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
3456 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
3457 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
3458 add_phi_arg (nphi, t, entry_to_l0, locus);
3459
3460 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
3461 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
3462
3463 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
164485b5 3464 }
629b3d75
MJ
3465 }
3466
3467 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
3468 recompute_dominator (CDI_DOMINATORS, l2_bb));
3469 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
3470 recompute_dominator (CDI_DOMINATORS, l3_bb));
3471 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
3472 recompute_dominator (CDI_DOMINATORS, l0_bb));
3473 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
3474 recompute_dominator (CDI_DOMINATORS, l1_bb));
3475
3476 /* We enter expand_omp_for_generic with a loop. This original loop may
3477 have its own loop struct, or it may be part of an outer loop struct
3478 (which may be the fake loop). */
99b1c316 3479 class loop *outer_loop = entry_bb->loop_father;
629b3d75
MJ
3480 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
3481
3482 add_bb_to_loop (l2_bb, outer_loop);
3483
3484 /* We've added a new loop around the original loop. Allocate the
3485 corresponding loop struct. */
99b1c316 3486 class loop *new_loop = alloc_loop ();
629b3d75
MJ
3487 new_loop->header = l0_bb;
3488 new_loop->latch = l2_bb;
3489 add_loop (new_loop, outer_loop);
3490
3491 /* Allocate a loop structure for the original loop unless we already
3492 had one. */
3493 if (!orig_loop_has_loop_struct
3494 && !gimple_omp_for_combined_p (fd->for_stmt))
3495 {
99b1c316 3496 class loop *orig_loop = alloc_loop ();
629b3d75
MJ
3497 orig_loop->header = l1_bb;
3498 /* The loop may have multiple latches. */
3499 add_loop (orig_loop, new_loop);
3500 }
3501 }
3502}
3503
2f6bb511
JJ
3504/* Helper function for expand_omp_for_static_nochunk. If PTR is NULL,
3505 compute needed allocation size. If !ALLOC of team allocations,
3506 if ALLOC of thread allocation. SZ is the initial needed size for
3507 other purposes, ALLOC_ALIGN guaranteed alignment of allocation in bytes,
3508 CNT number of elements of each array, for !ALLOC this is
3509 omp_get_num_threads (), for ALLOC number of iterations handled by the
3510 current thread. If PTR is non-NULL, it is the start of the allocation
3511 and this routine shall assign to OMP_CLAUSE_DECL (c) of those _scantemp_
3512 clauses pointers to the corresponding arrays. */
3513
3514static tree
3515expand_omp_scantemp_alloc (tree clauses, tree ptr, unsigned HOST_WIDE_INT sz,
3516 unsigned HOST_WIDE_INT alloc_align, tree cnt,
3517 gimple_stmt_iterator *gsi, bool alloc)
3518{
3519 tree eltsz = NULL_TREE;
3520 unsigned HOST_WIDE_INT preval = 0;
3521 if (ptr && sz)
3522 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
3523 ptr, size_int (sz));
3524 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
3525 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
3526 && !OMP_CLAUSE__SCANTEMP__CONTROL (c)
3527 && (!OMP_CLAUSE__SCANTEMP__ALLOC (c)) != alloc)
3528 {
3529 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
3530 unsigned HOST_WIDE_INT al = TYPE_ALIGN_UNIT (pointee_type);
3531 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
3532 {
3533 unsigned HOST_WIDE_INT szl
3534 = tree_to_uhwi (TYPE_SIZE_UNIT (pointee_type));
3535 szl = least_bit_hwi (szl);
3536 if (szl)
3537 al = MIN (al, szl);
3538 }
3539 if (ptr == NULL_TREE)
3540 {
3541 if (eltsz == NULL_TREE)
3542 eltsz = TYPE_SIZE_UNIT (pointee_type);
3543 else
3544 eltsz = size_binop (PLUS_EXPR, eltsz,
3545 TYPE_SIZE_UNIT (pointee_type));
3546 }
3547 if (preval == 0 && al <= alloc_align)
3548 {
3549 unsigned HOST_WIDE_INT diff = ROUND_UP (sz, al) - sz;
3550 sz += diff;
3551 if (diff && ptr)
3552 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
3553 ptr, size_int (diff));
3554 }
3555 else if (al > preval)
3556 {
3557 if (ptr)
3558 {
3559 ptr = fold_convert (pointer_sized_int_node, ptr);
3560 ptr = fold_build2 (PLUS_EXPR, pointer_sized_int_node, ptr,
3561 build_int_cst (pointer_sized_int_node,
3562 al - 1));
3563 ptr = fold_build2 (BIT_AND_EXPR, pointer_sized_int_node, ptr,
3564 build_int_cst (pointer_sized_int_node,
3565 -(HOST_WIDE_INT) al));
3566 ptr = fold_convert (ptr_type_node, ptr);
3567 }
3568 else
3569 sz += al - 1;
3570 }
3571 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
3572 preval = al;
3573 else
3574 preval = 1;
3575 if (ptr)
3576 {
3577 expand_omp_build_assign (gsi, OMP_CLAUSE_DECL (c), ptr, false);
3578 ptr = OMP_CLAUSE_DECL (c);
3579 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), ptr,
3580 size_binop (MULT_EXPR, cnt,
3581 TYPE_SIZE_UNIT (pointee_type)));
3582 }
3583 }
3584
3585 if (ptr == NULL_TREE)
3586 {
3587 eltsz = size_binop (MULT_EXPR, eltsz, cnt);
3588 if (sz)
3589 eltsz = size_binop (PLUS_EXPR, eltsz, size_int (sz));
3590 return eltsz;
3591 }
3592 else
3593 return ptr;
3594}
3595
629b3d75
MJ
3596/* A subroutine of expand_omp_for. Generate code for a parallel
3597 loop with static schedule and no specified chunk size. Given
3598 parameters:
3599
3600 for (V = N1; V cond N2; V += STEP) BODY;
3601
3602 where COND is "<" or ">", we generate pseudocode
3603
3604 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3605 if (cond is <)
3606 adj = STEP - 1;
3607 else
3608 adj = STEP + 1;
3609 if ((__typeof (V)) -1 > 0 && cond is >)
3610 n = -(adj + N2 - N1) / -STEP;
3611 else
3612 n = (adj + N2 - N1) / STEP;
3613 q = n / nthreads;
3614 tt = n % nthreads;
3615 if (threadid < tt) goto L3; else goto L4;
3616 L3:
3617 tt = 0;
3618 q = q + 1;
3619 L4:
3620 s0 = q * threadid + tt;
3621 e0 = s0 + q;
3622 V = s0 * STEP + N1;
3623 if (s0 >= e0) goto L2; else goto L0;
3624 L0:
3625 e = e0 * STEP + N1;
3626 L1:
3627 BODY;
3628 V += STEP;
3629 if (V cond e) goto L1;
3630 L2:
3631*/
3632
3633static void
3634expand_omp_for_static_nochunk (struct omp_region *region,
3635 struct omp_for_data *fd,
3636 gimple *inner_stmt)
3637{
2f6bb511 3638 tree n, q, s0, e0, e, t, tt, nthreads = NULL_TREE, threadid;
629b3d75
MJ
3639 tree type, itype, vmain, vback;
3640 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
3641 basic_block body_bb, cont_bb, collapse_bb = NULL;
2f6bb511
JJ
3642 basic_block fin_bb, fourth_bb = NULL, fifth_bb = NULL, sixth_bb = NULL;
3643 basic_block exit1_bb = NULL, exit2_bb = NULL, exit3_bb = NULL;
6c7ae8c5 3644 gimple_stmt_iterator gsi, gsip;
629b3d75
MJ
3645 edge ep;
3646 bool broken_loop = region->cont == NULL;
3647 tree *counts = NULL;
3648 tree n1, n2, step;
28567c40 3649 tree reductions = NULL_TREE;
8221c30b 3650 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
629b3d75
MJ
3651
3652 itype = type = TREE_TYPE (fd->loop.v);
3653 if (POINTER_TYPE_P (type))
3654 itype = signed_type_for (type);
3655
3656 entry_bb = region->entry;
3657 cont_bb = region->cont;
3658 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3659 fin_bb = BRANCH_EDGE (entry_bb)->dest;
3660 gcc_assert (broken_loop
3661 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
3662 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3663 body_bb = single_succ (seq_start_bb);
3664 if (!broken_loop)
3665 {
3666 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3667 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3668 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3669 }
3670 exit_bb = region->exit;
3671
3672 /* Iteration space partitioning goes in ENTRY_BB. */
65f4b875 3673 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75 3674 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
6c7ae8c5
JJ
3675 gsip = gsi;
3676 gsi_prev (&gsip);
629b3d75
MJ
3677
3678 if (fd->collapse > 1)
3679 {
3680 int first_zero_iter = -1, dummy = -1;
3681 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3682
3683 counts = XALLOCAVEC (tree, fd->collapse);
3684 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3685 fin_bb, first_zero_iter,
3686 dummy_bb, dummy, l2_dom_bb);
3687 t = NULL_TREE;
3688 }
3689 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3690 t = integer_one_node;
3691 else
3692 t = fold_binary (fd->loop.cond_code, boolean_type_node,
3693 fold_convert (type, fd->loop.n1),
3694 fold_convert (type, fd->loop.n2));
3695 if (fd->collapse == 1
3696 && TYPE_UNSIGNED (type)
3697 && (t == NULL_TREE || !integer_onep (t)))
3698 {
3699 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3700 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3701 true, GSI_SAME_STMT);
3702 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3703 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3704 true, GSI_SAME_STMT);
3705 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
6c7ae8c5 3706 NULL_TREE, NULL_TREE);
629b3d75
MJ
3707 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3708 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3709 expand_omp_regimplify_p, NULL, NULL)
3710 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3711 expand_omp_regimplify_p, NULL, NULL))
3712 {
3713 gsi = gsi_for_stmt (cond_stmt);
3714 gimple_regimplify_operands (cond_stmt, &gsi);
3715 }
3716 ep = split_block (entry_bb, cond_stmt);
3717 ep->flags = EDGE_TRUE_VALUE;
3718 entry_bb = ep->dest;
357067f2 3719 ep->probability = profile_probability::very_likely ();
629b3d75 3720 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
357067f2 3721 ep->probability = profile_probability::very_unlikely ();
629b3d75
MJ
3722 if (gimple_in_ssa_p (cfun))
3723 {
3724 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
3725 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3726 !gsi_end_p (gpi); gsi_next (&gpi))
3727 {
3728 gphi *phi = gpi.phi ();
3729 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3730 ep, UNKNOWN_LOCATION);
3731 }
3732 }
3733 gsi = gsi_last_bb (entry_bb);
3734 }
3735
8221c30b
JJ
3736 if (fd->lastprivate_conditional)
3737 {
3738 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
3739 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
3740 if (fd->have_pointer_condtemp)
3741 condtemp = OMP_CLAUSE_DECL (c);
3742 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
3743 cond_var = OMP_CLAUSE_DECL (c);
3744 }
2f6bb511 3745 if (fd->have_reductemp
2f03073f
JJ
3746 /* For scan, we don't want to reinitialize condtemp before the
3747 second loop. */
3748 || (fd->have_pointer_condtemp && !fd->have_scantemp)
2f6bb511 3749 || fd->have_nonctrl_scantemp)
28567c40
JJ
3750 {
3751 tree t1 = build_int_cst (long_integer_type_node, 0);
3752 tree t2 = build_int_cst (long_integer_type_node, 1);
3753 tree t3 = build_int_cstu (long_integer_type_node,
3754 (HOST_WIDE_INT_1U << 31) + 1);
3755 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
6c7ae8c5
JJ
3756 gimple_stmt_iterator gsi2 = gsi_none ();
3757 gimple *g = NULL;
3758 tree mem = null_pointer_node, memv = NULL_TREE;
2f6bb511
JJ
3759 unsigned HOST_WIDE_INT condtemp_sz = 0;
3760 unsigned HOST_WIDE_INT alloc_align = 0;
6c7ae8c5
JJ
3761 if (fd->have_reductemp)
3762 {
2f6bb511 3763 gcc_assert (!fd->have_nonctrl_scantemp);
6c7ae8c5
JJ
3764 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
3765 reductions = OMP_CLAUSE_DECL (c);
3766 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
3767 g = SSA_NAME_DEF_STMT (reductions);
3768 reductions = gimple_assign_rhs1 (g);
3769 OMP_CLAUSE_DECL (c) = reductions;
3770 gsi2 = gsi_for_stmt (g);
3771 }
3772 else
3773 {
3774 if (gsi_end_p (gsip))
3775 gsi2 = gsi_after_labels (region->entry);
3776 else
3777 gsi2 = gsip;
3778 reductions = null_pointer_node;
3779 }
2f6bb511 3780 if (fd->have_pointer_condtemp || fd->have_nonctrl_scantemp)
6c7ae8c5 3781 {
2f6bb511
JJ
3782 tree type;
3783 if (fd->have_pointer_condtemp)
3784 type = TREE_TYPE (condtemp);
3785 else
3786 type = ptr_type_node;
6c7ae8c5
JJ
3787 memv = create_tmp_var (type);
3788 TREE_ADDRESSABLE (memv) = 1;
2f6bb511
JJ
3789 unsigned HOST_WIDE_INT sz = 0;
3790 tree size = NULL_TREE;
3791 if (fd->have_pointer_condtemp)
3792 {
3793 sz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
3794 sz *= fd->lastprivate_conditional;
3795 condtemp_sz = sz;
3796 }
3797 if (fd->have_nonctrl_scantemp)
3798 {
3799 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3800 gimple *g = gimple_build_call (nthreads, 0);
3801 nthreads = create_tmp_var (integer_type_node);
3802 gimple_call_set_lhs (g, nthreads);
3803 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3804 nthreads = fold_convert (sizetype, nthreads);
3805 alloc_align = TYPE_ALIGN_UNIT (long_long_integer_type_node);
3806 size = expand_omp_scantemp_alloc (clauses, NULL_TREE, sz,
3807 alloc_align, nthreads, NULL,
3808 false);
3809 size = fold_convert (type, size);
3810 }
3811 else
3812 size = build_int_cst (type, sz);
3813 expand_omp_build_assign (&gsi2, memv, size, false);
6c7ae8c5
JJ
3814 mem = build_fold_addr_expr (memv);
3815 }
28567c40
JJ
3816 tree t
3817 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
3818 9, t1, t2, t2, t3, t1, null_pointer_node,
6c7ae8c5 3819 null_pointer_node, reductions, mem);
28567c40
JJ
3820 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3821 true, GSI_SAME_STMT);
8221c30b 3822 if (fd->have_pointer_condtemp)
6c7ae8c5 3823 expand_omp_build_assign (&gsi2, condtemp, memv, false);
2f6bb511
JJ
3824 if (fd->have_nonctrl_scantemp)
3825 {
3826 tree ptr = fd->have_pointer_condtemp ? condtemp : memv;
3827 expand_omp_scantemp_alloc (clauses, ptr, condtemp_sz,
3828 alloc_align, nthreads, &gsi2, false);
3829 }
6c7ae8c5
JJ
3830 if (fd->have_reductemp)
3831 {
3832 gsi_remove (&gsi2, true);
3833 release_ssa_name (gimple_assign_lhs (g));
3834 }
28567c40 3835 }
629b3d75
MJ
3836 switch (gimple_omp_for_kind (fd->for_stmt))
3837 {
3838 case GF_OMP_FOR_KIND_FOR:
3839 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3840 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3841 break;
3842 case GF_OMP_FOR_KIND_DISTRIBUTE:
3843 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3844 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3845 break;
3846 default:
3847 gcc_unreachable ();
3848 }
3849 nthreads = build_call_expr (nthreads, 0);
3850 nthreads = fold_convert (itype, nthreads);
3851 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3852 true, GSI_SAME_STMT);
3853 threadid = build_call_expr (threadid, 0);
3854 threadid = fold_convert (itype, threadid);
3855 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3856 true, GSI_SAME_STMT);
3857
3858 n1 = fd->loop.n1;
3859 n2 = fd->loop.n2;
3860 step = fd->loop.step;
3861 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3862 {
3863 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3864 OMP_CLAUSE__LOOPTEMP_);
3865 gcc_assert (innerc);
3866 n1 = OMP_CLAUSE_DECL (innerc);
3867 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3868 OMP_CLAUSE__LOOPTEMP_);
3869 gcc_assert (innerc);
3870 n2 = OMP_CLAUSE_DECL (innerc);
3871 }
3872 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3873 true, NULL_TREE, true, GSI_SAME_STMT);
3874 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3875 true, NULL_TREE, true, GSI_SAME_STMT);
3876 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3877 true, NULL_TREE, true, GSI_SAME_STMT);
3878
3879 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3880 t = fold_build2 (PLUS_EXPR, itype, step, t);
3881 t = fold_build2 (PLUS_EXPR, itype, t, n2);
3882 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3883 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3884 t = fold_build2 (TRUNC_DIV_EXPR, itype,
3885 fold_build1 (NEGATE_EXPR, itype, t),
3886 fold_build1 (NEGATE_EXPR, itype, step));
3887 else
3888 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3889 t = fold_convert (itype, t);
3890 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3891
3892 q = create_tmp_reg (itype, "q");
3893 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
3894 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3895 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
3896
3897 tt = create_tmp_reg (itype, "tt");
3898 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
3899 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3900 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
3901
3902 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
3903 gcond *cond_stmt = gimple_build_cond_empty (t);
3904 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3905
3906 second_bb = split_block (entry_bb, cond_stmt)->dest;
65f4b875 3907 gsi = gsi_last_nondebug_bb (second_bb);
629b3d75
MJ
3908 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3909
3910 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
3911 GSI_SAME_STMT);
3912 gassign *assign_stmt
3913 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
3914 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3915
3916 third_bb = split_block (second_bb, assign_stmt)->dest;
65f4b875 3917 gsi = gsi_last_nondebug_bb (third_bb);
629b3d75
MJ
3918 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3919
2f6bb511
JJ
3920 if (fd->have_nonctrl_scantemp)
3921 {
3922 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
3923 tree controlp = NULL_TREE, controlb = NULL_TREE;
3924 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
3925 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
3926 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
3927 {
3928 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
3929 controlb = OMP_CLAUSE_DECL (c);
3930 else
3931 controlp = OMP_CLAUSE_DECL (c);
3932 if (controlb && controlp)
3933 break;
3934 }
3935 gcc_assert (controlp && controlb);
3936 tree cnt = create_tmp_var (sizetype);
3937 gimple *g = gimple_build_assign (cnt, NOP_EXPR, q);
3938 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3939 unsigned HOST_WIDE_INT alloc_align = TYPE_ALIGN_UNIT (ptr_type_node);
3940 tree sz = expand_omp_scantemp_alloc (clauses, NULL_TREE, 0,
3941 alloc_align, cnt, NULL, true);
3942 tree size = create_tmp_var (sizetype);
3943 expand_omp_build_assign (&gsi, size, sz, false);
3944 tree cmp = fold_build2 (GT_EXPR, boolean_type_node,
3945 size, size_int (16384));
3946 expand_omp_build_assign (&gsi, controlb, cmp);
3947 g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
3948 NULL_TREE, NULL_TREE);
3949 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3950 fourth_bb = split_block (third_bb, g)->dest;
3951 gsi = gsi_last_nondebug_bb (fourth_bb);
3952 /* FIXME: Once we have allocators, this should use allocator. */
3953 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_MALLOC), 1, size);
3954 gimple_call_set_lhs (g, controlp);
3955 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3956 expand_omp_scantemp_alloc (clauses, controlp, 0, alloc_align, cnt,
3957 &gsi, true);
3958 gsi_prev (&gsi);
3959 g = gsi_stmt (gsi);
3960 fifth_bb = split_block (fourth_bb, g)->dest;
3961 gsi = gsi_last_nondebug_bb (fifth_bb);
3962
3963 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_SAVE), 0);
3964 gimple_call_set_lhs (g, controlp);
3965 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3966 tree alloca_decl = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN);
3967 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
3968 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
3969 && OMP_CLAUSE__SCANTEMP__ALLOC (c))
3970 {
3971 tree tmp = create_tmp_var (sizetype);
3972 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
3973 g = gimple_build_assign (tmp, MULT_EXPR, cnt,
3974 TYPE_SIZE_UNIT (pointee_type));
3975 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3976 g = gimple_build_call (alloca_decl, 2, tmp,
3977 size_int (TYPE_ALIGN (pointee_type)));
3978 gimple_call_set_lhs (g, OMP_CLAUSE_DECL (c));
3979 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3980 }
3981
3982 sixth_bb = split_block (fifth_bb, g)->dest;
3983 gsi = gsi_last_nondebug_bb (sixth_bb);
3984 }
3985
629b3d75
MJ
3986 t = build2 (MULT_EXPR, itype, q, threadid);
3987 t = build2 (PLUS_EXPR, itype, t, tt);
3988 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3989
3990 t = fold_build2 (PLUS_EXPR, itype, s0, q);
3991 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3992
3993 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
3994 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3995
3996 /* Remove the GIMPLE_OMP_FOR statement. */
3997 gsi_remove (&gsi, true);
3998
3999 /* Setup code for sequential iteration goes in SEQ_START_BB. */
4000 gsi = gsi_start_bb (seq_start_bb);
4001
4002 tree startvar = fd->loop.v;
4003 tree endvar = NULL_TREE;
4004
4005 if (gimple_omp_for_combined_p (fd->for_stmt))
4006 {
4007 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
4008 ? gimple_omp_parallel_clauses (inner_stmt)
4009 : gimple_omp_for_clauses (inner_stmt);
4010 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
4011 gcc_assert (innerc);
4012 startvar = OMP_CLAUSE_DECL (innerc);
4013 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4014 OMP_CLAUSE__LOOPTEMP_);
4015 gcc_assert (innerc);
4016 endvar = OMP_CLAUSE_DECL (innerc);
4017 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
4018 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
4019 {
4020 int i;
4021 for (i = 1; i < fd->collapse; i++)
4022 {
4023 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4024 OMP_CLAUSE__LOOPTEMP_);
4025 gcc_assert (innerc);
4026 }
4027 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4028 OMP_CLAUSE__LOOPTEMP_);
4029 if (innerc)
4030 {
4031 /* If needed (distribute parallel for with lastprivate),
4032 propagate down the total number of iterations. */
4033 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
4034 fd->loop.n2);
4035 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
4036 GSI_CONTINUE_LINKING);
4037 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4038 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4039 }
4040 }
4041 }
4042 t = fold_convert (itype, s0);
4043 t = fold_build2 (MULT_EXPR, itype, t, step);
4044 if (POINTER_TYPE_P (type))
bde84d51
RB
4045 {
4046 t = fold_build_pointer_plus (n1, t);
4047 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4048 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4049 t = fold_convert (signed_type_for (type), t);
4050 }
629b3d75
MJ
4051 else
4052 t = fold_build2 (PLUS_EXPR, type, t, n1);
4053 t = fold_convert (TREE_TYPE (startvar), t);
4054 t = force_gimple_operand_gsi (&gsi, t,
4055 DECL_P (startvar)
4056 && TREE_ADDRESSABLE (startvar),
4057 NULL_TREE, false, GSI_CONTINUE_LINKING);
4058 assign_stmt = gimple_build_assign (startvar, t);
4059 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6c7ae8c5
JJ
4060 if (cond_var)
4061 {
4062 tree itype = TREE_TYPE (cond_var);
4063 /* For lastprivate(conditional:) itervar, we need some iteration
4064 counter that starts at unsigned non-zero and increases.
4065 Prefer as few IVs as possible, so if we can use startvar
4066 itself, use that, or startvar + constant (those would be
4067 incremented with step), and as last resort use the s0 + 1
4068 incremented by 1. */
4069 if (POINTER_TYPE_P (type)
4070 || TREE_CODE (n1) != INTEGER_CST
4071 || fd->loop.cond_code != LT_EXPR)
4072 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
4073 build_int_cst (itype, 1));
4074 else if (tree_int_cst_sgn (n1) == 1)
4075 t = fold_convert (itype, t);
4076 else
4077 {
4078 tree c = fold_convert (itype, n1);
4079 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
4080 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
4081 }
4082 t = force_gimple_operand_gsi (&gsi, t, false,
4083 NULL_TREE, false, GSI_CONTINUE_LINKING);
4084 assign_stmt = gimple_build_assign (cond_var, t);
4085 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4086 }
629b3d75
MJ
4087
4088 t = fold_convert (itype, e0);
4089 t = fold_build2 (MULT_EXPR, itype, t, step);
4090 if (POINTER_TYPE_P (type))
bde84d51
RB
4091 {
4092 t = fold_build_pointer_plus (n1, t);
4093 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4094 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4095 t = fold_convert (signed_type_for (type), t);
4096 }
629b3d75
MJ
4097 else
4098 t = fold_build2 (PLUS_EXPR, type, t, n1);
4099 t = fold_convert (TREE_TYPE (startvar), t);
4100 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4101 false, GSI_CONTINUE_LINKING);
4102 if (endvar)
4103 {
4104 assign_stmt = gimple_build_assign (endvar, e);
4105 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4106 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4107 assign_stmt = gimple_build_assign (fd->loop.v, e);
4108 else
4109 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4110 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4111 }
4112 /* Handle linear clause adjustments. */
4113 tree itercnt = NULL_TREE;
4114 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4115 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4116 c; c = OMP_CLAUSE_CHAIN (c))
4117 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4118 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4119 {
4120 tree d = OMP_CLAUSE_DECL (c);
4121 bool is_ref = omp_is_reference (d);
4122 tree t = d, a, dest;
4123 if (is_ref)
4124 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4125 if (itercnt == NULL_TREE)
4126 {
4127 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4128 {
4129 itercnt = fold_build2 (MINUS_EXPR, itype,
4130 fold_convert (itype, n1),
4131 fold_convert (itype, fd->loop.n1));
4132 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
4133 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
4134 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4135 NULL_TREE, false,
4136 GSI_CONTINUE_LINKING);
4137 }
4138 else
4139 itercnt = s0;
4140 }
4141 tree type = TREE_TYPE (t);
4142 if (POINTER_TYPE_P (type))
4143 type = sizetype;
4144 a = fold_build2 (MULT_EXPR, type,
4145 fold_convert (type, itercnt),
4146 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4147 dest = unshare_expr (t);
4148 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4149 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
4150 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4151 false, GSI_CONTINUE_LINKING);
4152 assign_stmt = gimple_build_assign (dest, t);
4153 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4154 }
4155 if (fd->collapse > 1)
4156 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4157
4158 if (!broken_loop)
4159 {
4160 /* The code controlling the sequential loop replaces the
4161 GIMPLE_OMP_CONTINUE. */
65f4b875 4162 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
4163 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4164 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
4165 vmain = gimple_omp_continue_control_use (cont_stmt);
4166 vback = gimple_omp_continue_control_def (cont_stmt);
4167
7855700e
JJ
4168 if (cond_var)
4169 {
4170 tree itype = TREE_TYPE (cond_var);
4171 tree t2;
4172 if (POINTER_TYPE_P (type)
4173 || TREE_CODE (n1) != INTEGER_CST
4174 || fd->loop.cond_code != LT_EXPR)
4175 t2 = build_int_cst (itype, 1);
4176 else
4177 t2 = fold_convert (itype, step);
4178 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
4179 t2 = force_gimple_operand_gsi (&gsi, t2, false,
4180 NULL_TREE, true, GSI_SAME_STMT);
4181 assign_stmt = gimple_build_assign (cond_var, t2);
4182 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4183 }
4184
629b3d75
MJ
4185 if (!gimple_omp_for_combined_p (fd->for_stmt))
4186 {
4187 if (POINTER_TYPE_P (type))
4188 t = fold_build_pointer_plus (vmain, step);
4189 else
4190 t = fold_build2 (PLUS_EXPR, type, vmain, step);
4191 t = force_gimple_operand_gsi (&gsi, t,
4192 DECL_P (vback)
4193 && TREE_ADDRESSABLE (vback),
4194 NULL_TREE, true, GSI_SAME_STMT);
4195 assign_stmt = gimple_build_assign (vback, t);
4196 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4197
4198 t = build2 (fd->loop.cond_code, boolean_type_node,
4199 DECL_P (vback) && TREE_ADDRESSABLE (vback)
4200 ? t : vback, e);
4201 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4202 }
4203
4204 /* Remove the GIMPLE_OMP_CONTINUE statement. */
4205 gsi_remove (&gsi, true);
4206
4207 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4208 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
4209 }
4210
4211 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
65f4b875 4212 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
4213 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4214 {
4215 t = gimple_omp_return_lhs (gsi_stmt (gsi));
2f6bb511
JJ
4216 if (fd->have_reductemp
4217 || ((fd->have_pointer_condtemp || fd->have_scantemp)
4218 && !fd->have_nonctrl_scantemp))
28567c40
JJ
4219 {
4220 tree fn;
4221 if (t)
4222 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4223 else
4224 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4225 gcall *g = gimple_build_call (fn, 0);
4226 if (t)
4227 {
4228 gimple_call_set_lhs (g, t);
6c7ae8c5
JJ
4229 if (fd->have_reductemp)
4230 gsi_insert_after (&gsi, gimple_build_assign (reductions,
4231 NOP_EXPR, t),
4232 GSI_SAME_STMT);
28567c40
JJ
4233 }
4234 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4235 }
4236 else
4237 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
629b3d75 4238 }
2f03073f
JJ
4239 else if ((fd->have_pointer_condtemp || fd->have_scantemp)
4240 && !fd->have_nonctrl_scantemp)
1a39b3d3
JJ
4241 {
4242 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
4243 gcall *g = gimple_build_call (fn, 0);
4244 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4245 }
2f6bb511
JJ
4246 if (fd->have_scantemp && !fd->have_nonctrl_scantemp)
4247 {
4248 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4249 tree controlp = NULL_TREE, controlb = NULL_TREE;
4250 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
4251 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
4252 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
4253 {
4254 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
4255 controlb = OMP_CLAUSE_DECL (c);
4256 else
4257 controlp = OMP_CLAUSE_DECL (c);
4258 if (controlb && controlp)
4259 break;
4260 }
4261 gcc_assert (controlp && controlb);
4262 gimple *g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
4263 NULL_TREE, NULL_TREE);
4264 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4265 exit1_bb = split_block (exit_bb, g)->dest;
4266 gsi = gsi_after_labels (exit1_bb);
4267 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_FREE), 1,
4268 controlp);
4269 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4270 exit2_bb = split_block (exit1_bb, g)->dest;
4271 gsi = gsi_after_labels (exit2_bb);
4272 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_RESTORE), 1,
4273 controlp);
4274 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4275 exit3_bb = split_block (exit2_bb, g)->dest;
4276 gsi = gsi_after_labels (exit3_bb);
4277 }
629b3d75
MJ
4278 gsi_remove (&gsi, true);
4279
4280 /* Connect all the blocks. */
4281 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
357067f2 4282 ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
629b3d75
MJ
4283 ep = find_edge (entry_bb, second_bb);
4284 ep->flags = EDGE_TRUE_VALUE;
357067f2 4285 ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
2f6bb511
JJ
4286 if (fourth_bb)
4287 {
4288 ep = make_edge (third_bb, fifth_bb, EDGE_FALSE_VALUE);
4289 ep->probability
4290 = profile_probability::guessed_always ().apply_scale (1, 2);
4291 ep = find_edge (third_bb, fourth_bb);
4292 ep->flags = EDGE_TRUE_VALUE;
4293 ep->probability
4294 = profile_probability::guessed_always ().apply_scale (1, 2);
4295 ep = find_edge (fourth_bb, fifth_bb);
4296 redirect_edge_and_branch (ep, sixth_bb);
4297 }
4298 else
4299 sixth_bb = third_bb;
4300 find_edge (sixth_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
4301 find_edge (sixth_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
4302 if (exit1_bb)
4303 {
4304 ep = make_edge (exit_bb, exit2_bb, EDGE_FALSE_VALUE);
4305 ep->probability
4306 = profile_probability::guessed_always ().apply_scale (1, 2);
4307 ep = find_edge (exit_bb, exit1_bb);
4308 ep->flags = EDGE_TRUE_VALUE;
4309 ep->probability
4310 = profile_probability::guessed_always ().apply_scale (1, 2);
4311 ep = find_edge (exit1_bb, exit2_bb);
4312 redirect_edge_and_branch (ep, exit3_bb);
4313 }
629b3d75
MJ
4314
4315 if (!broken_loop)
4316 {
4317 ep = find_edge (cont_bb, body_bb);
4318 if (ep == NULL)
4319 {
4320 ep = BRANCH_EDGE (cont_bb);
4321 gcc_assert (single_succ (ep->dest) == body_bb);
4322 }
4323 if (gimple_omp_for_combined_p (fd->for_stmt))
4324 {
4325 remove_edge (ep);
4326 ep = NULL;
4327 }
4328 else if (fd->collapse > 1)
4329 {
4330 remove_edge (ep);
4331 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4332 }
4333 else
4334 ep->flags = EDGE_TRUE_VALUE;
4335 find_edge (cont_bb, fin_bb)->flags
4336 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
4337 }
4338
4339 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
4340 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
2f6bb511
JJ
4341 if (fourth_bb)
4342 {
4343 set_immediate_dominator (CDI_DOMINATORS, fifth_bb, third_bb);
4344 set_immediate_dominator (CDI_DOMINATORS, sixth_bb, third_bb);
4345 }
4346 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, sixth_bb);
629b3d75
MJ
4347
4348 set_immediate_dominator (CDI_DOMINATORS, body_bb,
4349 recompute_dominator (CDI_DOMINATORS, body_bb));
4350 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
4351 recompute_dominator (CDI_DOMINATORS, fin_bb));
2f6bb511
JJ
4352 if (exit1_bb)
4353 {
4354 set_immediate_dominator (CDI_DOMINATORS, exit2_bb, exit_bb);
4355 set_immediate_dominator (CDI_DOMINATORS, exit3_bb, exit_bb);
4356 }
629b3d75 4357
99b1c316 4358 class loop *loop = body_bb->loop_father;
629b3d75
MJ
4359 if (loop != entry_bb->loop_father)
4360 {
4361 gcc_assert (broken_loop || loop->header == body_bb);
4362 gcc_assert (broken_loop
4363 || loop->latch == region->cont
4364 || single_pred (loop->latch) == region->cont);
4365 return;
4366 }
4367
4368 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
4369 {
4370 loop = alloc_loop ();
4371 loop->header = body_bb;
4372 if (collapse_bb == NULL)
4373 loop->latch = cont_bb;
4374 add_loop (loop, body_bb->loop_father);
4375 }
4376}
4377
4378/* Return phi in E->DEST with ARG on edge E. */
4379
4380static gphi *
4381find_phi_with_arg_on_edge (tree arg, edge e)
4382{
4383 basic_block bb = e->dest;
4384
4385 for (gphi_iterator gpi = gsi_start_phis (bb);
4386 !gsi_end_p (gpi);
4387 gsi_next (&gpi))
4388 {
4389 gphi *phi = gpi.phi ();
4390 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
4391 return phi;
4392 }
4393
4394 return NULL;
4395}
4396
4397/* A subroutine of expand_omp_for. Generate code for a parallel
4398 loop with static schedule and a specified chunk size. Given
4399 parameters:
4400
4401 for (V = N1; V cond N2; V += STEP) BODY;
4402
4403 where COND is "<" or ">", we generate pseudocode
4404
4405 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
4406 if (cond is <)
4407 adj = STEP - 1;
4408 else
4409 adj = STEP + 1;
4410 if ((__typeof (V)) -1 > 0 && cond is >)
4411 n = -(adj + N2 - N1) / -STEP;
4412 else
4413 n = (adj + N2 - N1) / STEP;
4414 trip = 0;
4415 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
4416 here so that V is defined
4417 if the loop is not entered
4418 L0:
4419 s0 = (trip * nthreads + threadid) * CHUNK;
01914336 4420 e0 = min (s0 + CHUNK, n);
629b3d75
MJ
4421 if (s0 < n) goto L1; else goto L4;
4422 L1:
4423 V = s0 * STEP + N1;
4424 e = e0 * STEP + N1;
4425 L2:
4426 BODY;
4427 V += STEP;
4428 if (V cond e) goto L2; else goto L3;
4429 L3:
4430 trip += 1;
4431 goto L0;
4432 L4:
4433*/
4434
4435static void
4436expand_omp_for_static_chunk (struct omp_region *region,
4437 struct omp_for_data *fd, gimple *inner_stmt)
4438{
4439 tree n, s0, e0, e, t;
4440 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
4441 tree type, itype, vmain, vback, vextra;
4442 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
4443 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
6c7ae8c5 4444 gimple_stmt_iterator gsi, gsip;
629b3d75
MJ
4445 edge se;
4446 bool broken_loop = region->cont == NULL;
4447 tree *counts = NULL;
4448 tree n1, n2, step;
28567c40 4449 tree reductions = NULL_TREE;
8221c30b 4450 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
629b3d75
MJ
4451
4452 itype = type = TREE_TYPE (fd->loop.v);
4453 if (POINTER_TYPE_P (type))
4454 itype = signed_type_for (type);
4455
4456 entry_bb = region->entry;
4457 se = split_block (entry_bb, last_stmt (entry_bb));
4458 entry_bb = se->src;
4459 iter_part_bb = se->dest;
4460 cont_bb = region->cont;
4461 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
4462 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
4463 gcc_assert (broken_loop
4464 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
4465 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
4466 body_bb = single_succ (seq_start_bb);
4467 if (!broken_loop)
4468 {
4469 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
4470 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
4471 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4472 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
4473 }
4474 exit_bb = region->exit;
4475
4476 /* Trip and adjustment setup goes in ENTRY_BB. */
65f4b875 4477 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75 4478 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
6c7ae8c5
JJ
4479 gsip = gsi;
4480 gsi_prev (&gsip);
629b3d75
MJ
4481
4482 if (fd->collapse > 1)
4483 {
4484 int first_zero_iter = -1, dummy = -1;
4485 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
4486
4487 counts = XALLOCAVEC (tree, fd->collapse);
4488 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4489 fin_bb, first_zero_iter,
4490 dummy_bb, dummy, l2_dom_bb);
4491 t = NULL_TREE;
4492 }
4493 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
4494 t = integer_one_node;
4495 else
4496 t = fold_binary (fd->loop.cond_code, boolean_type_node,
4497 fold_convert (type, fd->loop.n1),
4498 fold_convert (type, fd->loop.n2));
4499 if (fd->collapse == 1
4500 && TYPE_UNSIGNED (type)
4501 && (t == NULL_TREE || !integer_onep (t)))
4502 {
4503 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
4504 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
4505 true, GSI_SAME_STMT);
4506 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
4507 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
4508 true, GSI_SAME_STMT);
4509 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
4510 NULL_TREE, NULL_TREE);
4511 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4512 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
4513 expand_omp_regimplify_p, NULL, NULL)
4514 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
4515 expand_omp_regimplify_p, NULL, NULL))
4516 {
4517 gsi = gsi_for_stmt (cond_stmt);
4518 gimple_regimplify_operands (cond_stmt, &gsi);
4519 }
4520 se = split_block (entry_bb, cond_stmt);
4521 se->flags = EDGE_TRUE_VALUE;
4522 entry_bb = se->dest;
357067f2 4523 se->probability = profile_probability::very_likely ();
629b3d75 4524 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
357067f2 4525 se->probability = profile_probability::very_unlikely ();
629b3d75
MJ
4526 if (gimple_in_ssa_p (cfun))
4527 {
4528 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
4529 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
4530 !gsi_end_p (gpi); gsi_next (&gpi))
4531 {
4532 gphi *phi = gpi.phi ();
4533 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
4534 se, UNKNOWN_LOCATION);
4535 }
4536 }
4537 gsi = gsi_last_bb (entry_bb);
4538 }
4539
8221c30b
JJ
4540 if (fd->lastprivate_conditional)
4541 {
4542 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4543 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
4544 if (fd->have_pointer_condtemp)
4545 condtemp = OMP_CLAUSE_DECL (c);
4546 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
4547 cond_var = OMP_CLAUSE_DECL (c);
4548 }
4549 if (fd->have_reductemp || fd->have_pointer_condtemp)
28567c40
JJ
4550 {
4551 tree t1 = build_int_cst (long_integer_type_node, 0);
4552 tree t2 = build_int_cst (long_integer_type_node, 1);
4553 tree t3 = build_int_cstu (long_integer_type_node,
4554 (HOST_WIDE_INT_1U << 31) + 1);
4555 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
6c7ae8c5
JJ
4556 gimple_stmt_iterator gsi2 = gsi_none ();
4557 gimple *g = NULL;
4558 tree mem = null_pointer_node, memv = NULL_TREE;
6c7ae8c5
JJ
4559 if (fd->have_reductemp)
4560 {
4561 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
4562 reductions = OMP_CLAUSE_DECL (c);
4563 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
4564 g = SSA_NAME_DEF_STMT (reductions);
4565 reductions = gimple_assign_rhs1 (g);
4566 OMP_CLAUSE_DECL (c) = reductions;
4567 gsi2 = gsi_for_stmt (g);
4568 }
4569 else
4570 {
4571 if (gsi_end_p (gsip))
4572 gsi2 = gsi_after_labels (region->entry);
4573 else
4574 gsi2 = gsip;
4575 reductions = null_pointer_node;
4576 }
8221c30b 4577 if (fd->have_pointer_condtemp)
6c7ae8c5 4578 {
6c7ae8c5
JJ
4579 tree type = TREE_TYPE (condtemp);
4580 memv = create_tmp_var (type);
4581 TREE_ADDRESSABLE (memv) = 1;
4582 unsigned HOST_WIDE_INT sz
4583 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
4584 sz *= fd->lastprivate_conditional;
4585 expand_omp_build_assign (&gsi2, memv, build_int_cst (type, sz),
4586 false);
4587 mem = build_fold_addr_expr (memv);
4588 }
28567c40
JJ
4589 tree t
4590 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
4591 9, t1, t2, t2, t3, t1, null_pointer_node,
6c7ae8c5 4592 null_pointer_node, reductions, mem);
28567c40
JJ
4593 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
4594 true, GSI_SAME_STMT);
8221c30b 4595 if (fd->have_pointer_condtemp)
6c7ae8c5
JJ
4596 expand_omp_build_assign (&gsi2, condtemp, memv, false);
4597 if (fd->have_reductemp)
4598 {
4599 gsi_remove (&gsi2, true);
4600 release_ssa_name (gimple_assign_lhs (g));
4601 }
28567c40 4602 }
629b3d75
MJ
4603 switch (gimple_omp_for_kind (fd->for_stmt))
4604 {
4605 case GF_OMP_FOR_KIND_FOR:
4606 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
4607 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
4608 break;
4609 case GF_OMP_FOR_KIND_DISTRIBUTE:
4610 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
4611 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
4612 break;
4613 default:
4614 gcc_unreachable ();
4615 }
4616 nthreads = build_call_expr (nthreads, 0);
4617 nthreads = fold_convert (itype, nthreads);
4618 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
4619 true, GSI_SAME_STMT);
4620 threadid = build_call_expr (threadid, 0);
4621 threadid = fold_convert (itype, threadid);
4622 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
4623 true, GSI_SAME_STMT);
4624
4625 n1 = fd->loop.n1;
4626 n2 = fd->loop.n2;
4627 step = fd->loop.step;
4628 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4629 {
4630 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4631 OMP_CLAUSE__LOOPTEMP_);
4632 gcc_assert (innerc);
4633 n1 = OMP_CLAUSE_DECL (innerc);
4634 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4635 OMP_CLAUSE__LOOPTEMP_);
4636 gcc_assert (innerc);
4637 n2 = OMP_CLAUSE_DECL (innerc);
4638 }
4639 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
4640 true, NULL_TREE, true, GSI_SAME_STMT);
4641 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
4642 true, NULL_TREE, true, GSI_SAME_STMT);
4643 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
4644 true, NULL_TREE, true, GSI_SAME_STMT);
4645 tree chunk_size = fold_convert (itype, fd->chunk_size);
4646 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
4647 chunk_size
4648 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
4649 GSI_SAME_STMT);
4650
4651 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
4652 t = fold_build2 (PLUS_EXPR, itype, step, t);
4653 t = fold_build2 (PLUS_EXPR, itype, t, n2);
4654 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
4655 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
4656 t = fold_build2 (TRUNC_DIV_EXPR, itype,
4657 fold_build1 (NEGATE_EXPR, itype, t),
4658 fold_build1 (NEGATE_EXPR, itype, step));
4659 else
4660 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
4661 t = fold_convert (itype, t);
4662 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4663 true, GSI_SAME_STMT);
4664
4665 trip_var = create_tmp_reg (itype, ".trip");
4666 if (gimple_in_ssa_p (cfun))
4667 {
4668 trip_init = make_ssa_name (trip_var);
4669 trip_main = make_ssa_name (trip_var);
4670 trip_back = make_ssa_name (trip_var);
4671 }
4672 else
4673 {
4674 trip_init = trip_var;
4675 trip_main = trip_var;
4676 trip_back = trip_var;
4677 }
4678
4679 gassign *assign_stmt
4680 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
4681 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4682
4683 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
4684 t = fold_build2 (MULT_EXPR, itype, t, step);
4685 if (POINTER_TYPE_P (type))
4686 t = fold_build_pointer_plus (n1, t);
4687 else
4688 t = fold_build2 (PLUS_EXPR, type, t, n1);
4689 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4690 true, GSI_SAME_STMT);
4691
4692 /* Remove the GIMPLE_OMP_FOR. */
4693 gsi_remove (&gsi, true);
4694
4695 gimple_stmt_iterator gsif = gsi;
4696
4697 /* Iteration space partitioning goes in ITER_PART_BB. */
4698 gsi = gsi_last_bb (iter_part_bb);
4699
4700 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
4701 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
4702 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
4703 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4704 false, GSI_CONTINUE_LINKING);
4705
4706 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
4707 t = fold_build2 (MIN_EXPR, itype, t, n);
4708 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4709 false, GSI_CONTINUE_LINKING);
4710
4711 t = build2 (LT_EXPR, boolean_type_node, s0, n);
4712 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
4713
4714 /* Setup code for sequential iteration goes in SEQ_START_BB. */
4715 gsi = gsi_start_bb (seq_start_bb);
4716
4717 tree startvar = fd->loop.v;
4718 tree endvar = NULL_TREE;
4719
4720 if (gimple_omp_for_combined_p (fd->for_stmt))
4721 {
4722 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
4723 ? gimple_omp_parallel_clauses (inner_stmt)
4724 : gimple_omp_for_clauses (inner_stmt);
4725 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
4726 gcc_assert (innerc);
4727 startvar = OMP_CLAUSE_DECL (innerc);
4728 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4729 OMP_CLAUSE__LOOPTEMP_);
4730 gcc_assert (innerc);
4731 endvar = OMP_CLAUSE_DECL (innerc);
4732 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
4733 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
4734 {
4735 int i;
4736 for (i = 1; i < fd->collapse; i++)
4737 {
4738 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4739 OMP_CLAUSE__LOOPTEMP_);
4740 gcc_assert (innerc);
4741 }
4742 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4743 OMP_CLAUSE__LOOPTEMP_);
4744 if (innerc)
4745 {
4746 /* If needed (distribute parallel for with lastprivate),
4747 propagate down the total number of iterations. */
4748 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
4749 fd->loop.n2);
4750 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
4751 GSI_CONTINUE_LINKING);
4752 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4753 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4754 }
4755 }
4756 }
4757
4758 t = fold_convert (itype, s0);
4759 t = fold_build2 (MULT_EXPR, itype, t, step);
4760 if (POINTER_TYPE_P (type))
bde84d51
RB
4761 {
4762 t = fold_build_pointer_plus (n1, t);
4763 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4764 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4765 t = fold_convert (signed_type_for (type), t);
4766 }
629b3d75
MJ
4767 else
4768 t = fold_build2 (PLUS_EXPR, type, t, n1);
4769 t = fold_convert (TREE_TYPE (startvar), t);
4770 t = force_gimple_operand_gsi (&gsi, t,
4771 DECL_P (startvar)
4772 && TREE_ADDRESSABLE (startvar),
4773 NULL_TREE, false, GSI_CONTINUE_LINKING);
4774 assign_stmt = gimple_build_assign (startvar, t);
4775 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6c7ae8c5
JJ
4776 if (cond_var)
4777 {
4778 tree itype = TREE_TYPE (cond_var);
4779 /* For lastprivate(conditional:) itervar, we need some iteration
4780 counter that starts at unsigned non-zero and increases.
4781 Prefer as few IVs as possible, so if we can use startvar
4782 itself, use that, or startvar + constant (those would be
4783 incremented with step), and as last resort use the s0 + 1
4784 incremented by 1. */
4785 if (POINTER_TYPE_P (type)
4786 || TREE_CODE (n1) != INTEGER_CST
4787 || fd->loop.cond_code != LT_EXPR)
4788 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
4789 build_int_cst (itype, 1));
4790 else if (tree_int_cst_sgn (n1) == 1)
4791 t = fold_convert (itype, t);
4792 else
4793 {
4794 tree c = fold_convert (itype, n1);
4795 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
4796 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
4797 }
4798 t = force_gimple_operand_gsi (&gsi, t, false,
4799 NULL_TREE, false, GSI_CONTINUE_LINKING);
4800 assign_stmt = gimple_build_assign (cond_var, t);
4801 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4802 }
629b3d75
MJ
4803
4804 t = fold_convert (itype, e0);
4805 t = fold_build2 (MULT_EXPR, itype, t, step);
4806 if (POINTER_TYPE_P (type))
bde84d51
RB
4807 {
4808 t = fold_build_pointer_plus (n1, t);
4809 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4810 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4811 t = fold_convert (signed_type_for (type), t);
4812 }
629b3d75
MJ
4813 else
4814 t = fold_build2 (PLUS_EXPR, type, t, n1);
4815 t = fold_convert (TREE_TYPE (startvar), t);
4816 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4817 false, GSI_CONTINUE_LINKING);
4818 if (endvar)
4819 {
4820 assign_stmt = gimple_build_assign (endvar, e);
4821 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4822 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4823 assign_stmt = gimple_build_assign (fd->loop.v, e);
4824 else
4825 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4826 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4827 }
4828 /* Handle linear clause adjustments. */
4829 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
4830 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4831 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4832 c; c = OMP_CLAUSE_CHAIN (c))
4833 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4834 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4835 {
4836 tree d = OMP_CLAUSE_DECL (c);
4837 bool is_ref = omp_is_reference (d);
4838 tree t = d, a, dest;
4839 if (is_ref)
4840 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4841 tree type = TREE_TYPE (t);
4842 if (POINTER_TYPE_P (type))
4843 type = sizetype;
4844 dest = unshare_expr (t);
4845 tree v = create_tmp_var (TREE_TYPE (t), NULL);
4846 expand_omp_build_assign (&gsif, v, t);
4847 if (itercnt == NULL_TREE)
4848 {
4849 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4850 {
4851 itercntbias
4852 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
4853 fold_convert (itype, fd->loop.n1));
4854 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
4855 itercntbias, step);
4856 itercntbias
4857 = force_gimple_operand_gsi (&gsif, itercntbias, true,
4858 NULL_TREE, true,
4859 GSI_SAME_STMT);
4860 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
4861 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4862 NULL_TREE, false,
4863 GSI_CONTINUE_LINKING);
4864 }
4865 else
4866 itercnt = s0;
4867 }
4868 a = fold_build2 (MULT_EXPR, type,
4869 fold_convert (type, itercnt),
4870 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4871 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4872 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4873 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4874 false, GSI_CONTINUE_LINKING);
4875 assign_stmt = gimple_build_assign (dest, t);
4876 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4877 }
4878 if (fd->collapse > 1)
4879 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4880
4881 if (!broken_loop)
4882 {
4883 /* The code controlling the sequential loop goes in CONT_BB,
4884 replacing the GIMPLE_OMP_CONTINUE. */
65f4b875 4885 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
4886 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4887 vmain = gimple_omp_continue_control_use (cont_stmt);
4888 vback = gimple_omp_continue_control_def (cont_stmt);
4889
7855700e
JJ
4890 if (cond_var)
4891 {
4892 tree itype = TREE_TYPE (cond_var);
4893 tree t2;
4894 if (POINTER_TYPE_P (type)
4895 || TREE_CODE (n1) != INTEGER_CST
4896 || fd->loop.cond_code != LT_EXPR)
4897 t2 = build_int_cst (itype, 1);
4898 else
4899 t2 = fold_convert (itype, step);
4900 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
4901 t2 = force_gimple_operand_gsi (&gsi, t2, false,
4902 NULL_TREE, true, GSI_SAME_STMT);
4903 assign_stmt = gimple_build_assign (cond_var, t2);
4904 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4905 }
4906
629b3d75
MJ
4907 if (!gimple_omp_for_combined_p (fd->for_stmt))
4908 {
4909 if (POINTER_TYPE_P (type))
4910 t = fold_build_pointer_plus (vmain, step);
4911 else
4912 t = fold_build2 (PLUS_EXPR, type, vmain, step);
4913 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
4914 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4915 true, GSI_SAME_STMT);
4916 assign_stmt = gimple_build_assign (vback, t);
4917 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4918
4919 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
4920 t = build2 (EQ_EXPR, boolean_type_node,
4921 build_int_cst (itype, 0),
4922 build_int_cst (itype, 1));
4923 else
4924 t = build2 (fd->loop.cond_code, boolean_type_node,
4925 DECL_P (vback) && TREE_ADDRESSABLE (vback)
4926 ? t : vback, e);
4927 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4928 }
4929
4930 /* Remove GIMPLE_OMP_CONTINUE. */
4931 gsi_remove (&gsi, true);
4932
4933 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4934 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
4935
4936 /* Trip update code goes into TRIP_UPDATE_BB. */
4937 gsi = gsi_start_bb (trip_update_bb);
4938
4939 t = build_int_cst (itype, 1);
4940 t = build2 (PLUS_EXPR, itype, trip_main, t);
4941 assign_stmt = gimple_build_assign (trip_back, t);
4942 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4943 }
4944
4945 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
65f4b875 4946 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
4947 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4948 {
4949 t = gimple_omp_return_lhs (gsi_stmt (gsi));
8221c30b 4950 if (fd->have_reductemp || fd->have_pointer_condtemp)
28567c40
JJ
4951 {
4952 tree fn;
4953 if (t)
4954 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4955 else
4956 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4957 gcall *g = gimple_build_call (fn, 0);
4958 if (t)
4959 {
4960 gimple_call_set_lhs (g, t);
6c7ae8c5
JJ
4961 if (fd->have_reductemp)
4962 gsi_insert_after (&gsi, gimple_build_assign (reductions,
4963 NOP_EXPR, t),
4964 GSI_SAME_STMT);
28567c40
JJ
4965 }
4966 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4967 }
4968 else
4969 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
629b3d75 4970 }
1a39b3d3
JJ
4971 else if (fd->have_pointer_condtemp)
4972 {
4973 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
4974 gcall *g = gimple_build_call (fn, 0);
4975 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4976 }
629b3d75
MJ
4977 gsi_remove (&gsi, true);
4978
4979 /* Connect the new blocks. */
4980 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
4981 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
4982
4983 if (!broken_loop)
4984 {
4985 se = find_edge (cont_bb, body_bb);
4986 if (se == NULL)
4987 {
4988 se = BRANCH_EDGE (cont_bb);
4989 gcc_assert (single_succ (se->dest) == body_bb);
4990 }
4991 if (gimple_omp_for_combined_p (fd->for_stmt))
4992 {
4993 remove_edge (se);
4994 se = NULL;
4995 }
4996 else if (fd->collapse > 1)
4997 {
4998 remove_edge (se);
4999 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5000 }
5001 else
5002 se->flags = EDGE_TRUE_VALUE;
5003 find_edge (cont_bb, trip_update_bb)->flags
5004 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5005
01914336
MJ
5006 redirect_edge_and_branch (single_succ_edge (trip_update_bb),
5007 iter_part_bb);
629b3d75
MJ
5008 }
5009
5010 if (gimple_in_ssa_p (cfun))
5011 {
5012 gphi_iterator psi;
5013 gphi *phi;
5014 edge re, ene;
5015 edge_var_map *vm;
5016 size_t i;
5017
5018 gcc_assert (fd->collapse == 1 && !broken_loop);
5019
5020 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
5021 remove arguments of the phi nodes in fin_bb. We need to create
5022 appropriate phi nodes in iter_part_bb instead. */
5023 se = find_edge (iter_part_bb, fin_bb);
5024 re = single_succ_edge (trip_update_bb);
5025 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
5026 ene = single_succ_edge (entry_bb);
5027
5028 psi = gsi_start_phis (fin_bb);
5029 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
5030 gsi_next (&psi), ++i)
5031 {
5032 gphi *nphi;
620e594b 5033 location_t locus;
629b3d75
MJ
5034
5035 phi = psi.phi ();
d83cc5cc
TV
5036 if (operand_equal_p (gimple_phi_arg_def (phi, 0),
5037 redirect_edge_var_map_def (vm), 0))
5038 continue;
5039
629b3d75
MJ
5040 t = gimple_phi_result (phi);
5041 gcc_assert (t == redirect_edge_var_map_result (vm));
5042
5043 if (!single_pred_p (fin_bb))
5044 t = copy_ssa_name (t, phi);
5045
5046 nphi = create_phi_node (t, iter_part_bb);
5047
5048 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
5049 locus = gimple_phi_arg_location_from_edge (phi, se);
5050
5051 /* A special case -- fd->loop.v is not yet computed in
5052 iter_part_bb, we need to use vextra instead. */
5053 if (t == fd->loop.v)
5054 t = vextra;
5055 add_phi_arg (nphi, t, ene, locus);
5056 locus = redirect_edge_var_map_location (vm);
5057 tree back_arg = redirect_edge_var_map_def (vm);
5058 add_phi_arg (nphi, back_arg, re, locus);
5059 edge ce = find_edge (cont_bb, body_bb);
5060 if (ce == NULL)
5061 {
5062 ce = BRANCH_EDGE (cont_bb);
5063 gcc_assert (single_succ (ce->dest) == body_bb);
5064 ce = single_succ_edge (ce->dest);
5065 }
5066 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
5067 gcc_assert (inner_loop_phi != NULL);
5068 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
5069 find_edge (seq_start_bb, body_bb), locus);
5070
5071 if (!single_pred_p (fin_bb))
5072 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
5073 }
5074 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
5075 redirect_edge_var_map_clear (re);
5076 if (single_pred_p (fin_bb))
5077 while (1)
5078 {
5079 psi = gsi_start_phis (fin_bb);
5080 if (gsi_end_p (psi))
5081 break;
5082 remove_phi_node (&psi, false);
5083 }
5084
5085 /* Make phi node for trip. */
5086 phi = create_phi_node (trip_main, iter_part_bb);
5087 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
5088 UNKNOWN_LOCATION);
5089 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
5090 UNKNOWN_LOCATION);
5091 }
5092
5093 if (!broken_loop)
5094 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
5095 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
5096 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
5097 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5098 recompute_dominator (CDI_DOMINATORS, fin_bb));
5099 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
5100 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
5101 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5102 recompute_dominator (CDI_DOMINATORS, body_bb));
5103
5104 if (!broken_loop)
5105 {
99b1c316
MS
5106 class loop *loop = body_bb->loop_father;
5107 class loop *trip_loop = alloc_loop ();
629b3d75
MJ
5108 trip_loop->header = iter_part_bb;
5109 trip_loop->latch = trip_update_bb;
5110 add_loop (trip_loop, iter_part_bb->loop_father);
5111
5112 if (loop != entry_bb->loop_father)
5113 {
5114 gcc_assert (loop->header == body_bb);
5115 gcc_assert (loop->latch == region->cont
5116 || single_pred (loop->latch) == region->cont);
5117 trip_loop->inner = loop;
5118 return;
5119 }
5120
5121 if (!gimple_omp_for_combined_p (fd->for_stmt))
5122 {
5123 loop = alloc_loop ();
5124 loop->header = body_bb;
5125 if (collapse_bb == NULL)
5126 loop->latch = cont_bb;
5127 add_loop (loop, trip_loop);
5128 }
5129 }
5130}
5131
629b3d75
MJ
5132/* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
5133 loop. Given parameters:
5134
5135 for (V = N1; V cond N2; V += STEP) BODY;
5136
5137 where COND is "<" or ">", we generate pseudocode
5138
5139 V = N1;
5140 goto L1;
5141 L0:
5142 BODY;
5143 V += STEP;
5144 L1:
5145 if (V cond N2) goto L0; else goto L2;
5146 L2:
5147
5148 For collapsed loops, given parameters:
5149 collapse(3)
5150 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
5151 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
5152 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
5153 BODY;
5154
5155 we generate pseudocode
5156
5157 if (cond3 is <)
5158 adj = STEP3 - 1;
5159 else
5160 adj = STEP3 + 1;
5161 count3 = (adj + N32 - N31) / STEP3;
5162 if (cond2 is <)
5163 adj = STEP2 - 1;
5164 else
5165 adj = STEP2 + 1;
5166 count2 = (adj + N22 - N21) / STEP2;
5167 if (cond1 is <)
5168 adj = STEP1 - 1;
5169 else
5170 adj = STEP1 + 1;
5171 count1 = (adj + N12 - N11) / STEP1;
5172 count = count1 * count2 * count3;
5173 V = 0;
5174 V1 = N11;
5175 V2 = N21;
5176 V3 = N31;
5177 goto L1;
5178 L0:
5179 BODY;
5180 V += 1;
5181 V3 += STEP3;
5182 V2 += (V3 cond3 N32) ? 0 : STEP2;
5183 V3 = (V3 cond3 N32) ? V3 : N31;
5184 V1 += (V2 cond2 N22) ? 0 : STEP1;
5185 V2 = (V2 cond2 N22) ? V2 : N21;
5186 L1:
5187 if (V < count) goto L0; else goto L2;
5188 L2:
5189
5190 */
5191
5192static void
5193expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
5194{
5195 tree type, t;
5196 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
5197 gimple_stmt_iterator gsi;
5198 gimple *stmt;
5199 gcond *cond_stmt;
5200 bool broken_loop = region->cont == NULL;
5201 edge e, ne;
5202 tree *counts = NULL;
5203 int i;
5204 int safelen_int = INT_MAX;
fed2a43c 5205 bool dont_vectorize = false;
629b3d75
MJ
5206 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5207 OMP_CLAUSE_SAFELEN);
5208 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5209 OMP_CLAUSE__SIMDUID_);
fed2a43c
JJ
5210 tree ifc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5211 OMP_CLAUSE_IF);
5212 tree simdlen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5213 OMP_CLAUSE_SIMDLEN);
e7393c89
JJ
5214 tree condtemp = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5215 OMP_CLAUSE__CONDTEMP_);
629b3d75 5216 tree n1, n2;
e7393c89 5217 tree cond_var = condtemp ? OMP_CLAUSE_DECL (condtemp) : NULL_TREE;
629b3d75
MJ
5218
5219 if (safelen)
5220 {
9d2f08ab 5221 poly_uint64 val;
629b3d75 5222 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
9d2f08ab 5223 if (!poly_int_tree_p (safelen, &val))
629b3d75 5224 safelen_int = 0;
9d2f08ab
RS
5225 else
5226 safelen_int = MIN (constant_lower_bound (val), INT_MAX);
629b3d75
MJ
5227 if (safelen_int == 1)
5228 safelen_int = 0;
5229 }
fed2a43c
JJ
5230 if ((ifc && integer_zerop (OMP_CLAUSE_IF_EXPR (ifc)))
5231 || (simdlen && integer_onep (OMP_CLAUSE_SIMDLEN_EXPR (simdlen))))
5232 {
5233 safelen_int = 0;
5234 dont_vectorize = true;
5235 }
629b3d75
MJ
5236 type = TREE_TYPE (fd->loop.v);
5237 entry_bb = region->entry;
5238 cont_bb = region->cont;
5239 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5240 gcc_assert (broken_loop
5241 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
5242 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
5243 if (!broken_loop)
5244 {
5245 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
5246 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5247 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
5248 l2_bb = BRANCH_EDGE (entry_bb)->dest;
5249 }
5250 else
5251 {
5252 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
5253 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
5254 l2_bb = single_succ (l1_bb);
5255 }
5256 exit_bb = region->exit;
5257 l2_dom_bb = NULL;
5258
65f4b875 5259 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
5260
5261 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5262 /* Not needed in SSA form right now. */
5263 gcc_assert (!gimple_in_ssa_p (cfun));
5264 if (fd->collapse > 1)
5265 {
5266 int first_zero_iter = -1, dummy = -1;
5267 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
5268
5269 counts = XALLOCAVEC (tree, fd->collapse);
5270 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5271 zero_iter_bb, first_zero_iter,
5272 dummy_bb, dummy, l2_dom_bb);
5273 }
5274 if (l2_dom_bb == NULL)
5275 l2_dom_bb = l1_bb;
5276
5277 n1 = fd->loop.n1;
5278 n2 = fd->loop.n2;
5279 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5280 {
5281 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5282 OMP_CLAUSE__LOOPTEMP_);
5283 gcc_assert (innerc);
5284 n1 = OMP_CLAUSE_DECL (innerc);
5285 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5286 OMP_CLAUSE__LOOPTEMP_);
5287 gcc_assert (innerc);
5288 n2 = OMP_CLAUSE_DECL (innerc);
5289 }
5290 tree step = fd->loop.step;
5291
4cea8675
AM
5292 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5293 OMP_CLAUSE__SIMT_);
629b3d75
MJ
5294 if (is_simt)
5295 {
5296 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
4cea8675
AM
5297 is_simt = safelen_int > 1;
5298 }
5299 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
5300 if (is_simt)
5301 {
629b3d75
MJ
5302 simt_lane = create_tmp_var (unsigned_type_node);
5303 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
5304 gimple_call_set_lhs (g, simt_lane);
5305 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5306 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
5307 fold_convert (TREE_TYPE (step), simt_lane));
5308 n1 = fold_convert (type, n1);
5309 if (POINTER_TYPE_P (type))
5310 n1 = fold_build_pointer_plus (n1, offset);
5311 else
5312 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
5313
5314 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
5315 if (fd->collapse > 1)
5316 simt_maxlane = build_one_cst (unsigned_type_node);
5317 else if (safelen_int < omp_max_simt_vf ())
5318 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
5319 tree vf
5320 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
5321 unsigned_type_node, 0);
5322 if (simt_maxlane)
5323 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
5324 vf = fold_convert (TREE_TYPE (step), vf);
5325 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
5326 }
5327
5328 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
5329 if (fd->collapse > 1)
5330 {
5331 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5332 {
5333 gsi_prev (&gsi);
5334 expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1);
5335 gsi_next (&gsi);
5336 }
5337 else
5338 for (i = 0; i < fd->collapse; i++)
5339 {
5340 tree itype = TREE_TYPE (fd->loops[i].v);
5341 if (POINTER_TYPE_P (itype))
5342 itype = signed_type_for (itype);
5343 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
5344 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
5345 }
5346 }
e7393c89
JJ
5347 if (cond_var)
5348 {
5349 if (POINTER_TYPE_P (type)
5350 || TREE_CODE (n1) != INTEGER_CST
5351 || fd->loop.cond_code != LT_EXPR
5352 || tree_int_cst_sgn (n1) != 1)
5353 expand_omp_build_assign (&gsi, cond_var,
5354 build_one_cst (TREE_TYPE (cond_var)));
5355 else
5356 expand_omp_build_assign (&gsi, cond_var,
5357 fold_convert (TREE_TYPE (cond_var), n1));
5358 }
629b3d75
MJ
5359
5360 /* Remove the GIMPLE_OMP_FOR statement. */
5361 gsi_remove (&gsi, true);
5362
5363 if (!broken_loop)
5364 {
5365 /* Code to control the increment goes in the CONT_BB. */
65f4b875 5366 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
5367 stmt = gsi_stmt (gsi);
5368 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
5369
5370 if (POINTER_TYPE_P (type))
5371 t = fold_build_pointer_plus (fd->loop.v, step);
5372 else
5373 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
5374 expand_omp_build_assign (&gsi, fd->loop.v, t);
5375
5376 if (fd->collapse > 1)
5377 {
5378 i = fd->collapse - 1;
5379 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
5380 {
5381 t = fold_convert (sizetype, fd->loops[i].step);
5382 t = fold_build_pointer_plus (fd->loops[i].v, t);
5383 }
5384 else
5385 {
5386 t = fold_convert (TREE_TYPE (fd->loops[i].v),
5387 fd->loops[i].step);
5388 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
5389 fd->loops[i].v, t);
5390 }
5391 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
5392
5393 for (i = fd->collapse - 1; i > 0; i--)
5394 {
5395 tree itype = TREE_TYPE (fd->loops[i].v);
5396 tree itype2 = TREE_TYPE (fd->loops[i - 1].v);
5397 if (POINTER_TYPE_P (itype2))
5398 itype2 = signed_type_for (itype2);
bcc6842b
JJ
5399 t = fold_convert (itype2, fd->loops[i - 1].step);
5400 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
5401 GSI_SAME_STMT);
629b3d75
MJ
5402 t = build3 (COND_EXPR, itype2,
5403 build2 (fd->loops[i].cond_code, boolean_type_node,
5404 fd->loops[i].v,
5405 fold_convert (itype, fd->loops[i].n2)),
bcc6842b 5406 build_int_cst (itype2, 0), t);
629b3d75
MJ
5407 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v)))
5408 t = fold_build_pointer_plus (fd->loops[i - 1].v, t);
5409 else
5410 t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t);
5411 expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t);
5412
bcc6842b
JJ
5413 t = fold_convert (itype, fd->loops[i].n1);
5414 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
5415 GSI_SAME_STMT);
629b3d75
MJ
5416 t = build3 (COND_EXPR, itype,
5417 build2 (fd->loops[i].cond_code, boolean_type_node,
5418 fd->loops[i].v,
5419 fold_convert (itype, fd->loops[i].n2)),
bcc6842b 5420 fd->loops[i].v, t);
629b3d75
MJ
5421 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
5422 }
5423 }
e7393c89
JJ
5424 if (cond_var)
5425 {
5426 if (POINTER_TYPE_P (type)
5427 || TREE_CODE (n1) != INTEGER_CST
5428 || fd->loop.cond_code != LT_EXPR
5429 || tree_int_cst_sgn (n1) != 1)
5430 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
5431 build_one_cst (TREE_TYPE (cond_var)));
5432 else
5433 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
5434 fold_convert (TREE_TYPE (cond_var), step));
5435 expand_omp_build_assign (&gsi, cond_var, t);
5436 }
629b3d75
MJ
5437
5438 /* Remove GIMPLE_OMP_CONTINUE. */
5439 gsi_remove (&gsi, true);
5440 }
5441
5442 /* Emit the condition in L1_BB. */
5443 gsi = gsi_start_bb (l1_bb);
5444
5445 t = fold_convert (type, n2);
5446 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5447 false, GSI_CONTINUE_LINKING);
5448 tree v = fd->loop.v;
5449 if (DECL_P (v) && TREE_ADDRESSABLE (v))
5450 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
5451 false, GSI_CONTINUE_LINKING);
5452 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
5453 cond_stmt = gimple_build_cond_empty (t);
5454 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
5455 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
5456 NULL, NULL)
5457 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
5458 NULL, NULL))
5459 {
5460 gsi = gsi_for_stmt (cond_stmt);
5461 gimple_regimplify_operands (cond_stmt, &gsi);
5462 }
5463
5464 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
5465 if (is_simt)
5466 {
5467 gsi = gsi_start_bb (l2_bb);
5468 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step);
5469 if (POINTER_TYPE_P (type))
5470 t = fold_build_pointer_plus (fd->loop.v, step);
5471 else
5472 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
5473 expand_omp_build_assign (&gsi, fd->loop.v, t);
5474 }
5475
5476 /* Remove GIMPLE_OMP_RETURN. */
65f4b875 5477 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
5478 gsi_remove (&gsi, true);
5479
5480 /* Connect the new blocks. */
5481 remove_edge (FALLTHRU_EDGE (entry_bb));
5482
5483 if (!broken_loop)
5484 {
5485 remove_edge (BRANCH_EDGE (entry_bb));
5486 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
5487
5488 e = BRANCH_EDGE (l1_bb);
5489 ne = FALLTHRU_EDGE (l1_bb);
5490 e->flags = EDGE_TRUE_VALUE;
5491 }
5492 else
5493 {
5494 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
5495
5496 ne = single_succ_edge (l1_bb);
5497 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
5498
5499 }
5500 ne->flags = EDGE_FALSE_VALUE;
357067f2
JH
5501 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
5502 ne->probability = e->probability.invert ();
629b3d75
MJ
5503
5504 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
5505 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
5506
5507 if (simt_maxlane)
5508 {
5509 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
5510 NULL_TREE, NULL_TREE);
5511 gsi = gsi_last_bb (entry_bb);
5512 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
5513 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
5514 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
357067f2
JH
5515 FALLTHRU_EDGE (entry_bb)->probability
5516 = profile_probability::guessed_always ().apply_scale (7, 8);
5517 BRANCH_EDGE (entry_bb)->probability
5518 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
629b3d75
MJ
5519 l2_dom_bb = entry_bb;
5520 }
5521 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
5522
5523 if (!broken_loop)
5524 {
99b1c316 5525 class loop *loop = alloc_loop ();
629b3d75
MJ
5526 loop->header = l1_bb;
5527 loop->latch = cont_bb;
5528 add_loop (loop, l1_bb->loop_father);
5529 loop->safelen = safelen_int;
5530 if (simduid)
5531 {
5532 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
5533 cfun->has_simduid_loops = true;
5534 }
5535 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
5536 the loop. */
5537 if ((flag_tree_loop_vectorize
26d476cd 5538 || !global_options_set.x_flag_tree_loop_vectorize)
629b3d75
MJ
5539 && flag_tree_loop_optimize
5540 && loop->safelen > 1)
5541 {
5542 loop->force_vectorize = true;
f63445e5
JJ
5543 if (simdlen && tree_fits_uhwi_p (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)))
5544 {
5545 unsigned HOST_WIDE_INT v
5546 = tree_to_uhwi (OMP_CLAUSE_SIMDLEN_EXPR (simdlen));
5547 if (v < INT_MAX && v <= (unsigned HOST_WIDE_INT) loop->safelen)
5548 loop->simdlen = v;
5549 }
629b3d75
MJ
5550 cfun->has_force_vectorize_loops = true;
5551 }
fed2a43c
JJ
5552 else if (dont_vectorize)
5553 loop->dont_vectorize = true;
629b3d75
MJ
5554 }
5555 else if (simduid)
5556 cfun->has_simduid_loops = true;
5557}
5558
5559/* Taskloop construct is represented after gimplification with
5560 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
5561 in between them. This routine expands the outer GIMPLE_OMP_FOR,
5562 which should just compute all the needed loop temporaries
5563 for GIMPLE_OMP_TASK. */
5564
5565static void
5566expand_omp_taskloop_for_outer (struct omp_region *region,
5567 struct omp_for_data *fd,
5568 gimple *inner_stmt)
5569{
5570 tree type, bias = NULL_TREE;
5571 basic_block entry_bb, cont_bb, exit_bb;
5572 gimple_stmt_iterator gsi;
5573 gassign *assign_stmt;
5574 tree *counts = NULL;
5575 int i;
5576
5577 gcc_assert (inner_stmt);
5578 gcc_assert (region->cont);
5579 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
5580 && gimple_omp_task_taskloop_p (inner_stmt));
5581 type = TREE_TYPE (fd->loop.v);
5582
5583 /* See if we need to bias by LLONG_MIN. */
5584 if (fd->iter_type == long_long_unsigned_type_node
5585 && TREE_CODE (type) == INTEGER_TYPE
5586 && !TYPE_UNSIGNED (type))
5587 {
5588 tree n1, n2;
5589
5590 if (fd->loop.cond_code == LT_EXPR)
5591 {
5592 n1 = fd->loop.n1;
5593 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
5594 }
5595 else
5596 {
5597 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
5598 n2 = fd->loop.n1;
5599 }
5600 if (TREE_CODE (n1) != INTEGER_CST
5601 || TREE_CODE (n2) != INTEGER_CST
5602 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
5603 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
5604 }
5605
5606 entry_bb = region->entry;
5607 cont_bb = region->cont;
5608 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5609 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
5610 exit_bb = region->exit;
5611
65f4b875 5612 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
5613 gimple *for_stmt = gsi_stmt (gsi);
5614 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
5615 if (fd->collapse > 1)
5616 {
5617 int first_zero_iter = -1, dummy = -1;
5618 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
5619
5620 counts = XALLOCAVEC (tree, fd->collapse);
5621 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5622 zero_iter_bb, first_zero_iter,
5623 dummy_bb, dummy, l2_dom_bb);
5624
5625 if (zero_iter_bb)
5626 {
5627 /* Some counts[i] vars might be uninitialized if
5628 some loop has zero iterations. But the body shouldn't
5629 be executed in that case, so just avoid uninit warnings. */
5630 for (i = first_zero_iter; i < fd->collapse; i++)
5631 if (SSA_VAR_P (counts[i]))
5632 TREE_NO_WARNING (counts[i]) = 1;
5633 gsi_prev (&gsi);
5634 edge e = split_block (entry_bb, gsi_stmt (gsi));
5635 entry_bb = e->dest;
5636 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
5637 gsi = gsi_last_bb (entry_bb);
5638 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
5639 get_immediate_dominator (CDI_DOMINATORS,
5640 zero_iter_bb));
5641 }
5642 }
5643
5644 tree t0, t1;
5645 t1 = fd->loop.n2;
5646 t0 = fd->loop.n1;
5647 if (POINTER_TYPE_P (TREE_TYPE (t0))
5648 && TYPE_PRECISION (TREE_TYPE (t0))
5649 != TYPE_PRECISION (fd->iter_type))
5650 {
5651 /* Avoid casting pointers to integer of a different size. */
5652 tree itype = signed_type_for (type);
5653 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
5654 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
5655 }
5656 else
5657 {
5658 t1 = fold_convert (fd->iter_type, t1);
5659 t0 = fold_convert (fd->iter_type, t0);
5660 }
5661 if (bias)
5662 {
5663 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
5664 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
5665 }
5666
5667 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
5668 OMP_CLAUSE__LOOPTEMP_);
5669 gcc_assert (innerc);
5670 tree startvar = OMP_CLAUSE_DECL (innerc);
5671 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
5672 gcc_assert (innerc);
5673 tree endvar = OMP_CLAUSE_DECL (innerc);
5674 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
5675 {
5676 gcc_assert (innerc);
5677 for (i = 1; i < fd->collapse; i++)
5678 {
5679 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5680 OMP_CLAUSE__LOOPTEMP_);
5681 gcc_assert (innerc);
5682 }
5683 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5684 OMP_CLAUSE__LOOPTEMP_);
5685 if (innerc)
5686 {
5687 /* If needed (inner taskloop has lastprivate clause), propagate
5688 down the total number of iterations. */
5689 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
5690 NULL_TREE, false,
5691 GSI_CONTINUE_LINKING);
5692 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5693 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5694 }
5695 }
5696
5697 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
5698 GSI_CONTINUE_LINKING);
5699 assign_stmt = gimple_build_assign (startvar, t0);
5700 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5701
5702 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
5703 GSI_CONTINUE_LINKING);
5704 assign_stmt = gimple_build_assign (endvar, t1);
5705 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5706 if (fd->collapse > 1)
5707 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5708
5709 /* Remove the GIMPLE_OMP_FOR statement. */
5710 gsi = gsi_for_stmt (for_stmt);
5711 gsi_remove (&gsi, true);
5712
65f4b875 5713 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
5714 gsi_remove (&gsi, true);
5715
65f4b875 5716 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
5717 gsi_remove (&gsi, true);
5718
357067f2 5719 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
629b3d75 5720 remove_edge (BRANCH_EDGE (entry_bb));
357067f2 5721 FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
629b3d75
MJ
5722 remove_edge (BRANCH_EDGE (cont_bb));
5723 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
5724 set_immediate_dominator (CDI_DOMINATORS, region->entry,
5725 recompute_dominator (CDI_DOMINATORS, region->entry));
5726}
5727
5728/* Taskloop construct is represented after gimplification with
5729 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
5730 in between them. This routine expands the inner GIMPLE_OMP_FOR.
5731 GOMP_taskloop{,_ull} function arranges for each task to be given just
5732 a single range of iterations. */
5733
5734static void
5735expand_omp_taskloop_for_inner (struct omp_region *region,
5736 struct omp_for_data *fd,
5737 gimple *inner_stmt)
5738{
5739 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
5740 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
5741 basic_block fin_bb;
5742 gimple_stmt_iterator gsi;
5743 edge ep;
5744 bool broken_loop = region->cont == NULL;
5745 tree *counts = NULL;
5746 tree n1, n2, step;
5747
5748 itype = type = TREE_TYPE (fd->loop.v);
5749 if (POINTER_TYPE_P (type))
5750 itype = signed_type_for (type);
5751
5752 /* See if we need to bias by LLONG_MIN. */
5753 if (fd->iter_type == long_long_unsigned_type_node
5754 && TREE_CODE (type) == INTEGER_TYPE
5755 && !TYPE_UNSIGNED (type))
5756 {
5757 tree n1, n2;
5758
5759 if (fd->loop.cond_code == LT_EXPR)
5760 {
5761 n1 = fd->loop.n1;
5762 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
5763 }
5764 else
5765 {
5766 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
5767 n2 = fd->loop.n1;
5768 }
5769 if (TREE_CODE (n1) != INTEGER_CST
5770 || TREE_CODE (n2) != INTEGER_CST
5771 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
5772 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
5773 }
5774
5775 entry_bb = region->entry;
5776 cont_bb = region->cont;
5777 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5778 fin_bb = BRANCH_EDGE (entry_bb)->dest;
5779 gcc_assert (broken_loop
5780 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
5781 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5782 if (!broken_loop)
5783 {
5784 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
5785 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5786 }
5787 exit_bb = region->exit;
5788
5789 /* Iteration space partitioning goes in ENTRY_BB. */
65f4b875 5790 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
5791 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5792
5793 if (fd->collapse > 1)
5794 {
5795 int first_zero_iter = -1, dummy = -1;
5796 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5797
5798 counts = XALLOCAVEC (tree, fd->collapse);
5799 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5800 fin_bb, first_zero_iter,
5801 dummy_bb, dummy, l2_dom_bb);
5802 t = NULL_TREE;
5803 }
5804 else
5805 t = integer_one_node;
5806
5807 step = fd->loop.step;
5808 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5809 OMP_CLAUSE__LOOPTEMP_);
5810 gcc_assert (innerc);
5811 n1 = OMP_CLAUSE_DECL (innerc);
5812 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
5813 gcc_assert (innerc);
5814 n2 = OMP_CLAUSE_DECL (innerc);
5815 if (bias)
5816 {
5817 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
5818 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
5819 }
5820 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5821 true, NULL_TREE, true, GSI_SAME_STMT);
5822 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5823 true, NULL_TREE, true, GSI_SAME_STMT);
5824 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5825 true, NULL_TREE, true, GSI_SAME_STMT);
5826
5827 tree startvar = fd->loop.v;
5828 tree endvar = NULL_TREE;
5829
5830 if (gimple_omp_for_combined_p (fd->for_stmt))
5831 {
5832 tree clauses = gimple_omp_for_clauses (inner_stmt);
5833 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5834 gcc_assert (innerc);
5835 startvar = OMP_CLAUSE_DECL (innerc);
5836 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5837 OMP_CLAUSE__LOOPTEMP_);
5838 gcc_assert (innerc);
5839 endvar = OMP_CLAUSE_DECL (innerc);
5840 }
5841 t = fold_convert (TREE_TYPE (startvar), n1);
5842 t = force_gimple_operand_gsi (&gsi, t,
5843 DECL_P (startvar)
5844 && TREE_ADDRESSABLE (startvar),
5845 NULL_TREE, false, GSI_CONTINUE_LINKING);
5846 gimple *assign_stmt = gimple_build_assign (startvar, t);
5847 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5848
5849 t = fold_convert (TREE_TYPE (startvar), n2);
5850 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5851 false, GSI_CONTINUE_LINKING);
5852 if (endvar)
5853 {
5854 assign_stmt = gimple_build_assign (endvar, e);
5855 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5856 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5857 assign_stmt = gimple_build_assign (fd->loop.v, e);
5858 else
5859 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5860 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5861 }
5862 if (fd->collapse > 1)
5863 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5864
5865 if (!broken_loop)
5866 {
5867 /* The code controlling the sequential loop replaces the
5868 GIMPLE_OMP_CONTINUE. */
65f4b875 5869 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
5870 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5871 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5872 vmain = gimple_omp_continue_control_use (cont_stmt);
5873 vback = gimple_omp_continue_control_def (cont_stmt);
5874
5875 if (!gimple_omp_for_combined_p (fd->for_stmt))
5876 {
5877 if (POINTER_TYPE_P (type))
5878 t = fold_build_pointer_plus (vmain, step);
5879 else
5880 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5881 t = force_gimple_operand_gsi (&gsi, t,
5882 DECL_P (vback)
5883 && TREE_ADDRESSABLE (vback),
5884 NULL_TREE, true, GSI_SAME_STMT);
5885 assign_stmt = gimple_build_assign (vback, t);
5886 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5887
5888 t = build2 (fd->loop.cond_code, boolean_type_node,
5889 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5890 ? t : vback, e);
5891 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5892 }
5893
5894 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5895 gsi_remove (&gsi, true);
5896
5897 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5898 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
5899 }
5900
5901 /* Remove the GIMPLE_OMP_FOR statement. */
5902 gsi = gsi_for_stmt (fd->for_stmt);
5903 gsi_remove (&gsi, true);
5904
5905 /* Remove the GIMPLE_OMP_RETURN statement. */
65f4b875 5906 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
5907 gsi_remove (&gsi, true);
5908
357067f2 5909 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
629b3d75
MJ
5910 if (!broken_loop)
5911 remove_edge (BRANCH_EDGE (entry_bb));
5912 else
5913 {
5914 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
5915 region->outer->cont = NULL;
5916 }
5917
5918 /* Connect all the blocks. */
5919 if (!broken_loop)
5920 {
5921 ep = find_edge (cont_bb, body_bb);
5922 if (gimple_omp_for_combined_p (fd->for_stmt))
5923 {
5924 remove_edge (ep);
5925 ep = NULL;
5926 }
5927 else if (fd->collapse > 1)
5928 {
5929 remove_edge (ep);
5930 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5931 }
5932 else
5933 ep->flags = EDGE_TRUE_VALUE;
5934 find_edge (cont_bb, fin_bb)->flags
5935 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5936 }
5937
5938 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5939 recompute_dominator (CDI_DOMINATORS, body_bb));
5940 if (!broken_loop)
5941 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5942 recompute_dominator (CDI_DOMINATORS, fin_bb));
5943
5944 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5945 {
99b1c316 5946 class loop *loop = alloc_loop ();
629b3d75
MJ
5947 loop->header = body_bb;
5948 if (collapse_bb == NULL)
5949 loop->latch = cont_bb;
5950 add_loop (loop, body_bb->loop_father);
5951 }
5952}
5953
5954/* A subroutine of expand_omp_for. Generate code for an OpenACC
5955 partitioned loop. The lowering here is abstracted, in that the
5956 loop parameters are passed through internal functions, which are
5957 further lowered by oacc_device_lower, once we get to the target
5958 compiler. The loop is of the form:
5959
5960 for (V = B; V LTGT E; V += S) {BODY}
5961
5962 where LTGT is < or >. We may have a specified chunking size, CHUNKING
5963 (constant 0 for no chunking) and we will have a GWV partitioning
5964 mask, specifying dimensions over which the loop is to be
02889d23
CLT
5965 partitioned (see note below). We generate code that looks like
5966 (this ignores tiling):
629b3d75
MJ
5967
5968 <entry_bb> [incoming FALL->body, BRANCH->exit]
5969 typedef signedintify (typeof (V)) T; // underlying signed integral type
5970 T range = E - B;
5971 T chunk_no = 0;
5972 T DIR = LTGT == '<' ? +1 : -1;
5973 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
5974 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
5975
5976 <head_bb> [created by splitting end of entry_bb]
5977 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
5978 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
5979 if (!(offset LTGT bound)) goto bottom_bb;
5980
5981 <body_bb> [incoming]
5982 V = B + offset;
5983 {BODY}
5984
5985 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
5986 offset += step;
5987 if (offset LTGT bound) goto body_bb; [*]
5988
5989 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
5990 chunk_no++;
5991 if (chunk < chunk_max) goto head_bb;
5992
5993 <exit_bb> [incoming]
5994 V = B + ((range -/+ 1) / S +/- 1) * S [*]
5995
02889d23 5996 [*] Needed if V live at end of loop. */
629b3d75
MJ
5997
5998static void
5999expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
6000{
6001 tree v = fd->loop.v;
6002 enum tree_code cond_code = fd->loop.cond_code;
6003 enum tree_code plus_code = PLUS_EXPR;
6004
6005 tree chunk_size = integer_minus_one_node;
6006 tree gwv = integer_zero_node;
6007 tree iter_type = TREE_TYPE (v);
6008 tree diff_type = iter_type;
6009 tree plus_type = iter_type;
6010 struct oacc_collapse *counts = NULL;
6011
6012 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
6013 == GF_OMP_FOR_KIND_OACC_LOOP);
6014 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
6015 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
6016
6017 if (POINTER_TYPE_P (iter_type))
6018 {
6019 plus_code = POINTER_PLUS_EXPR;
6020 plus_type = sizetype;
6021 }
6022 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
6023 diff_type = signed_type_for (diff_type);
f4c222c0
TV
6024 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
6025 diff_type = integer_type_node;
629b3d75
MJ
6026
6027 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
6028 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
6029 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
6030 basic_block bottom_bb = NULL;
6031
9c3da8cc
JJ
6032 /* entry_bb has two successors; the branch edge is to the exit
6033 block, fallthrough edge to body. */
629b3d75
MJ
6034 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
6035 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
6036
6037 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
6038 body_bb, or to a block whose only successor is the body_bb. Its
6039 fallthrough successor is the final block (same as the branch
6040 successor of the entry_bb). */
6041 if (cont_bb)
6042 {
6043 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
6044 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
6045
6046 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
6047 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
6048 }
6049 else
6050 gcc_assert (!gimple_in_ssa_p (cfun));
6051
6052 /* The exit block only has entry_bb and cont_bb as predecessors. */
6053 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
6054
6055 tree chunk_no;
6056 tree chunk_max = NULL_TREE;
6057 tree bound, offset;
6058 tree step = create_tmp_var (diff_type, ".step");
6059 bool up = cond_code == LT_EXPR;
6060 tree dir = build_int_cst (diff_type, up ? +1 : -1);
02889d23 6061 bool chunking = !gimple_in_ssa_p (cfun);
629b3d75
MJ
6062 bool negating;
6063
02889d23
CLT
6064 /* Tiling vars. */
6065 tree tile_size = NULL_TREE;
6066 tree element_s = NULL_TREE;
6067 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
6068 basic_block elem_body_bb = NULL;
6069 basic_block elem_cont_bb = NULL;
6070
629b3d75
MJ
6071 /* SSA instances. */
6072 tree offset_incr = NULL_TREE;
6073 tree offset_init = NULL_TREE;
6074
6075 gimple_stmt_iterator gsi;
6076 gassign *ass;
6077 gcall *call;
6078 gimple *stmt;
6079 tree expr;
6080 location_t loc;
6081 edge split, be, fte;
6082
6083 /* Split the end of entry_bb to create head_bb. */
6084 split = split_block (entry_bb, last_stmt (entry_bb));
6085 basic_block head_bb = split->dest;
6086 entry_bb = split->src;
6087
6088 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
65f4b875 6089 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
6090 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
6091 loc = gimple_location (for_stmt);
6092
6093 if (gimple_in_ssa_p (cfun))
6094 {
6095 offset_init = gimple_omp_for_index (for_stmt, 0);
6096 gcc_assert (integer_zerop (fd->loop.n1));
6097 /* The SSA parallelizer does gang parallelism. */
6098 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
6099 }
6100
02889d23 6101 if (fd->collapse > 1 || fd->tiling)
629b3d75 6102 {
02889d23 6103 gcc_assert (!gimple_in_ssa_p (cfun) && up);
629b3d75
MJ
6104 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
6105 tree total = expand_oacc_collapse_init (fd, &gsi, counts,
02889d23 6106 TREE_TYPE (fd->loop.n2), loc);
629b3d75
MJ
6107
6108 if (SSA_VAR_P (fd->loop.n2))
6109 {
6110 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
6111 true, GSI_SAME_STMT);
6112 ass = gimple_build_assign (fd->loop.n2, total);
6113 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6114 }
629b3d75
MJ
6115 }
6116
6117 tree b = fd->loop.n1;
6118 tree e = fd->loop.n2;
6119 tree s = fd->loop.step;
6120
6121 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
6122 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
6123
01914336 6124 /* Convert the step, avoiding possible unsigned->signed overflow. */
629b3d75
MJ
6125 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
6126 if (negating)
6127 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
6128 s = fold_convert (diff_type, s);
6129 if (negating)
6130 s = fold_build1 (NEGATE_EXPR, diff_type, s);
6131 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
6132
6133 if (!chunking)
6134 chunk_size = integer_zero_node;
6135 expr = fold_convert (diff_type, chunk_size);
6136 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
6137 NULL_TREE, true, GSI_SAME_STMT);
02889d23
CLT
6138
6139 if (fd->tiling)
6140 {
6141 /* Determine the tile size and element step,
6142 modify the outer loop step size. */
6143 tile_size = create_tmp_var (diff_type, ".tile_size");
6144 expr = build_int_cst (diff_type, 1);
6145 for (int ix = 0; ix < fd->collapse; ix++)
6146 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
6147 expr = force_gimple_operand_gsi (&gsi, expr, true,
6148 NULL_TREE, true, GSI_SAME_STMT);
6149 ass = gimple_build_assign (tile_size, expr);
6150 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6151
6152 element_s = create_tmp_var (diff_type, ".element_s");
6153 ass = gimple_build_assign (element_s, s);
6154 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6155
6156 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
6157 s = force_gimple_operand_gsi (&gsi, expr, true,
6158 NULL_TREE, true, GSI_SAME_STMT);
6159 }
6160
01914336 6161 /* Determine the range, avoiding possible unsigned->signed overflow. */
629b3d75
MJ
6162 negating = !up && TYPE_UNSIGNED (iter_type);
6163 expr = fold_build2 (MINUS_EXPR, plus_type,
6164 fold_convert (plus_type, negating ? b : e),
6165 fold_convert (plus_type, negating ? e : b));
6166 expr = fold_convert (diff_type, expr);
6167 if (negating)
6168 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
6169 tree range = force_gimple_operand_gsi (&gsi, expr, true,
6170 NULL_TREE, true, GSI_SAME_STMT);
6171
6172 chunk_no = build_int_cst (diff_type, 0);
6173 if (chunking)
6174 {
6175 gcc_assert (!gimple_in_ssa_p (cfun));
6176
6177 expr = chunk_no;
6178 chunk_max = create_tmp_var (diff_type, ".chunk_max");
6179 chunk_no = create_tmp_var (diff_type, ".chunk_no");
6180
6181 ass = gimple_build_assign (chunk_no, expr);
6182 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6183
6184 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
6185 build_int_cst (integer_type_node,
6186 IFN_GOACC_LOOP_CHUNKS),
6187 dir, range, s, chunk_size, gwv);
6188 gimple_call_set_lhs (call, chunk_max);
6189 gimple_set_location (call, loc);
6190 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
6191 }
6192 else
6193 chunk_size = chunk_no;
6194
6195 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
6196 build_int_cst (integer_type_node,
6197 IFN_GOACC_LOOP_STEP),
6198 dir, range, s, chunk_size, gwv);
6199 gimple_call_set_lhs (call, step);
6200 gimple_set_location (call, loc);
6201 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
6202
6203 /* Remove the GIMPLE_OMP_FOR. */
6204 gsi_remove (&gsi, true);
6205
01914336 6206 /* Fixup edges from head_bb. */
629b3d75
MJ
6207 be = BRANCH_EDGE (head_bb);
6208 fte = FALLTHRU_EDGE (head_bb);
6209 be->flags |= EDGE_FALSE_VALUE;
6210 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
6211
6212 basic_block body_bb = fte->dest;
6213
6214 if (gimple_in_ssa_p (cfun))
6215 {
65f4b875 6216 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
6217 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
6218
6219 offset = gimple_omp_continue_control_use (cont_stmt);
6220 offset_incr = gimple_omp_continue_control_def (cont_stmt);
6221 }
6222 else
6223 {
6224 offset = create_tmp_var (diff_type, ".offset");
6225 offset_init = offset_incr = offset;
6226 }
6227 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
6228
6229 /* Loop offset & bound go into head_bb. */
6230 gsi = gsi_start_bb (head_bb);
6231
6232 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
6233 build_int_cst (integer_type_node,
6234 IFN_GOACC_LOOP_OFFSET),
6235 dir, range, s,
6236 chunk_size, gwv, chunk_no);
6237 gimple_call_set_lhs (call, offset_init);
6238 gimple_set_location (call, loc);
6239 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
6240
6241 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
6242 build_int_cst (integer_type_node,
6243 IFN_GOACC_LOOP_BOUND),
6244 dir, range, s,
6245 chunk_size, gwv, offset_init);
6246 gimple_call_set_lhs (call, bound);
6247 gimple_set_location (call, loc);
6248 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
6249
6250 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
6251 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
6252 GSI_CONTINUE_LINKING);
6253
6254 /* V assignment goes into body_bb. */
6255 if (!gimple_in_ssa_p (cfun))
6256 {
6257 gsi = gsi_start_bb (body_bb);
6258
6259 expr = build2 (plus_code, iter_type, b,
6260 fold_convert (plus_type, offset));
6261 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
6262 true, GSI_SAME_STMT);
6263 ass = gimple_build_assign (v, expr);
6264 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
02889d23
CLT
6265
6266 if (fd->collapse > 1 || fd->tiling)
6267 expand_oacc_collapse_vars (fd, false, &gsi, counts, v);
6268
6269 if (fd->tiling)
6270 {
6271 /* Determine the range of the element loop -- usually simply
6272 the tile_size, but could be smaller if the final
6273 iteration of the outer loop is a partial tile. */
6274 tree e_range = create_tmp_var (diff_type, ".e_range");
6275
6276 expr = build2 (MIN_EXPR, diff_type,
6277 build2 (MINUS_EXPR, diff_type, bound, offset),
6278 build2 (MULT_EXPR, diff_type, tile_size,
6279 element_s));
6280 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
6281 true, GSI_SAME_STMT);
6282 ass = gimple_build_assign (e_range, expr);
6283 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6284
6285 /* Determine bound, offset & step of inner loop. */
6286 e_bound = create_tmp_var (diff_type, ".e_bound");
6287 e_offset = create_tmp_var (diff_type, ".e_offset");
6288 e_step = create_tmp_var (diff_type, ".e_step");
6289
6290 /* Mark these as element loops. */
6291 tree t, e_gwv = integer_minus_one_node;
6292 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */
6293
6294 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
6295 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
6296 element_s, chunk, e_gwv, chunk);
6297 gimple_call_set_lhs (call, e_offset);
6298 gimple_set_location (call, loc);
6299 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
6300
6301 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
6302 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
6303 element_s, chunk, e_gwv, e_offset);
6304 gimple_call_set_lhs (call, e_bound);
6305 gimple_set_location (call, loc);
6306 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
6307
6308 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
6309 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
6310 element_s, chunk, e_gwv);
6311 gimple_call_set_lhs (call, e_step);
6312 gimple_set_location (call, loc);
6313 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
6314
6315 /* Add test and split block. */
6316 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
6317 stmt = gimple_build_cond_empty (expr);
6318 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
6319 split = split_block (body_bb, stmt);
6320 elem_body_bb = split->dest;
6321 if (cont_bb == body_bb)
6322 cont_bb = elem_body_bb;
6323 body_bb = split->src;
6324
6325 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
6326
05e0af43
CP
6327 /* Add a dummy exit for the tiled block when cont_bb is missing. */
6328 if (cont_bb == NULL)
6329 {
6330 edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
6331 e->probability = profile_probability::even ();
6332 split->probability = profile_probability::even ();
6333 }
6334
02889d23
CLT
6335 /* Initialize the user's loop vars. */
6336 gsi = gsi_start_bb (elem_body_bb);
6337 expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset);
6338 }
629b3d75
MJ
6339 }
6340
6341 /* Loop increment goes into cont_bb. If this is not a loop, we
6342 will have spawned threads as if it was, and each one will
6343 execute one iteration. The specification is not explicit about
6344 whether such constructs are ill-formed or not, and they can
6345 occur, especially when noreturn routines are involved. */
6346 if (cont_bb)
6347 {
65f4b875 6348 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
6349 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
6350 loc = gimple_location (cont_stmt);
6351
02889d23
CLT
6352 if (fd->tiling)
6353 {
6354 /* Insert element loop increment and test. */
6355 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
6356 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
6357 true, GSI_SAME_STMT);
6358 ass = gimple_build_assign (e_offset, expr);
6359 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6360 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
6361
6362 stmt = gimple_build_cond_empty (expr);
6363 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
6364 split = split_block (cont_bb, stmt);
6365 elem_cont_bb = split->src;
6366 cont_bb = split->dest;
6367
6368 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
9fba2943
TV
6369 split->probability = profile_probability::unlikely ().guessed ();
6370 edge latch_edge
6371 = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
6372 latch_edge->probability = profile_probability::likely ().guessed ();
6373
6374 edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
6375 skip_edge->probability = profile_probability::unlikely ().guessed ();
6376 edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
6377 loop_entry_edge->probability
6378 = profile_probability::likely ().guessed ();
02889d23
CLT
6379
6380 gsi = gsi_for_stmt (cont_stmt);
6381 }
6382
629b3d75
MJ
6383 /* Increment offset. */
6384 if (gimple_in_ssa_p (cfun))
02889d23
CLT
6385 expr = build2 (plus_code, iter_type, offset,
6386 fold_convert (plus_type, step));
629b3d75
MJ
6387 else
6388 expr = build2 (PLUS_EXPR, diff_type, offset, step);
6389 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
6390 true, GSI_SAME_STMT);
6391 ass = gimple_build_assign (offset_incr, expr);
6392 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6393 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
6394 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
6395
6396 /* Remove the GIMPLE_OMP_CONTINUE. */
6397 gsi_remove (&gsi, true);
6398
01914336 6399 /* Fixup edges from cont_bb. */
629b3d75
MJ
6400 be = BRANCH_EDGE (cont_bb);
6401 fte = FALLTHRU_EDGE (cont_bb);
6402 be->flags |= EDGE_TRUE_VALUE;
6403 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
6404
6405 if (chunking)
6406 {
6407 /* Split the beginning of exit_bb to make bottom_bb. We
6408 need to insert a nop at the start, because splitting is
01914336 6409 after a stmt, not before. */
629b3d75
MJ
6410 gsi = gsi_start_bb (exit_bb);
6411 stmt = gimple_build_nop ();
6412 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
6413 split = split_block (exit_bb, stmt);
6414 bottom_bb = split->src;
6415 exit_bb = split->dest;
6416 gsi = gsi_last_bb (bottom_bb);
6417
6418 /* Chunk increment and test goes into bottom_bb. */
6419 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
6420 build_int_cst (diff_type, 1));
6421 ass = gimple_build_assign (chunk_no, expr);
6422 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
6423
6424 /* Chunk test at end of bottom_bb. */
6425 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
6426 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
6427 GSI_CONTINUE_LINKING);
6428
01914336 6429 /* Fixup edges from bottom_bb. */
629b3d75 6430 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
f38057cc
TV
6431 split->probability = profile_probability::unlikely ().guessed ();
6432 edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
6433 latch_edge->probability = profile_probability::likely ().guessed ();
629b3d75
MJ
6434 }
6435 }
6436
65f4b875 6437 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
6438 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
6439 loc = gimple_location (gsi_stmt (gsi));
6440
6441 if (!gimple_in_ssa_p (cfun))
6442 {
6443 /* Insert the final value of V, in case it is live. This is the
6444 value for the only thread that survives past the join. */
6445 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
6446 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
6447 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
6448 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
6449 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
6450 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
6451 true, GSI_SAME_STMT);
6452 ass = gimple_build_assign (v, expr);
6453 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6454 }
6455
01914336 6456 /* Remove the OMP_RETURN. */
629b3d75
MJ
6457 gsi_remove (&gsi, true);
6458
6459 if (cont_bb)
6460 {
02889d23 6461 /* We now have one, two or three nested loops. Update the loop
629b3d75 6462 structures. */
99b1c316
MS
6463 class loop *parent = entry_bb->loop_father;
6464 class loop *body = body_bb->loop_father;
629b3d75
MJ
6465
6466 if (chunking)
6467 {
99b1c316 6468 class loop *chunk_loop = alloc_loop ();
629b3d75
MJ
6469 chunk_loop->header = head_bb;
6470 chunk_loop->latch = bottom_bb;
6471 add_loop (chunk_loop, parent);
6472 parent = chunk_loop;
6473 }
6474 else if (parent != body)
6475 {
6476 gcc_assert (body->header == body_bb);
6477 gcc_assert (body->latch == cont_bb
6478 || single_pred (body->latch) == cont_bb);
6479 parent = NULL;
6480 }
6481
6482 if (parent)
6483 {
99b1c316 6484 class loop *body_loop = alloc_loop ();
629b3d75
MJ
6485 body_loop->header = body_bb;
6486 body_loop->latch = cont_bb;
6487 add_loop (body_loop, parent);
02889d23
CLT
6488
6489 if (fd->tiling)
6490 {
6491 /* Insert tiling's element loop. */
99b1c316 6492 class loop *inner_loop = alloc_loop ();
02889d23
CLT
6493 inner_loop->header = elem_body_bb;
6494 inner_loop->latch = elem_cont_bb;
6495 add_loop (inner_loop, body_loop);
6496 }
629b3d75
MJ
6497 }
6498 }
6499}
6500
6501/* Expand the OMP loop defined by REGION. */
6502
6503static void
6504expand_omp_for (struct omp_region *region, gimple *inner_stmt)
6505{
6506 struct omp_for_data fd;
6507 struct omp_for_data_loop *loops;
6508
6509 loops
6510 = (struct omp_for_data_loop *)
6511 alloca (gimple_omp_for_collapse (last_stmt (region->entry))
6512 * sizeof (struct omp_for_data_loop));
6513 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
6514 &fd, loops);
6515 region->sched_kind = fd.sched_kind;
6516 region->sched_modifiers = fd.sched_modifiers;
0b887b75 6517 region->has_lastprivate_conditional = fd.lastprivate_conditional != 0;
629b3d75
MJ
6518
6519 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
6520 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
6521 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
6522 if (region->cont)
6523 {
6524 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
6525 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
6526 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
6527 }
6528 else
6529 /* If there isn't a continue then this is a degerate case where
6530 the introduction of abnormal edges during lowering will prevent
6531 original loops from being detected. Fix that up. */
6532 loops_state_set (LOOPS_NEED_FIXUP);
6533
dfa6e5b4 6534 if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_SIMD)
1160ec9a
JJ
6535 {
6536 if (fd.non_rect)
6537 sorry_at (gimple_location (fd.for_stmt),
6538 "non-rectangular %<simd%> not supported yet");
6539 expand_omp_simd (region, &fd);
6540 }
629b3d75
MJ
6541 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
6542 {
1160ec9a 6543 gcc_assert (!inner_stmt && !fd.non_rect);
629b3d75
MJ
6544 expand_oacc_for (region, &fd);
6545 }
6546 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
6547 {
1160ec9a
JJ
6548 if (fd.non_rect)
6549 sorry_at (gimple_location (fd.for_stmt),
6550 "non-rectangular %<taskloop%> not supported yet");
629b3d75
MJ
6551 if (gimple_omp_for_combined_into_p (fd.for_stmt))
6552 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
6553 else
6554 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
6555 }
6556 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
6557 && !fd.have_ordered)
6558 {
1160ec9a
JJ
6559 if (fd.non_rect)
6560 sorry_at (gimple_location (fd.for_stmt),
6561 "non-rectangular OpenMP loops not supported yet");
629b3d75
MJ
6562 if (fd.chunk_size == NULL)
6563 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
6564 else
6565 expand_omp_for_static_chunk (region, &fd, inner_stmt);
6566 }
6567 else
6568 {
6569 int fn_index, start_ix, next_ix;
28567c40
JJ
6570 unsigned HOST_WIDE_INT sched = 0;
6571 tree sched_arg = NULL_TREE;
629b3d75
MJ
6572
6573 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
1160ec9a 6574 == GF_OMP_FOR_KIND_FOR && !fd.non_rect);
629b3d75
MJ
6575 if (fd.chunk_size == NULL
6576 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
6577 fd.chunk_size = integer_zero_node;
629b3d75
MJ
6578 switch (fd.sched_kind)
6579 {
6580 case OMP_CLAUSE_SCHEDULE_RUNTIME:
0b887b75
JJ
6581 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0
6582 && fd.lastprivate_conditional == 0)
28567c40
JJ
6583 {
6584 gcc_assert (!fd.have_ordered);
6585 fn_index = 6;
6586 sched = 4;
6587 }
6588 else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
0b887b75
JJ
6589 && !fd.have_ordered
6590 && fd.lastprivate_conditional == 0)
28567c40
JJ
6591 fn_index = 7;
6592 else
6593 {
6594 fn_index = 3;
6595 sched = (HOST_WIDE_INT_1U << 31);
6596 }
629b3d75
MJ
6597 break;
6598 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
6599 case OMP_CLAUSE_SCHEDULE_GUIDED:
28567c40 6600 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
0b887b75
JJ
6601 && !fd.have_ordered
6602 && fd.lastprivate_conditional == 0)
629b3d75
MJ
6603 {
6604 fn_index = 3 + fd.sched_kind;
28567c40 6605 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
629b3d75
MJ
6606 break;
6607 }
629b3d75 6608 fn_index = fd.sched_kind;
28567c40
JJ
6609 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
6610 sched += (HOST_WIDE_INT_1U << 31);
629b3d75 6611 break;
28567c40
JJ
6612 case OMP_CLAUSE_SCHEDULE_STATIC:
6613 gcc_assert (fd.have_ordered);
6614 fn_index = 0;
6615 sched = (HOST_WIDE_INT_1U << 31) + 1;
6616 break;
6617 default:
6618 gcc_unreachable ();
629b3d75
MJ
6619 }
6620 if (!fd.ordered)
28567c40 6621 fn_index += fd.have_ordered * 8;
629b3d75
MJ
6622 if (fd.ordered)
6623 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
6624 else
6625 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
6626 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
8221c30b 6627 if (fd.have_reductemp || fd.have_pointer_condtemp)
28567c40
JJ
6628 {
6629 if (fd.ordered)
6630 start_ix = (int)BUILT_IN_GOMP_LOOP_DOACROSS_START;
6631 else if (fd.have_ordered)
6632 start_ix = (int)BUILT_IN_GOMP_LOOP_ORDERED_START;
6633 else
6634 start_ix = (int)BUILT_IN_GOMP_LOOP_START;
6635 sched_arg = build_int_cstu (long_integer_type_node, sched);
6636 if (!fd.chunk_size)
6637 fd.chunk_size = integer_zero_node;
6638 }
629b3d75
MJ
6639 if (fd.iter_type == long_long_unsigned_type_node)
6640 {
6641 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
6642 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
6643 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
6644 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
6645 }
6646 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
28567c40
JJ
6647 (enum built_in_function) next_ix, sched_arg,
6648 inner_stmt);
629b3d75
MJ
6649 }
6650
6651 if (gimple_in_ssa_p (cfun))
6652 update_ssa (TODO_update_ssa_only_virtuals);
6653}
6654
6655/* Expand code for an OpenMP sections directive. In pseudo code, we generate
6656
6657 v = GOMP_sections_start (n);
6658 L0:
6659 switch (v)
6660 {
6661 case 0:
6662 goto L2;
6663 case 1:
6664 section 1;
6665 goto L1;
6666 case 2:
6667 ...
6668 case n:
6669 ...
6670 default:
6671 abort ();
6672 }
6673 L1:
6674 v = GOMP_sections_next ();
6675 goto L0;
6676 L2:
6677 reduction;
6678
6679 If this is a combined parallel sections, replace the call to
6680 GOMP_sections_start with call to GOMP_sections_next. */
6681
6682static void
6683expand_omp_sections (struct omp_region *region)
6684{
6685 tree t, u, vin = NULL, vmain, vnext, l2;
6686 unsigned len;
6687 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
6688 gimple_stmt_iterator si, switch_si;
6689 gomp_sections *sections_stmt;
6690 gimple *stmt;
6691 gomp_continue *cont;
6692 edge_iterator ei;
6693 edge e;
6694 struct omp_region *inner;
6695 unsigned i, casei;
6696 bool exit_reachable = region->cont != NULL;
6697
6698 gcc_assert (region->exit != NULL);
6699 entry_bb = region->entry;
6700 l0_bb = single_succ (entry_bb);
6701 l1_bb = region->cont;
6702 l2_bb = region->exit;
6703 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
6704 l2 = gimple_block_label (l2_bb);
6705 else
6706 {
6707 /* This can happen if there are reductions. */
6708 len = EDGE_COUNT (l0_bb->succs);
6709 gcc_assert (len > 0);
6710 e = EDGE_SUCC (l0_bb, len - 1);
65f4b875 6711 si = gsi_last_nondebug_bb (e->dest);
629b3d75
MJ
6712 l2 = NULL_TREE;
6713 if (gsi_end_p (si)
01914336 6714 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
629b3d75
MJ
6715 l2 = gimple_block_label (e->dest);
6716 else
6717 FOR_EACH_EDGE (e, ei, l0_bb->succs)
6718 {
65f4b875 6719 si = gsi_last_nondebug_bb (e->dest);
629b3d75
MJ
6720 if (gsi_end_p (si)
6721 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
6722 {
6723 l2 = gimple_block_label (e->dest);
6724 break;
6725 }
6726 }
6727 }
6728 if (exit_reachable)
6729 default_bb = create_empty_bb (l1_bb->prev_bb);
6730 else
6731 default_bb = create_empty_bb (l0_bb);
6732
6733 /* We will build a switch() with enough cases for all the
6734 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
6735 and a default case to abort if something goes wrong. */
6736 len = EDGE_COUNT (l0_bb->succs);
6737
6738 /* Use vec::quick_push on label_vec throughout, since we know the size
6739 in advance. */
6740 auto_vec<tree> label_vec (len);
6741
6742 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
6743 GIMPLE_OMP_SECTIONS statement. */
65f4b875 6744 si = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
6745 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
6746 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
6747 vin = gimple_omp_sections_control (sections_stmt);
28567c40
JJ
6748 tree clauses = gimple_omp_sections_clauses (sections_stmt);
6749 tree reductmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
8e7757ba
JJ
6750 tree condtmp = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
6751 tree cond_var = NULL_TREE;
6752 if (reductmp || condtmp)
6753 {
6754 tree reductions = null_pointer_node, mem = null_pointer_node;
6755 tree memv = NULL_TREE, condtemp = NULL_TREE;
6756 gimple_stmt_iterator gsi = gsi_none ();
6757 gimple *g = NULL;
6758 if (reductmp)
6759 {
6760 reductions = OMP_CLAUSE_DECL (reductmp);
6761 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
6762 g = SSA_NAME_DEF_STMT (reductions);
6763 reductions = gimple_assign_rhs1 (g);
6764 OMP_CLAUSE_DECL (reductmp) = reductions;
6765 gsi = gsi_for_stmt (g);
6766 }
6767 else
6768 gsi = si;
6769 if (condtmp)
6770 {
6771 condtemp = OMP_CLAUSE_DECL (condtmp);
6772 tree c = omp_find_clause (OMP_CLAUSE_CHAIN (condtmp),
6773 OMP_CLAUSE__CONDTEMP_);
6774 cond_var = OMP_CLAUSE_DECL (c);
6775 tree type = TREE_TYPE (condtemp);
6776 memv = create_tmp_var (type);
6777 TREE_ADDRESSABLE (memv) = 1;
6778 unsigned cnt = 0;
6779 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
6780 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
6781 && OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (c))
6782 ++cnt;
6783 unsigned HOST_WIDE_INT sz
6784 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))) * cnt;
6785 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
6786 false);
6787 mem = build_fold_addr_expr (memv);
6788 }
28567c40
JJ
6789 t = build_int_cst (unsigned_type_node, len - 1);
6790 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS2_START);
8e7757ba 6791 stmt = gimple_build_call (u, 3, t, reductions, mem);
28567c40
JJ
6792 gimple_call_set_lhs (stmt, vin);
6793 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
8e7757ba
JJ
6794 if (condtmp)
6795 {
6796 expand_omp_build_assign (&gsi, condtemp, memv, false);
6797 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
6798 vin, build_one_cst (TREE_TYPE (cond_var)));
6799 expand_omp_build_assign (&gsi, cond_var, t, false);
6800 }
6801 if (reductmp)
6802 {
6803 gsi_remove (&gsi, true);
6804 release_ssa_name (gimple_assign_lhs (g));
6805 }
28567c40
JJ
6806 }
6807 else if (!is_combined_parallel (region))
629b3d75
MJ
6808 {
6809 /* If we are not inside a combined parallel+sections region,
6810 call GOMP_sections_start. */
6811 t = build_int_cst (unsigned_type_node, len - 1);
6812 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
6813 stmt = gimple_build_call (u, 1, t);
6814 }
6815 else
6816 {
6817 /* Otherwise, call GOMP_sections_next. */
6818 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
6819 stmt = gimple_build_call (u, 0);
6820 }
8e7757ba 6821 if (!reductmp && !condtmp)
28567c40
JJ
6822 {
6823 gimple_call_set_lhs (stmt, vin);
6824 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6825 }
629b3d75
MJ
6826 gsi_remove (&si, true);
6827
6828 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
6829 L0_BB. */
65f4b875 6830 switch_si = gsi_last_nondebug_bb (l0_bb);
629b3d75
MJ
6831 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
6832 if (exit_reachable)
6833 {
6834 cont = as_a <gomp_continue *> (last_stmt (l1_bb));
6835 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
6836 vmain = gimple_omp_continue_control_use (cont);
6837 vnext = gimple_omp_continue_control_def (cont);
6838 }
6839 else
6840 {
6841 vmain = vin;
6842 vnext = NULL_TREE;
6843 }
6844
6845 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
6846 label_vec.quick_push (t);
6847 i = 1;
6848
6849 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
6850 for (inner = region->inner, casei = 1;
6851 inner;
6852 inner = inner->next, i++, casei++)
6853 {
6854 basic_block s_entry_bb, s_exit_bb;
6855
6856 /* Skip optional reduction region. */
6857 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
6858 {
6859 --i;
6860 --casei;
6861 continue;
6862 }
6863
6864 s_entry_bb = inner->entry;
6865 s_exit_bb = inner->exit;
6866
6867 t = gimple_block_label (s_entry_bb);
6868 u = build_int_cst (unsigned_type_node, casei);
6869 u = build_case_label (u, NULL, t);
6870 label_vec.quick_push (u);
6871
65f4b875 6872 si = gsi_last_nondebug_bb (s_entry_bb);
629b3d75
MJ
6873 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
6874 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
6875 gsi_remove (&si, true);
6876 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
6877
6878 if (s_exit_bb == NULL)
6879 continue;
6880
65f4b875 6881 si = gsi_last_nondebug_bb (s_exit_bb);
629b3d75
MJ
6882 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6883 gsi_remove (&si, true);
6884
6885 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
6886 }
6887
6888 /* Error handling code goes in DEFAULT_BB. */
6889 t = gimple_block_label (default_bb);
6890 u = build_case_label (NULL, NULL, t);
6891 make_edge (l0_bb, default_bb, 0);
6892 add_bb_to_loop (default_bb, current_loops->tree_root);
6893
6894 stmt = gimple_build_switch (vmain, u, label_vec);
6895 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
6896 gsi_remove (&switch_si, true);
6897
6898 si = gsi_start_bb (default_bb);
6899 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
6900 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
6901
6902 if (exit_reachable)
6903 {
6904 tree bfn_decl;
6905
6906 /* Code to get the next section goes in L1_BB. */
65f4b875 6907 si = gsi_last_nondebug_bb (l1_bb);
629b3d75
MJ
6908 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
6909
6910 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
6911 stmt = gimple_build_call (bfn_decl, 0);
6912 gimple_call_set_lhs (stmt, vnext);
8e7757ba
JJ
6913 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6914 if (cond_var)
6915 {
6916 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
6917 vnext, build_one_cst (TREE_TYPE (cond_var)));
6918 expand_omp_build_assign (&si, cond_var, t, false);
6919 }
629b3d75
MJ
6920 gsi_remove (&si, true);
6921
6922 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
6923 }
6924
6925 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
65f4b875 6926 si = gsi_last_nondebug_bb (l2_bb);
629b3d75
MJ
6927 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
6928 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
6929 else if (gimple_omp_return_lhs (gsi_stmt (si)))
6930 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
6931 else
6932 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
6933 stmt = gimple_build_call (t, 0);
6934 if (gimple_omp_return_lhs (gsi_stmt (si)))
6935 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
6936 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6937 gsi_remove (&si, true);
6938
6939 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
6940}
6941
6942/* Expand code for an OpenMP single directive. We've already expanded
6943 much of the code, here we simply place the GOMP_barrier call. */
6944
6945static void
6946expand_omp_single (struct omp_region *region)
6947{
6948 basic_block entry_bb, exit_bb;
6949 gimple_stmt_iterator si;
6950
6951 entry_bb = region->entry;
6952 exit_bb = region->exit;
6953
65f4b875 6954 si = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
6955 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
6956 gsi_remove (&si, true);
6957 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6958
65f4b875 6959 si = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
6960 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
6961 {
6962 tree t = gimple_omp_return_lhs (gsi_stmt (si));
6963 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
6964 }
6965 gsi_remove (&si, true);
6966 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6967}
6968
6969/* Generic expansion for OpenMP synchronization directives: master,
6970 ordered and critical. All we need to do here is remove the entry
6971 and exit markers for REGION. */
6972
6973static void
6974expand_omp_synch (struct omp_region *region)
6975{
6976 basic_block entry_bb, exit_bb;
6977 gimple_stmt_iterator si;
6978
6979 entry_bb = region->entry;
6980 exit_bb = region->exit;
6981
65f4b875 6982 si = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
6983 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
6984 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
6985 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
6986 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
6987 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
6988 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
28567c40
JJ
6989 if (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS
6990 && gimple_omp_teams_host (as_a <gomp_teams *> (gsi_stmt (si))))
6991 {
6992 expand_omp_taskreg (region);
6993 return;
6994 }
629b3d75
MJ
6995 gsi_remove (&si, true);
6996 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6997
6998 if (exit_bb)
6999 {
65f4b875 7000 si = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
7001 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
7002 gsi_remove (&si, true);
7003 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
7004 }
7005}
7006
28567c40
JJ
7007/* Translate enum omp_memory_order to enum memmodel. The two enums
7008 are using different numbers so that OMP_MEMORY_ORDER_UNSPECIFIED
7009 is 0. */
7010
7011static enum memmodel
7012omp_memory_order_to_memmodel (enum omp_memory_order mo)
7013{
7014 switch (mo)
7015 {
7016 case OMP_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
7017 case OMP_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
7018 case OMP_MEMORY_ORDER_RELEASE: return MEMMODEL_RELEASE;
7019 case OMP_MEMORY_ORDER_ACQ_REL: return MEMMODEL_ACQ_REL;
7020 case OMP_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
7021 default: gcc_unreachable ();
7022 }
7023}
7024
629b3d75
MJ
7025/* A subroutine of expand_omp_atomic. Attempt to implement the atomic
7026 operation as a normal volatile load. */
7027
7028static bool
7029expand_omp_atomic_load (basic_block load_bb, tree addr,
7030 tree loaded_val, int index)
7031{
7032 enum built_in_function tmpbase;
7033 gimple_stmt_iterator gsi;
7034 basic_block store_bb;
7035 location_t loc;
7036 gimple *stmt;
7037 tree decl, call, type, itype;
7038
65f4b875 7039 gsi = gsi_last_nondebug_bb (load_bb);
629b3d75
MJ
7040 stmt = gsi_stmt (gsi);
7041 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
7042 loc = gimple_location (stmt);
7043
7044 /* ??? If the target does not implement atomic_load_optab[mode], and mode
7045 is smaller than word size, then expand_atomic_load assumes that the load
7046 is atomic. We could avoid the builtin entirely in this case. */
7047
7048 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
7049 decl = builtin_decl_explicit (tmpbase);
7050 if (decl == NULL_TREE)
7051 return false;
7052
7053 type = TREE_TYPE (loaded_val);
7054 itype = TREE_TYPE (TREE_TYPE (decl));
7055
28567c40
JJ
7056 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
7057 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
7058 call = build_call_expr_loc (loc, decl, 2, addr, mo);
629b3d75
MJ
7059 if (!useless_type_conversion_p (type, itype))
7060 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
7061 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
7062
7063 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
7064 gsi_remove (&gsi, true);
7065
7066 store_bb = single_succ (load_bb);
65f4b875 7067 gsi = gsi_last_nondebug_bb (store_bb);
629b3d75
MJ
7068 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
7069 gsi_remove (&gsi, true);
7070
7071 if (gimple_in_ssa_p (cfun))
7072 update_ssa (TODO_update_ssa_no_phi);
7073
7074 return true;
7075}
7076
7077/* A subroutine of expand_omp_atomic. Attempt to implement the atomic
7078 operation as a normal volatile store. */
7079
7080static bool
7081expand_omp_atomic_store (basic_block load_bb, tree addr,
7082 tree loaded_val, tree stored_val, int index)
7083{
7084 enum built_in_function tmpbase;
7085 gimple_stmt_iterator gsi;
7086 basic_block store_bb = single_succ (load_bb);
7087 location_t loc;
7088 gimple *stmt;
7089 tree decl, call, type, itype;
7090 machine_mode imode;
7091 bool exchange;
7092
65f4b875 7093 gsi = gsi_last_nondebug_bb (load_bb);
629b3d75
MJ
7094 stmt = gsi_stmt (gsi);
7095 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
7096
7097 /* If the load value is needed, then this isn't a store but an exchange. */
7098 exchange = gimple_omp_atomic_need_value_p (stmt);
7099
65f4b875 7100 gsi = gsi_last_nondebug_bb (store_bb);
629b3d75
MJ
7101 stmt = gsi_stmt (gsi);
7102 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
7103 loc = gimple_location (stmt);
7104
7105 /* ??? If the target does not implement atomic_store_optab[mode], and mode
7106 is smaller than word size, then expand_atomic_store assumes that the store
7107 is atomic. We could avoid the builtin entirely in this case. */
7108
7109 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
7110 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
7111 decl = builtin_decl_explicit (tmpbase);
7112 if (decl == NULL_TREE)
7113 return false;
7114
7115 type = TREE_TYPE (stored_val);
7116
7117 /* Dig out the type of the function's second argument. */
7118 itype = TREE_TYPE (decl);
7119 itype = TYPE_ARG_TYPES (itype);
7120 itype = TREE_CHAIN (itype);
7121 itype = TREE_VALUE (itype);
7122 imode = TYPE_MODE (itype);
7123
7124 if (exchange && !can_atomic_exchange_p (imode, true))
7125 return false;
7126
7127 if (!useless_type_conversion_p (itype, type))
7128 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
28567c40
JJ
7129 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
7130 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
7131 call = build_call_expr_loc (loc, decl, 3, addr, stored_val, mo);
629b3d75
MJ
7132 if (exchange)
7133 {
7134 if (!useless_type_conversion_p (type, itype))
7135 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
7136 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
7137 }
7138
7139 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
7140 gsi_remove (&gsi, true);
7141
7142 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
65f4b875 7143 gsi = gsi_last_nondebug_bb (load_bb);
629b3d75
MJ
7144 gsi_remove (&gsi, true);
7145
7146 if (gimple_in_ssa_p (cfun))
7147 update_ssa (TODO_update_ssa_no_phi);
7148
7149 return true;
7150}
7151
7152/* A subroutine of expand_omp_atomic. Attempt to implement the atomic
7153 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
7154 size of the data type, and thus usable to find the index of the builtin
7155 decl. Returns false if the expression is not of the proper form. */
7156
7157static bool
7158expand_omp_atomic_fetch_op (basic_block load_bb,
7159 tree addr, tree loaded_val,
7160 tree stored_val, int index)
7161{
7162 enum built_in_function oldbase, newbase, tmpbase;
7163 tree decl, itype, call;
7164 tree lhs, rhs;
7165 basic_block store_bb = single_succ (load_bb);
7166 gimple_stmt_iterator gsi;
7167 gimple *stmt;
7168 location_t loc;
7169 enum tree_code code;
7170 bool need_old, need_new;
7171 machine_mode imode;
629b3d75
MJ
7172
7173 /* We expect to find the following sequences:
7174
7175 load_bb:
7176 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
7177
7178 store_bb:
7179 val = tmp OP something; (or: something OP tmp)
7180 GIMPLE_OMP_STORE (val)
7181
7182 ???FIXME: Allow a more flexible sequence.
7183 Perhaps use data flow to pick the statements.
7184
7185 */
7186
7187 gsi = gsi_after_labels (store_bb);
7188 stmt = gsi_stmt (gsi);
65f4b875
AO
7189 if (is_gimple_debug (stmt))
7190 {
7191 gsi_next_nondebug (&gsi);
7192 if (gsi_end_p (gsi))
7193 return false;
7194 stmt = gsi_stmt (gsi);
7195 }
629b3d75
MJ
7196 loc = gimple_location (stmt);
7197 if (!is_gimple_assign (stmt))
7198 return false;
65f4b875 7199 gsi_next_nondebug (&gsi);
629b3d75
MJ
7200 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
7201 return false;
7202 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
7203 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
28567c40
JJ
7204 enum omp_memory_order omo
7205 = gimple_omp_atomic_memory_order (last_stmt (load_bb));
7206 enum memmodel mo = omp_memory_order_to_memmodel (omo);
629b3d75
MJ
7207 gcc_checking_assert (!need_old || !need_new);
7208
7209 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
7210 return false;
7211
7212 /* Check for one of the supported fetch-op operations. */
7213 code = gimple_assign_rhs_code (stmt);
7214 switch (code)
7215 {
7216 case PLUS_EXPR:
7217 case POINTER_PLUS_EXPR:
7218 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
7219 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
7220 break;
7221 case MINUS_EXPR:
7222 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
7223 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
7224 break;
7225 case BIT_AND_EXPR:
7226 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
7227 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
7228 break;
7229 case BIT_IOR_EXPR:
7230 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
7231 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
7232 break;
7233 case BIT_XOR_EXPR:
7234 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
7235 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
7236 break;
7237 default:
7238 return false;
7239 }
7240
7241 /* Make sure the expression is of the proper form. */
7242 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
7243 rhs = gimple_assign_rhs2 (stmt);
7244 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
7245 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
7246 rhs = gimple_assign_rhs1 (stmt);
7247 else
7248 return false;
7249
7250 tmpbase = ((enum built_in_function)
7251 ((need_new ? newbase : oldbase) + index + 1));
7252 decl = builtin_decl_explicit (tmpbase);
7253 if (decl == NULL_TREE)
7254 return false;
7255 itype = TREE_TYPE (TREE_TYPE (decl));
7256 imode = TYPE_MODE (itype);
7257
7258 /* We could test all of the various optabs involved, but the fact of the
7259 matter is that (with the exception of i486 vs i586 and xadd) all targets
7260 that support any atomic operaton optab also implements compare-and-swap.
7261 Let optabs.c take care of expanding any compare-and-swap loop. */
dc06356a 7262 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
629b3d75
MJ
7263 return false;
7264
65f4b875 7265 gsi = gsi_last_nondebug_bb (load_bb);
629b3d75
MJ
7266 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
7267
7268 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
7269 It only requires that the operation happen atomically. Thus we can
7270 use the RELAXED memory model. */
7271 call = build_call_expr_loc (loc, decl, 3, addr,
7272 fold_convert_loc (loc, itype, rhs),
28567c40 7273 build_int_cst (NULL, mo));
629b3d75
MJ
7274
7275 if (need_old || need_new)
7276 {
7277 lhs = need_old ? loaded_val : stored_val;
7278 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
7279 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
7280 }
7281 else
7282 call = fold_convert_loc (loc, void_type_node, call);
7283 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
7284 gsi_remove (&gsi, true);
7285
65f4b875 7286 gsi = gsi_last_nondebug_bb (store_bb);
629b3d75
MJ
7287 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
7288 gsi_remove (&gsi, true);
65f4b875 7289 gsi = gsi_last_nondebug_bb (store_bb);
629b3d75
MJ
7290 stmt = gsi_stmt (gsi);
7291 gsi_remove (&gsi, true);
7292
7293 if (gimple_in_ssa_p (cfun))
7294 {
7295 release_defs (stmt);
7296 update_ssa (TODO_update_ssa_no_phi);
7297 }
7298
7299 return true;
7300}
7301
7302/* A subroutine of expand_omp_atomic. Implement the atomic operation as:
7303
7304 oldval = *addr;
7305 repeat:
01914336 7306 newval = rhs; // with oldval replacing *addr in rhs
629b3d75
MJ
7307 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
7308 if (oldval != newval)
7309 goto repeat;
7310
7311 INDEX is log2 of the size of the data type, and thus usable to find the
7312 index of the builtin decl. */
7313
7314static bool
7315expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
7316 tree addr, tree loaded_val, tree stored_val,
7317 int index)
7318{
7319 tree loadedi, storedi, initial, new_storedi, old_vali;
b4e47472 7320 tree type, itype, cmpxchg, iaddr, atype;
629b3d75
MJ
7321 gimple_stmt_iterator si;
7322 basic_block loop_header = single_succ (load_bb);
7323 gimple *phi, *stmt;
7324 edge e;
7325 enum built_in_function fncode;
7326
7327 /* ??? We need a non-pointer interface to __atomic_compare_exchange in
7328 order to use the RELAXED memory model effectively. */
7329 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
7330 + index + 1);
7331 cmpxchg = builtin_decl_explicit (fncode);
7332 if (cmpxchg == NULL_TREE)
7333 return false;
b4e47472
JJ
7334 type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
7335 atype = type;
629b3d75
MJ
7336 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
7337
dc06356a
JJ
7338 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
7339 || !can_atomic_load_p (TYPE_MODE (itype)))
629b3d75
MJ
7340 return false;
7341
7342 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
65f4b875 7343 si = gsi_last_nondebug_bb (load_bb);
629b3d75
MJ
7344 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
7345
7346 /* For floating-point values, we'll need to view-convert them to integers
7347 so that we can perform the atomic compare and swap. Simplify the
7348 following code by always setting up the "i"ntegral variables. */
7349 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
7350 {
7351 tree iaddr_val;
7352
7353 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
7354 true));
b4e47472 7355 atype = itype;
629b3d75
MJ
7356 iaddr_val
7357 = force_gimple_operand_gsi (&si,
7358 fold_convert (TREE_TYPE (iaddr), addr),
7359 false, NULL_TREE, true, GSI_SAME_STMT);
7360 stmt = gimple_build_assign (iaddr, iaddr_val);
7361 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7362 loadedi = create_tmp_var (itype);
7363 if (gimple_in_ssa_p (cfun))
7364 loadedi = make_ssa_name (loadedi);
7365 }
7366 else
7367 {
7368 iaddr = addr;
7369 loadedi = loaded_val;
7370 }
7371
7372 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
7373 tree loaddecl = builtin_decl_explicit (fncode);
7374 if (loaddecl)
7375 initial
b4e47472 7376 = fold_convert (atype,
629b3d75
MJ
7377 build_call_expr (loaddecl, 2, iaddr,
7378 build_int_cst (NULL_TREE,
7379 MEMMODEL_RELAXED)));
7380 else
b4e47472
JJ
7381 {
7382 tree off
7383 = build_int_cst (build_pointer_type_for_mode (atype, ptr_mode,
7384 true), 0);
7385 initial = build2 (MEM_REF, atype, iaddr, off);
7386 }
629b3d75
MJ
7387
7388 initial
7389 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
7390 GSI_SAME_STMT);
7391
7392 /* Move the value to the LOADEDI temporary. */
7393 if (gimple_in_ssa_p (cfun))
7394 {
7395 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
7396 phi = create_phi_node (loadedi, loop_header);
7397 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
7398 initial);
7399 }
7400 else
7401 gsi_insert_before (&si,
7402 gimple_build_assign (loadedi, initial),
7403 GSI_SAME_STMT);
7404 if (loadedi != loaded_val)
7405 {
7406 gimple_stmt_iterator gsi2;
7407 tree x;
7408
7409 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
7410 gsi2 = gsi_start_bb (loop_header);
7411 if (gimple_in_ssa_p (cfun))
7412 {
7413 gassign *stmt;
7414 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
7415 true, GSI_SAME_STMT);
7416 stmt = gimple_build_assign (loaded_val, x);
7417 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
7418 }
7419 else
7420 {
7421 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
7422 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
7423 true, GSI_SAME_STMT);
7424 }
7425 }
7426 gsi_remove (&si, true);
7427
65f4b875 7428 si = gsi_last_nondebug_bb (store_bb);
629b3d75
MJ
7429 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
7430
7431 if (iaddr == addr)
7432 storedi = stored_val;
7433 else
01914336
MJ
7434 storedi
7435 = force_gimple_operand_gsi (&si,
7436 build1 (VIEW_CONVERT_EXPR, itype,
7437 stored_val), true, NULL_TREE, true,
7438 GSI_SAME_STMT);
629b3d75
MJ
7439
7440 /* Build the compare&swap statement. */
7441 new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
7442 new_storedi = force_gimple_operand_gsi (&si,
7443 fold_convert (TREE_TYPE (loadedi),
7444 new_storedi),
7445 true, NULL_TREE,
7446 true, GSI_SAME_STMT);
7447
7448 if (gimple_in_ssa_p (cfun))
7449 old_vali = loadedi;
7450 else
7451 {
7452 old_vali = create_tmp_var (TREE_TYPE (loadedi));
7453 stmt = gimple_build_assign (old_vali, loadedi);
7454 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7455
7456 stmt = gimple_build_assign (loadedi, new_storedi);
7457 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7458 }
7459
7460 /* Note that we always perform the comparison as an integer, even for
7461 floating point. This allows the atomic operation to properly
7462 succeed even with NaNs and -0.0. */
01914336
MJ
7463 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
7464 stmt = gimple_build_cond_empty (ne);
629b3d75
MJ
7465 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7466
7467 /* Update cfg. */
7468 e = single_succ_edge (store_bb);
7469 e->flags &= ~EDGE_FALLTHRU;
7470 e->flags |= EDGE_FALSE_VALUE;
357067f2
JH
7471 /* Expect no looping. */
7472 e->probability = profile_probability::guessed_always ();
629b3d75
MJ
7473
7474 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
357067f2 7475 e->probability = profile_probability::guessed_never ();
629b3d75
MJ
7476
7477 /* Copy the new value to loadedi (we already did that before the condition
7478 if we are not in SSA). */
7479 if (gimple_in_ssa_p (cfun))
7480 {
7481 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
7482 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
7483 }
7484
7485 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
7486 gsi_remove (&si, true);
7487
99b1c316 7488 class loop *loop = alloc_loop ();
629b3d75
MJ
7489 loop->header = loop_header;
7490 loop->latch = store_bb;
7491 add_loop (loop, loop_header->loop_father);
7492
7493 if (gimple_in_ssa_p (cfun))
7494 update_ssa (TODO_update_ssa_no_phi);
7495
7496 return true;
7497}
7498
7499/* A subroutine of expand_omp_atomic. Implement the atomic operation as:
7500
01914336
MJ
7501 GOMP_atomic_start ();
7502 *addr = rhs;
7503 GOMP_atomic_end ();
629b3d75
MJ
7504
7505 The result is not globally atomic, but works so long as all parallel
7506 references are within #pragma omp atomic directives. According to
7507 responses received from omp@openmp.org, appears to be within spec.
7508 Which makes sense, since that's how several other compilers handle
7509 this situation as well.
7510 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
7511 expanding. STORED_VAL is the operand of the matching
7512 GIMPLE_OMP_ATOMIC_STORE.
7513
7514 We replace
7515 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
7516 loaded_val = *addr;
7517
7518 and replace
7519 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
7520 *addr = stored_val;
7521*/
7522
7523static bool
7524expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
7525 tree addr, tree loaded_val, tree stored_val)
7526{
7527 gimple_stmt_iterator si;
7528 gassign *stmt;
7529 tree t;
7530
65f4b875 7531 si = gsi_last_nondebug_bb (load_bb);
629b3d75
MJ
7532 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
7533
7534 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
7535 t = build_call_expr (t, 0);
7536 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
7537
b4e47472
JJ
7538 tree mem = build_simple_mem_ref (addr);
7539 TREE_TYPE (mem) = TREE_TYPE (loaded_val);
7540 TREE_OPERAND (mem, 1)
7541 = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode,
7542 true),
7543 TREE_OPERAND (mem, 1));
7544 stmt = gimple_build_assign (loaded_val, mem);
629b3d75
MJ
7545 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7546 gsi_remove (&si, true);
7547
65f4b875 7548 si = gsi_last_nondebug_bb (store_bb);
629b3d75
MJ
7549 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
7550
b4e47472 7551 stmt = gimple_build_assign (unshare_expr (mem), stored_val);
629b3d75
MJ
7552 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7553
7554 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
7555 t = build_call_expr (t, 0);
7556 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
7557 gsi_remove (&si, true);
7558
7559 if (gimple_in_ssa_p (cfun))
7560 update_ssa (TODO_update_ssa_no_phi);
7561 return true;
7562}
7563
7564/* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
01914336 7565 using expand_omp_atomic_fetch_op. If it failed, we try to
629b3d75
MJ
7566 call expand_omp_atomic_pipeline, and if it fails too, the
7567 ultimate fallback is wrapping the operation in a mutex
7568 (expand_omp_atomic_mutex). REGION is the atomic region built
7569 by build_omp_regions_1(). */
7570
7571static void
7572expand_omp_atomic (struct omp_region *region)
7573{
7574 basic_block load_bb = region->entry, store_bb = region->exit;
7575 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
7576 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
7577 tree loaded_val = gimple_omp_atomic_load_lhs (load);
7578 tree addr = gimple_omp_atomic_load_rhs (load);
7579 tree stored_val = gimple_omp_atomic_store_val (store);
b4e47472 7580 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
629b3d75
MJ
7581 HOST_WIDE_INT index;
7582
7583 /* Make sure the type is one of the supported sizes. */
7584 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
7585 index = exact_log2 (index);
7586 if (index >= 0 && index <= 4)
7587 {
7588 unsigned int align = TYPE_ALIGN_UNIT (type);
7589
7590 /* __sync builtins require strict data alignment. */
7591 if (exact_log2 (align) >= index)
7592 {
7593 /* Atomic load. */
3bd8f481 7594 scalar_mode smode;
629b3d75 7595 if (loaded_val == stored_val
3bd8f481
RS
7596 && (is_int_mode (TYPE_MODE (type), &smode)
7597 || is_float_mode (TYPE_MODE (type), &smode))
7598 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
629b3d75
MJ
7599 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
7600 return;
7601
7602 /* Atomic store. */
3bd8f481
RS
7603 if ((is_int_mode (TYPE_MODE (type), &smode)
7604 || is_float_mode (TYPE_MODE (type), &smode))
7605 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
629b3d75
MJ
7606 && store_bb == single_succ (load_bb)
7607 && first_stmt (store_bb) == store
7608 && expand_omp_atomic_store (load_bb, addr, loaded_val,
7609 stored_val, index))
7610 return;
7611
7612 /* When possible, use specialized atomic update functions. */
7613 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
7614 && store_bb == single_succ (load_bb)
7615 && expand_omp_atomic_fetch_op (load_bb, addr,
7616 loaded_val, stored_val, index))
7617 return;
7618
7619 /* If we don't have specialized __sync builtins, try and implement
7620 as a compare and swap loop. */
7621 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
7622 loaded_val, stored_val, index))
7623 return;
7624 }
7625 }
7626
7627 /* The ultimate fallback is wrapping the operation in a mutex. */
7628 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
7629}
7630
7631/* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
7632 at REGION_EXIT. */
7633
7634static void
7635mark_loops_in_oacc_kernels_region (basic_block region_entry,
7636 basic_block region_exit)
7637{
99b1c316 7638 class loop *outer = region_entry->loop_father;
629b3d75
MJ
7639 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
7640
7641 /* Don't parallelize the kernels region if it contains more than one outer
7642 loop. */
7643 unsigned int nr_outer_loops = 0;
99b1c316
MS
7644 class loop *single_outer = NULL;
7645 for (class loop *loop = outer->inner; loop != NULL; loop = loop->next)
629b3d75
MJ
7646 {
7647 gcc_assert (loop_outer (loop) == outer);
7648
7649 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
7650 continue;
7651
7652 if (region_exit != NULL
7653 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
7654 continue;
7655
7656 nr_outer_loops++;
7657 single_outer = loop;
7658 }
7659 if (nr_outer_loops != 1)
7660 return;
7661
99b1c316 7662 for (class loop *loop = single_outer->inner;
01914336
MJ
7663 loop != NULL;
7664 loop = loop->inner)
629b3d75
MJ
7665 if (loop->next)
7666 return;
7667
7668 /* Mark the loops in the region. */
99b1c316 7669 for (class loop *loop = single_outer; loop != NULL; loop = loop->inner)
629b3d75
MJ
7670 loop->in_oacc_kernels_region = true;
7671}
7672
7673/* Types used to pass grid and wortkgroup sizes to kernel invocation. */
7674
7675struct GTY(()) grid_launch_attributes_trees
7676{
7677 tree kernel_dim_array_type;
7678 tree kernel_lattrs_dimnum_decl;
7679 tree kernel_lattrs_grid_decl;
7680 tree kernel_lattrs_group_decl;
7681 tree kernel_launch_attributes_type;
7682};
7683
7684static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees;
7685
7686/* Create types used to pass kernel launch attributes to target. */
7687
7688static void
7689grid_create_kernel_launch_attr_types (void)
7690{
7691 if (grid_attr_trees)
7692 return;
7693 grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> ();
7694
7695 tree dim_arr_index_type
7696 = build_index_type (build_int_cst (integer_type_node, 2));
7697 grid_attr_trees->kernel_dim_array_type
7698 = build_array_type (uint32_type_node, dim_arr_index_type);
7699
7700 grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE);
7701 grid_attr_trees->kernel_lattrs_dimnum_decl
7702 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"),
7703 uint32_type_node);
7704 DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE;
7705
7706 grid_attr_trees->kernel_lattrs_grid_decl
7707 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"),
7708 grid_attr_trees->kernel_dim_array_type);
7709 DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl)
7710 = grid_attr_trees->kernel_lattrs_dimnum_decl;
7711 grid_attr_trees->kernel_lattrs_group_decl
7712 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"),
7713 grid_attr_trees->kernel_dim_array_type);
7714 DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl)
7715 = grid_attr_trees->kernel_lattrs_grid_decl;
7716 finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type,
7717 "__gomp_kernel_launch_attributes",
7718 grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE);
7719}
7720
7721/* Insert before the current statement in GSI a store of VALUE to INDEX of
7722 array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be
7723 of type uint32_type_node. */
7724
7725static void
7726grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var,
7727 tree fld_decl, int index, tree value)
7728{
7729 tree ref = build4 (ARRAY_REF, uint32_type_node,
7730 build3 (COMPONENT_REF,
7731 grid_attr_trees->kernel_dim_array_type,
7732 range_var, fld_decl, NULL_TREE),
7733 build_int_cst (integer_type_node, index),
7734 NULL_TREE, NULL_TREE);
7735 gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT);
7736}
7737
7738/* Return a tree representation of a pointer to a structure with grid and
7739 work-group size information. Statements filling that information will be
7740 inserted before GSI, TGT_STMT is the target statement which has the
7741 necessary information in it. */
7742
7743static tree
7744grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi,
7745 gomp_target *tgt_stmt)
7746{
7747 grid_create_kernel_launch_attr_types ();
7748 tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type,
7749 "__kernel_launch_attrs");
7750
7751 unsigned max_dim = 0;
7752 for (tree clause = gimple_omp_target_clauses (tgt_stmt);
7753 clause;
7754 clause = OMP_CLAUSE_CHAIN (clause))
7755 {
7756 if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_)
7757 continue;
7758
7759 unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause);
7760 max_dim = MAX (dim, max_dim);
7761
7762 grid_insert_store_range_dim (gsi, lattrs,
7763 grid_attr_trees->kernel_lattrs_grid_decl,
7764 dim, OMP_CLAUSE__GRIDDIM__SIZE (clause));
7765 grid_insert_store_range_dim (gsi, lattrs,
7766 grid_attr_trees->kernel_lattrs_group_decl,
7767 dim, OMP_CLAUSE__GRIDDIM__GROUP (clause));
7768 }
7769
7770 tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs,
7771 grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE);
7772 gcc_checking_assert (max_dim <= 2);
7773 tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1);
7774 gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions),
7775 GSI_SAME_STMT);
7776 TREE_ADDRESSABLE (lattrs) = 1;
7777 return build_fold_addr_expr (lattrs);
7778}
7779
7780/* Build target argument identifier from the DEVICE identifier, value
7781 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
7782
7783static tree
7784get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
7785{
7786 tree t = build_int_cst (integer_type_node, device);
7787 if (subseqent_param)
7788 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
7789 build_int_cst (integer_type_node,
7790 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
7791 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
7792 build_int_cst (integer_type_node, id));
7793 return t;
7794}
7795
7796/* Like above but return it in type that can be directly stored as an element
7797 of the argument array. */
7798
7799static tree
7800get_target_argument_identifier (int device, bool subseqent_param, int id)
7801{
7802 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
7803 return fold_convert (ptr_type_node, t);
7804}
7805
7806/* Return a target argument consisting of DEVICE identifier, value identifier
7807 ID, and the actual VALUE. */
7808
7809static tree
7810get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
7811 tree value)
7812{
7813 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
7814 fold_convert (integer_type_node, value),
7815 build_int_cst (unsigned_type_node,
7816 GOMP_TARGET_ARG_VALUE_SHIFT));
7817 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
7818 get_target_argument_identifier_1 (device, false, id));
7819 t = fold_convert (ptr_type_node, t);
7820 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
7821}
7822
7823/* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
7824 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
7825 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
7826 arguments. */
7827
7828static void
7829push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
7830 int id, tree value, vec <tree> *args)
7831{
7832 if (tree_fits_shwi_p (value)
7833 && tree_to_shwi (value) > -(1 << 15)
7834 && tree_to_shwi (value) < (1 << 15))
7835 args->quick_push (get_target_argument_value (gsi, device, id, value));
7836 else
7837 {
7838 args->quick_push (get_target_argument_identifier (device, true, id));
7839 value = fold_convert (ptr_type_node, value);
7840 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
7841 GSI_SAME_STMT);
7842 args->quick_push (value);
7843 }
7844}
7845
01914336 7846/* Create an array of arguments that is then passed to GOMP_target. */
629b3d75
MJ
7847
7848static tree
7849get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
7850{
7851 auto_vec <tree, 6> args;
7852 tree clauses = gimple_omp_target_clauses (tgt_stmt);
7853 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
7854 if (c)
7855 t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
7856 else
7857 t = integer_minus_one_node;
7858 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
7859 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
7860
7861 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
7862 if (c)
7863 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
7864 else
7865 t = integer_minus_one_node;
7866 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
7867 GOMP_TARGET_ARG_THREAD_LIMIT, t,
7868 &args);
7869
7870 /* Add HSA-specific grid sizes, if available. */
7871 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7872 OMP_CLAUSE__GRIDDIM_))
7873 {
01914336
MJ
7874 int id = GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES;
7875 t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, id);
629b3d75
MJ
7876 args.quick_push (t);
7877 args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt));
7878 }
7879
7880 /* Produce more, perhaps device specific, arguments here. */
7881
7882 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
7883 args.length () + 1),
7884 ".omp_target_args");
7885 for (unsigned i = 0; i < args.length (); i++)
7886 {
7887 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
7888 build_int_cst (integer_type_node, i),
7889 NULL_TREE, NULL_TREE);
7890 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
7891 GSI_SAME_STMT);
7892 }
7893 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
7894 build_int_cst (integer_type_node, args.length ()),
7895 NULL_TREE, NULL_TREE);
7896 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
7897 GSI_SAME_STMT);
7898 TREE_ADDRESSABLE (argarray) = 1;
7899 return build_fold_addr_expr (argarray);
7900}
7901
7902/* Expand the GIMPLE_OMP_TARGET starting at REGION. */
7903
7904static void
7905expand_omp_target (struct omp_region *region)
7906{
7907 basic_block entry_bb, exit_bb, new_bb;
7908 struct function *child_cfun;
7909 tree child_fn, block, t;
7910 gimple_stmt_iterator gsi;
7911 gomp_target *entry_stmt;
7912 gimple *stmt;
7913 edge e;
7914 bool offloaded, data_region;
62aee289 7915 int target_kind;
629b3d75
MJ
7916
7917 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
62aee289 7918 target_kind = gimple_omp_target_kind (entry_stmt);
629b3d75
MJ
7919 new_bb = region->entry;
7920
7921 offloaded = is_gimple_omp_offloaded (entry_stmt);
62aee289 7922 switch (target_kind)
629b3d75
MJ
7923 {
7924 case GF_OMP_TARGET_KIND_REGION:
7925 case GF_OMP_TARGET_KIND_UPDATE:
7926 case GF_OMP_TARGET_KIND_ENTER_DATA:
7927 case GF_OMP_TARGET_KIND_EXIT_DATA:
7928 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7929 case GF_OMP_TARGET_KIND_OACC_KERNELS:
62aee289 7930 case GF_OMP_TARGET_KIND_OACC_SERIAL:
629b3d75
MJ
7931 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7932 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7933 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7934 data_region = false;
7935 break;
7936 case GF_OMP_TARGET_KIND_DATA:
7937 case GF_OMP_TARGET_KIND_OACC_DATA:
7938 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7939 data_region = true;
7940 break;
7941 default:
7942 gcc_unreachable ();
7943 }
7944
7945 child_fn = NULL_TREE;
7946 child_cfun = NULL;
7947 if (offloaded)
7948 {
7949 child_fn = gimple_omp_target_child_fn (entry_stmt);
7950 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
7951 }
7952
7953 /* Supported by expand_omp_taskreg, but not here. */
7954 if (child_cfun != NULL)
7955 gcc_checking_assert (!child_cfun->cfg);
7956 gcc_checking_assert (!gimple_in_ssa_p (cfun));
7957
7958 entry_bb = region->entry;
7959 exit_bb = region->exit;
7960
62aee289 7961 switch (target_kind)
25651634 7962 {
62aee289 7963 case GF_OMP_TARGET_KIND_OACC_KERNELS:
25651634
TS
7964 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
7965
62aee289
MR
7966 /* Further down, all OpenACC compute constructs will be mapped to
7967 BUILT_IN_GOACC_PARALLEL, and to distinguish between them, there
7968 is an "oacc kernels" attribute set for OpenACC kernels. */
25651634
TS
7969 DECL_ATTRIBUTES (child_fn)
7970 = tree_cons (get_identifier ("oacc kernels"),
7971 NULL_TREE, DECL_ATTRIBUTES (child_fn));
62aee289
MR
7972 break;
7973 case GF_OMP_TARGET_KIND_OACC_SERIAL:
7974 /* Further down, all OpenACC compute constructs will be mapped to
7975 BUILT_IN_GOACC_PARALLEL, and to distinguish between them, there
7976 is an "oacc serial" attribute set for OpenACC serial. */
7977 DECL_ATTRIBUTES (child_fn)
7978 = tree_cons (get_identifier ("oacc serial"),
7979 NULL_TREE, DECL_ATTRIBUTES (child_fn));
7980 break;
7981 default:
7982 break;
25651634 7983 }
629b3d75
MJ
7984
7985 if (offloaded)
7986 {
7987 unsigned srcidx, dstidx, num;
7988
7989 /* If the offloading region needs data sent from the parent
7990 function, then the very first statement (except possible
7991 tree profile counter updates) of the offloading body
7992 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
7993 &.OMP_DATA_O is passed as an argument to the child function,
7994 we need to replace it with the argument as seen by the child
7995 function.
7996
7997 In most cases, this will end up being the identity assignment
7998 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
7999 a function call that has been inlined, the original PARM_DECL
8000 .OMP_DATA_I may have been converted into a different local
8001 variable. In which case, we need to keep the assignment. */
8002 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
8003 if (data_arg)
8004 {
8005 basic_block entry_succ_bb = single_succ (entry_bb);
8006 gimple_stmt_iterator gsi;
8007 tree arg;
8008 gimple *tgtcopy_stmt = NULL;
8009 tree sender = TREE_VEC_ELT (data_arg, 0);
8010
8011 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
8012 {
8013 gcc_assert (!gsi_end_p (gsi));
8014 stmt = gsi_stmt (gsi);
8015 if (gimple_code (stmt) != GIMPLE_ASSIGN)
8016 continue;
8017
8018 if (gimple_num_ops (stmt) == 2)
8019 {
8020 tree arg = gimple_assign_rhs1 (stmt);
8021
8022 /* We're ignoring the subcode because we're
8023 effectively doing a STRIP_NOPS. */
8024
8025 if (TREE_CODE (arg) == ADDR_EXPR
8026 && TREE_OPERAND (arg, 0) == sender)
8027 {
8028 tgtcopy_stmt = stmt;
8029 break;
8030 }
8031 }
8032 }
8033
8034 gcc_assert (tgtcopy_stmt != NULL);
8035 arg = DECL_ARGUMENTS (child_fn);
8036
8037 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
8038 gsi_remove (&gsi, true);
8039 }
8040
8041 /* Declare local variables needed in CHILD_CFUN. */
8042 block = DECL_INITIAL (child_fn);
8043 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
8044 /* The gimplifier could record temporaries in the offloading block
8045 rather than in containing function's local_decls chain,
8046 which would mean cgraph missed finalizing them. Do it now. */
8047 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
8048 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
8049 varpool_node::finalize_decl (t);
8050 DECL_SAVED_TREE (child_fn) = NULL;
8051 /* We'll create a CFG for child_fn, so no gimple body is needed. */
8052 gimple_set_body (child_fn, NULL);
8053 TREE_USED (block) = 1;
8054
8055 /* Reset DECL_CONTEXT on function arguments. */
8056 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
8057 DECL_CONTEXT (t) = child_fn;
8058
8059 /* Split ENTRY_BB at GIMPLE_*,
8060 so that it can be moved to the child function. */
65f4b875 8061 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
8062 stmt = gsi_stmt (gsi);
8063 gcc_assert (stmt
8064 && gimple_code (stmt) == gimple_code (entry_stmt));
8065 e = split_block (entry_bb, stmt);
8066 gsi_remove (&gsi, true);
8067 entry_bb = e->dest;
8068 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8069
8070 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
8071 if (exit_bb)
8072 {
65f4b875 8073 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
8074 gcc_assert (!gsi_end_p (gsi)
8075 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
8076 stmt = gimple_build_return (NULL);
8077 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
8078 gsi_remove (&gsi, true);
8079 }
8080
8081 /* Move the offloading region into CHILD_CFUN. */
8082
8083 block = gimple_block (entry_stmt);
8084
8085 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
8086 if (exit_bb)
8087 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
8088 /* When the OMP expansion process cannot guarantee an up-to-date
8089 loop tree arrange for the child function to fixup loops. */
8090 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
8091 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
8092
8093 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
8094 num = vec_safe_length (child_cfun->local_decls);
8095 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
8096 {
8097 t = (*child_cfun->local_decls)[srcidx];
8098 if (DECL_CONTEXT (t) == cfun->decl)
8099 continue;
8100 if (srcidx != dstidx)
8101 (*child_cfun->local_decls)[dstidx] = t;
8102 dstidx++;
8103 }
8104 if (dstidx != num)
8105 vec_safe_truncate (child_cfun->local_decls, dstidx);
8106
8107 /* Inform the callgraph about the new function. */
8108 child_cfun->curr_properties = cfun->curr_properties;
8109 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
8110 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
8111 cgraph_node *node = cgraph_node::get_create (child_fn);
8112 node->parallelized_function = 1;
8113 cgraph_node::add_new_function (child_fn, true);
8114
8115 /* Add the new function to the offload table. */
8116 if (ENABLE_OFFLOADING)
60bf575c
TV
8117 {
8118 if (in_lto_p)
8119 DECL_PRESERVE_P (child_fn) = 1;
8120 vec_safe_push (offload_funcs, child_fn);
8121 }
629b3d75
MJ
8122
8123 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
8124 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
8125
8126 /* Fix the callgraph edges for child_cfun. Those for cfun will be
8127 fixed in a following pass. */
8128 push_cfun (child_cfun);
8129 if (need_asm)
9579db35 8130 assign_assembler_name_if_needed (child_fn);
629b3d75
MJ
8131 cgraph_edge::rebuild_edges ();
8132
8133 /* Some EH regions might become dead, see PR34608. If
8134 pass_cleanup_cfg isn't the first pass to happen with the
8135 new child, these dead EH edges might cause problems.
8136 Clean them up now. */
8137 if (flag_exceptions)
8138 {
8139 basic_block bb;
8140 bool changed = false;
8141
8142 FOR_EACH_BB_FN (bb, cfun)
8143 changed |= gimple_purge_dead_eh_edges (bb);
8144 if (changed)
8145 cleanup_tree_cfg ();
8146 }
8147 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
8148 verify_loop_structure ();
8149 pop_cfun ();
8150
8151 if (dump_file && !gimple_in_ssa_p (cfun))
8152 {
8153 omp_any_child_fn_dumped = true;
8154 dump_function_header (dump_file, child_fn, dump_flags);
8155 dump_function_to_file (child_fn, dump_file, dump_flags);
8156 }
4ccc4e30
JJ
8157
8158 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
629b3d75
MJ
8159 }
8160
8161 /* Emit a library call to launch the offloading region, or do data
8162 transfers. */
59d5960c 8163 tree t1, t2, t3, t4, depend, c, clauses;
629b3d75 8164 enum built_in_function start_ix;
629b3d75 8165 unsigned int flags_i = 0;
629b3d75
MJ
8166
8167 switch (gimple_omp_target_kind (entry_stmt))
8168 {
8169 case GF_OMP_TARGET_KIND_REGION:
8170 start_ix = BUILT_IN_GOMP_TARGET;
8171 break;
8172 case GF_OMP_TARGET_KIND_DATA:
8173 start_ix = BUILT_IN_GOMP_TARGET_DATA;
8174 break;
8175 case GF_OMP_TARGET_KIND_UPDATE:
8176 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
8177 break;
8178 case GF_OMP_TARGET_KIND_ENTER_DATA:
8179 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
8180 break;
8181 case GF_OMP_TARGET_KIND_EXIT_DATA:
8182 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
8183 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
8184 break;
629b3d75 8185 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
62aee289
MR
8186 case GF_OMP_TARGET_KIND_OACC_KERNELS:
8187 case GF_OMP_TARGET_KIND_OACC_SERIAL:
629b3d75
MJ
8188 start_ix = BUILT_IN_GOACC_PARALLEL;
8189 break;
8190 case GF_OMP_TARGET_KIND_OACC_DATA:
8191 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8192 start_ix = BUILT_IN_GOACC_DATA_START;
8193 break;
8194 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8195 start_ix = BUILT_IN_GOACC_UPDATE;
8196 break;
8197 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8198 start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA;
8199 break;
8200 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8201 start_ix = BUILT_IN_GOACC_DECLARE;
8202 break;
8203 default:
8204 gcc_unreachable ();
8205 }
8206
8207 clauses = gimple_omp_target_clauses (entry_stmt);
8208
59d5960c
TS
8209 tree device = NULL_TREE;
8210 location_t device_loc = UNKNOWN_LOCATION;
8211 tree goacc_flags = NULL_TREE;
8212 if (is_gimple_omp_oacc (entry_stmt))
629b3d75 8213 {
59d5960c
TS
8214 /* By default, no GOACC_FLAGs are set. */
8215 goacc_flags = integer_zero_node;
629b3d75
MJ
8216 }
8217 else
59d5960c
TS
8218 {
8219 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
8220 if (c)
8221 {
8222 device = OMP_CLAUSE_DEVICE_ID (c);
8223 device_loc = OMP_CLAUSE_LOCATION (c);
8224 }
8225 else
8226 {
8227 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
8228 library choose). */
8229 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
8230 device_loc = gimple_location (entry_stmt);
8231 }
629b3d75 8232
59d5960c
TS
8233 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
8234 if (c)
8235 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
8236 }
629b3d75 8237
59d5960c
TS
8238 /* By default, there is no conditional. */
8239 tree cond = NULL_TREE;
8240 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
8241 if (c)
8242 cond = OMP_CLAUSE_IF_EXPR (c);
8243 /* If we found the clause 'if (cond)', build:
8244 OpenACC: goacc_flags = (cond ? goacc_flags : flags | GOACC_FLAG_HOST_FALLBACK)
8245 OpenMP: device = (cond ? device : GOMP_DEVICE_HOST_FALLBACK) */
629b3d75
MJ
8246 if (cond)
8247 {
59d5960c
TS
8248 tree *tp;
8249 if (is_gimple_omp_oacc (entry_stmt))
8250 tp = &goacc_flags;
8251 else
8252 {
8253 /* Ensure 'device' is of the correct type. */
8254 device = fold_convert_loc (device_loc, integer_type_node, device);
8255
8256 tp = &device;
8257 }
8258
629b3d75
MJ
8259 cond = gimple_boolify (cond);
8260
8261 basic_block cond_bb, then_bb, else_bb;
8262 edge e;
8263 tree tmp_var;
8264
59d5960c 8265 tmp_var = create_tmp_var (TREE_TYPE (*tp));
629b3d75
MJ
8266 if (offloaded)
8267 e = split_block_after_labels (new_bb);
8268 else
8269 {
65f4b875 8270 gsi = gsi_last_nondebug_bb (new_bb);
629b3d75
MJ
8271 gsi_prev (&gsi);
8272 e = split_block (new_bb, gsi_stmt (gsi));
8273 }
8274 cond_bb = e->src;
8275 new_bb = e->dest;
8276 remove_edge (e);
8277
8278 then_bb = create_empty_bb (cond_bb);
8279 else_bb = create_empty_bb (then_bb);
8280 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
8281 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
8282
8283 stmt = gimple_build_cond_empty (cond);
8284 gsi = gsi_last_bb (cond_bb);
8285 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
8286
8287 gsi = gsi_start_bb (then_bb);
59d5960c 8288 stmt = gimple_build_assign (tmp_var, *tp);
629b3d75
MJ
8289 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
8290
8291 gsi = gsi_start_bb (else_bb);
59d5960c
TS
8292 if (is_gimple_omp_oacc (entry_stmt))
8293 stmt = gimple_build_assign (tmp_var,
8294 BIT_IOR_EXPR,
8295 *tp,
8296 build_int_cst (integer_type_node,
8297 GOACC_FLAG_HOST_FALLBACK));
8298 else
8299 stmt = gimple_build_assign (tmp_var,
8300 build_int_cst (integer_type_node,
8301 GOMP_DEVICE_HOST_FALLBACK));
629b3d75
MJ
8302 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
8303
8304 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
8305 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
8306 add_bb_to_loop (then_bb, cond_bb->loop_father);
8307 add_bb_to_loop (else_bb, cond_bb->loop_father);
8308 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
8309 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
8310
59d5960c
TS
8311 *tp = tmp_var;
8312
65f4b875 8313 gsi = gsi_last_nondebug_bb (new_bb);
629b3d75
MJ
8314 }
8315 else
8316 {
65f4b875 8317 gsi = gsi_last_nondebug_bb (new_bb);
59d5960c
TS
8318
8319 if (device != NULL_TREE)
8320 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
8321 true, GSI_SAME_STMT);
629b3d75
MJ
8322 }
8323
8324 t = gimple_omp_target_data_arg (entry_stmt);
8325 if (t == NULL)
8326 {
8327 t1 = size_zero_node;
8328 t2 = build_zero_cst (ptr_type_node);
8329 t3 = t2;
8330 t4 = t2;
8331 }
8332 else
8333 {
8334 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
8335 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
8336 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
8337 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
8338 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
8339 }
8340
8341 gimple *g;
8342 bool tagging = false;
8343 /* The maximum number used by any start_ix, without varargs. */
8344 auto_vec<tree, 11> args;
59d5960c
TS
8345 if (is_gimple_omp_oacc (entry_stmt))
8346 {
8347 tree goacc_flags_m = fold_build1 (GOACC_FLAGS_MARSHAL_OP,
8348 TREE_TYPE (goacc_flags), goacc_flags);
8349 goacc_flags_m = force_gimple_operand_gsi (&gsi, goacc_flags_m, true,
8350 NULL_TREE, true,
8351 GSI_SAME_STMT);
8352 args.quick_push (goacc_flags_m);
8353 }
8354 else
8355 args.quick_push (device);
629b3d75
MJ
8356 if (offloaded)
8357 args.quick_push (build_fold_addr_expr (child_fn));
8358 args.quick_push (t1);
8359 args.quick_push (t2);
8360 args.quick_push (t3);
8361 args.quick_push (t4);
8362 switch (start_ix)
8363 {
8364 case BUILT_IN_GOACC_DATA_START:
8365 case BUILT_IN_GOACC_DECLARE:
8366 case BUILT_IN_GOMP_TARGET_DATA:
8367 break;
8368 case BUILT_IN_GOMP_TARGET:
8369 case BUILT_IN_GOMP_TARGET_UPDATE:
8370 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
8371 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
8372 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
8373 if (c)
8374 depend = OMP_CLAUSE_DECL (c);
8375 else
8376 depend = build_int_cst (ptr_type_node, 0);
8377 args.quick_push (depend);
8378 if (start_ix == BUILT_IN_GOMP_TARGET)
8379 args.quick_push (get_target_arguments (&gsi, entry_stmt));
8380 break;
8381 case BUILT_IN_GOACC_PARALLEL:
62aee289
MR
8382 if (lookup_attribute ("oacc serial", DECL_ATTRIBUTES (child_fn)) != NULL)
8383 {
8384 tree dims = NULL_TREE;
8385 unsigned int ix;
8386
8387 /* For serial constructs we set all dimensions to 1. */
8388 for (ix = GOMP_DIM_MAX; ix--;)
8389 dims = tree_cons (NULL_TREE, integer_one_node, dims);
8390 oacc_replace_fn_attrib (child_fn, dims);
8391 }
8392 else
8393 oacc_set_fn_attrib (child_fn, clauses, &args);
25651634 8394 tagging = true;
629b3d75
MJ
8395 /* FALLTHRU */
8396 case BUILT_IN_GOACC_ENTER_EXIT_DATA:
8397 case BUILT_IN_GOACC_UPDATE:
8398 {
8399 tree t_async = NULL_TREE;
8400
8401 /* If present, use the value specified by the respective
8402 clause, making sure that is of the correct type. */
8403 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
8404 if (c)
8405 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
8406 integer_type_node,
8407 OMP_CLAUSE_ASYNC_EXPR (c));
8408 else if (!tagging)
8409 /* Default values for t_async. */
8410 t_async = fold_convert_loc (gimple_location (entry_stmt),
8411 integer_type_node,
8412 build_int_cst (integer_type_node,
8413 GOMP_ASYNC_SYNC));
8414 if (tagging && t_async)
8415 {
8416 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
8417
8418 if (TREE_CODE (t_async) == INTEGER_CST)
8419 {
8420 /* See if we can pack the async arg in to the tag's
8421 operand. */
8422 i_async = TREE_INT_CST_LOW (t_async);
8423 if (i_async < GOMP_LAUNCH_OP_MAX)
8424 t_async = NULL_TREE;
8425 else
8426 i_async = GOMP_LAUNCH_OP_MAX;
8427 }
8428 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
8429 i_async));
8430 }
8431 if (t_async)
ee9fcee3
AS
8432 args.safe_push (force_gimple_operand_gsi (&gsi, t_async, true,
8433 NULL_TREE, true,
8434 GSI_SAME_STMT));
629b3d75
MJ
8435
8436 /* Save the argument index, and ... */
8437 unsigned t_wait_idx = args.length ();
8438 unsigned num_waits = 0;
8439 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
8440 if (!tagging || c)
8441 /* ... push a placeholder. */
8442 args.safe_push (integer_zero_node);
8443
8444 for (; c; c = OMP_CLAUSE_CHAIN (c))
8445 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
8446 {
ee9fcee3
AS
8447 tree arg = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
8448 integer_type_node,
8449 OMP_CLAUSE_WAIT_EXPR (c));
8450 arg = force_gimple_operand_gsi (&gsi, arg, true, NULL_TREE, true,
8451 GSI_SAME_STMT);
8452 args.safe_push (arg);
629b3d75
MJ
8453 num_waits++;
8454 }
8455
8456 if (!tagging || num_waits)
8457 {
8458 tree len;
8459
8460 /* Now that we know the number, update the placeholder. */
8461 if (tagging)
8462 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
8463 else
8464 len = build_int_cst (integer_type_node, num_waits);
8465 len = fold_convert_loc (gimple_location (entry_stmt),
8466 unsigned_type_node, len);
8467 args[t_wait_idx] = len;
8468 }
8469 }
8470 break;
8471 default:
8472 gcc_unreachable ();
8473 }
8474 if (tagging)
8475 /* Push terminal marker - zero. */
8476 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
8477
8478 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
8479 gimple_set_location (g, gimple_location (entry_stmt));
8480 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
8481 if (!offloaded)
8482 {
8483 g = gsi_stmt (gsi);
8484 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
8485 gsi_remove (&gsi, true);
8486 }
8487 if (data_region && region->exit)
8488 {
65f4b875 8489 gsi = gsi_last_nondebug_bb (region->exit);
629b3d75
MJ
8490 g = gsi_stmt (gsi);
8491 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN);
8492 gsi_remove (&gsi, true);
8493 }
8494}
8495
8496/* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with
8497 iteration variable derived from the thread number. INTRA_GROUP means this
8498 is an expansion of a loop iterating over work-items within a separate
01914336 8499 iteration over groups. */
629b3d75
MJ
8500
8501static void
8502grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group)
8503{
8504 gimple_stmt_iterator gsi;
8505 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
8506 gcc_checking_assert (gimple_omp_for_kind (for_stmt)
8507 == GF_OMP_FOR_KIND_GRID_LOOP);
8508 size_t collapse = gimple_omp_for_collapse (for_stmt);
8509 struct omp_for_data_loop *loops
8510 = XALLOCAVEC (struct omp_for_data_loop,
01914336 8511 gimple_omp_for_collapse (for_stmt));
629b3d75
MJ
8512 struct omp_for_data fd;
8513
8514 remove_edge (BRANCH_EDGE (kfor->entry));
8515 basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest;
8516
8517 gcc_assert (kfor->cont);
8518 omp_extract_for_data (for_stmt, &fd, loops);
8519
8520 gsi = gsi_start_bb (body_bb);
8521
8522 for (size_t dim = 0; dim < collapse; dim++)
8523 {
8524 tree type, itype;
8525 itype = type = TREE_TYPE (fd.loops[dim].v);
8526 if (POINTER_TYPE_P (type))
8527 itype = signed_type_for (type);
8528
8529 tree n1 = fd.loops[dim].n1;
8530 tree step = fd.loops[dim].step;
8531 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
8532 true, NULL_TREE, true, GSI_SAME_STMT);
8533 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
8534 true, NULL_TREE, true, GSI_SAME_STMT);
8535 tree threadid;
8536 if (gimple_omp_for_grid_group_iter (for_stmt))
8537 {
8538 gcc_checking_assert (!intra_group);
8539 threadid = build_call_expr (builtin_decl_explicit
8540 (BUILT_IN_HSA_WORKGROUPID), 1,
8541 build_int_cstu (unsigned_type_node, dim));
8542 }
8543 else if (intra_group)
8544 threadid = build_call_expr (builtin_decl_explicit
8545 (BUILT_IN_HSA_WORKITEMID), 1,
8546 build_int_cstu (unsigned_type_node, dim));
8547 else
8548 threadid = build_call_expr (builtin_decl_explicit
8549 (BUILT_IN_HSA_WORKITEMABSID), 1,
8550 build_int_cstu (unsigned_type_node, dim));
8551 threadid = fold_convert (itype, threadid);
8552 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
8553 true, GSI_SAME_STMT);
8554
8555 tree startvar = fd.loops[dim].v;
8556 tree t = fold_build2 (MULT_EXPR, itype, threadid, step);
8557 if (POINTER_TYPE_P (type))
8558 t = fold_build_pointer_plus (n1, t);
8559 else
8560 t = fold_build2 (PLUS_EXPR, type, t, n1);
8561 t = fold_convert (type, t);
8562 t = force_gimple_operand_gsi (&gsi, t,
8563 DECL_P (startvar)
8564 && TREE_ADDRESSABLE (startvar),
8565 NULL_TREE, true, GSI_SAME_STMT);
8566 gassign *assign_stmt = gimple_build_assign (startvar, t);
8567 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
8568 }
01914336 8569 /* Remove the omp for statement. */
65f4b875 8570 gsi = gsi_last_nondebug_bb (kfor->entry);
629b3d75
MJ
8571 gsi_remove (&gsi, true);
8572
8573 /* Remove the GIMPLE_OMP_CONTINUE statement. */
65f4b875 8574 gsi = gsi_last_nondebug_bb (kfor->cont);
629b3d75
MJ
8575 gcc_assert (!gsi_end_p (gsi)
8576 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE);
8577 gsi_remove (&gsi, true);
8578
8579 /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary. */
65f4b875 8580 gsi = gsi_last_nondebug_bb (kfor->exit);
629b3d75
MJ
8581 gcc_assert (!gsi_end_p (gsi)
8582 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
8583 if (intra_group)
8584 gsi_insert_before (&gsi, omp_build_barrier (NULL_TREE), GSI_SAME_STMT);
8585 gsi_remove (&gsi, true);
8586
8587 /* Fixup the much simpler CFG. */
8588 remove_edge (find_edge (kfor->cont, body_bb));
8589
8590 if (kfor->cont != body_bb)
8591 set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb);
8592 set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont);
8593}
8594
8595/* Structure passed to grid_remap_kernel_arg_accesses so that it can remap
8596 argument_decls. */
8597
8598struct grid_arg_decl_map
8599{
8600 tree old_arg;
8601 tree new_arg;
8602};
8603
8604/* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones
8605 pertaining to kernel function. */
8606
8607static tree
8608grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data)
8609{
8610 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
8611 struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info;
8612 tree t = *tp;
8613
8614 if (t == adm->old_arg)
8615 *tp = adm->new_arg;
8616 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
8617 return NULL_TREE;
8618}
8619
8620/* If TARGET region contains a kernel body for loop, remove its region from the
01914336 8621 TARGET and expand it in HSA gridified kernel fashion. */
629b3d75
MJ
8622
8623static void
8624grid_expand_target_grid_body (struct omp_region *target)
8625{
8626 if (!hsa_gen_requested_p ())
8627 return;
8628
8629 gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry));
8630 struct omp_region **pp;
8631
8632 for (pp = &target->inner; *pp; pp = &(*pp)->next)
8633 if ((*pp)->type == GIMPLE_OMP_GRID_BODY)
8634 break;
8635
8636 struct omp_region *gpukernel = *pp;
8637
8638 tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt);
8639 if (!gpukernel)
8640 {
8641 /* HSA cannot handle OACC stuff. */
8642 if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION)
8643 return;
8644 gcc_checking_assert (orig_child_fndecl);
8645 gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
8646 OMP_CLAUSE__GRIDDIM_));
8647 cgraph_node *n = cgraph_node::get (orig_child_fndecl);
8648
8649 hsa_register_kernel (n);
8650 return;
8651 }
8652
8653 gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
8654 OMP_CLAUSE__GRIDDIM_));
01914336
MJ
8655 tree inside_block
8656 = gimple_block (first_stmt (single_succ (gpukernel->entry)));
629b3d75
MJ
8657 *pp = gpukernel->next;
8658 for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next)
8659 if ((*pp)->type == GIMPLE_OMP_FOR)
8660 break;
8661
8662 struct omp_region *kfor = *pp;
8663 gcc_assert (kfor);
8664 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
8665 gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP);
8666 *pp = kfor->next;
8667 if (kfor->inner)
8668 {
8669 if (gimple_omp_for_grid_group_iter (for_stmt))
8670 {
8671 struct omp_region **next_pp;
8672 for (pp = &kfor->inner; *pp; pp = next_pp)
8673 {
8674 next_pp = &(*pp)->next;
8675 if ((*pp)->type != GIMPLE_OMP_FOR)
8676 continue;
8677 gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry));
8678 gcc_assert (gimple_omp_for_kind (inner)
8679 == GF_OMP_FOR_KIND_GRID_LOOP);
8680 grid_expand_omp_for_loop (*pp, true);
8681 *pp = (*pp)->next;
8682 next_pp = pp;
8683 }
8684 }
8685 expand_omp (kfor->inner);
8686 }
8687 if (gpukernel->inner)
8688 expand_omp (gpukernel->inner);
8689
8690 tree kern_fndecl = copy_node (orig_child_fndecl);
7958186b
MP
8691 DECL_NAME (kern_fndecl) = clone_function_name_numbered (kern_fndecl,
8692 "kernel");
629b3d75
MJ
8693 SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl));
8694 tree tgtblock = gimple_block (tgt_stmt);
8695 tree fniniblock = make_node (BLOCK);
dc16b007 8696 BLOCK_ABSTRACT_ORIGIN (fniniblock) = BLOCK_ORIGIN (tgtblock);
629b3d75
MJ
8697 BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock);
8698 BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock);
8699 BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl;
8700 DECL_INITIAL (kern_fndecl) = fniniblock;
8701 push_struct_function (kern_fndecl);
8702 cfun->function_end_locus = gimple_location (tgt_stmt);
8703 init_tree_ssa (cfun);
8704 pop_cfun ();
8705
8706 tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl);
8707 gcc_assert (!DECL_CHAIN (old_parm_decl));
8708 tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl));
8709 DECL_CONTEXT (new_parm_decl) = kern_fndecl;
8710 DECL_ARGUMENTS (kern_fndecl) = new_parm_decl;
8711 gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl))));
8712 DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl));
8713 DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl;
8714 struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl);
8715 kern_cfun->curr_properties = cfun->curr_properties;
8716
8717 grid_expand_omp_for_loop (kfor, false);
8718
01914336 8719 /* Remove the omp for statement. */
65f4b875 8720 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (gpukernel->entry);
629b3d75
MJ
8721 gsi_remove (&gsi, true);
8722 /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real
8723 return. */
65f4b875 8724 gsi = gsi_last_nondebug_bb (gpukernel->exit);
629b3d75
MJ
8725 gcc_assert (!gsi_end_p (gsi)
8726 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
8727 gimple *ret_stmt = gimple_build_return (NULL);
8728 gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT);
8729 gsi_remove (&gsi, true);
8730
8731 /* Statements in the first BB in the target construct have been produced by
8732 target lowering and must be copied inside the GPUKERNEL, with the two
8733 exceptions of the first OMP statement and the OMP_DATA assignment
8734 statement. */
8735 gsi = gsi_start_bb (single_succ (gpukernel->entry));
8736 tree data_arg = gimple_omp_target_data_arg (tgt_stmt);
8737 tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL;
8738 for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry));
8739 !gsi_end_p (tsi); gsi_next (&tsi))
8740 {
8741 gimple *stmt = gsi_stmt (tsi);
8742 if (is_gimple_omp (stmt))
8743 break;
8744 if (sender
8745 && is_gimple_assign (stmt)
8746 && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR
8747 && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender)
8748 continue;
8749 gimple *copy = gimple_copy (stmt);
8750 gsi_insert_before (&gsi, copy, GSI_SAME_STMT);
8751 gimple_set_block (copy, fniniblock);
8752 }
8753
8754 move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry),
8755 gpukernel->exit, inside_block);
8756
8757 cgraph_node *kcn = cgraph_node::get_create (kern_fndecl);
8758 kcn->mark_force_output ();
8759 cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl);
8760
8761 hsa_register_kernel (kcn, orig_child);
8762
8763 cgraph_node::add_new_function (kern_fndecl, true);
8764 push_cfun (kern_cfun);
8765 cgraph_edge::rebuild_edges ();
8766
8767 /* Re-map any mention of the PARM_DECL of the original function to the
8768 PARM_DECL of the new one.
8769
8770 TODO: It would be great if lowering produced references into the GPU
8771 kernel decl straight away and we did not have to do this. */
8772 struct grid_arg_decl_map adm;
8773 adm.old_arg = old_parm_decl;
8774 adm.new_arg = new_parm_decl;
8775 basic_block bb;
8776 FOR_EACH_BB_FN (bb, kern_cfun)
8777 {
8778 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
8779 {
8780 gimple *stmt = gsi_stmt (gsi);
8781 struct walk_stmt_info wi;
8782 memset (&wi, 0, sizeof (wi));
8783 wi.info = &adm;
8784 walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi);
8785 }
8786 }
8787 pop_cfun ();
8788
8789 return;
8790}
8791
8792/* Expand the parallel region tree rooted at REGION. Expansion
8793 proceeds in depth-first order. Innermost regions are expanded
8794 first. This way, parallel regions that require a new function to
8795 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
8796 internal dependencies in their body. */
8797
8798static void
8799expand_omp (struct omp_region *region)
8800{
8801 omp_any_child_fn_dumped = false;
8802 while (region)
8803 {
8804 location_t saved_location;
8805 gimple *inner_stmt = NULL;
8806
8807 /* First, determine whether this is a combined parallel+workshare
01914336 8808 region. */
629b3d75
MJ
8809 if (region->type == GIMPLE_OMP_PARALLEL)
8810 determine_parallel_type (region);
8811 else if (region->type == GIMPLE_OMP_TARGET)
8812 grid_expand_target_grid_body (region);
8813
8814 if (region->type == GIMPLE_OMP_FOR
8815 && gimple_omp_for_combined_p (last_stmt (region->entry)))
8816 inner_stmt = last_stmt (region->inner->entry);
8817
8818 if (region->inner)
8819 expand_omp (region->inner);
8820
8821 saved_location = input_location;
8822 if (gimple_has_location (last_stmt (region->entry)))
8823 input_location = gimple_location (last_stmt (region->entry));
8824
8825 switch (region->type)
8826 {
8827 case GIMPLE_OMP_PARALLEL:
8828 case GIMPLE_OMP_TASK:
8829 expand_omp_taskreg (region);
8830 break;
8831
8832 case GIMPLE_OMP_FOR:
8833 expand_omp_for (region, inner_stmt);
8834 break;
8835
8836 case GIMPLE_OMP_SECTIONS:
8837 expand_omp_sections (region);
8838 break;
8839
8840 case GIMPLE_OMP_SECTION:
8841 /* Individual omp sections are handled together with their
8842 parent GIMPLE_OMP_SECTIONS region. */
8843 break;
8844
8845 case GIMPLE_OMP_SINGLE:
8846 expand_omp_single (region);
8847 break;
8848
8849 case GIMPLE_OMP_ORDERED:
8850 {
8851 gomp_ordered *ord_stmt
8852 = as_a <gomp_ordered *> (last_stmt (region->entry));
8853 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
8854 OMP_CLAUSE_DEPEND))
8855 {
8856 /* We'll expand these when expanding corresponding
8857 worksharing region with ordered(n) clause. */
8858 gcc_assert (region->outer
8859 && region->outer->type == GIMPLE_OMP_FOR);
8860 region->ord_stmt = ord_stmt;
8861 break;
8862 }
8863 }
8864 /* FALLTHRU */
8865 case GIMPLE_OMP_MASTER:
8866 case GIMPLE_OMP_TASKGROUP:
8867 case GIMPLE_OMP_CRITICAL:
8868 case GIMPLE_OMP_TEAMS:
8869 expand_omp_synch (region);
8870 break;
8871
8872 case GIMPLE_OMP_ATOMIC_LOAD:
8873 expand_omp_atomic (region);
8874 break;
8875
8876 case GIMPLE_OMP_TARGET:
8877 expand_omp_target (region);
8878 break;
8879
8880 default:
8881 gcc_unreachable ();
8882 }
8883
8884 input_location = saved_location;
8885 region = region->next;
8886 }
8887 if (omp_any_child_fn_dumped)
8888 {
8889 if (dump_file)
8890 dump_function_header (dump_file, current_function_decl, dump_flags);
8891 omp_any_child_fn_dumped = false;
8892 }
8893}
8894
8895/* Helper for build_omp_regions. Scan the dominator tree starting at
8896 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
8897 true, the function ends once a single tree is built (otherwise, whole
8898 forest of OMP constructs may be built). */
8899
8900static void
8901build_omp_regions_1 (basic_block bb, struct omp_region *parent,
8902 bool single_tree)
8903{
8904 gimple_stmt_iterator gsi;
8905 gimple *stmt;
8906 basic_block son;
8907
65f4b875 8908 gsi = gsi_last_nondebug_bb (bb);
629b3d75
MJ
8909 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
8910 {
8911 struct omp_region *region;
8912 enum gimple_code code;
8913
8914 stmt = gsi_stmt (gsi);
8915 code = gimple_code (stmt);
8916 if (code == GIMPLE_OMP_RETURN)
8917 {
8918 /* STMT is the return point out of region PARENT. Mark it
8919 as the exit point and make PARENT the immediately
8920 enclosing region. */
8921 gcc_assert (parent);
8922 region = parent;
8923 region->exit = bb;
8924 parent = parent->outer;
8925 }
8926 else if (code == GIMPLE_OMP_ATOMIC_STORE)
8927 {
5764ee3c 8928 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
629b3d75
MJ
8929 GIMPLE_OMP_RETURN, but matches with
8930 GIMPLE_OMP_ATOMIC_LOAD. */
8931 gcc_assert (parent);
8932 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
8933 region = parent;
8934 region->exit = bb;
8935 parent = parent->outer;
8936 }
8937 else if (code == GIMPLE_OMP_CONTINUE)
8938 {
8939 gcc_assert (parent);
8940 parent->cont = bb;
8941 }
8942 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
8943 {
8944 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
8945 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
8946 }
8947 else
8948 {
8949 region = new_omp_region (bb, code, parent);
8950 /* Otherwise... */
8951 if (code == GIMPLE_OMP_TARGET)
8952 {
8953 switch (gimple_omp_target_kind (stmt))
8954 {
8955 case GF_OMP_TARGET_KIND_REGION:
8956 case GF_OMP_TARGET_KIND_DATA:
8957 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8958 case GF_OMP_TARGET_KIND_OACC_KERNELS:
62aee289 8959 case GF_OMP_TARGET_KIND_OACC_SERIAL:
629b3d75
MJ
8960 case GF_OMP_TARGET_KIND_OACC_DATA:
8961 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8962 break;
8963 case GF_OMP_TARGET_KIND_UPDATE:
8964 case GF_OMP_TARGET_KIND_ENTER_DATA:
8965 case GF_OMP_TARGET_KIND_EXIT_DATA:
8966 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8967 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8968 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8969 /* ..., other than for those stand-alone directives... */
8970 region = NULL;
8971 break;
8972 default:
8973 gcc_unreachable ();
8974 }
8975 }
8976 else if (code == GIMPLE_OMP_ORDERED
8977 && omp_find_clause (gimple_omp_ordered_clauses
8978 (as_a <gomp_ordered *> (stmt)),
8979 OMP_CLAUSE_DEPEND))
8980 /* #pragma omp ordered depend is also just a stand-alone
8981 directive. */
8982 region = NULL;
28567c40
JJ
8983 else if (code == GIMPLE_OMP_TASK
8984 && gimple_omp_task_taskwait_p (stmt))
8985 /* #pragma omp taskwait depend(...) is a stand-alone directive. */
8986 region = NULL;
629b3d75
MJ
8987 /* ..., this directive becomes the parent for a new region. */
8988 if (region)
8989 parent = region;
8990 }
8991 }
8992
8993 if (single_tree && !parent)
8994 return;
8995
8996 for (son = first_dom_son (CDI_DOMINATORS, bb);
8997 son;
8998 son = next_dom_son (CDI_DOMINATORS, son))
8999 build_omp_regions_1 (son, parent, single_tree);
9000}
9001
9002/* Builds the tree of OMP regions rooted at ROOT, storing it to
9003 root_omp_region. */
9004
9005static void
9006build_omp_regions_root (basic_block root)
9007{
9008 gcc_assert (root_omp_region == NULL);
9009 build_omp_regions_1 (root, NULL, true);
9010 gcc_assert (root_omp_region != NULL);
9011}
9012
9013/* Expands omp construct (and its subconstructs) starting in HEAD. */
9014
9015void
9016omp_expand_local (basic_block head)
9017{
9018 build_omp_regions_root (head);
9019 if (dump_file && (dump_flags & TDF_DETAILS))
9020 {
9021 fprintf (dump_file, "\nOMP region tree\n\n");
9022 dump_omp_region (dump_file, root_omp_region, 0);
9023 fprintf (dump_file, "\n");
9024 }
9025
9026 remove_exit_barriers (root_omp_region);
9027 expand_omp (root_omp_region);
9028
9029 omp_free_regions ();
9030}
9031
9032/* Scan the CFG and build a tree of OMP regions. Return the root of
9033 the OMP region tree. */
9034
9035static void
9036build_omp_regions (void)
9037{
9038 gcc_assert (root_omp_region == NULL);
9039 calculate_dominance_info (CDI_DOMINATORS);
9040 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
9041}
9042
9043/* Main entry point for expanding OMP-GIMPLE into runtime calls. */
9044
9045static unsigned int
9046execute_expand_omp (void)
9047{
9048 build_omp_regions ();
9049
9050 if (!root_omp_region)
9051 return 0;
9052
9053 if (dump_file)
9054 {
9055 fprintf (dump_file, "\nOMP region tree\n\n");
9056 dump_omp_region (dump_file, root_omp_region, 0);
9057 fprintf (dump_file, "\n");
9058 }
9059
9060 remove_exit_barriers (root_omp_region);
9061
9062 expand_omp (root_omp_region);
9063
9064 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9065 verify_loop_structure ();
9066 cleanup_tree_cfg ();
9067
9068 omp_free_regions ();
9069
9070 return 0;
9071}
9072
9073/* OMP expansion -- the default pass, run before creation of SSA form. */
9074
9075namespace {
9076
9077const pass_data pass_data_expand_omp =
9078{
9079 GIMPLE_PASS, /* type */
9080 "ompexp", /* name */
fd2b8c8b 9081 OPTGROUP_OMP, /* optinfo_flags */
629b3d75
MJ
9082 TV_NONE, /* tv_id */
9083 PROP_gimple_any, /* properties_required */
9084 PROP_gimple_eomp, /* properties_provided */
9085 0, /* properties_destroyed */
9086 0, /* todo_flags_start */
9087 0, /* todo_flags_finish */
9088};
9089
9090class pass_expand_omp : public gimple_opt_pass
9091{
9092public:
9093 pass_expand_omp (gcc::context *ctxt)
9094 : gimple_opt_pass (pass_data_expand_omp, ctxt)
9095 {}
9096
9097 /* opt_pass methods: */
9098 virtual unsigned int execute (function *)
9099 {
5e9d6aa4 9100 bool gate = ((flag_openacc != 0 || flag_openmp != 0
629b3d75
MJ
9101 || flag_openmp_simd != 0)
9102 && !seen_error ());
9103
9104 /* This pass always runs, to provide PROP_gimple_eomp.
9105 But often, there is nothing to do. */
9106 if (!gate)
9107 return 0;
9108
9109 return execute_expand_omp ();
9110 }
9111
9112}; // class pass_expand_omp
9113
9114} // anon namespace
9115
9116gimple_opt_pass *
9117make_pass_expand_omp (gcc::context *ctxt)
9118{
9119 return new pass_expand_omp (ctxt);
9120}
9121
9122namespace {
9123
9124const pass_data pass_data_expand_omp_ssa =
9125{
9126 GIMPLE_PASS, /* type */
9127 "ompexpssa", /* name */
fd2b8c8b 9128 OPTGROUP_OMP, /* optinfo_flags */
629b3d75
MJ
9129 TV_NONE, /* tv_id */
9130 PROP_cfg | PROP_ssa, /* properties_required */
9131 PROP_gimple_eomp, /* properties_provided */
9132 0, /* properties_destroyed */
9133 0, /* todo_flags_start */
9134 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
9135};
9136
9137class pass_expand_omp_ssa : public gimple_opt_pass
9138{
9139public:
9140 pass_expand_omp_ssa (gcc::context *ctxt)
9141 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
9142 {}
9143
9144 /* opt_pass methods: */
9145 virtual bool gate (function *fun)
9146 {
9147 return !(fun->curr_properties & PROP_gimple_eomp);
9148 }
9149 virtual unsigned int execute (function *) { return execute_expand_omp (); }
9150 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
9151
9152}; // class pass_expand_omp_ssa
9153
9154} // anon namespace
9155
9156gimple_opt_pass *
9157make_pass_expand_omp_ssa (gcc::context *ctxt)
9158{
9159 return new pass_expand_omp_ssa (ctxt);
9160}
9161
9162/* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
9163 GIMPLE_* codes. */
9164
9165bool
9166omp_make_gimple_edges (basic_block bb, struct omp_region **region,
9167 int *region_idx)
9168{
9169 gimple *last = last_stmt (bb);
9170 enum gimple_code code = gimple_code (last);
9171 struct omp_region *cur_region = *region;
9172 bool fallthru = false;
9173
9174 switch (code)
9175 {
9176 case GIMPLE_OMP_PARALLEL:
629b3d75
MJ
9177 case GIMPLE_OMP_FOR:
9178 case GIMPLE_OMP_SINGLE:
9179 case GIMPLE_OMP_TEAMS:
9180 case GIMPLE_OMP_MASTER:
9181 case GIMPLE_OMP_TASKGROUP:
9182 case GIMPLE_OMP_CRITICAL:
9183 case GIMPLE_OMP_SECTION:
9184 case GIMPLE_OMP_GRID_BODY:
9185 cur_region = new_omp_region (bb, code, cur_region);
9186 fallthru = true;
9187 break;
9188
28567c40
JJ
9189 case GIMPLE_OMP_TASK:
9190 cur_region = new_omp_region (bb, code, cur_region);
9191 fallthru = true;
9192 if (gimple_omp_task_taskwait_p (last))
9193 cur_region = cur_region->outer;
9194 break;
9195
629b3d75
MJ
9196 case GIMPLE_OMP_ORDERED:
9197 cur_region = new_omp_region (bb, code, cur_region);
9198 fallthru = true;
9199 if (omp_find_clause (gimple_omp_ordered_clauses
9200 (as_a <gomp_ordered *> (last)),
9201 OMP_CLAUSE_DEPEND))
9202 cur_region = cur_region->outer;
9203 break;
9204
9205 case GIMPLE_OMP_TARGET:
9206 cur_region = new_omp_region (bb, code, cur_region);
9207 fallthru = true;
9208 switch (gimple_omp_target_kind (last))
9209 {
9210 case GF_OMP_TARGET_KIND_REGION:
9211 case GF_OMP_TARGET_KIND_DATA:
9212 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9213 case GF_OMP_TARGET_KIND_OACC_KERNELS:
62aee289 9214 case GF_OMP_TARGET_KIND_OACC_SERIAL:
629b3d75
MJ
9215 case GF_OMP_TARGET_KIND_OACC_DATA:
9216 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9217 break;
9218 case GF_OMP_TARGET_KIND_UPDATE:
9219 case GF_OMP_TARGET_KIND_ENTER_DATA:
9220 case GF_OMP_TARGET_KIND_EXIT_DATA:
9221 case GF_OMP_TARGET_KIND_OACC_UPDATE:
9222 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
9223 case GF_OMP_TARGET_KIND_OACC_DECLARE:
9224 cur_region = cur_region->outer;
9225 break;
9226 default:
9227 gcc_unreachable ();
9228 }
9229 break;
9230
9231 case GIMPLE_OMP_SECTIONS:
9232 cur_region = new_omp_region (bb, code, cur_region);
9233 fallthru = true;
9234 break;
9235
9236 case GIMPLE_OMP_SECTIONS_SWITCH:
9237 fallthru = false;
9238 break;
9239
9240 case GIMPLE_OMP_ATOMIC_LOAD:
9241 case GIMPLE_OMP_ATOMIC_STORE:
9242 fallthru = true;
9243 break;
9244
9245 case GIMPLE_OMP_RETURN:
9246 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
9247 somewhere other than the next block. This will be
9248 created later. */
9249 cur_region->exit = bb;
9250 if (cur_region->type == GIMPLE_OMP_TASK)
9251 /* Add an edge corresponding to not scheduling the task
9252 immediately. */
9253 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
9254 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
9255 cur_region = cur_region->outer;
9256 break;
9257
9258 case GIMPLE_OMP_CONTINUE:
9259 cur_region->cont = bb;
9260 switch (cur_region->type)
9261 {
9262 case GIMPLE_OMP_FOR:
9263 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
9264 succs edges as abnormal to prevent splitting
9265 them. */
9266 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
9267 /* Make the loopback edge. */
9268 make_edge (bb, single_succ (cur_region->entry),
9269 EDGE_ABNORMAL);
9270
9271 /* Create an edge from GIMPLE_OMP_FOR to exit, which
9272 corresponds to the case that the body of the loop
9273 is not executed at all. */
9274 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
9275 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
9276 fallthru = false;
9277 break;
9278
9279 case GIMPLE_OMP_SECTIONS:
9280 /* Wire up the edges into and out of the nested sections. */
9281 {
9282 basic_block switch_bb = single_succ (cur_region->entry);
9283
9284 struct omp_region *i;
9285 for (i = cur_region->inner; i ; i = i->next)
9286 {
9287 gcc_assert (i->type == GIMPLE_OMP_SECTION);
9288 make_edge (switch_bb, i->entry, 0);
9289 make_edge (i->exit, bb, EDGE_FALLTHRU);
9290 }
9291
9292 /* Make the loopback edge to the block with
9293 GIMPLE_OMP_SECTIONS_SWITCH. */
9294 make_edge (bb, switch_bb, 0);
9295
9296 /* Make the edge from the switch to exit. */
9297 make_edge (switch_bb, bb->next_bb, 0);
9298 fallthru = false;
9299 }
9300 break;
9301
9302 case GIMPLE_OMP_TASK:
9303 fallthru = true;
9304 break;
9305
9306 default:
9307 gcc_unreachable ();
9308 }
9309 break;
9310
9311 default:
9312 gcc_unreachable ();
9313 }
9314
9315 if (*region != cur_region)
9316 {
9317 *region = cur_region;
9318 if (cur_region)
9319 *region_idx = cur_region->entry->index;
9320 else
9321 *region_idx = 0;
9322 }
9323
9324 return fallthru;
9325}
9326
9327#include "gt-omp-expand.h"