]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/omp-expand.c
In PR70010, a function is marked with target(no-vsx) to disable VSX code
[thirdparty/gcc.git] / gcc / omp-expand.c
CommitLineData
4954efd4 1/* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
4
fbd26352 5Copyright (C) 2005-2019 Free Software Foundation, Inc.
4954efd4 6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
11Software Foundation; either version 3, or (at your option) any later
12version.
13
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING3. If not see
21<http://www.gnu.org/licenses/>. */
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "memmodel.h"
27#include "backend.h"
28#include "target.h"
29#include "rtl.h"
30#include "tree.h"
31#include "gimple.h"
32#include "cfghooks.h"
33#include "tree-pass.h"
34#include "ssa.h"
35#include "optabs.h"
36#include "cgraph.h"
37#include "pretty-print.h"
38#include "diagnostic-core.h"
39#include "fold-const.h"
40#include "stor-layout.h"
41#include "cfganal.h"
42#include "internal-fn.h"
43#include "gimplify.h"
44#include "gimple-iterator.h"
45#include "gimplify-me.h"
46#include "gimple-walk.h"
47#include "tree-cfg.h"
48#include "tree-into-ssa.h"
49#include "tree-ssa.h"
50#include "splay-tree.h"
51#include "cfgloop.h"
52#include "omp-general.h"
53#include "omp-offload.h"
54#include "tree-cfgcleanup.h"
55#include "symbol-summary.h"
4954efd4 56#include "gomp-constants.h"
57#include "gimple-pretty-print.h"
ef2beaf2 58#include "hsa-common.h"
30a86690 59#include "stringpool.h"
60#include "attribs.h"
4954efd4 61
62/* OMP region information. Every parallel and workshare
63 directive is enclosed between two markers, the OMP_* directive
64 and a corresponding GIMPLE_OMP_RETURN statement. */
65
66struct omp_region
67{
68 /* The enclosing region. */
69 struct omp_region *outer;
70
71 /* First child region. */
72 struct omp_region *inner;
73
74 /* Next peer region. */
75 struct omp_region *next;
76
77 /* Block containing the omp directive as its last stmt. */
78 basic_block entry;
79
80 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
81 basic_block exit;
82
83 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
84 basic_block cont;
85
86 /* If this is a combined parallel+workshare region, this is a list
87 of additional arguments needed by the combined parallel+workshare
88 library call. */
89 vec<tree, va_gc> *ws_args;
90
91 /* The code for the omp directive of this region. */
92 enum gimple_code type;
93
94 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
95 enum omp_clause_schedule_kind sched_kind;
96
97 /* Schedule modifiers. */
98 unsigned char sched_modifiers;
99
100 /* True if this is a combined parallel+workshare region. */
101 bool is_combined_parallel;
102
0365070c 103 /* Copy of fd.lastprivate_conditional != 0. */
104 bool has_lastprivate_conditional;
105
4954efd4 106 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
107 a depend clause. */
108 gomp_ordered *ord_stmt;
109};
110
111static struct omp_region *root_omp_region;
112static bool omp_any_child_fn_dumped;
113
114static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
115 bool = false);
116static gphi *find_phi_with_arg_on_edge (tree, edge);
117static void expand_omp (struct omp_region *region);
118
119/* Return true if REGION is a combined parallel+workshare region. */
120
121static inline bool
122is_combined_parallel (struct omp_region *region)
123{
124 return region->is_combined_parallel;
125}
126
127/* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
128 is the immediate dominator of PAR_ENTRY_BB, return true if there
129 are no data dependencies that would prevent expanding the parallel
130 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
131
132 When expanding a combined parallel+workshare region, the call to
133 the child function may need additional arguments in the case of
134 GIMPLE_OMP_FOR regions. In some cases, these arguments are
135 computed out of variables passed in from the parent to the child
136 via 'struct .omp_data_s'. For instance:
137
138 #pragma omp parallel for schedule (guided, i * 4)
139 for (j ...)
140
141 Is lowered into:
142
7c6746c9 143 # BLOCK 2 (PAR_ENTRY_BB)
4954efd4 144 .omp_data_o.i = i;
145 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
146
147 # BLOCK 3 (WS_ENTRY_BB)
148 .omp_data_i = &.omp_data_o;
149 D.1667 = .omp_data_i->i;
150 D.1598 = D.1667 * 4;
151 #pragma omp for schedule (guided, D.1598)
152
153 When we outline the parallel region, the call to the child function
154 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
155 that value is computed *after* the call site. So, in principle we
156 cannot do the transformation.
157
158 To see whether the code in WS_ENTRY_BB blocks the combined
159 parallel+workshare call, we collect all the variables used in the
160 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
161 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
162 call.
163
164 FIXME. If we had the SSA form built at this point, we could merely
165 hoist the code in block 3 into block 2 and be done with it. But at
166 this point we don't have dataflow information and though we could
167 hack something up here, it is really not worth the aggravation. */
168
169static bool
170workshare_safe_to_combine_p (basic_block ws_entry_bb)
171{
172 struct omp_for_data fd;
173 gimple *ws_stmt = last_stmt (ws_entry_bb);
174
175 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
176 return true;
177
178 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
7e5a76c8 179 if (gimple_omp_for_kind (ws_stmt) != GF_OMP_FOR_KIND_FOR)
180 return false;
4954efd4 181
182 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
183
184 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
185 return false;
186 if (fd.iter_type != long_integer_type_node)
187 return false;
188
189 /* FIXME. We give up too easily here. If any of these arguments
190 are not constants, they will likely involve variables that have
191 been mapped into fields of .omp_data_s for sharing with the child
192 function. With appropriate data flow, it would be possible to
193 see through this. */
194 if (!is_gimple_min_invariant (fd.loop.n1)
195 || !is_gimple_min_invariant (fd.loop.n2)
196 || !is_gimple_min_invariant (fd.loop.step)
197 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
198 return false;
199
200 return true;
201}
202
203/* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
204 presence (SIMD_SCHEDULE). */
205
206static tree
207omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
208{
7e5a76c8 209 if (!simd_schedule || integer_zerop (chunk_size))
4954efd4 210 return chunk_size;
211
9d805ed8 212 poly_uint64 vf = omp_max_vf ();
213 if (known_eq (vf, 1U))
4954efd4 214 return chunk_size;
215
216 tree type = TREE_TYPE (chunk_size);
217 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
218 build_int_cst (type, vf - 1));
219 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
220 build_int_cst (type, -vf));
221}
222
223/* Collect additional arguments needed to emit a combined
224 parallel+workshare call. WS_STMT is the workshare directive being
225 expanded. */
226
227static vec<tree, va_gc> *
228get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
229{
230 tree t;
231 location_t loc = gimple_location (ws_stmt);
232 vec<tree, va_gc> *ws_args;
233
234 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
235 {
236 struct omp_for_data fd;
237 tree n1, n2;
238
239 omp_extract_for_data (for_stmt, &fd, NULL);
240 n1 = fd.loop.n1;
241 n2 = fd.loop.n2;
242
243 if (gimple_omp_for_combined_into_p (for_stmt))
244 {
245 tree innerc
246 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
247 OMP_CLAUSE__LOOPTEMP_);
248 gcc_assert (innerc);
249 n1 = OMP_CLAUSE_DECL (innerc);
250 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
251 OMP_CLAUSE__LOOPTEMP_);
252 gcc_assert (innerc);
253 n2 = OMP_CLAUSE_DECL (innerc);
254 }
255
256 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
257
258 t = fold_convert_loc (loc, long_integer_type_node, n1);
259 ws_args->quick_push (t);
260
261 t = fold_convert_loc (loc, long_integer_type_node, n2);
262 ws_args->quick_push (t);
263
264 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
265 ws_args->quick_push (t);
266
267 if (fd.chunk_size)
268 {
269 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
270 t = omp_adjust_chunk_size (t, fd.simd_schedule);
271 ws_args->quick_push (t);
272 }
273
274 return ws_args;
275 }
276 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
277 {
278 /* Number of sections is equal to the number of edges from the
279 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
280 the exit of the sections region. */
281 basic_block bb = single_succ (gimple_bb (ws_stmt));
282 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
283 vec_alloc (ws_args, 1);
284 ws_args->quick_push (t);
285 return ws_args;
286 }
287
288 gcc_unreachable ();
289}
290
291/* Discover whether REGION is a combined parallel+workshare region. */
292
293static void
294determine_parallel_type (struct omp_region *region)
295{
296 basic_block par_entry_bb, par_exit_bb;
297 basic_block ws_entry_bb, ws_exit_bb;
298
299 if (region == NULL || region->inner == NULL
300 || region->exit == NULL || region->inner->exit == NULL
301 || region->inner->cont == NULL)
302 return;
303
304 /* We only support parallel+for and parallel+sections. */
305 if (region->type != GIMPLE_OMP_PARALLEL
306 || (region->inner->type != GIMPLE_OMP_FOR
307 && region->inner->type != GIMPLE_OMP_SECTIONS))
308 return;
309
310 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
311 WS_EXIT_BB -> PAR_EXIT_BB. */
312 par_entry_bb = region->entry;
313 par_exit_bb = region->exit;
314 ws_entry_bb = region->inner->entry;
315 ws_exit_bb = region->inner->exit;
316
7e5a76c8 317 /* Give up for task reductions on the parallel, while it is implementable,
318 adding another big set of APIs or slowing down the normal paths is
319 not acceptable. */
320 tree pclauses = gimple_omp_parallel_clauses (last_stmt (par_entry_bb));
321 if (omp_find_clause (pclauses, OMP_CLAUSE__REDUCTEMP_))
322 return;
323
4954efd4 324 if (single_succ (par_entry_bb) == ws_entry_bb
325 && single_succ (ws_exit_bb) == par_exit_bb
326 && workshare_safe_to_combine_p (ws_entry_bb)
327 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
328 || (last_and_only_stmt (ws_entry_bb)
329 && last_and_only_stmt (par_exit_bb))))
330 {
331 gimple *par_stmt = last_stmt (par_entry_bb);
332 gimple *ws_stmt = last_stmt (ws_entry_bb);
333
334 if (region->inner->type == GIMPLE_OMP_FOR)
335 {
336 /* If this is a combined parallel loop, we need to determine
337 whether or not to use the combined library calls. There
338 are two cases where we do not apply the transformation:
339 static loops and any kind of ordered loop. In the first
340 case, we already open code the loop so there is no need
341 to do anything else. In the latter case, the combined
342 parallel loop call would still need extra synchronization
343 to implement ordered semantics, so there would not be any
344 gain in using the combined call. */
345 tree clauses = gimple_omp_for_clauses (ws_stmt);
346 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
347 if (c == NULL
348 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
349 == OMP_CLAUSE_SCHEDULE_STATIC)
7e5a76c8 350 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED)
9a1d892b 351 || omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_)
48152aa2 352 || ((c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_))
353 && POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (c)))))
7e5a76c8 354 return;
4954efd4 355 }
7e5a76c8 356 else if (region->inner->type == GIMPLE_OMP_SECTIONS
48152aa2 357 && (omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
358 OMP_CLAUSE__REDUCTEMP_)
359 || omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
360 OMP_CLAUSE__CONDTEMP_)))
7e5a76c8 361 return;
4954efd4 362
363 region->is_combined_parallel = true;
364 region->inner->is_combined_parallel = true;
365 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
366 }
367}
368
369/* Debugging dumps for parallel regions. */
370void dump_omp_region (FILE *, struct omp_region *, int);
371void debug_omp_region (struct omp_region *);
372void debug_all_omp_regions (void);
373
374/* Dump the parallel region tree rooted at REGION. */
375
376void
377dump_omp_region (FILE *file, struct omp_region *region, int indent)
378{
379 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
380 gimple_code_name[region->type]);
381
382 if (region->inner)
383 dump_omp_region (file, region->inner, indent + 4);
384
385 if (region->cont)
386 {
387 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
388 region->cont->index);
389 }
390
391 if (region->exit)
392 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
393 region->exit->index);
394 else
395 fprintf (file, "%*s[no exit marker]\n", indent, "");
396
397 if (region->next)
398 dump_omp_region (file, region->next, indent);
399}
400
401DEBUG_FUNCTION void
402debug_omp_region (struct omp_region *region)
403{
404 dump_omp_region (stderr, region, 0);
405}
406
407DEBUG_FUNCTION void
408debug_all_omp_regions (void)
409{
410 dump_omp_region (stderr, root_omp_region, 0);
411}
412
413/* Create a new parallel region starting at STMT inside region PARENT. */
414
415static struct omp_region *
416new_omp_region (basic_block bb, enum gimple_code type,
417 struct omp_region *parent)
418{
419 struct omp_region *region = XCNEW (struct omp_region);
420
421 region->outer = parent;
422 region->entry = bb;
423 region->type = type;
424
425 if (parent)
426 {
427 /* This is a nested region. Add it to the list of inner
428 regions in PARENT. */
429 region->next = parent->inner;
430 parent->inner = region;
431 }
432 else
433 {
434 /* This is a toplevel region. Add it to the list of toplevel
435 regions in ROOT_OMP_REGION. */
436 region->next = root_omp_region;
437 root_omp_region = region;
438 }
439
440 return region;
441}
442
443/* Release the memory associated with the region tree rooted at REGION. */
444
445static void
446free_omp_region_1 (struct omp_region *region)
447{
448 struct omp_region *i, *n;
449
450 for (i = region->inner; i ; i = n)
451 {
452 n = i->next;
453 free_omp_region_1 (i);
454 }
455
456 free (region);
457}
458
459/* Release the memory for the entire omp region tree. */
460
461void
462omp_free_regions (void)
463{
464 struct omp_region *r, *n;
465 for (r = root_omp_region; r ; r = n)
466 {
467 n = r->next;
468 free_omp_region_1 (r);
469 }
470 root_omp_region = NULL;
471}
472
473/* A convenience function to build an empty GIMPLE_COND with just the
474 condition. */
475
476static gcond *
477gimple_build_cond_empty (tree cond)
478{
479 enum tree_code pred_code;
480 tree lhs, rhs;
481
482 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
483 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
484}
485
486/* Return true if a parallel REGION is within a declare target function or
487 within a target region and is not a part of a gridified target. */
488
489static bool
490parallel_needs_hsa_kernel_p (struct omp_region *region)
491{
492 bool indirect = false;
493 for (region = region->outer; region; region = region->outer)
494 {
495 if (region->type == GIMPLE_OMP_PARALLEL)
496 indirect = true;
497 else if (region->type == GIMPLE_OMP_TARGET)
498 {
499 gomp_target *tgt_stmt
500 = as_a <gomp_target *> (last_stmt (region->entry));
501
502 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
503 OMP_CLAUSE__GRIDDIM_))
504 return indirect;
505 else
506 return true;
507 }
508 }
509
510 if (lookup_attribute ("omp declare target",
511 DECL_ATTRIBUTES (current_function_decl)))
512 return true;
513
514 return false;
515}
516
330a2f10 517/* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
518 Add CHILD_FNDECL to decl chain of the supercontext of the block
519 ENTRY_BLOCK - this is the block which originally contained the
520 code from which CHILD_FNDECL was created.
521
522 Together, these actions ensure that the debug info for the outlined
523 function will be emitted with the correct lexical scope. */
524
525static void
b3136cb0 526adjust_context_and_scope (struct omp_region *region, tree entry_block,
527 tree child_fndecl)
330a2f10 528{
b3136cb0 529 tree parent_fndecl = NULL_TREE;
530 gimple *entry_stmt;
531 /* OMP expansion expands inner regions before outer ones, so if
532 we e.g. have explicit task region nested in parallel region, when
533 expanding the task region current_function_decl will be the original
534 source function, but we actually want to use as context the child
535 function of the parallel. */
536 for (region = region->outer;
537 region && parent_fndecl == NULL_TREE; region = region->outer)
538 switch (region->type)
539 {
540 case GIMPLE_OMP_PARALLEL:
541 case GIMPLE_OMP_TASK:
542 case GIMPLE_OMP_TEAMS:
543 entry_stmt = last_stmt (region->entry);
544 parent_fndecl = gimple_omp_taskreg_child_fn (entry_stmt);
545 break;
546 case GIMPLE_OMP_TARGET:
547 entry_stmt = last_stmt (region->entry);
548 parent_fndecl
549 = gimple_omp_target_child_fn (as_a <gomp_target *> (entry_stmt));
550 break;
551 default:
552 break;
553 }
554
555 if (parent_fndecl == NULL_TREE)
556 parent_fndecl = current_function_decl;
557 DECL_CONTEXT (child_fndecl) = parent_fndecl;
558
330a2f10 559 if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
560 {
561 tree b = BLOCK_SUPERCONTEXT (entry_block);
330a2f10 562 if (TREE_CODE (b) == BLOCK)
563 {
330a2f10 564 DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
565 BLOCK_VARS (b) = child_fndecl;
566 }
567 }
568}
569
7e5a76c8 570/* Build the function calls to GOMP_parallel etc to actually
4954efd4 571 generate the parallel operation. REGION is the parallel region
572 being expanded. BB is the block where to insert the code. WS_ARGS
573 will be set if this is a call to a combined parallel+workshare
574 construct, it contains the list of additional arguments needed by
575 the workshare construct. */
576
577static void
578expand_parallel_call (struct omp_region *region, basic_block bb,
579 gomp_parallel *entry_stmt,
580 vec<tree, va_gc> *ws_args)
581{
582 tree t, t1, t2, val, cond, c, clauses, flags;
583 gimple_stmt_iterator gsi;
584 gimple *stmt;
585 enum built_in_function start_ix;
586 int start_ix2;
587 location_t clause_loc;
588 vec<tree, va_gc> *args;
589
590 clauses = gimple_omp_parallel_clauses (entry_stmt);
591
592 /* Determine what flavor of GOMP_parallel we will be
593 emitting. */
594 start_ix = BUILT_IN_GOMP_PARALLEL;
7e5a76c8 595 tree rtmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
596 if (rtmp)
597 start_ix = BUILT_IN_GOMP_PARALLEL_REDUCTIONS;
598 else if (is_combined_parallel (region))
4954efd4 599 {
600 switch (region->inner->type)
601 {
602 case GIMPLE_OMP_FOR:
603 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
604 switch (region->inner->sched_kind)
605 {
606 case OMP_CLAUSE_SCHEDULE_RUNTIME:
0365070c 607 /* For lastprivate(conditional:), our implementation
608 requires monotonic behavior. */
609 if (region->inner->has_lastprivate_conditional != 0)
610 start_ix2 = 3;
611 else if ((region->inner->sched_modifiers
612 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
7e5a76c8 613 start_ix2 = 6;
614 else if ((region->inner->sched_modifiers
615 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
616 start_ix2 = 7;
617 else
618 start_ix2 = 3;
4954efd4 619 break;
620 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
621 case OMP_CLAUSE_SCHEDULE_GUIDED:
7e5a76c8 622 if ((region->inner->sched_modifiers
0365070c 623 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
624 && !region->inner->has_lastprivate_conditional)
4954efd4 625 {
626 start_ix2 = 3 + region->inner->sched_kind;
627 break;
628 }
629 /* FALLTHRU */
630 default:
631 start_ix2 = region->inner->sched_kind;
632 break;
633 }
634 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
635 start_ix = (enum built_in_function) start_ix2;
636 break;
637 case GIMPLE_OMP_SECTIONS:
638 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
639 break;
640 default:
641 gcc_unreachable ();
642 }
643 }
644
645 /* By default, the value of NUM_THREADS is zero (selected at run time)
646 and there is no conditional. */
647 cond = NULL_TREE;
648 val = build_int_cst (unsigned_type_node, 0);
649 flags = build_int_cst (unsigned_type_node, 0);
650
651 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
652 if (c)
653 cond = OMP_CLAUSE_IF_EXPR (c);
654
655 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
656 if (c)
657 {
658 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
659 clause_loc = OMP_CLAUSE_LOCATION (c);
660 }
661 else
662 clause_loc = gimple_location (entry_stmt);
663
664 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
665 if (c)
666 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
667
668 /* Ensure 'val' is of the correct type. */
669 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
670
671 /* If we found the clause 'if (cond)', build either
672 (cond != 0) or (cond ? val : 1u). */
673 if (cond)
674 {
675 cond = gimple_boolify (cond);
676
677 if (integer_zerop (val))
678 val = fold_build2_loc (clause_loc,
679 EQ_EXPR, unsigned_type_node, cond,
680 build_int_cst (TREE_TYPE (cond), 0));
681 else
682 {
683 basic_block cond_bb, then_bb, else_bb;
684 edge e, e_then, e_else;
685 tree tmp_then, tmp_else, tmp_join, tmp_var;
686
687 tmp_var = create_tmp_var (TREE_TYPE (val));
688 if (gimple_in_ssa_p (cfun))
689 {
690 tmp_then = make_ssa_name (tmp_var);
691 tmp_else = make_ssa_name (tmp_var);
692 tmp_join = make_ssa_name (tmp_var);
693 }
694 else
695 {
696 tmp_then = tmp_var;
697 tmp_else = tmp_var;
698 tmp_join = tmp_var;
699 }
700
701 e = split_block_after_labels (bb);
702 cond_bb = e->src;
703 bb = e->dest;
704 remove_edge (e);
705
706 then_bb = create_empty_bb (cond_bb);
707 else_bb = create_empty_bb (then_bb);
708 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
709 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
710
711 stmt = gimple_build_cond_empty (cond);
712 gsi = gsi_start_bb (cond_bb);
713 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
714
715 gsi = gsi_start_bb (then_bb);
716 expand_omp_build_assign (&gsi, tmp_then, val, true);
717
718 gsi = gsi_start_bb (else_bb);
719 expand_omp_build_assign (&gsi, tmp_else,
720 build_int_cst (unsigned_type_node, 1),
721 true);
722
723 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
724 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
725 add_bb_to_loop (then_bb, cond_bb->loop_father);
726 add_bb_to_loop (else_bb, cond_bb->loop_father);
727 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
728 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
729
730 if (gimple_in_ssa_p (cfun))
731 {
732 gphi *phi = create_phi_node (tmp_join, bb);
733 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
734 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
735 }
736
737 val = tmp_join;
738 }
739
740 gsi = gsi_start_bb (bb);
741 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
742 false, GSI_CONTINUE_LINKING);
743 }
744
bce107d7 745 gsi = gsi_last_nondebug_bb (bb);
4954efd4 746 t = gimple_omp_parallel_data_arg (entry_stmt);
747 if (t == NULL)
748 t1 = null_pointer_node;
749 else
750 t1 = build_fold_addr_expr (t);
751 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
752 t2 = build_fold_addr_expr (child_fndecl);
753
754 vec_alloc (args, 4 + vec_safe_length (ws_args));
755 args->quick_push (t2);
756 args->quick_push (t1);
757 args->quick_push (val);
758 if (ws_args)
759 args->splice (*ws_args);
760 args->quick_push (flags);
761
762 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
763 builtin_decl_explicit (start_ix), args);
764
7e5a76c8 765 if (rtmp)
766 {
767 tree type = TREE_TYPE (OMP_CLAUSE_DECL (rtmp));
768 t = build2 (MODIFY_EXPR, type, OMP_CLAUSE_DECL (rtmp),
769 fold_convert (type,
770 fold_convert (pointer_sized_int_node, t)));
771 }
4954efd4 772 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
773 false, GSI_CONTINUE_LINKING);
774
775 if (hsa_gen_requested_p ()
776 && parallel_needs_hsa_kernel_p (region))
777 {
778 cgraph_node *child_cnode = cgraph_node::get (child_fndecl);
779 hsa_register_kernel (child_cnode);
780 }
781}
782
4954efd4 783/* Build the function call to GOMP_task to actually
784 generate the task operation. BB is the block where to insert the code. */
785
786static void
787expand_task_call (struct omp_region *region, basic_block bb,
788 gomp_task *entry_stmt)
789{
790 tree t1, t2, t3;
791 gimple_stmt_iterator gsi;
792 location_t loc = gimple_location (entry_stmt);
793
794 tree clauses = gimple_omp_task_clauses (entry_stmt);
795
796 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
797 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
798 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
799 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
800 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
801 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
802
803 unsigned int iflags
804 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
805 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
806 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
807
808 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
809 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
810 tree num_tasks = NULL_TREE;
811 bool ull = false;
812 if (taskloop_p)
813 {
814 gimple *g = last_stmt (region->outer->entry);
815 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
816 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
817 struct omp_for_data fd;
818 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
819 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
820 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
821 OMP_CLAUSE__LOOPTEMP_);
822 startvar = OMP_CLAUSE_DECL (startvar);
823 endvar = OMP_CLAUSE_DECL (endvar);
824 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
825 if (fd.loop.cond_code == LT_EXPR)
826 iflags |= GOMP_TASK_FLAG_UP;
827 tree tclauses = gimple_omp_for_clauses (g);
828 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
829 if (num_tasks)
830 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
831 else
832 {
833 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
834 if (num_tasks)
835 {
836 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
837 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
838 }
839 else
840 num_tasks = integer_zero_node;
841 }
842 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
843 if (ifc == NULL_TREE)
844 iflags |= GOMP_TASK_FLAG_IF;
845 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
846 iflags |= GOMP_TASK_FLAG_NOGROUP;
847 ull = fd.iter_type == long_long_unsigned_type_node;
7e5a76c8 848 if (omp_find_clause (clauses, OMP_CLAUSE_REDUCTION))
849 iflags |= GOMP_TASK_FLAG_REDUCTION;
4954efd4 850 }
851 else if (priority)
852 iflags |= GOMP_TASK_FLAG_PRIORITY;
853
854 tree flags = build_int_cst (unsigned_type_node, iflags);
855
856 tree cond = boolean_true_node;
857 if (ifc)
858 {
859 if (taskloop_p)
860 {
861 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
862 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
863 build_int_cst (unsigned_type_node,
864 GOMP_TASK_FLAG_IF),
865 build_int_cst (unsigned_type_node, 0));
866 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
867 flags, t);
868 }
869 else
870 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
871 }
872
873 if (finalc)
874 {
875 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
876 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
877 build_int_cst (unsigned_type_node,
878 GOMP_TASK_FLAG_FINAL),
879 build_int_cst (unsigned_type_node, 0));
880 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
881 }
882 if (depend)
883 depend = OMP_CLAUSE_DECL (depend);
884 else
885 depend = build_int_cst (ptr_type_node, 0);
886 if (priority)
887 priority = fold_convert (integer_type_node,
888 OMP_CLAUSE_PRIORITY_EXPR (priority));
889 else
890 priority = integer_zero_node;
891
bce107d7 892 gsi = gsi_last_nondebug_bb (bb);
4954efd4 893 tree t = gimple_omp_task_data_arg (entry_stmt);
894 if (t == NULL)
895 t2 = null_pointer_node;
896 else
897 t2 = build_fold_addr_expr_loc (loc, t);
898 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
899 t = gimple_omp_task_copy_fn (entry_stmt);
900 if (t == NULL)
901 t3 = null_pointer_node;
902 else
903 t3 = build_fold_addr_expr_loc (loc, t);
904
905 if (taskloop_p)
906 t = build_call_expr (ull
907 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
908 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
909 11, t1, t2, t3,
910 gimple_omp_task_arg_size (entry_stmt),
911 gimple_omp_task_arg_align (entry_stmt), flags,
912 num_tasks, priority, startvar, endvar, step);
913 else
914 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
915 9, t1, t2, t3,
916 gimple_omp_task_arg_size (entry_stmt),
917 gimple_omp_task_arg_align (entry_stmt), cond, flags,
918 depend, priority);
919
920 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
921 false, GSI_CONTINUE_LINKING);
922}
923
7e5a76c8 924/* Build the function call to GOMP_taskwait_depend to actually
925 generate the taskwait operation. BB is the block where to insert the
926 code. */
927
928static void
929expand_taskwait_call (basic_block bb, gomp_task *entry_stmt)
930{
931 tree clauses = gimple_omp_task_clauses (entry_stmt);
932 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
933 if (depend == NULL_TREE)
934 return;
935
936 depend = OMP_CLAUSE_DECL (depend);
937
938 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
939 tree t
940 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASKWAIT_DEPEND),
941 1, depend);
942
943 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
944 false, GSI_CONTINUE_LINKING);
945}
946
947/* Build the function call to GOMP_teams_reg to actually
948 generate the host teams operation. REGION is the teams region
949 being expanded. BB is the block where to insert the code. */
950
951static void
952expand_teams_call (basic_block bb, gomp_teams *entry_stmt)
953{
954 tree clauses = gimple_omp_teams_clauses (entry_stmt);
955 tree num_teams = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
956 if (num_teams == NULL_TREE)
957 num_teams = build_int_cst (unsigned_type_node, 0);
958 else
959 {
960 num_teams = OMP_CLAUSE_NUM_TEAMS_EXPR (num_teams);
961 num_teams = fold_convert (unsigned_type_node, num_teams);
962 }
963 tree thread_limit = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
964 if (thread_limit == NULL_TREE)
965 thread_limit = build_int_cst (unsigned_type_node, 0);
966 else
967 {
968 thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit);
969 thread_limit = fold_convert (unsigned_type_node, thread_limit);
970 }
971
972 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
973 tree t = gimple_omp_teams_data_arg (entry_stmt), t1;
974 if (t == NULL)
975 t1 = null_pointer_node;
976 else
977 t1 = build_fold_addr_expr (t);
978 tree child_fndecl = gimple_omp_teams_child_fn (entry_stmt);
979 tree t2 = build_fold_addr_expr (child_fndecl);
980
7e5a76c8 981 vec<tree, va_gc> *args;
982 vec_alloc (args, 5);
983 args->quick_push (t2);
984 args->quick_push (t1);
985 args->quick_push (num_teams);
986 args->quick_push (thread_limit);
987 /* For future extensibility. */
988 args->quick_push (build_zero_cst (unsigned_type_node));
989
990 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
991 builtin_decl_explicit (BUILT_IN_GOMP_TEAMS_REG),
992 args);
993
994 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
995 false, GSI_CONTINUE_LINKING);
996}
997
4954efd4 998/* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
999
1000static tree
1001vec2chain (vec<tree, va_gc> *v)
1002{
1003 tree chain = NULL_TREE, t;
1004 unsigned ix;
1005
1006 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
1007 {
1008 DECL_CHAIN (t) = chain;
1009 chain = t;
1010 }
1011
1012 return chain;
1013}
1014
1015/* Remove barriers in REGION->EXIT's block. Note that this is only
1016 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
1017 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
1018 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
1019 removed. */
1020
1021static void
1022remove_exit_barrier (struct omp_region *region)
1023{
1024 gimple_stmt_iterator gsi;
1025 basic_block exit_bb;
1026 edge_iterator ei;
1027 edge e;
1028 gimple *stmt;
1029 int any_addressable_vars = -1;
1030
1031 exit_bb = region->exit;
1032
1033 /* If the parallel region doesn't return, we don't have REGION->EXIT
1034 block at all. */
1035 if (! exit_bb)
1036 return;
1037
1038 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
1039 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
1040 statements that can appear in between are extremely limited -- no
1041 memory operations at all. Here, we allow nothing at all, so the
1042 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
bce107d7 1043 gsi = gsi_last_nondebug_bb (exit_bb);
4954efd4 1044 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
bce107d7 1045 gsi_prev_nondebug (&gsi);
4954efd4 1046 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
1047 return;
1048
1049 FOR_EACH_EDGE (e, ei, exit_bb->preds)
1050 {
bce107d7 1051 gsi = gsi_last_nondebug_bb (e->src);
4954efd4 1052 if (gsi_end_p (gsi))
1053 continue;
1054 stmt = gsi_stmt (gsi);
1055 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
1056 && !gimple_omp_return_nowait_p (stmt))
1057 {
1058 /* OpenMP 3.0 tasks unfortunately prevent this optimization
1059 in many cases. If there could be tasks queued, the barrier
1060 might be needed to let the tasks run before some local
1061 variable of the parallel that the task uses as shared
1062 runs out of scope. The task can be spawned either
1063 from within current function (this would be easy to check)
1064 or from some function it calls and gets passed an address
1065 of such a variable. */
1066 if (any_addressable_vars < 0)
1067 {
1068 gomp_parallel *parallel_stmt
1069 = as_a <gomp_parallel *> (last_stmt (region->entry));
1070 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
1071 tree local_decls, block, decl;
1072 unsigned ix;
1073
1074 any_addressable_vars = 0;
1075 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
1076 if (TREE_ADDRESSABLE (decl))
1077 {
1078 any_addressable_vars = 1;
1079 break;
1080 }
1081 for (block = gimple_block (stmt);
1082 !any_addressable_vars
1083 && block
1084 && TREE_CODE (block) == BLOCK;
1085 block = BLOCK_SUPERCONTEXT (block))
1086 {
1087 for (local_decls = BLOCK_VARS (block);
1088 local_decls;
1089 local_decls = DECL_CHAIN (local_decls))
1090 if (TREE_ADDRESSABLE (local_decls))
1091 {
1092 any_addressable_vars = 1;
1093 break;
1094 }
1095 if (block == gimple_block (parallel_stmt))
1096 break;
1097 }
1098 }
1099 if (!any_addressable_vars)
1100 gimple_omp_return_set_nowait (stmt);
1101 }
1102 }
1103}
1104
1105static void
1106remove_exit_barriers (struct omp_region *region)
1107{
1108 if (region->type == GIMPLE_OMP_PARALLEL)
1109 remove_exit_barrier (region);
1110
1111 if (region->inner)
1112 {
1113 region = region->inner;
1114 remove_exit_barriers (region);
1115 while (region->next)
1116 {
1117 region = region->next;
1118 remove_exit_barriers (region);
1119 }
1120 }
1121}
1122
1123/* Optimize omp_get_thread_num () and omp_get_num_threads ()
1124 calls. These can't be declared as const functions, but
1125 within one parallel body they are constant, so they can be
1126 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1127 which are declared const. Similarly for task body, except
1128 that in untied task omp_get_thread_num () can change at any task
1129 scheduling point. */
1130
1131static void
1132optimize_omp_library_calls (gimple *entry_stmt)
1133{
1134 basic_block bb;
1135 gimple_stmt_iterator gsi;
1136 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1137 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1138 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1139 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1140 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1141 && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1142 OMP_CLAUSE_UNTIED) != NULL);
1143
1144 FOR_EACH_BB_FN (bb, cfun)
1145 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1146 {
1147 gimple *call = gsi_stmt (gsi);
1148 tree decl;
1149
1150 if (is_gimple_call (call)
1151 && (decl = gimple_call_fndecl (call))
1152 && DECL_EXTERNAL (decl)
1153 && TREE_PUBLIC (decl)
1154 && DECL_INITIAL (decl) == NULL)
1155 {
1156 tree built_in;
1157
1158 if (DECL_NAME (decl) == thr_num_id)
1159 {
1160 /* In #pragma omp task untied omp_get_thread_num () can change
1161 during the execution of the task region. */
1162 if (untied_task)
1163 continue;
1164 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1165 }
1166 else if (DECL_NAME (decl) == num_thr_id)
1167 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1168 else
1169 continue;
1170
1171 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1172 || gimple_call_num_args (call) != 0)
1173 continue;
1174
1175 if (flag_exceptions && !TREE_NOTHROW (decl))
1176 continue;
1177
1178 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1179 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1180 TREE_TYPE (TREE_TYPE (built_in))))
1181 continue;
1182
1183 gimple_call_set_fndecl (call, built_in);
1184 }
1185 }
1186}
1187
1188/* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1189 regimplified. */
1190
1191static tree
1192expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1193{
1194 tree t = *tp;
1195
1196 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1197 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1198 return t;
1199
1200 if (TREE_CODE (t) == ADDR_EXPR)
1201 recompute_tree_invariant_for_addr_expr (t);
1202
1203 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1204 return NULL_TREE;
1205}
1206
1207/* Prepend or append TO = FROM assignment before or after *GSI_P. */
1208
1209static void
1210expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1211 bool after)
1212{
1213 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1214 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1215 !after, after ? GSI_CONTINUE_LINKING
1216 : GSI_SAME_STMT);
1217 gimple *stmt = gimple_build_assign (to, from);
1218 if (after)
1219 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1220 else
1221 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1222 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1223 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1224 {
1225 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1226 gimple_regimplify_operands (stmt, &gsi);
1227 }
1228}
1229
1230/* Expand the OpenMP parallel or task directive starting at REGION. */
1231
1232static void
1233expand_omp_taskreg (struct omp_region *region)
1234{
1235 basic_block entry_bb, exit_bb, new_bb;
1236 struct function *child_cfun;
1237 tree child_fn, block, t;
1238 gimple_stmt_iterator gsi;
1239 gimple *entry_stmt, *stmt;
1240 edge e;
1241 vec<tree, va_gc> *ws_args;
1242
1243 entry_stmt = last_stmt (region->entry);
7e5a76c8 1244 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1245 && gimple_omp_task_taskwait_p (entry_stmt))
1246 {
1247 new_bb = region->entry;
1248 gsi = gsi_last_nondebug_bb (region->entry);
1249 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1250 gsi_remove (&gsi, true);
1251 expand_taskwait_call (new_bb, as_a <gomp_task *> (entry_stmt));
1252 return;
1253 }
1254
4954efd4 1255 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1256 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1257
1258 entry_bb = region->entry;
1259 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1260 exit_bb = region->cont;
1261 else
1262 exit_bb = region->exit;
1263
efa02472 1264 if (is_combined_parallel (region))
4954efd4 1265 ws_args = region->ws_args;
1266 else
1267 ws_args = NULL;
1268
1269 if (child_cfun->cfg)
1270 {
1271 /* Due to inlining, it may happen that we have already outlined
1272 the region, in which case all we need to do is make the
1273 sub-graph unreachable and emit the parallel call. */
1274 edge entry_succ_e, exit_succ_e;
1275
1276 entry_succ_e = single_succ_edge (entry_bb);
1277
bce107d7 1278 gsi = gsi_last_nondebug_bb (entry_bb);
4954efd4 1279 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
7e5a76c8 1280 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK
1281 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TEAMS);
4954efd4 1282 gsi_remove (&gsi, true);
1283
1284 new_bb = entry_bb;
1285 if (exit_bb)
1286 {
1287 exit_succ_e = single_succ_edge (exit_bb);
1288 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1289 }
1290 remove_edge_and_dominated_blocks (entry_succ_e);
1291 }
1292 else
1293 {
1294 unsigned srcidx, dstidx, num;
1295
1296 /* If the parallel region needs data sent from the parent
1297 function, then the very first statement (except possible
1298 tree profile counter updates) of the parallel body
1299 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1300 &.OMP_DATA_O is passed as an argument to the child function,
1301 we need to replace it with the argument as seen by the child
1302 function.
1303
1304 In most cases, this will end up being the identity assignment
1305 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1306 a function call that has been inlined, the original PARM_DECL
1307 .OMP_DATA_I may have been converted into a different local
1308 variable. In which case, we need to keep the assignment. */
1309 if (gimple_omp_taskreg_data_arg (entry_stmt))
1310 {
1311 basic_block entry_succ_bb
1312 = single_succ_p (entry_bb) ? single_succ (entry_bb)
1313 : FALLTHRU_EDGE (entry_bb)->dest;
1314 tree arg;
1315 gimple *parcopy_stmt = NULL;
1316
1317 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1318 {
1319 gimple *stmt;
1320
1321 gcc_assert (!gsi_end_p (gsi));
1322 stmt = gsi_stmt (gsi);
1323 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1324 continue;
1325
1326 if (gimple_num_ops (stmt) == 2)
1327 {
1328 tree arg = gimple_assign_rhs1 (stmt);
1329
1330 /* We're ignore the subcode because we're
1331 effectively doing a STRIP_NOPS. */
1332
1333 if (TREE_CODE (arg) == ADDR_EXPR
7e5a76c8 1334 && (TREE_OPERAND (arg, 0)
1335 == gimple_omp_taskreg_data_arg (entry_stmt)))
4954efd4 1336 {
1337 parcopy_stmt = stmt;
1338 break;
1339 }
1340 }
1341 }
1342
1343 gcc_assert (parcopy_stmt != NULL);
1344 arg = DECL_ARGUMENTS (child_fn);
1345
1346 if (!gimple_in_ssa_p (cfun))
1347 {
1348 if (gimple_assign_lhs (parcopy_stmt) == arg)
1349 gsi_remove (&gsi, true);
1350 else
1351 {
7c6746c9 1352 /* ?? Is setting the subcode really necessary ?? */
4954efd4 1353 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1354 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1355 }
1356 }
1357 else
1358 {
1359 tree lhs = gimple_assign_lhs (parcopy_stmt);
1360 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1361 /* We'd like to set the rhs to the default def in the child_fn,
1362 but it's too early to create ssa names in the child_fn.
1363 Instead, we set the rhs to the parm. In
1364 move_sese_region_to_fn, we introduce a default def for the
1365 parm, map the parm to it's default def, and once we encounter
1366 this stmt, replace the parm with the default def. */
1367 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1368 update_stmt (parcopy_stmt);
1369 }
1370 }
1371
1372 /* Declare local variables needed in CHILD_CFUN. */
1373 block = DECL_INITIAL (child_fn);
1374 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1375 /* The gimplifier could record temporaries in parallel/task block
1376 rather than in containing function's local_decls chain,
1377 which would mean cgraph missed finalizing them. Do it now. */
1378 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1379 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1380 varpool_node::finalize_decl (t);
1381 DECL_SAVED_TREE (child_fn) = NULL;
1382 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1383 gimple_set_body (child_fn, NULL);
1384 TREE_USED (block) = 1;
1385
1386 /* Reset DECL_CONTEXT on function arguments. */
1387 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1388 DECL_CONTEXT (t) = child_fn;
1389
1390 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1391 so that it can be moved to the child function. */
bce107d7 1392 gsi = gsi_last_nondebug_bb (entry_bb);
4954efd4 1393 stmt = gsi_stmt (gsi);
1394 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
7e5a76c8 1395 || gimple_code (stmt) == GIMPLE_OMP_TASK
1396 || gimple_code (stmt) == GIMPLE_OMP_TEAMS));
4954efd4 1397 e = split_block (entry_bb, stmt);
1398 gsi_remove (&gsi, true);
1399 entry_bb = e->dest;
1400 edge e2 = NULL;
7e5a76c8 1401 if (gimple_code (entry_stmt) != GIMPLE_OMP_TASK)
4954efd4 1402 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1403 else
1404 {
1405 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1406 gcc_assert (e2->dest == region->exit);
1407 remove_edge (BRANCH_EDGE (entry_bb));
1408 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
bce107d7 1409 gsi = gsi_last_nondebug_bb (region->exit);
4954efd4 1410 gcc_assert (!gsi_end_p (gsi)
1411 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1412 gsi_remove (&gsi, true);
1413 }
1414
1415 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1416 if (exit_bb)
1417 {
bce107d7 1418 gsi = gsi_last_nondebug_bb (exit_bb);
4954efd4 1419 gcc_assert (!gsi_end_p (gsi)
1420 && (gimple_code (gsi_stmt (gsi))
1421 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1422 stmt = gimple_build_return (NULL);
1423 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1424 gsi_remove (&gsi, true);
1425 }
1426
1427 /* Move the parallel region into CHILD_CFUN. */
1428
1429 if (gimple_in_ssa_p (cfun))
1430 {
1431 init_tree_ssa (child_cfun);
1432 init_ssa_operands (child_cfun);
1433 child_cfun->gimple_df->in_ssa_p = true;
1434 block = NULL_TREE;
1435 }
1436 else
1437 block = gimple_block (entry_stmt);
1438
1439 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1440 if (exit_bb)
1441 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1442 if (e2)
1443 {
1444 basic_block dest_bb = e2->dest;
1445 if (!exit_bb)
1446 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1447 remove_edge (e2);
1448 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1449 }
1450 /* When the OMP expansion process cannot guarantee an up-to-date
7c6746c9 1451 loop tree arrange for the child function to fixup loops. */
4954efd4 1452 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1453 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1454
1455 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1456 num = vec_safe_length (child_cfun->local_decls);
1457 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1458 {
1459 t = (*child_cfun->local_decls)[srcidx];
1460 if (DECL_CONTEXT (t) == cfun->decl)
1461 continue;
1462 if (srcidx != dstidx)
1463 (*child_cfun->local_decls)[dstidx] = t;
1464 dstidx++;
1465 }
1466 if (dstidx != num)
1467 vec_safe_truncate (child_cfun->local_decls, dstidx);
1468
1469 /* Inform the callgraph about the new function. */
1470 child_cfun->curr_properties = cfun->curr_properties;
1471 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1472 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1473 cgraph_node *node = cgraph_node::get_create (child_fn);
1474 node->parallelized_function = 1;
1475 cgraph_node::add_new_function (child_fn, true);
1476
1477 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1478 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1479
1480 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1481 fixed in a following pass. */
1482 push_cfun (child_cfun);
1483 if (need_asm)
d687f868 1484 assign_assembler_name_if_needed (child_fn);
4954efd4 1485
1486 if (optimize)
1487 optimize_omp_library_calls (entry_stmt);
688b6bc6 1488 update_max_bb_count ();
4954efd4 1489 cgraph_edge::rebuild_edges ();
1490
1491 /* Some EH regions might become dead, see PR34608. If
1492 pass_cleanup_cfg isn't the first pass to happen with the
1493 new child, these dead EH edges might cause problems.
1494 Clean them up now. */
1495 if (flag_exceptions)
1496 {
1497 basic_block bb;
1498 bool changed = false;
1499
1500 FOR_EACH_BB_FN (bb, cfun)
1501 changed |= gimple_purge_dead_eh_edges (bb);
1502 if (changed)
1503 cleanup_tree_cfg ();
1504 }
1505 if (gimple_in_ssa_p (cfun))
1506 update_ssa (TODO_update_ssa);
1507 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1508 verify_loop_structure ();
1509 pop_cfun ();
1510
1511 if (dump_file && !gimple_in_ssa_p (cfun))
1512 {
1513 omp_any_child_fn_dumped = true;
1514 dump_function_header (dump_file, child_fn, dump_flags);
1515 dump_function_to_file (child_fn, dump_file, dump_flags);
1516 }
1517 }
1518
b3136cb0 1519 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
1520
efa02472 1521 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
4954efd4 1522 expand_parallel_call (region, new_bb,
1523 as_a <gomp_parallel *> (entry_stmt), ws_args);
7e5a76c8 1524 else if (gimple_code (entry_stmt) == GIMPLE_OMP_TEAMS)
1525 expand_teams_call (new_bb, as_a <gomp_teams *> (entry_stmt));
4954efd4 1526 else
1527 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1528 if (gimple_in_ssa_p (cfun))
1529 update_ssa (TODO_update_ssa_only_virtuals);
1530}
1531
1532/* Information about members of an OpenACC collapsed loop nest. */
1533
1534struct oacc_collapse
1535{
7c6746c9 1536 tree base; /* Base value. */
4954efd4 1537 tree iters; /* Number of steps. */
719a7570 1538 tree step; /* Step size. */
1539 tree tile; /* Tile increment (if tiled). */
1540 tree outer; /* Tile iterator var. */
4954efd4 1541};
1542
1543/* Helper for expand_oacc_for. Determine collapsed loop information.
1544 Fill in COUNTS array. Emit any initialization code before GSI.
1545 Return the calculated outer loop bound of BOUND_TYPE. */
1546
1547static tree
1548expand_oacc_collapse_init (const struct omp_for_data *fd,
1549 gimple_stmt_iterator *gsi,
719a7570 1550 oacc_collapse *counts, tree bound_type,
1551 location_t loc)
4954efd4 1552{
719a7570 1553 tree tiling = fd->tiling;
4954efd4 1554 tree total = build_int_cst (bound_type, 1);
1555 int ix;
1556
1557 gcc_assert (integer_onep (fd->loop.step));
1558 gcc_assert (integer_zerop (fd->loop.n1));
1559
719a7570 1560 /* When tiling, the first operand of the tile clause applies to the
1561 innermost loop, and we work outwards from there. Seems
1562 backwards, but whatever. */
1563 for (ix = fd->collapse; ix--;)
4954efd4 1564 {
1565 const omp_for_data_loop *loop = &fd->loops[ix];
1566
1567 tree iter_type = TREE_TYPE (loop->v);
1568 tree diff_type = iter_type;
1569 tree plus_type = iter_type;
1570
1571 gcc_assert (loop->cond_code == fd->loop.cond_code);
1572
1573 if (POINTER_TYPE_P (iter_type))
1574 plus_type = sizetype;
1575 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
1576 diff_type = signed_type_for (diff_type);
3a5a56c5 1577 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
1578 diff_type = integer_type_node;
4954efd4 1579
719a7570 1580 if (tiling)
1581 {
1582 tree num = build_int_cst (integer_type_node, fd->collapse);
1583 tree loop_no = build_int_cst (integer_type_node, ix);
1584 tree tile = TREE_VALUE (tiling);
1585 gcall *call
1586 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1587 /* gwv-outer=*/integer_zero_node,
1588 /* gwv-inner=*/integer_zero_node);
1589
1590 counts[ix].outer = create_tmp_var (iter_type, ".outer");
1591 counts[ix].tile = create_tmp_var (diff_type, ".tile");
1592 gimple_call_set_lhs (call, counts[ix].tile);
1593 gimple_set_location (call, loc);
1594 gsi_insert_before (gsi, call, GSI_SAME_STMT);
1595
1596 tiling = TREE_CHAIN (tiling);
1597 }
1598 else
1599 {
1600 counts[ix].tile = NULL;
1601 counts[ix].outer = loop->v;
1602 }
1603
4954efd4 1604 tree b = loop->n1;
1605 tree e = loop->n2;
1606 tree s = loop->step;
1607 bool up = loop->cond_code == LT_EXPR;
1608 tree dir = build_int_cst (diff_type, up ? +1 : -1);
1609 bool negating;
1610 tree expr;
1611
1612 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1613 true, GSI_SAME_STMT);
1614 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1615 true, GSI_SAME_STMT);
1616
7c6746c9 1617 /* Convert the step, avoiding possible unsigned->signed overflow. */
4954efd4 1618 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1619 if (negating)
1620 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1621 s = fold_convert (diff_type, s);
1622 if (negating)
1623 s = fold_build1 (NEGATE_EXPR, diff_type, s);
1624 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1625 true, GSI_SAME_STMT);
1626
7c6746c9 1627 /* Determine the range, avoiding possible unsigned->signed overflow. */
4954efd4 1628 negating = !up && TYPE_UNSIGNED (iter_type);
1629 expr = fold_build2 (MINUS_EXPR, plus_type,
1630 fold_convert (plus_type, negating ? b : e),
1631 fold_convert (plus_type, negating ? e : b));
1632 expr = fold_convert (diff_type, expr);
1633 if (negating)
1634 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1635 tree range = force_gimple_operand_gsi
1636 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1637
1638 /* Determine number of iterations. */
1639 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1640 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1641 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1642
1643 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1644 true, GSI_SAME_STMT);
1645
1646 counts[ix].base = b;
1647 counts[ix].iters = iters;
1648 counts[ix].step = s;
1649
1650 total = fold_build2 (MULT_EXPR, bound_type, total,
1651 fold_convert (bound_type, iters));
1652 }
1653
1654 return total;
1655}
1656
719a7570 1657/* Emit initializers for collapsed loop members. INNER is true if
1658 this is for the element loop of a TILE. IVAR is the outer
4954efd4 1659 loop iteration variable, from which collapsed loop iteration values
1660 are calculated. COUNTS array has been initialized by
1661 expand_oacc_collapse_inits. */
1662
1663static void
719a7570 1664expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
4954efd4 1665 gimple_stmt_iterator *gsi,
1666 const oacc_collapse *counts, tree ivar)
1667{
1668 tree ivar_type = TREE_TYPE (ivar);
1669
1670 /* The most rapidly changing iteration variable is the innermost
1671 one. */
1672 for (int ix = fd->collapse; ix--;)
1673 {
1674 const omp_for_data_loop *loop = &fd->loops[ix];
1675 const oacc_collapse *collapse = &counts[ix];
719a7570 1676 tree v = inner ? loop->v : collapse->outer;
1677 tree iter_type = TREE_TYPE (v);
4954efd4 1678 tree diff_type = TREE_TYPE (collapse->step);
1679 tree plus_type = iter_type;
1680 enum tree_code plus_code = PLUS_EXPR;
1681 tree expr;
1682
1683 if (POINTER_TYPE_P (iter_type))
1684 {
1685 plus_code = POINTER_PLUS_EXPR;
1686 plus_type = sizetype;
1687 }
1688
719a7570 1689 expr = ivar;
1690 if (ix)
1691 {
1692 tree mod = fold_convert (ivar_type, collapse->iters);
1693 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1694 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1695 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1696 true, GSI_SAME_STMT);
1697 }
1698
4954efd4 1699 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1700 collapse->step);
719a7570 1701 expr = fold_build2 (plus_code, iter_type,
1702 inner ? collapse->outer : collapse->base,
4954efd4 1703 fold_convert (plus_type, expr));
1704 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1705 true, GSI_SAME_STMT);
719a7570 1706 gassign *ass = gimple_build_assign (v, expr);
4954efd4 1707 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
4954efd4 1708 }
1709}
1710
1711/* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1712 of the combined collapse > 1 loop constructs, generate code like:
1713 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1714 if (cond3 is <)
1715 adj = STEP3 - 1;
1716 else
1717 adj = STEP3 + 1;
1718 count3 = (adj + N32 - N31) / STEP3;
1719 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1720 if (cond2 is <)
1721 adj = STEP2 - 1;
1722 else
1723 adj = STEP2 + 1;
1724 count2 = (adj + N22 - N21) / STEP2;
1725 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1726 if (cond1 is <)
1727 adj = STEP1 - 1;
1728 else
1729 adj = STEP1 + 1;
1730 count1 = (adj + N12 - N11) / STEP1;
1731 count = count1 * count2 * count3;
1732 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1733 count = 0;
1734 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1735 of the combined loop constructs, just initialize COUNTS array
1736 from the _looptemp_ clauses. */
1737
1738/* NOTE: It *could* be better to moosh all of the BBs together,
1739 creating one larger BB with all the computation and the unexpected
1740 jump at the end. I.e.
1741
1742 bool zero3, zero2, zero1, zero;
1743
1744 zero3 = N32 c3 N31;
1745 count3 = (N32 - N31) /[cl] STEP3;
1746 zero2 = N22 c2 N21;
1747 count2 = (N22 - N21) /[cl] STEP2;
1748 zero1 = N12 c1 N11;
1749 count1 = (N12 - N11) /[cl] STEP1;
1750 zero = zero3 || zero2 || zero1;
1751 count = count1 * count2 * count3;
1752 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1753
1754 After all, we expect the zero=false, and thus we expect to have to
1755 evaluate all of the comparison expressions, so short-circuiting
1756 oughtn't be a win. Since the condition isn't protecting a
1757 denominator, we're not concerned about divide-by-zero, so we can
1758 fully evaluate count even if a numerator turned out to be wrong.
1759
1760 It seems like putting this all together would create much better
1761 scheduling opportunities, and less pressure on the chip's branch
1762 predictor. */
1763
1764static void
1765expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1766 basic_block &entry_bb, tree *counts,
1767 basic_block &zero_iter1_bb, int &first_zero_iter1,
1768 basic_block &zero_iter2_bb, int &first_zero_iter2,
1769 basic_block &l2_dom_bb)
1770{
1771 tree t, type = TREE_TYPE (fd->loop.v);
1772 edge e, ne;
1773 int i;
1774
1775 /* Collapsed loops need work for expansion into SSA form. */
1776 gcc_assert (!gimple_in_ssa_p (cfun));
1777
1778 if (gimple_omp_for_combined_into_p (fd->for_stmt)
1779 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1780 {
1781 gcc_assert (fd->ordered == 0);
1782 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1783 isn't supposed to be handled, as the inner loop doesn't
1784 use it. */
1785 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1786 OMP_CLAUSE__LOOPTEMP_);
1787 gcc_assert (innerc);
1788 for (i = 0; i < fd->collapse; i++)
1789 {
1790 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1791 OMP_CLAUSE__LOOPTEMP_);
1792 gcc_assert (innerc);
1793 if (i)
1794 counts[i] = OMP_CLAUSE_DECL (innerc);
1795 else
1796 counts[0] = NULL_TREE;
1797 }
1798 return;
1799 }
1800
1801 for (i = fd->collapse; i < fd->ordered; i++)
1802 {
1803 tree itype = TREE_TYPE (fd->loops[i].v);
1804 counts[i] = NULL_TREE;
1805 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1806 fold_convert (itype, fd->loops[i].n1),
1807 fold_convert (itype, fd->loops[i].n2));
1808 if (t && integer_zerop (t))
1809 {
1810 for (i = fd->collapse; i < fd->ordered; i++)
1811 counts[i] = build_int_cst (type, 0);
1812 break;
1813 }
1814 }
1815 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1816 {
1817 tree itype = TREE_TYPE (fd->loops[i].v);
1818
1819 if (i >= fd->collapse && counts[i])
1820 continue;
1821 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1822 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1823 fold_convert (itype, fd->loops[i].n1),
1824 fold_convert (itype, fd->loops[i].n2)))
1825 == NULL_TREE || !integer_onep (t)))
1826 {
1827 gcond *cond_stmt;
1828 tree n1, n2;
1829 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1830 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1831 true, GSI_SAME_STMT);
1832 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1833 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1834 true, GSI_SAME_STMT);
1835 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1836 NULL_TREE, NULL_TREE);
1837 gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1838 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1839 expand_omp_regimplify_p, NULL, NULL)
1840 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1841 expand_omp_regimplify_p, NULL, NULL))
1842 {
1843 *gsi = gsi_for_stmt (cond_stmt);
1844 gimple_regimplify_operands (cond_stmt, gsi);
1845 }
1846 e = split_block (entry_bb, cond_stmt);
1847 basic_block &zero_iter_bb
1848 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1849 int &first_zero_iter
1850 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1851 if (zero_iter_bb == NULL)
1852 {
1853 gassign *assign_stmt;
1854 first_zero_iter = i;
1855 zero_iter_bb = create_empty_bb (entry_bb);
1856 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1857 *gsi = gsi_after_labels (zero_iter_bb);
1858 if (i < fd->collapse)
1859 assign_stmt = gimple_build_assign (fd->loop.n2,
1860 build_zero_cst (type));
1861 else
1862 {
1863 counts[i] = create_tmp_reg (type, ".count");
1864 assign_stmt
1865 = gimple_build_assign (counts[i], build_zero_cst (type));
1866 }
1867 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1868 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1869 entry_bb);
1870 }
1871 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
720cfc43 1872 ne->probability = profile_probability::very_unlikely ();
4954efd4 1873 e->flags = EDGE_TRUE_VALUE;
720cfc43 1874 e->probability = ne->probability.invert ();
4954efd4 1875 if (l2_dom_bb == NULL)
1876 l2_dom_bb = entry_bb;
1877 entry_bb = e->dest;
bce107d7 1878 *gsi = gsi_last_nondebug_bb (entry_bb);
4954efd4 1879 }
1880
1881 if (POINTER_TYPE_P (itype))
1882 itype = signed_type_for (itype);
1883 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1884 ? -1 : 1));
1885 t = fold_build2 (PLUS_EXPR, itype,
1886 fold_convert (itype, fd->loops[i].step), t);
1887 t = fold_build2 (PLUS_EXPR, itype, t,
1888 fold_convert (itype, fd->loops[i].n2));
1889 t = fold_build2 (MINUS_EXPR, itype, t,
1890 fold_convert (itype, fd->loops[i].n1));
1891 /* ?? We could probably use CEIL_DIV_EXPR instead of
1892 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1893 generate the same code in the end because generically we
1894 don't know that the values involved must be negative for
1895 GT?? */
1896 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1897 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1898 fold_build1 (NEGATE_EXPR, itype, t),
1899 fold_build1 (NEGATE_EXPR, itype,
1900 fold_convert (itype,
1901 fd->loops[i].step)));
1902 else
1903 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1904 fold_convert (itype, fd->loops[i].step));
1905 t = fold_convert (type, t);
1906 if (TREE_CODE (t) == INTEGER_CST)
1907 counts[i] = t;
1908 else
1909 {
1910 if (i < fd->collapse || i != first_zero_iter2)
1911 counts[i] = create_tmp_reg (type, ".count");
1912 expand_omp_build_assign (gsi, counts[i], t);
1913 }
1914 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1915 {
1916 if (i == 0)
1917 t = counts[0];
1918 else
1919 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1920 expand_omp_build_assign (gsi, fd->loop.n2, t);
1921 }
1922 }
1923}
1924
1925/* Helper function for expand_omp_{for_*,simd}. Generate code like:
1926 T = V;
1927 V3 = N31 + (T % count3) * STEP3;
1928 T = T / count3;
1929 V2 = N21 + (T % count2) * STEP2;
1930 T = T / count2;
1931 V1 = N11 + T * STEP1;
1932 if this loop doesn't have an inner loop construct combined with it.
1933 If it does have an inner loop construct combined with it and the
1934 iteration count isn't known constant, store values from counts array
1935 into its _looptemp_ temporaries instead. */
1936
1937static void
1938expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1939 tree *counts, gimple *inner_stmt, tree startvar)
1940{
1941 int i;
1942 if (gimple_omp_for_combined_p (fd->for_stmt))
1943 {
1944 /* If fd->loop.n2 is constant, then no propagation of the counts
1945 is needed, they are constant. */
1946 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
1947 return;
1948
1949 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
1950 ? gimple_omp_taskreg_clauses (inner_stmt)
1951 : gimple_omp_for_clauses (inner_stmt);
1952 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1953 isn't supposed to be handled, as the inner loop doesn't
1954 use it. */
1955 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
1956 gcc_assert (innerc);
1957 for (i = 0; i < fd->collapse; i++)
1958 {
1959 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1960 OMP_CLAUSE__LOOPTEMP_);
1961 gcc_assert (innerc);
1962 if (i)
1963 {
1964 tree tem = OMP_CLAUSE_DECL (innerc);
1965 tree t = fold_convert (TREE_TYPE (tem), counts[i]);
1966 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1967 false, GSI_CONTINUE_LINKING);
1968 gassign *stmt = gimple_build_assign (tem, t);
1969 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1970 }
1971 }
1972 return;
1973 }
1974
1975 tree type = TREE_TYPE (fd->loop.v);
1976 tree tem = create_tmp_reg (type, ".tem");
1977 gassign *stmt = gimple_build_assign (tem, startvar);
1978 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1979
1980 for (i = fd->collapse - 1; i >= 0; i--)
1981 {
1982 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
1983 itype = vtype;
1984 if (POINTER_TYPE_P (vtype))
1985 itype = signed_type_for (vtype);
1986 if (i != 0)
1987 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
1988 else
1989 t = tem;
1990 t = fold_convert (itype, t);
1991 t = fold_build2 (MULT_EXPR, itype, t,
1992 fold_convert (itype, fd->loops[i].step));
1993 if (POINTER_TYPE_P (vtype))
1994 t = fold_build_pointer_plus (fd->loops[i].n1, t);
1995 else
1996 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
1997 t = force_gimple_operand_gsi (gsi, t,
1998 DECL_P (fd->loops[i].v)
1999 && TREE_ADDRESSABLE (fd->loops[i].v),
2000 NULL_TREE, false,
2001 GSI_CONTINUE_LINKING);
2002 stmt = gimple_build_assign (fd->loops[i].v, t);
2003 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2004 if (i != 0)
2005 {
2006 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
2007 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
2008 false, GSI_CONTINUE_LINKING);
2009 stmt = gimple_build_assign (tem, t);
2010 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2011 }
2012 }
2013}
2014
2015/* Helper function for expand_omp_for_*. Generate code like:
2016 L10:
2017 V3 += STEP3;
2018 if (V3 cond3 N32) goto BODY_BB; else goto L11;
2019 L11:
2020 V3 = N31;
2021 V2 += STEP2;
2022 if (V2 cond2 N22) goto BODY_BB; else goto L12;
2023 L12:
2024 V2 = N21;
2025 V1 += STEP1;
2026 goto BODY_BB; */
2027
2028static basic_block
2029extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb,
2030 basic_block body_bb)
2031{
2032 basic_block last_bb, bb, collapse_bb = NULL;
2033 int i;
2034 gimple_stmt_iterator gsi;
2035 edge e;
2036 tree t;
2037 gimple *stmt;
2038
2039 last_bb = cont_bb;
2040 for (i = fd->collapse - 1; i >= 0; i--)
2041 {
2042 tree vtype = TREE_TYPE (fd->loops[i].v);
2043
2044 bb = create_empty_bb (last_bb);
2045 add_bb_to_loop (bb, last_bb->loop_father);
2046 gsi = gsi_start_bb (bb);
2047
2048 if (i < fd->collapse - 1)
2049 {
2050 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
720cfc43 2051 e->probability = profile_probability::guessed_always ().apply_scale (1, 8);
4954efd4 2052
2053 t = fd->loops[i + 1].n1;
2054 t = force_gimple_operand_gsi (&gsi, t,
2055 DECL_P (fd->loops[i + 1].v)
2056 && TREE_ADDRESSABLE (fd->loops[i
2057 + 1].v),
2058 NULL_TREE, false,
2059 GSI_CONTINUE_LINKING);
2060 stmt = gimple_build_assign (fd->loops[i + 1].v, t);
2061 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2062 }
2063 else
2064 collapse_bb = bb;
2065
2066 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
2067
2068 if (POINTER_TYPE_P (vtype))
2069 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
2070 else
2071 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
2072 t = force_gimple_operand_gsi (&gsi, t,
2073 DECL_P (fd->loops[i].v)
2074 && TREE_ADDRESSABLE (fd->loops[i].v),
2075 NULL_TREE, false, GSI_CONTINUE_LINKING);
2076 stmt = gimple_build_assign (fd->loops[i].v, t);
2077 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2078
2079 if (i > 0)
2080 {
2081 t = fd->loops[i].n2;
2082 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2083 false, GSI_CONTINUE_LINKING);
2084 tree v = fd->loops[i].v;
2085 if (DECL_P (v) && TREE_ADDRESSABLE (v))
2086 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
2087 false, GSI_CONTINUE_LINKING);
2088 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
2089 stmt = gimple_build_cond_empty (t);
2090 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
095711ed 2091 if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)),
2092 expand_omp_regimplify_p, NULL, NULL)
2093 || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)),
2094 expand_omp_regimplify_p, NULL, NULL))
2095 gimple_regimplify_operands (stmt, &gsi);
4954efd4 2096 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
720cfc43 2097 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
4954efd4 2098 }
2099 else
2100 make_edge (bb, body_bb, EDGE_FALLTHRU);
2101 last_bb = bb;
2102 }
2103
2104 return collapse_bb;
2105}
2106
2107/* Expand #pragma omp ordered depend(source). */
2108
2109static void
2110expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
2111 tree *counts, location_t loc)
2112{
2113 enum built_in_function source_ix
2114 = fd->iter_type == long_integer_type_node
2115 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
2116 gimple *g
2117 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
2118 build_fold_addr_expr (counts[fd->ordered]));
2119 gimple_set_location (g, loc);
2120 gsi_insert_before (gsi, g, GSI_SAME_STMT);
2121}
2122
2123/* Expand a single depend from #pragma omp ordered depend(sink:...). */
2124
2125static void
2126expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
2127 tree *counts, tree c, location_t loc)
2128{
2129 auto_vec<tree, 10> args;
2130 enum built_in_function sink_ix
2131 = fd->iter_type == long_integer_type_node
2132 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
2133 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
2134 int i;
2135 gimple_stmt_iterator gsi2 = *gsi;
2136 bool warned_step = false;
2137
2138 for (i = 0; i < fd->ordered; i++)
2139 {
2140 tree step = NULL_TREE;
2141 off = TREE_PURPOSE (deps);
2142 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2143 {
2144 step = TREE_OPERAND (off, 1);
2145 off = TREE_OPERAND (off, 0);
2146 }
2147 if (!integer_zerop (off))
2148 {
2149 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2150 || fd->loops[i].cond_code == GT_EXPR);
2151 bool forward = fd->loops[i].cond_code == LT_EXPR;
2152 if (step)
2153 {
2154 /* Non-simple Fortran DO loops. If step is variable,
2155 we don't know at compile even the direction, so can't
2156 warn. */
2157 if (TREE_CODE (step) != INTEGER_CST)
2158 break;
2159 forward = tree_int_cst_sgn (step) != -1;
2160 }
2161 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
71c5a0bc 2162 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
2163 "waiting for lexically later iteration");
4954efd4 2164 break;
2165 }
2166 deps = TREE_CHAIN (deps);
2167 }
2168 /* If all offsets corresponding to the collapsed loops are zero,
2169 this depend clause can be ignored. FIXME: but there is still a
2170 flush needed. We need to emit one __sync_synchronize () for it
2171 though (perhaps conditionally)? Solve this together with the
2172 conservative dependence folding optimization.
2173 if (i >= fd->collapse)
2174 return; */
2175
2176 deps = OMP_CLAUSE_DECL (c);
2177 gsi_prev (&gsi2);
2178 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
2179 edge e2 = split_block_after_labels (e1->dest);
2180
2181 gsi2 = gsi_after_labels (e1->dest);
2182 *gsi = gsi_last_bb (e1->src);
2183 for (i = 0; i < fd->ordered; i++)
2184 {
2185 tree itype = TREE_TYPE (fd->loops[i].v);
2186 tree step = NULL_TREE;
2187 tree orig_off = NULL_TREE;
2188 if (POINTER_TYPE_P (itype))
2189 itype = sizetype;
2190 if (i)
2191 deps = TREE_CHAIN (deps);
2192 off = TREE_PURPOSE (deps);
2193 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2194 {
2195 step = TREE_OPERAND (off, 1);
2196 off = TREE_OPERAND (off, 0);
2197 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2198 && integer_onep (fd->loops[i].step)
2199 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
2200 }
2201 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
2202 if (step)
2203 {
2204 off = fold_convert_loc (loc, itype, off);
2205 orig_off = off;
2206 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2207 }
2208
2209 if (integer_zerop (off))
2210 t = boolean_true_node;
2211 else
2212 {
2213 tree a;
2214 tree co = fold_convert_loc (loc, itype, off);
2215 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
2216 {
2217 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2218 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
2219 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
2220 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
2221 co);
2222 }
2223 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2224 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2225 fd->loops[i].v, co);
2226 else
2227 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
2228 fd->loops[i].v, co);
2229 if (step)
2230 {
2231 tree t1, t2;
2232 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2233 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2234 fd->loops[i].n1);
2235 else
2236 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2237 fd->loops[i].n2);
2238 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2239 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2240 fd->loops[i].n2);
2241 else
2242 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2243 fd->loops[i].n1);
2244 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
2245 step, build_int_cst (TREE_TYPE (step), 0));
2246 if (TREE_CODE (step) != INTEGER_CST)
2247 {
2248 t1 = unshare_expr (t1);
2249 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
2250 false, GSI_CONTINUE_LINKING);
2251 t2 = unshare_expr (t2);
2252 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
2253 false, GSI_CONTINUE_LINKING);
2254 }
2255 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
2256 t, t2, t1);
2257 }
2258 else if (fd->loops[i].cond_code == LT_EXPR)
2259 {
2260 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2261 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2262 fd->loops[i].n1);
2263 else
2264 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2265 fd->loops[i].n2);
2266 }
2267 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2268 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
2269 fd->loops[i].n2);
2270 else
2271 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
2272 fd->loops[i].n1);
2273 }
2274 if (cond)
2275 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
2276 else
2277 cond = t;
2278
2279 off = fold_convert_loc (loc, itype, off);
2280
2281 if (step
2282 || (fd->loops[i].cond_code == LT_EXPR
2283 ? !integer_onep (fd->loops[i].step)
2284 : !integer_minus_onep (fd->loops[i].step)))
2285 {
2286 if (step == NULL_TREE
2287 && TYPE_UNSIGNED (itype)
2288 && fd->loops[i].cond_code == GT_EXPR)
2289 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
2290 fold_build1_loc (loc, NEGATE_EXPR, itype,
2291 s));
2292 else
2293 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
2294 orig_off ? orig_off : off, s);
2295 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
2296 build_int_cst (itype, 0));
2297 if (integer_zerop (t) && !warned_step)
2298 {
71c5a0bc 2299 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
2300 "refers to iteration never in the iteration "
2301 "space");
4954efd4 2302 warned_step = true;
2303 }
2304 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
2305 cond, t);
2306 }
2307
2308 if (i <= fd->collapse - 1 && fd->collapse > 1)
2309 t = fd->loop.v;
2310 else if (counts[i])
2311 t = counts[i];
2312 else
2313 {
2314 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2315 fd->loops[i].v, fd->loops[i].n1);
2316 t = fold_convert_loc (loc, fd->iter_type, t);
2317 }
2318 if (step)
2319 /* We have divided off by step already earlier. */;
2320 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
2321 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
2322 fold_build1_loc (loc, NEGATE_EXPR, itype,
2323 s));
2324 else
2325 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2326 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2327 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
2328 off = fold_convert_loc (loc, fd->iter_type, off);
2329 if (i <= fd->collapse - 1 && fd->collapse > 1)
2330 {
2331 if (i)
2332 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
2333 off);
2334 if (i < fd->collapse - 1)
2335 {
2336 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
2337 counts[i]);
2338 continue;
2339 }
2340 }
2341 off = unshare_expr (off);
2342 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
2343 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2344 true, GSI_SAME_STMT);
2345 args.safe_push (t);
2346 }
2347 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
2348 gimple_set_location (g, loc);
2349 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
2350
2351 cond = unshare_expr (cond);
2352 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
2353 GSI_CONTINUE_LINKING);
2354 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
2355 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
720cfc43 2356 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2357 e1->probability = e3->probability.invert ();
4954efd4 2358 e1->flags = EDGE_TRUE_VALUE;
2359 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
2360
2361 *gsi = gsi_after_labels (e2->dest);
2362}
2363
2364/* Expand all #pragma omp ordered depend(source) and
2365 #pragma omp ordered depend(sink:...) constructs in the current
2366 #pragma omp for ordered(n) region. */
2367
2368static void
2369expand_omp_ordered_source_sink (struct omp_region *region,
2370 struct omp_for_data *fd, tree *counts,
2371 basic_block cont_bb)
2372{
2373 struct omp_region *inner;
2374 int i;
2375 for (i = fd->collapse - 1; i < fd->ordered; i++)
2376 if (i == fd->collapse - 1 && fd->collapse > 1)
2377 counts[i] = NULL_TREE;
2378 else if (i >= fd->collapse && !cont_bb)
2379 counts[i] = build_zero_cst (fd->iter_type);
2380 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
2381 && integer_onep (fd->loops[i].step))
2382 counts[i] = NULL_TREE;
2383 else
2384 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
2385 tree atype
2386 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
2387 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
2388 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
2389
2390 for (inner = region->inner; inner; inner = inner->next)
2391 if (inner->type == GIMPLE_OMP_ORDERED)
2392 {
2393 gomp_ordered *ord_stmt = inner->ord_stmt;
2394 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
2395 location_t loc = gimple_location (ord_stmt);
2396 tree c;
2397 for (c = gimple_omp_ordered_clauses (ord_stmt);
2398 c; c = OMP_CLAUSE_CHAIN (c))
2399 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
2400 break;
2401 if (c)
2402 expand_omp_ordered_source (&gsi, fd, counts, loc);
2403 for (c = gimple_omp_ordered_clauses (ord_stmt);
2404 c; c = OMP_CLAUSE_CHAIN (c))
2405 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
2406 expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
2407 gsi_remove (&gsi, true);
2408 }
2409}
2410
2411/* Wrap the body into fd->ordered - fd->collapse loops that aren't
2412 collapsed. */
2413
2414static basic_block
2415expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
2416 basic_block cont_bb, basic_block body_bb,
2417 bool ordered_lastprivate)
2418{
2419 if (fd->ordered == fd->collapse)
2420 return cont_bb;
2421
2422 if (!cont_bb)
2423 {
2424 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2425 for (int i = fd->collapse; i < fd->ordered; i++)
2426 {
2427 tree type = TREE_TYPE (fd->loops[i].v);
2428 tree n1 = fold_convert (type, fd->loops[i].n1);
2429 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
2430 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2431 size_int (i - fd->collapse + 1),
2432 NULL_TREE, NULL_TREE);
2433 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2434 }
2435 return NULL;
2436 }
2437
2438 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
2439 {
2440 tree t, type = TREE_TYPE (fd->loops[i].v);
2441 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2442 expand_omp_build_assign (&gsi, fd->loops[i].v,
2443 fold_convert (type, fd->loops[i].n1));
2444 if (counts[i])
2445 expand_omp_build_assign (&gsi, counts[i],
2446 build_zero_cst (fd->iter_type));
2447 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2448 size_int (i - fd->collapse + 1),
2449 NULL_TREE, NULL_TREE);
2450 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2451 if (!gsi_end_p (gsi))
2452 gsi_prev (&gsi);
2453 else
2454 gsi = gsi_last_bb (body_bb);
2455 edge e1 = split_block (body_bb, gsi_stmt (gsi));
2456 basic_block new_body = e1->dest;
2457 if (body_bb == cont_bb)
2458 cont_bb = new_body;
2459 edge e2 = NULL;
2460 basic_block new_header;
2461 if (EDGE_COUNT (cont_bb->preds) > 0)
2462 {
2463 gsi = gsi_last_bb (cont_bb);
2464 if (POINTER_TYPE_P (type))
2465 t = fold_build_pointer_plus (fd->loops[i].v,
2466 fold_convert (sizetype,
2467 fd->loops[i].step));
2468 else
2469 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
2470 fold_convert (type, fd->loops[i].step));
2471 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
2472 if (counts[i])
2473 {
2474 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
2475 build_int_cst (fd->iter_type, 1));
2476 expand_omp_build_assign (&gsi, counts[i], t);
2477 t = counts[i];
2478 }
2479 else
2480 {
2481 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2482 fd->loops[i].v, fd->loops[i].n1);
2483 t = fold_convert (fd->iter_type, t);
2484 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2485 true, GSI_SAME_STMT);
2486 }
2487 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2488 size_int (i - fd->collapse + 1),
2489 NULL_TREE, NULL_TREE);
2490 expand_omp_build_assign (&gsi, aref, t);
2491 gsi_prev (&gsi);
2492 e2 = split_block (cont_bb, gsi_stmt (gsi));
2493 new_header = e2->dest;
2494 }
2495 else
2496 new_header = cont_bb;
2497 gsi = gsi_after_labels (new_header);
2498 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
2499 true, GSI_SAME_STMT);
2500 tree n2
2501 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
2502 true, NULL_TREE, true, GSI_SAME_STMT);
2503 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
2504 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
2505 edge e3 = split_block (new_header, gsi_stmt (gsi));
2506 cont_bb = e3->dest;
2507 remove_edge (e1);
2508 make_edge (body_bb, new_header, EDGE_FALLTHRU);
2509 e3->flags = EDGE_FALSE_VALUE;
720cfc43 2510 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
4954efd4 2511 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
720cfc43 2512 e1->probability = e3->probability.invert ();
4954efd4 2513
2514 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
2515 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
2516
2517 if (e2)
2518 {
2e966e2a 2519 class loop *loop = alloc_loop ();
4954efd4 2520 loop->header = new_header;
2521 loop->latch = e2->src;
2522 add_loop (loop, body_bb->loop_father);
2523 }
2524 }
2525
2526 /* If there are any lastprivate clauses and it is possible some loops
2527 might have zero iterations, ensure all the decls are initialized,
2528 otherwise we could crash evaluating C++ class iterators with lastprivate
2529 clauses. */
2530 bool need_inits = false;
2531 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
2532 if (need_inits)
2533 {
2534 tree type = TREE_TYPE (fd->loops[i].v);
2535 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2536 expand_omp_build_assign (&gsi, fd->loops[i].v,
2537 fold_convert (type, fd->loops[i].n1));
2538 }
2539 else
2540 {
2541 tree type = TREE_TYPE (fd->loops[i].v);
2542 tree this_cond = fold_build2 (fd->loops[i].cond_code,
2543 boolean_type_node,
2544 fold_convert (type, fd->loops[i].n1),
2545 fold_convert (type, fd->loops[i].n2));
2546 if (!integer_onep (this_cond))
2547 need_inits = true;
2548 }
2549
2550 return cont_bb;
2551}
2552
2553/* A subroutine of expand_omp_for. Generate code for a parallel
2554 loop with any schedule. Given parameters:
2555
2556 for (V = N1; V cond N2; V += STEP) BODY;
2557
2558 where COND is "<" or ">", we generate pseudocode
2559
2560 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
2561 if (more) goto L0; else goto L3;
2562 L0:
2563 V = istart0;
2564 iend = iend0;
2565 L1:
2566 BODY;
2567 V += STEP;
2568 if (V cond iend) goto L1; else goto L2;
2569 L2:
2570 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2571 L3:
2572
2573 If this is a combined omp parallel loop, instead of the call to
2574 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
2575 If this is gimple_omp_for_combined_p loop, then instead of assigning
2576 V and iend in L0 we assign the first two _looptemp_ clause decls of the
2577 inner GIMPLE_OMP_FOR and V += STEP; and
2578 if (V cond iend) goto L1; else goto L2; are removed.
2579
2580 For collapsed loops, given parameters:
2581 collapse(3)
2582 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2583 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2584 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
2585 BODY;
2586
2587 we generate pseudocode
2588
2589 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
2590 if (cond3 is <)
2591 adj = STEP3 - 1;
2592 else
2593 adj = STEP3 + 1;
2594 count3 = (adj + N32 - N31) / STEP3;
2595 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
2596 if (cond2 is <)
2597 adj = STEP2 - 1;
2598 else
2599 adj = STEP2 + 1;
2600 count2 = (adj + N22 - N21) / STEP2;
2601 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
2602 if (cond1 is <)
2603 adj = STEP1 - 1;
2604 else
2605 adj = STEP1 + 1;
2606 count1 = (adj + N12 - N11) / STEP1;
2607 count = count1 * count2 * count3;
2608 goto Z1;
2609 Z0:
2610 count = 0;
2611 Z1:
2612 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
2613 if (more) goto L0; else goto L3;
2614 L0:
2615 V = istart0;
2616 T = V;
2617 V3 = N31 + (T % count3) * STEP3;
2618 T = T / count3;
2619 V2 = N21 + (T % count2) * STEP2;
2620 T = T / count2;
2621 V1 = N11 + T * STEP1;
2622 iend = iend0;
2623 L1:
2624 BODY;
2625 V += 1;
2626 if (V < iend) goto L10; else goto L2;
2627 L10:
2628 V3 += STEP3;
2629 if (V3 cond3 N32) goto L1; else goto L11;
2630 L11:
2631 V3 = N31;
2632 V2 += STEP2;
2633 if (V2 cond2 N22) goto L1; else goto L12;
2634 L12:
2635 V2 = N21;
2636 V1 += STEP1;
2637 goto L1;
2638 L2:
2639 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2640 L3:
2641
2642 */
2643
2644static void
2645expand_omp_for_generic (struct omp_region *region,
2646 struct omp_for_data *fd,
2647 enum built_in_function start_fn,
2648 enum built_in_function next_fn,
7e5a76c8 2649 tree sched_arg,
4954efd4 2650 gimple *inner_stmt)
2651{
2652 tree type, istart0, iend0, iend;
2653 tree t, vmain, vback, bias = NULL_TREE;
2654 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
2655 basic_block l2_bb = NULL, l3_bb = NULL;
2656 gimple_stmt_iterator gsi;
2657 gassign *assign_stmt;
2658 bool in_combined_parallel = is_combined_parallel (region);
2659 bool broken_loop = region->cont == NULL;
2660 edge e, ne;
2661 tree *counts = NULL;
2662 int i;
2663 bool ordered_lastprivate = false;
2664
2665 gcc_assert (!broken_loop || !in_combined_parallel);
2666 gcc_assert (fd->iter_type == long_integer_type_node
2667 || !in_combined_parallel);
2668
2669 entry_bb = region->entry;
2670 cont_bb = region->cont;
2671 collapse_bb = NULL;
2672 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
2673 gcc_assert (broken_loop
2674 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
2675 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
2676 l1_bb = single_succ (l0_bb);
2677 if (!broken_loop)
2678 {
2679 l2_bb = create_empty_bb (cont_bb);
2680 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
2681 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
2682 == l1_bb));
2683 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
2684 }
2685 else
2686 l2_bb = NULL;
2687 l3_bb = BRANCH_EDGE (entry_bb)->dest;
2688 exit_bb = region->exit;
2689
bce107d7 2690 gsi = gsi_last_nondebug_bb (entry_bb);
4954efd4 2691
2692 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
2693 if (fd->ordered
9a1d892b 2694 && omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4954efd4 2695 OMP_CLAUSE_LASTPRIVATE))
2696 ordered_lastprivate = false;
7e5a76c8 2697 tree reductions = NULL_TREE;
9a1d892b 2698 tree mem = NULL_TREE, cond_var = NULL_TREE, condtemp = NULL_TREE;
2699 tree memv = NULL_TREE;
48152aa2 2700 if (fd->lastprivate_conditional)
2701 {
2702 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2703 OMP_CLAUSE__CONDTEMP_);
2704 if (fd->have_pointer_condtemp)
2705 condtemp = OMP_CLAUSE_DECL (c);
2706 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
2707 cond_var = OMP_CLAUSE_DECL (c);
2708 }
7e5a76c8 2709 if (sched_arg)
2710 {
2711 if (fd->have_reductemp)
2712 {
9a1d892b 2713 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
7e5a76c8 2714 OMP_CLAUSE__REDUCTEMP_);
2715 reductions = OMP_CLAUSE_DECL (c);
2716 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
2717 gimple *g = SSA_NAME_DEF_STMT (reductions);
2718 reductions = gimple_assign_rhs1 (g);
2719 OMP_CLAUSE_DECL (c) = reductions;
2720 entry_bb = gimple_bb (g);
2721 edge e = split_block (entry_bb, g);
2722 if (region->entry == entry_bb)
2723 region->entry = e->dest;
2724 gsi = gsi_last_bb (entry_bb);
2725 }
2726 else
2727 reductions = null_pointer_node;
48152aa2 2728 if (fd->have_pointer_condtemp)
9a1d892b 2729 {
9a1d892b 2730 tree type = TREE_TYPE (condtemp);
2731 memv = create_tmp_var (type);
2732 TREE_ADDRESSABLE (memv) = 1;
2733 unsigned HOST_WIDE_INT sz
2734 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
2735 sz *= fd->lastprivate_conditional;
2736 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
2737 false);
2738 mem = build_fold_addr_expr (memv);
2739 }
2740 else
2741 mem = null_pointer_node;
7e5a76c8 2742 }
4954efd4 2743 if (fd->collapse > 1 || fd->ordered)
2744 {
2745 int first_zero_iter1 = -1, first_zero_iter2 = -1;
2746 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
2747
2748 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
2749 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
2750 zero_iter1_bb, first_zero_iter1,
2751 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
2752
2753 if (zero_iter1_bb)
2754 {
2755 /* Some counts[i] vars might be uninitialized if
2756 some loop has zero iterations. But the body shouldn't
2757 be executed in that case, so just avoid uninit warnings. */
2758 for (i = first_zero_iter1;
2759 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
2760 if (SSA_VAR_P (counts[i]))
2761 TREE_NO_WARNING (counts[i]) = 1;
2762 gsi_prev (&gsi);
2763 e = split_block (entry_bb, gsi_stmt (gsi));
2764 entry_bb = e->dest;
2765 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
bce107d7 2766 gsi = gsi_last_nondebug_bb (entry_bb);
4954efd4 2767 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2768 get_immediate_dominator (CDI_DOMINATORS,
2769 zero_iter1_bb));
2770 }
2771 if (zero_iter2_bb)
2772 {
2773 /* Some counts[i] vars might be uninitialized if
2774 some loop has zero iterations. But the body shouldn't
2775 be executed in that case, so just avoid uninit warnings. */
2776 for (i = first_zero_iter2; i < fd->ordered; i++)
2777 if (SSA_VAR_P (counts[i]))
2778 TREE_NO_WARNING (counts[i]) = 1;
2779 if (zero_iter1_bb)
2780 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2781 else
2782 {
2783 gsi_prev (&gsi);
2784 e = split_block (entry_bb, gsi_stmt (gsi));
2785 entry_bb = e->dest;
2786 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
bce107d7 2787 gsi = gsi_last_nondebug_bb (entry_bb);
4954efd4 2788 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2789 get_immediate_dominator
2790 (CDI_DOMINATORS, zero_iter2_bb));
2791 }
2792 }
2793 if (fd->collapse == 1)
2794 {
2795 counts[0] = fd->loop.n2;
2796 fd->loop = fd->loops[0];
2797 }
2798 }
2799
2800 type = TREE_TYPE (fd->loop.v);
2801 istart0 = create_tmp_var (fd->iter_type, ".istart0");
2802 iend0 = create_tmp_var (fd->iter_type, ".iend0");
2803 TREE_ADDRESSABLE (istart0) = 1;
2804 TREE_ADDRESSABLE (iend0) = 1;
2805
2806 /* See if we need to bias by LLONG_MIN. */
2807 if (fd->iter_type == long_long_unsigned_type_node
2808 && TREE_CODE (type) == INTEGER_TYPE
2809 && !TYPE_UNSIGNED (type)
2810 && fd->ordered == 0)
2811 {
2812 tree n1, n2;
2813
2814 if (fd->loop.cond_code == LT_EXPR)
2815 {
2816 n1 = fd->loop.n1;
2817 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
2818 }
2819 else
2820 {
2821 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
2822 n2 = fd->loop.n1;
2823 }
2824 if (TREE_CODE (n1) != INTEGER_CST
2825 || TREE_CODE (n2) != INTEGER_CST
2826 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
2827 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
2828 }
2829
2830 gimple_stmt_iterator gsif = gsi;
2831 gsi_prev (&gsif);
2832
2833 tree arr = NULL_TREE;
2834 if (in_combined_parallel)
2835 {
2836 gcc_assert (fd->ordered == 0);
2837 /* In a combined parallel loop, emit a call to
2838 GOMP_loop_foo_next. */
2839 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
2840 build_fold_addr_expr (istart0),
2841 build_fold_addr_expr (iend0));
2842 }
2843 else
2844 {
2845 tree t0, t1, t2, t3, t4;
2846 /* If this is not a combined parallel loop, emit a call to
2847 GOMP_loop_foo_start in ENTRY_BB. */
2848 t4 = build_fold_addr_expr (iend0);
2849 t3 = build_fold_addr_expr (istart0);
2850 if (fd->ordered)
2851 {
2852 t0 = build_int_cst (unsigned_type_node,
2853 fd->ordered - fd->collapse + 1);
2854 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
2855 fd->ordered
2856 - fd->collapse + 1),
2857 ".omp_counts");
2858 DECL_NAMELESS (arr) = 1;
2859 TREE_ADDRESSABLE (arr) = 1;
2860 TREE_STATIC (arr) = 1;
2861 vec<constructor_elt, va_gc> *v;
2862 vec_alloc (v, fd->ordered - fd->collapse + 1);
2863 int idx;
2864
2865 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
2866 {
2867 tree c;
2868 if (idx == 0 && fd->collapse > 1)
2869 c = fd->loop.n2;
2870 else
2871 c = counts[idx + fd->collapse - 1];
2872 tree purpose = size_int (idx);
2873 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
2874 if (TREE_CODE (c) != INTEGER_CST)
2875 TREE_STATIC (arr) = 0;
2876 }
2877
2878 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
2879 if (!TREE_STATIC (arr))
2880 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
2881 void_type_node, arr),
2882 true, NULL_TREE, true, GSI_SAME_STMT);
2883 t1 = build_fold_addr_expr (arr);
2884 t2 = NULL_TREE;
2885 }
2886 else
2887 {
2888 t2 = fold_convert (fd->iter_type, fd->loop.step);
2889 t1 = fd->loop.n2;
2890 t0 = fd->loop.n1;
2891 if (gimple_omp_for_combined_into_p (fd->for_stmt))
2892 {
2893 tree innerc
2894 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2895 OMP_CLAUSE__LOOPTEMP_);
2896 gcc_assert (innerc);
2897 t0 = OMP_CLAUSE_DECL (innerc);
2898 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2899 OMP_CLAUSE__LOOPTEMP_);
2900 gcc_assert (innerc);
2901 t1 = OMP_CLAUSE_DECL (innerc);
2902 }
2903 if (POINTER_TYPE_P (TREE_TYPE (t0))
2904 && TYPE_PRECISION (TREE_TYPE (t0))
2905 != TYPE_PRECISION (fd->iter_type))
2906 {
2907 /* Avoid casting pointers to integer of a different size. */
2908 tree itype = signed_type_for (type);
2909 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
2910 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
2911 }
2912 else
2913 {
2914 t1 = fold_convert (fd->iter_type, t1);
2915 t0 = fold_convert (fd->iter_type, t0);
2916 }
2917 if (bias)
2918 {
2919 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
2920 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
2921 }
2922 }
2923 if (fd->iter_type == long_integer_type_node || fd->ordered)
2924 {
2925 if (fd->chunk_size)
2926 {
2927 t = fold_convert (fd->iter_type, fd->chunk_size);
2928 t = omp_adjust_chunk_size (t, fd->simd_schedule);
7e5a76c8 2929 if (sched_arg)
2930 {
2931 if (fd->ordered)
2932 t = build_call_expr (builtin_decl_explicit (start_fn),
2933 8, t0, t1, sched_arg, t, t3, t4,
2934 reductions, mem);
2935 else
2936 t = build_call_expr (builtin_decl_explicit (start_fn),
2937 9, t0, t1, t2, sched_arg, t, t3, t4,
2938 reductions, mem);
2939 }
2940 else if (fd->ordered)
4954efd4 2941 t = build_call_expr (builtin_decl_explicit (start_fn),
2942 5, t0, t1, t, t3, t4);
2943 else
2944 t = build_call_expr (builtin_decl_explicit (start_fn),
2945 6, t0, t1, t2, t, t3, t4);
2946 }
2947 else if (fd->ordered)
2948 t = build_call_expr (builtin_decl_explicit (start_fn),
2949 4, t0, t1, t3, t4);
2950 else
2951 t = build_call_expr (builtin_decl_explicit (start_fn),
2952 5, t0, t1, t2, t3, t4);
2953 }
2954 else
2955 {
2956 tree t5;
2957 tree c_bool_type;
2958 tree bfn_decl;
2959
2960 /* The GOMP_loop_ull_*start functions have additional boolean
2961 argument, true for < loops and false for > loops.
2962 In Fortran, the C bool type can be different from
2963 boolean_type_node. */
2964 bfn_decl = builtin_decl_explicit (start_fn);
2965 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
2966 t5 = build_int_cst (c_bool_type,
2967 fd->loop.cond_code == LT_EXPR ? 1 : 0);
2968 if (fd->chunk_size)
2969 {
2970 tree bfn_decl = builtin_decl_explicit (start_fn);
2971 t = fold_convert (fd->iter_type, fd->chunk_size);
2972 t = omp_adjust_chunk_size (t, fd->simd_schedule);
7e5a76c8 2973 if (sched_arg)
2974 t = build_call_expr (bfn_decl, 10, t5, t0, t1, t2, sched_arg,
2975 t, t3, t4, reductions, mem);
2976 else
2977 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
4954efd4 2978 }
2979 else
2980 t = build_call_expr (builtin_decl_explicit (start_fn),
2981 6, t5, t0, t1, t2, t3, t4);
2982 }
2983 }
2984 if (TREE_TYPE (t) != boolean_type_node)
2985 t = fold_build2 (NE_EXPR, boolean_type_node,
2986 t, build_int_cst (TREE_TYPE (t), 0));
2987 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
7c6746c9 2988 true, GSI_SAME_STMT);
4954efd4 2989 if (arr && !TREE_STATIC (arr))
2990 {
b83e94b8 2991 tree clobber = build_clobber (TREE_TYPE (arr));
4954efd4 2992 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
2993 GSI_SAME_STMT);
2994 }
48152aa2 2995 if (fd->have_pointer_condtemp)
9a1d892b 2996 expand_omp_build_assign (&gsi, condtemp, memv, false);
7e5a76c8 2997 if (fd->have_reductemp)
2998 {
2999 gimple *g = gsi_stmt (gsi);
3000 gsi_remove (&gsi, true);
3001 release_ssa_name (gimple_assign_lhs (g));
3002
3003 entry_bb = region->entry;
3004 gsi = gsi_last_nondebug_bb (entry_bb);
3005
3006 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3007 }
4954efd4 3008 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3009
3010 /* Remove the GIMPLE_OMP_FOR statement. */
3011 gsi_remove (&gsi, true);
3012
3013 if (gsi_end_p (gsif))
3014 gsif = gsi_after_labels (gsi_bb (gsif));
3015 gsi_next (&gsif);
3016
3017 /* Iteration setup for sequential loop goes in L0_BB. */
3018 tree startvar = fd->loop.v;
3019 tree endvar = NULL_TREE;
3020
3021 if (gimple_omp_for_combined_p (fd->for_stmt))
3022 {
3023 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
3024 && gimple_omp_for_kind (inner_stmt)
3025 == GF_OMP_FOR_KIND_SIMD);
3026 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
3027 OMP_CLAUSE__LOOPTEMP_);
3028 gcc_assert (innerc);
3029 startvar = OMP_CLAUSE_DECL (innerc);
3030 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3031 OMP_CLAUSE__LOOPTEMP_);
3032 gcc_assert (innerc);
3033 endvar = OMP_CLAUSE_DECL (innerc);
3034 }
3035
3036 gsi = gsi_start_bb (l0_bb);
3037 t = istart0;
3038 if (fd->ordered && fd->collapse == 1)
3039 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
3040 fold_convert (fd->iter_type, fd->loop.step));
3041 else if (bias)
3042 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
3043 if (fd->ordered && fd->collapse == 1)
3044 {
3045 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3046 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
3047 fd->loop.n1, fold_convert (sizetype, t));
3048 else
3049 {
3050 t = fold_convert (TREE_TYPE (startvar), t);
3051 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
3052 fd->loop.n1, t);
3053 }
3054 }
3055 else
3056 {
3057 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3058 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
3059 t = fold_convert (TREE_TYPE (startvar), t);
3060 }
3061 t = force_gimple_operand_gsi (&gsi, t,
3062 DECL_P (startvar)
3063 && TREE_ADDRESSABLE (startvar),
3064 NULL_TREE, false, GSI_CONTINUE_LINKING);
3065 assign_stmt = gimple_build_assign (startvar, t);
3066 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
9a1d892b 3067 if (cond_var)
3068 {
3069 tree itype = TREE_TYPE (cond_var);
3070 /* For lastprivate(conditional:) itervar, we need some iteration
3071 counter that starts at unsigned non-zero and increases.
3072 Prefer as few IVs as possible, so if we can use startvar
3073 itself, use that, or startvar + constant (those would be
3074 incremented with step), and as last resort use the s0 + 1
3075 incremented by 1. */
3076 if ((fd->ordered && fd->collapse == 1)
3077 || bias
3078 || POINTER_TYPE_P (type)
3079 || TREE_CODE (fd->loop.n1) != INTEGER_CST
3080 || fd->loop.cond_code != LT_EXPR)
3081 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, istart0),
3082 build_int_cst (itype, 1));
3083 else if (tree_int_cst_sgn (fd->loop.n1) == 1)
3084 t = fold_convert (itype, t);
3085 else
3086 {
3087 tree c = fold_convert (itype, fd->loop.n1);
3088 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
3089 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
3090 }
3091 t = force_gimple_operand_gsi (&gsi, t, false,
3092 NULL_TREE, false, GSI_CONTINUE_LINKING);
3093 assign_stmt = gimple_build_assign (cond_var, t);
3094 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3095 }
4954efd4 3096
3097 t = iend0;
3098 if (fd->ordered && fd->collapse == 1)
3099 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
3100 fold_convert (fd->iter_type, fd->loop.step));
3101 else if (bias)
3102 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
3103 if (fd->ordered && fd->collapse == 1)
3104 {
3105 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3106 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
3107 fd->loop.n1, fold_convert (sizetype, t));
3108 else
3109 {
3110 t = fold_convert (TREE_TYPE (startvar), t);
3111 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
3112 fd->loop.n1, t);
3113 }
3114 }
3115 else
3116 {
3117 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3118 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
3119 t = fold_convert (TREE_TYPE (startvar), t);
3120 }
3121 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3122 false, GSI_CONTINUE_LINKING);
3123 if (endvar)
3124 {
3125 assign_stmt = gimple_build_assign (endvar, iend);
3126 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3127 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
3128 assign_stmt = gimple_build_assign (fd->loop.v, iend);
3129 else
3130 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
3131 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3132 }
3133 /* Handle linear clause adjustments. */
3134 tree itercnt = NULL_TREE;
3135 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
3136 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
3137 c; c = OMP_CLAUSE_CHAIN (c))
3138 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
3139 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
3140 {
3141 tree d = OMP_CLAUSE_DECL (c);
3142 bool is_ref = omp_is_reference (d);
3143 tree t = d, a, dest;
3144 if (is_ref)
3145 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
3146 tree type = TREE_TYPE (t);
3147 if (POINTER_TYPE_P (type))
3148 type = sizetype;
3149 dest = unshare_expr (t);
3150 tree v = create_tmp_var (TREE_TYPE (t), NULL);
3151 expand_omp_build_assign (&gsif, v, t);
3152 if (itercnt == NULL_TREE)
3153 {
3154 itercnt = startvar;
3155 tree n1 = fd->loop.n1;
3156 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
3157 {
3158 itercnt
3159 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
3160 itercnt);
3161 n1 = fold_convert (TREE_TYPE (itercnt), n1);
3162 }
3163 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
3164 itercnt, n1);
3165 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
3166 itercnt, fd->loop.step);
3167 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
3168 NULL_TREE, false,
3169 GSI_CONTINUE_LINKING);
3170 }
3171 a = fold_build2 (MULT_EXPR, type,
3172 fold_convert (type, itercnt),
3173 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
3174 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
3175 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
3176 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3177 false, GSI_CONTINUE_LINKING);
3178 assign_stmt = gimple_build_assign (dest, t);
3179 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3180 }
3181 if (fd->collapse > 1)
3182 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
3183
3184 if (fd->ordered)
3185 {
3186 /* Until now, counts array contained number of iterations or
3187 variable containing it for ith loop. From now on, we need
3188 those counts only for collapsed loops, and only for the 2nd
3189 till the last collapsed one. Move those one element earlier,
3190 we'll use counts[fd->collapse - 1] for the first source/sink
3191 iteration counter and so on and counts[fd->ordered]
3192 as the array holding the current counter values for
3193 depend(source). */
3194 if (fd->collapse > 1)
3195 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
3196 if (broken_loop)
3197 {
3198 int i;
3199 for (i = fd->collapse; i < fd->ordered; i++)
3200 {
3201 tree type = TREE_TYPE (fd->loops[i].v);
3202 tree this_cond
3203 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
3204 fold_convert (type, fd->loops[i].n1),
3205 fold_convert (type, fd->loops[i].n2));
3206 if (!integer_onep (this_cond))
3207 break;
3208 }
3209 if (i < fd->ordered)
3210 {
3211 cont_bb
3212 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
3213 add_bb_to_loop (cont_bb, l1_bb->loop_father);
3214 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
3215 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
3216 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3217 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
3218 make_edge (cont_bb, l1_bb, 0);
3219 l2_bb = create_empty_bb (cont_bb);
3220 broken_loop = false;
3221 }
3222 }
3223 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
3224 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
3225 ordered_lastprivate);
3226 if (counts[fd->collapse - 1])
3227 {
3228 gcc_assert (fd->collapse == 1);
3229 gsi = gsi_last_bb (l0_bb);
3230 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
3231 istart0, true);
3232 gsi = gsi_last_bb (cont_bb);
3233 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1],
3234 build_int_cst (fd->iter_type, 1));
3235 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
3236 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3237 size_zero_node, NULL_TREE, NULL_TREE);
3238 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
3239 t = counts[fd->collapse - 1];
3240 }
3241 else if (fd->collapse > 1)
3242 t = fd->loop.v;
3243 else
3244 {
3245 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3246 fd->loops[0].v, fd->loops[0].n1);
3247 t = fold_convert (fd->iter_type, t);
3248 }
3249 gsi = gsi_last_bb (l0_bb);
3250 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3251 size_zero_node, NULL_TREE, NULL_TREE);
3252 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3253 false, GSI_CONTINUE_LINKING);
3254 expand_omp_build_assign (&gsi, aref, t, true);
3255 }
3256
3257 if (!broken_loop)
3258 {
3259 /* Code to control the increment and predicate for the sequential
3260 loop goes in the CONT_BB. */
bce107d7 3261 gsi = gsi_last_nondebug_bb (cont_bb);
4954efd4 3262 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3263 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3264 vmain = gimple_omp_continue_control_use (cont_stmt);
3265 vback = gimple_omp_continue_control_def (cont_stmt);
3266
384aea12 3267 if (cond_var)
3268 {
3269 tree itype = TREE_TYPE (cond_var);
3270 tree t2;
3271 if ((fd->ordered && fd->collapse == 1)
3272 || bias
3273 || POINTER_TYPE_P (type)
3274 || TREE_CODE (fd->loop.n1) != INTEGER_CST
3275 || fd->loop.cond_code != LT_EXPR)
3276 t2 = build_int_cst (itype, 1);
3277 else
3278 t2 = fold_convert (itype, fd->loop.step);
3279 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
3280 t2 = force_gimple_operand_gsi (&gsi, t2, false,
3281 NULL_TREE, true, GSI_SAME_STMT);
3282 assign_stmt = gimple_build_assign (cond_var, t2);
3283 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3284 }
3285
4954efd4 3286 if (!gimple_omp_for_combined_p (fd->for_stmt))
3287 {
3288 if (POINTER_TYPE_P (type))
3289 t = fold_build_pointer_plus (vmain, fd->loop.step);
3290 else
3291 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
3292 t = force_gimple_operand_gsi (&gsi, t,
3293 DECL_P (vback)
3294 && TREE_ADDRESSABLE (vback),
3295 NULL_TREE, true, GSI_SAME_STMT);
3296 assign_stmt = gimple_build_assign (vback, t);
3297 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3298
3299 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
3300 {
095711ed 3301 tree tem;
4954efd4 3302 if (fd->collapse > 1)
095711ed 3303 tem = fd->loop.v;
4954efd4 3304 else
3305 {
095711ed 3306 tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3307 fd->loops[0].v, fd->loops[0].n1);
3308 tem = fold_convert (fd->iter_type, tem);
4954efd4 3309 }
3310 tree aref = build4 (ARRAY_REF, fd->iter_type,
3311 counts[fd->ordered], size_zero_node,
3312 NULL_TREE, NULL_TREE);
095711ed 3313 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE,
3314 true, GSI_SAME_STMT);
3315 expand_omp_build_assign (&gsi, aref, tem);
4954efd4 3316 }
3317
3318 t = build2 (fd->loop.cond_code, boolean_type_node,
3319 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
3320 iend);
3321 gcond *cond_stmt = gimple_build_cond_empty (t);
3322 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3323 }
3324
3325 /* Remove GIMPLE_OMP_CONTINUE. */
3326 gsi_remove (&gsi, true);
3327
3328 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3329 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb);
3330
3331 /* Emit code to get the next parallel iteration in L2_BB. */
3332 gsi = gsi_start_bb (l2_bb);
3333
3334 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
3335 build_fold_addr_expr (istart0),
3336 build_fold_addr_expr (iend0));
3337 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3338 false, GSI_CONTINUE_LINKING);
3339 if (TREE_TYPE (t) != boolean_type_node)
3340 t = fold_build2 (NE_EXPR, boolean_type_node,
3341 t, build_int_cst (TREE_TYPE (t), 0));
3342 gcond *cond_stmt = gimple_build_cond_empty (t);
3343 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
3344 }
3345
3346 /* Add the loop cleanup function. */
bce107d7 3347 gsi = gsi_last_nondebug_bb (exit_bb);
4954efd4 3348 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3349 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
3350 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3351 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
3352 else
3353 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
3354 gcall *call_stmt = gimple_build_call (t, 0);
4954efd4 3355 if (fd->ordered)
3356 {
3357 tree arr = counts[fd->ordered];
b83e94b8 3358 tree clobber = build_clobber (TREE_TYPE (arr));
4954efd4 3359 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
3360 GSI_SAME_STMT);
3361 }
7e5a76c8 3362 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3363 {
3364 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
3365 if (fd->have_reductemp)
3366 {
3367 gimple *g = gimple_build_assign (reductions, NOP_EXPR,
3368 gimple_call_lhs (call_stmt));
3369 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
3370 }
3371 }
3372 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
4954efd4 3373 gsi_remove (&gsi, true);
3374
3375 /* Connect the new blocks. */
3376 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
3377 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
3378
3379 if (!broken_loop)
3380 {
3381 gimple_seq phis;
3382
3383 e = find_edge (cont_bb, l3_bb);
3384 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
3385
3386 phis = phi_nodes (l3_bb);
3387 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
3388 {
3389 gimple *phi = gsi_stmt (gsi);
3390 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
3391 PHI_ARG_DEF_FROM_EDGE (phi, e));
3392 }
3393 remove_edge (e);
3394
3395 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
3396 e = find_edge (cont_bb, l1_bb);
3397 if (e == NULL)
3398 {
3399 e = BRANCH_EDGE (cont_bb);
3400 gcc_assert (single_succ (e->dest) == l1_bb);
3401 }
3402 if (gimple_omp_for_combined_p (fd->for_stmt))
3403 {
3404 remove_edge (e);
3405 e = NULL;
3406 }
3407 else if (fd->collapse > 1)
3408 {
3409 remove_edge (e);
3410 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3411 }
3412 else
3413 e->flags = EDGE_TRUE_VALUE;
3414 if (e)
3415 {
720cfc43 3416 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
3417 find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
4954efd4 3418 }
3419 else
3420 {
3421 e = find_edge (cont_bb, l2_bb);
3422 e->flags = EDGE_FALLTHRU;
3423 }
3424 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
3425
3426 if (gimple_in_ssa_p (cfun))
3427 {
3428 /* Add phis to the outer loop that connect to the phis in the inner,
3429 original loop, and move the loop entry value of the inner phi to
3430 the loop entry value of the outer phi. */
3431 gphi_iterator psi;
3432 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
3433 {
be1e7283 3434 location_t locus;
4954efd4 3435 gphi *nphi;
3436 gphi *exit_phi = psi.phi ();
3437
3337868c 3438 if (virtual_operand_p (gimple_phi_result (exit_phi)))
3439 continue;
3440
4954efd4 3441 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
3442 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
3443
3444 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
3445 edge latch_to_l1 = find_edge (latch, l1_bb);
3446 gphi *inner_phi
3447 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
3448
3449 tree t = gimple_phi_result (exit_phi);
3450 tree new_res = copy_ssa_name (t, NULL);
3451 nphi = create_phi_node (new_res, l0_bb);
3452
3453 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
3454 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
3455 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
3456 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
3457 add_phi_arg (nphi, t, entry_to_l0, locus);
3458
3459 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
3460 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
3461
3462 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
3337868c 3463 }
4954efd4 3464 }
3465
3466 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
3467 recompute_dominator (CDI_DOMINATORS, l2_bb));
3468 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
3469 recompute_dominator (CDI_DOMINATORS, l3_bb));
3470 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
3471 recompute_dominator (CDI_DOMINATORS, l0_bb));
3472 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
3473 recompute_dominator (CDI_DOMINATORS, l1_bb));
3474
3475 /* We enter expand_omp_for_generic with a loop. This original loop may
3476 have its own loop struct, or it may be part of an outer loop struct
3477 (which may be the fake loop). */
2e966e2a 3478 class loop *outer_loop = entry_bb->loop_father;
4954efd4 3479 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
3480
3481 add_bb_to_loop (l2_bb, outer_loop);
3482
3483 /* We've added a new loop around the original loop. Allocate the
3484 corresponding loop struct. */
2e966e2a 3485 class loop *new_loop = alloc_loop ();
4954efd4 3486 new_loop->header = l0_bb;
3487 new_loop->latch = l2_bb;
3488 add_loop (new_loop, outer_loop);
3489
3490 /* Allocate a loop structure for the original loop unless we already
3491 had one. */
3492 if (!orig_loop_has_loop_struct
3493 && !gimple_omp_for_combined_p (fd->for_stmt))
3494 {
2e966e2a 3495 class loop *orig_loop = alloc_loop ();
4954efd4 3496 orig_loop->header = l1_bb;
3497 /* The loop may have multiple latches. */
3498 add_loop (orig_loop, new_loop);
3499 }
3500 }
3501}
3502
7d26f131 3503/* Helper function for expand_omp_for_static_nochunk. If PTR is NULL,
3504 compute needed allocation size. If !ALLOC of team allocations,
3505 if ALLOC of thread allocation. SZ is the initial needed size for
3506 other purposes, ALLOC_ALIGN guaranteed alignment of allocation in bytes,
3507 CNT number of elements of each array, for !ALLOC this is
3508 omp_get_num_threads (), for ALLOC number of iterations handled by the
3509 current thread. If PTR is non-NULL, it is the start of the allocation
3510 and this routine shall assign to OMP_CLAUSE_DECL (c) of those _scantemp_
3511 clauses pointers to the corresponding arrays. */
3512
3513static tree
3514expand_omp_scantemp_alloc (tree clauses, tree ptr, unsigned HOST_WIDE_INT sz,
3515 unsigned HOST_WIDE_INT alloc_align, tree cnt,
3516 gimple_stmt_iterator *gsi, bool alloc)
3517{
3518 tree eltsz = NULL_TREE;
3519 unsigned HOST_WIDE_INT preval = 0;
3520 if (ptr && sz)
3521 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
3522 ptr, size_int (sz));
3523 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
3524 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
3525 && !OMP_CLAUSE__SCANTEMP__CONTROL (c)
3526 && (!OMP_CLAUSE__SCANTEMP__ALLOC (c)) != alloc)
3527 {
3528 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
3529 unsigned HOST_WIDE_INT al = TYPE_ALIGN_UNIT (pointee_type);
3530 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
3531 {
3532 unsigned HOST_WIDE_INT szl
3533 = tree_to_uhwi (TYPE_SIZE_UNIT (pointee_type));
3534 szl = least_bit_hwi (szl);
3535 if (szl)
3536 al = MIN (al, szl);
3537 }
3538 if (ptr == NULL_TREE)
3539 {
3540 if (eltsz == NULL_TREE)
3541 eltsz = TYPE_SIZE_UNIT (pointee_type);
3542 else
3543 eltsz = size_binop (PLUS_EXPR, eltsz,
3544 TYPE_SIZE_UNIT (pointee_type));
3545 }
3546 if (preval == 0 && al <= alloc_align)
3547 {
3548 unsigned HOST_WIDE_INT diff = ROUND_UP (sz, al) - sz;
3549 sz += diff;
3550 if (diff && ptr)
3551 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
3552 ptr, size_int (diff));
3553 }
3554 else if (al > preval)
3555 {
3556 if (ptr)
3557 {
3558 ptr = fold_convert (pointer_sized_int_node, ptr);
3559 ptr = fold_build2 (PLUS_EXPR, pointer_sized_int_node, ptr,
3560 build_int_cst (pointer_sized_int_node,
3561 al - 1));
3562 ptr = fold_build2 (BIT_AND_EXPR, pointer_sized_int_node, ptr,
3563 build_int_cst (pointer_sized_int_node,
3564 -(HOST_WIDE_INT) al));
3565 ptr = fold_convert (ptr_type_node, ptr);
3566 }
3567 else
3568 sz += al - 1;
3569 }
3570 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
3571 preval = al;
3572 else
3573 preval = 1;
3574 if (ptr)
3575 {
3576 expand_omp_build_assign (gsi, OMP_CLAUSE_DECL (c), ptr, false);
3577 ptr = OMP_CLAUSE_DECL (c);
3578 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), ptr,
3579 size_binop (MULT_EXPR, cnt,
3580 TYPE_SIZE_UNIT (pointee_type)));
3581 }
3582 }
3583
3584 if (ptr == NULL_TREE)
3585 {
3586 eltsz = size_binop (MULT_EXPR, eltsz, cnt);
3587 if (sz)
3588 eltsz = size_binop (PLUS_EXPR, eltsz, size_int (sz));
3589 return eltsz;
3590 }
3591 else
3592 return ptr;
3593}
3594
4954efd4 3595/* A subroutine of expand_omp_for. Generate code for a parallel
3596 loop with static schedule and no specified chunk size. Given
3597 parameters:
3598
3599 for (V = N1; V cond N2; V += STEP) BODY;
3600
3601 where COND is "<" or ">", we generate pseudocode
3602
3603 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3604 if (cond is <)
3605 adj = STEP - 1;
3606 else
3607 adj = STEP + 1;
3608 if ((__typeof (V)) -1 > 0 && cond is >)
3609 n = -(adj + N2 - N1) / -STEP;
3610 else
3611 n = (adj + N2 - N1) / STEP;
3612 q = n / nthreads;
3613 tt = n % nthreads;
3614 if (threadid < tt) goto L3; else goto L4;
3615 L3:
3616 tt = 0;
3617 q = q + 1;
3618 L4:
3619 s0 = q * threadid + tt;
3620 e0 = s0 + q;
3621 V = s0 * STEP + N1;
3622 if (s0 >= e0) goto L2; else goto L0;
3623 L0:
3624 e = e0 * STEP + N1;
3625 L1:
3626 BODY;
3627 V += STEP;
3628 if (V cond e) goto L1;
3629 L2:
3630*/
3631
3632static void
3633expand_omp_for_static_nochunk (struct omp_region *region,
3634 struct omp_for_data *fd,
3635 gimple *inner_stmt)
3636{
7d26f131 3637 tree n, q, s0, e0, e, t, tt, nthreads = NULL_TREE, threadid;
4954efd4 3638 tree type, itype, vmain, vback;
3639 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
3640 basic_block body_bb, cont_bb, collapse_bb = NULL;
7d26f131 3641 basic_block fin_bb, fourth_bb = NULL, fifth_bb = NULL, sixth_bb = NULL;
3642 basic_block exit1_bb = NULL, exit2_bb = NULL, exit3_bb = NULL;
9a1d892b 3643 gimple_stmt_iterator gsi, gsip;
4954efd4 3644 edge ep;
3645 bool broken_loop = region->cont == NULL;
3646 tree *counts = NULL;
3647 tree n1, n2, step;
7e5a76c8 3648 tree reductions = NULL_TREE;
48152aa2 3649 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
4954efd4 3650
3651 itype = type = TREE_TYPE (fd->loop.v);
3652 if (POINTER_TYPE_P (type))
3653 itype = signed_type_for (type);
3654
3655 entry_bb = region->entry;
3656 cont_bb = region->cont;
3657 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3658 fin_bb = BRANCH_EDGE (entry_bb)->dest;
3659 gcc_assert (broken_loop
3660 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
3661 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3662 body_bb = single_succ (seq_start_bb);
3663 if (!broken_loop)
3664 {
3665 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3666 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3667 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3668 }
3669 exit_bb = region->exit;
3670
3671 /* Iteration space partitioning goes in ENTRY_BB. */
bce107d7 3672 gsi = gsi_last_nondebug_bb (entry_bb);
4954efd4 3673 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
9a1d892b 3674 gsip = gsi;
3675 gsi_prev (&gsip);
4954efd4 3676
3677 if (fd->collapse > 1)
3678 {
3679 int first_zero_iter = -1, dummy = -1;
3680 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3681
3682 counts = XALLOCAVEC (tree, fd->collapse);
3683 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3684 fin_bb, first_zero_iter,
3685 dummy_bb, dummy, l2_dom_bb);
3686 t = NULL_TREE;
3687 }
3688 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3689 t = integer_one_node;
3690 else
3691 t = fold_binary (fd->loop.cond_code, boolean_type_node,
3692 fold_convert (type, fd->loop.n1),
3693 fold_convert (type, fd->loop.n2));
3694 if (fd->collapse == 1
3695 && TYPE_UNSIGNED (type)
3696 && (t == NULL_TREE || !integer_onep (t)))
3697 {
3698 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3699 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3700 true, GSI_SAME_STMT);
3701 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3702 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3703 true, GSI_SAME_STMT);
3704 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
9a1d892b 3705 NULL_TREE, NULL_TREE);
4954efd4 3706 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3707 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3708 expand_omp_regimplify_p, NULL, NULL)
3709 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3710 expand_omp_regimplify_p, NULL, NULL))
3711 {
3712 gsi = gsi_for_stmt (cond_stmt);
3713 gimple_regimplify_operands (cond_stmt, &gsi);
3714 }
3715 ep = split_block (entry_bb, cond_stmt);
3716 ep->flags = EDGE_TRUE_VALUE;
3717 entry_bb = ep->dest;
720cfc43 3718 ep->probability = profile_probability::very_likely ();
4954efd4 3719 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
720cfc43 3720 ep->probability = profile_probability::very_unlikely ();
4954efd4 3721 if (gimple_in_ssa_p (cfun))
3722 {
3723 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
3724 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3725 !gsi_end_p (gpi); gsi_next (&gpi))
3726 {
3727 gphi *phi = gpi.phi ();
3728 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3729 ep, UNKNOWN_LOCATION);
3730 }
3731 }
3732 gsi = gsi_last_bb (entry_bb);
3733 }
3734
48152aa2 3735 if (fd->lastprivate_conditional)
3736 {
3737 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
3738 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
3739 if (fd->have_pointer_condtemp)
3740 condtemp = OMP_CLAUSE_DECL (c);
3741 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
3742 cond_var = OMP_CLAUSE_DECL (c);
3743 }
7d26f131 3744 if (fd->have_reductemp
5c77f6d6 3745 /* For scan, we don't want to reinitialize condtemp before the
3746 second loop. */
3747 || (fd->have_pointer_condtemp && !fd->have_scantemp)
7d26f131 3748 || fd->have_nonctrl_scantemp)
7e5a76c8 3749 {
3750 tree t1 = build_int_cst (long_integer_type_node, 0);
3751 tree t2 = build_int_cst (long_integer_type_node, 1);
3752 tree t3 = build_int_cstu (long_integer_type_node,
3753 (HOST_WIDE_INT_1U << 31) + 1);
3754 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
9a1d892b 3755 gimple_stmt_iterator gsi2 = gsi_none ();
3756 gimple *g = NULL;
3757 tree mem = null_pointer_node, memv = NULL_TREE;
7d26f131 3758 unsigned HOST_WIDE_INT condtemp_sz = 0;
3759 unsigned HOST_WIDE_INT alloc_align = 0;
9a1d892b 3760 if (fd->have_reductemp)
3761 {
7d26f131 3762 gcc_assert (!fd->have_nonctrl_scantemp);
9a1d892b 3763 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
3764 reductions = OMP_CLAUSE_DECL (c);
3765 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
3766 g = SSA_NAME_DEF_STMT (reductions);
3767 reductions = gimple_assign_rhs1 (g);
3768 OMP_CLAUSE_DECL (c) = reductions;
3769 gsi2 = gsi_for_stmt (g);
3770 }
3771 else
3772 {
3773 if (gsi_end_p (gsip))
3774 gsi2 = gsi_after_labels (region->entry);
3775 else
3776 gsi2 = gsip;
3777 reductions = null_pointer_node;
3778 }
7d26f131 3779 if (fd->have_pointer_condtemp || fd->have_nonctrl_scantemp)
9a1d892b 3780 {
7d26f131 3781 tree type;
3782 if (fd->have_pointer_condtemp)
3783 type = TREE_TYPE (condtemp);
3784 else
3785 type = ptr_type_node;
9a1d892b 3786 memv = create_tmp_var (type);
3787 TREE_ADDRESSABLE (memv) = 1;
7d26f131 3788 unsigned HOST_WIDE_INT sz = 0;
3789 tree size = NULL_TREE;
3790 if (fd->have_pointer_condtemp)
3791 {
3792 sz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
3793 sz *= fd->lastprivate_conditional;
3794 condtemp_sz = sz;
3795 }
3796 if (fd->have_nonctrl_scantemp)
3797 {
3798 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3799 gimple *g = gimple_build_call (nthreads, 0);
3800 nthreads = create_tmp_var (integer_type_node);
3801 gimple_call_set_lhs (g, nthreads);
3802 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3803 nthreads = fold_convert (sizetype, nthreads);
3804 alloc_align = TYPE_ALIGN_UNIT (long_long_integer_type_node);
3805 size = expand_omp_scantemp_alloc (clauses, NULL_TREE, sz,
3806 alloc_align, nthreads, NULL,
3807 false);
3808 size = fold_convert (type, size);
3809 }
3810 else
3811 size = build_int_cst (type, sz);
3812 expand_omp_build_assign (&gsi2, memv, size, false);
9a1d892b 3813 mem = build_fold_addr_expr (memv);
3814 }
7e5a76c8 3815 tree t
3816 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
3817 9, t1, t2, t2, t3, t1, null_pointer_node,
9a1d892b 3818 null_pointer_node, reductions, mem);
7e5a76c8 3819 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3820 true, GSI_SAME_STMT);
48152aa2 3821 if (fd->have_pointer_condtemp)
9a1d892b 3822 expand_omp_build_assign (&gsi2, condtemp, memv, false);
7d26f131 3823 if (fd->have_nonctrl_scantemp)
3824 {
3825 tree ptr = fd->have_pointer_condtemp ? condtemp : memv;
3826 expand_omp_scantemp_alloc (clauses, ptr, condtemp_sz,
3827 alloc_align, nthreads, &gsi2, false);
3828 }
9a1d892b 3829 if (fd->have_reductemp)
3830 {
3831 gsi_remove (&gsi2, true);
3832 release_ssa_name (gimple_assign_lhs (g));
3833 }
7e5a76c8 3834 }
4954efd4 3835 switch (gimple_omp_for_kind (fd->for_stmt))
3836 {
3837 case GF_OMP_FOR_KIND_FOR:
3838 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3839 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3840 break;
3841 case GF_OMP_FOR_KIND_DISTRIBUTE:
3842 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3843 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3844 break;
3845 default:
3846 gcc_unreachable ();
3847 }
3848 nthreads = build_call_expr (nthreads, 0);
3849 nthreads = fold_convert (itype, nthreads);
3850 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3851 true, GSI_SAME_STMT);
3852 threadid = build_call_expr (threadid, 0);
3853 threadid = fold_convert (itype, threadid);
3854 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3855 true, GSI_SAME_STMT);
3856
3857 n1 = fd->loop.n1;
3858 n2 = fd->loop.n2;
3859 step = fd->loop.step;
3860 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3861 {
3862 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3863 OMP_CLAUSE__LOOPTEMP_);
3864 gcc_assert (innerc);
3865 n1 = OMP_CLAUSE_DECL (innerc);
3866 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3867 OMP_CLAUSE__LOOPTEMP_);
3868 gcc_assert (innerc);
3869 n2 = OMP_CLAUSE_DECL (innerc);
3870 }
3871 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3872 true, NULL_TREE, true, GSI_SAME_STMT);
3873 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3874 true, NULL_TREE, true, GSI_SAME_STMT);
3875 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3876 true, NULL_TREE, true, GSI_SAME_STMT);
3877
3878 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3879 t = fold_build2 (PLUS_EXPR, itype, step, t);
3880 t = fold_build2 (PLUS_EXPR, itype, t, n2);
3881 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3882 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3883 t = fold_build2 (TRUNC_DIV_EXPR, itype,
3884 fold_build1 (NEGATE_EXPR, itype, t),
3885 fold_build1 (NEGATE_EXPR, itype, step));
3886 else
3887 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3888 t = fold_convert (itype, t);
3889 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3890
3891 q = create_tmp_reg (itype, "q");
3892 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
3893 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3894 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
3895
3896 tt = create_tmp_reg (itype, "tt");
3897 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
3898 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3899 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
3900
3901 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
3902 gcond *cond_stmt = gimple_build_cond_empty (t);
3903 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3904
3905 second_bb = split_block (entry_bb, cond_stmt)->dest;
bce107d7 3906 gsi = gsi_last_nondebug_bb (second_bb);
4954efd4 3907 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3908
3909 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
3910 GSI_SAME_STMT);
3911 gassign *assign_stmt
3912 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
3913 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3914
3915 third_bb = split_block (second_bb, assign_stmt)->dest;
bce107d7 3916 gsi = gsi_last_nondebug_bb (third_bb);
4954efd4 3917 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3918
7d26f131 3919 if (fd->have_nonctrl_scantemp)
3920 {
3921 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
3922 tree controlp = NULL_TREE, controlb = NULL_TREE;
3923 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
3924 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
3925 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
3926 {
3927 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
3928 controlb = OMP_CLAUSE_DECL (c);
3929 else
3930 controlp = OMP_CLAUSE_DECL (c);
3931 if (controlb && controlp)
3932 break;
3933 }
3934 gcc_assert (controlp && controlb);
3935 tree cnt = create_tmp_var (sizetype);
3936 gimple *g = gimple_build_assign (cnt, NOP_EXPR, q);
3937 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3938 unsigned HOST_WIDE_INT alloc_align = TYPE_ALIGN_UNIT (ptr_type_node);
3939 tree sz = expand_omp_scantemp_alloc (clauses, NULL_TREE, 0,
3940 alloc_align, cnt, NULL, true);
3941 tree size = create_tmp_var (sizetype);
3942 expand_omp_build_assign (&gsi, size, sz, false);
3943 tree cmp = fold_build2 (GT_EXPR, boolean_type_node,
3944 size, size_int (16384));
3945 expand_omp_build_assign (&gsi, controlb, cmp);
3946 g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
3947 NULL_TREE, NULL_TREE);
3948 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3949 fourth_bb = split_block (third_bb, g)->dest;
3950 gsi = gsi_last_nondebug_bb (fourth_bb);
3951 /* FIXME: Once we have allocators, this should use allocator. */
3952 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_MALLOC), 1, size);
3953 gimple_call_set_lhs (g, controlp);
3954 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3955 expand_omp_scantemp_alloc (clauses, controlp, 0, alloc_align, cnt,
3956 &gsi, true);
3957 gsi_prev (&gsi);
3958 g = gsi_stmt (gsi);
3959 fifth_bb = split_block (fourth_bb, g)->dest;
3960 gsi = gsi_last_nondebug_bb (fifth_bb);
3961
3962 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_SAVE), 0);
3963 gimple_call_set_lhs (g, controlp);
3964 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3965 tree alloca_decl = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN);
3966 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
3967 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
3968 && OMP_CLAUSE__SCANTEMP__ALLOC (c))
3969 {
3970 tree tmp = create_tmp_var (sizetype);
3971 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
3972 g = gimple_build_assign (tmp, MULT_EXPR, cnt,
3973 TYPE_SIZE_UNIT (pointee_type));
3974 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3975 g = gimple_build_call (alloca_decl, 2, tmp,
3976 size_int (TYPE_ALIGN (pointee_type)));
3977 gimple_call_set_lhs (g, OMP_CLAUSE_DECL (c));
3978 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3979 }
3980
3981 sixth_bb = split_block (fifth_bb, g)->dest;
3982 gsi = gsi_last_nondebug_bb (sixth_bb);
3983 }
3984
4954efd4 3985 t = build2 (MULT_EXPR, itype, q, threadid);
3986 t = build2 (PLUS_EXPR, itype, t, tt);
3987 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3988
3989 t = fold_build2 (PLUS_EXPR, itype, s0, q);
3990 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3991
3992 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
3993 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3994
3995 /* Remove the GIMPLE_OMP_FOR statement. */
3996 gsi_remove (&gsi, true);
3997
3998 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3999 gsi = gsi_start_bb (seq_start_bb);
4000
4001 tree startvar = fd->loop.v;
4002 tree endvar = NULL_TREE;
4003
4004 if (gimple_omp_for_combined_p (fd->for_stmt))
4005 {
4006 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
4007 ? gimple_omp_parallel_clauses (inner_stmt)
4008 : gimple_omp_for_clauses (inner_stmt);
4009 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
4010 gcc_assert (innerc);
4011 startvar = OMP_CLAUSE_DECL (innerc);
4012 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4013 OMP_CLAUSE__LOOPTEMP_);
4014 gcc_assert (innerc);
4015 endvar = OMP_CLAUSE_DECL (innerc);
4016 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
4017 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
4018 {
4019 int i;
4020 for (i = 1; i < fd->collapse; i++)
4021 {
4022 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4023 OMP_CLAUSE__LOOPTEMP_);
4024 gcc_assert (innerc);
4025 }
4026 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4027 OMP_CLAUSE__LOOPTEMP_);
4028 if (innerc)
4029 {
4030 /* If needed (distribute parallel for with lastprivate),
4031 propagate down the total number of iterations. */
4032 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
4033 fd->loop.n2);
4034 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
4035 GSI_CONTINUE_LINKING);
4036 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4037 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4038 }
4039 }
4040 }
4041 t = fold_convert (itype, s0);
4042 t = fold_build2 (MULT_EXPR, itype, t, step);
4043 if (POINTER_TYPE_P (type))
2480c191 4044 {
4045 t = fold_build_pointer_plus (n1, t);
4046 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4047 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4048 t = fold_convert (signed_type_for (type), t);
4049 }
4954efd4 4050 else
4051 t = fold_build2 (PLUS_EXPR, type, t, n1);
4052 t = fold_convert (TREE_TYPE (startvar), t);
4053 t = force_gimple_operand_gsi (&gsi, t,
4054 DECL_P (startvar)
4055 && TREE_ADDRESSABLE (startvar),
4056 NULL_TREE, false, GSI_CONTINUE_LINKING);
4057 assign_stmt = gimple_build_assign (startvar, t);
4058 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
9a1d892b 4059 if (cond_var)
4060 {
4061 tree itype = TREE_TYPE (cond_var);
4062 /* For lastprivate(conditional:) itervar, we need some iteration
4063 counter that starts at unsigned non-zero and increases.
4064 Prefer as few IVs as possible, so if we can use startvar
4065 itself, use that, or startvar + constant (those would be
4066 incremented with step), and as last resort use the s0 + 1
4067 incremented by 1. */
4068 if (POINTER_TYPE_P (type)
4069 || TREE_CODE (n1) != INTEGER_CST
4070 || fd->loop.cond_code != LT_EXPR)
4071 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
4072 build_int_cst (itype, 1));
4073 else if (tree_int_cst_sgn (n1) == 1)
4074 t = fold_convert (itype, t);
4075 else
4076 {
4077 tree c = fold_convert (itype, n1);
4078 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
4079 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
4080 }
4081 t = force_gimple_operand_gsi (&gsi, t, false,
4082 NULL_TREE, false, GSI_CONTINUE_LINKING);
4083 assign_stmt = gimple_build_assign (cond_var, t);
4084 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4085 }
4954efd4 4086
4087 t = fold_convert (itype, e0);
4088 t = fold_build2 (MULT_EXPR, itype, t, step);
4089 if (POINTER_TYPE_P (type))
2480c191 4090 {
4091 t = fold_build_pointer_plus (n1, t);
4092 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4093 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4094 t = fold_convert (signed_type_for (type), t);
4095 }
4954efd4 4096 else
4097 t = fold_build2 (PLUS_EXPR, type, t, n1);
4098 t = fold_convert (TREE_TYPE (startvar), t);
4099 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4100 false, GSI_CONTINUE_LINKING);
4101 if (endvar)
4102 {
4103 assign_stmt = gimple_build_assign (endvar, e);
4104 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4105 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4106 assign_stmt = gimple_build_assign (fd->loop.v, e);
4107 else
4108 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4109 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4110 }
4111 /* Handle linear clause adjustments. */
4112 tree itercnt = NULL_TREE;
4113 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4114 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4115 c; c = OMP_CLAUSE_CHAIN (c))
4116 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4117 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4118 {
4119 tree d = OMP_CLAUSE_DECL (c);
4120 bool is_ref = omp_is_reference (d);
4121 tree t = d, a, dest;
4122 if (is_ref)
4123 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4124 if (itercnt == NULL_TREE)
4125 {
4126 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4127 {
4128 itercnt = fold_build2 (MINUS_EXPR, itype,
4129 fold_convert (itype, n1),
4130 fold_convert (itype, fd->loop.n1));
4131 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
4132 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
4133 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4134 NULL_TREE, false,
4135 GSI_CONTINUE_LINKING);
4136 }
4137 else
4138 itercnt = s0;
4139 }
4140 tree type = TREE_TYPE (t);
4141 if (POINTER_TYPE_P (type))
4142 type = sizetype;
4143 a = fold_build2 (MULT_EXPR, type,
4144 fold_convert (type, itercnt),
4145 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4146 dest = unshare_expr (t);
4147 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4148 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
4149 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4150 false, GSI_CONTINUE_LINKING);
4151 assign_stmt = gimple_build_assign (dest, t);
4152 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4153 }
4154 if (fd->collapse > 1)
4155 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4156
4157 if (!broken_loop)
4158 {
4159 /* The code controlling the sequential loop replaces the
4160 GIMPLE_OMP_CONTINUE. */
bce107d7 4161 gsi = gsi_last_nondebug_bb (cont_bb);
4954efd4 4162 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4163 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
4164 vmain = gimple_omp_continue_control_use (cont_stmt);
4165 vback = gimple_omp_continue_control_def (cont_stmt);
4166
384aea12 4167 if (cond_var)
4168 {
4169 tree itype = TREE_TYPE (cond_var);
4170 tree t2;
4171 if (POINTER_TYPE_P (type)
4172 || TREE_CODE (n1) != INTEGER_CST
4173 || fd->loop.cond_code != LT_EXPR)
4174 t2 = build_int_cst (itype, 1);
4175 else
4176 t2 = fold_convert (itype, step);
4177 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
4178 t2 = force_gimple_operand_gsi (&gsi, t2, false,
4179 NULL_TREE, true, GSI_SAME_STMT);
4180 assign_stmt = gimple_build_assign (cond_var, t2);
4181 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4182 }
4183
4954efd4 4184 if (!gimple_omp_for_combined_p (fd->for_stmt))
4185 {
4186 if (POINTER_TYPE_P (type))
4187 t = fold_build_pointer_plus (vmain, step);
4188 else
4189 t = fold_build2 (PLUS_EXPR, type, vmain, step);
4190 t = force_gimple_operand_gsi (&gsi, t,
4191 DECL_P (vback)
4192 && TREE_ADDRESSABLE (vback),
4193 NULL_TREE, true, GSI_SAME_STMT);
4194 assign_stmt = gimple_build_assign (vback, t);
4195 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4196
4197 t = build2 (fd->loop.cond_code, boolean_type_node,
4198 DECL_P (vback) && TREE_ADDRESSABLE (vback)
4199 ? t : vback, e);
4200 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4201 }
4202
4203 /* Remove the GIMPLE_OMP_CONTINUE statement. */
4204 gsi_remove (&gsi, true);
4205
4206 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4207 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
4208 }
4209
4210 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
bce107d7 4211 gsi = gsi_last_nondebug_bb (exit_bb);
4954efd4 4212 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4213 {
4214 t = gimple_omp_return_lhs (gsi_stmt (gsi));
7d26f131 4215 if (fd->have_reductemp
4216 || ((fd->have_pointer_condtemp || fd->have_scantemp)
4217 && !fd->have_nonctrl_scantemp))
7e5a76c8 4218 {
4219 tree fn;
4220 if (t)
4221 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4222 else
4223 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4224 gcall *g = gimple_build_call (fn, 0);
4225 if (t)
4226 {
4227 gimple_call_set_lhs (g, t);
9a1d892b 4228 if (fd->have_reductemp)
4229 gsi_insert_after (&gsi, gimple_build_assign (reductions,
4230 NOP_EXPR, t),
4231 GSI_SAME_STMT);
7e5a76c8 4232 }
4233 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4234 }
4235 else
4236 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
4954efd4 4237 }
5c77f6d6 4238 else if ((fd->have_pointer_condtemp || fd->have_scantemp)
4239 && !fd->have_nonctrl_scantemp)
89c6fe66 4240 {
4241 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
4242 gcall *g = gimple_build_call (fn, 0);
4243 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4244 }
7d26f131 4245 if (fd->have_scantemp && !fd->have_nonctrl_scantemp)
4246 {
4247 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4248 tree controlp = NULL_TREE, controlb = NULL_TREE;
4249 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
4250 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
4251 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
4252 {
4253 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
4254 controlb = OMP_CLAUSE_DECL (c);
4255 else
4256 controlp = OMP_CLAUSE_DECL (c);
4257 if (controlb && controlp)
4258 break;
4259 }
4260 gcc_assert (controlp && controlb);
4261 gimple *g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
4262 NULL_TREE, NULL_TREE);
4263 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4264 exit1_bb = split_block (exit_bb, g)->dest;
4265 gsi = gsi_after_labels (exit1_bb);
4266 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_FREE), 1,
4267 controlp);
4268 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4269 exit2_bb = split_block (exit1_bb, g)->dest;
4270 gsi = gsi_after_labels (exit2_bb);
4271 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_RESTORE), 1,
4272 controlp);
4273 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4274 exit3_bb = split_block (exit2_bb, g)->dest;
4275 gsi = gsi_after_labels (exit3_bb);
4276 }
4954efd4 4277 gsi_remove (&gsi, true);
4278
4279 /* Connect all the blocks. */
4280 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
720cfc43 4281 ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
4954efd4 4282 ep = find_edge (entry_bb, second_bb);
4283 ep->flags = EDGE_TRUE_VALUE;
720cfc43 4284 ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
7d26f131 4285 if (fourth_bb)
4286 {
4287 ep = make_edge (third_bb, fifth_bb, EDGE_FALSE_VALUE);
4288 ep->probability
4289 = profile_probability::guessed_always ().apply_scale (1, 2);
4290 ep = find_edge (third_bb, fourth_bb);
4291 ep->flags = EDGE_TRUE_VALUE;
4292 ep->probability
4293 = profile_probability::guessed_always ().apply_scale (1, 2);
4294 ep = find_edge (fourth_bb, fifth_bb);
4295 redirect_edge_and_branch (ep, sixth_bb);
4296 }
4297 else
4298 sixth_bb = third_bb;
4299 find_edge (sixth_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
4300 find_edge (sixth_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
4301 if (exit1_bb)
4302 {
4303 ep = make_edge (exit_bb, exit2_bb, EDGE_FALSE_VALUE);
4304 ep->probability
4305 = profile_probability::guessed_always ().apply_scale (1, 2);
4306 ep = find_edge (exit_bb, exit1_bb);
4307 ep->flags = EDGE_TRUE_VALUE;
4308 ep->probability
4309 = profile_probability::guessed_always ().apply_scale (1, 2);
4310 ep = find_edge (exit1_bb, exit2_bb);
4311 redirect_edge_and_branch (ep, exit3_bb);
4312 }
4954efd4 4313
4314 if (!broken_loop)
4315 {
4316 ep = find_edge (cont_bb, body_bb);
4317 if (ep == NULL)
4318 {
4319 ep = BRANCH_EDGE (cont_bb);
4320 gcc_assert (single_succ (ep->dest) == body_bb);
4321 }
4322 if (gimple_omp_for_combined_p (fd->for_stmt))
4323 {
4324 remove_edge (ep);
4325 ep = NULL;
4326 }
4327 else if (fd->collapse > 1)
4328 {
4329 remove_edge (ep);
4330 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4331 }
4332 else
4333 ep->flags = EDGE_TRUE_VALUE;
4334 find_edge (cont_bb, fin_bb)->flags
4335 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
4336 }
4337
4338 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
4339 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
7d26f131 4340 if (fourth_bb)
4341 {
4342 set_immediate_dominator (CDI_DOMINATORS, fifth_bb, third_bb);
4343 set_immediate_dominator (CDI_DOMINATORS, sixth_bb, third_bb);
4344 }
4345 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, sixth_bb);
4954efd4 4346
4347 set_immediate_dominator (CDI_DOMINATORS, body_bb,
4348 recompute_dominator (CDI_DOMINATORS, body_bb));
4349 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
4350 recompute_dominator (CDI_DOMINATORS, fin_bb));
7d26f131 4351 if (exit1_bb)
4352 {
4353 set_immediate_dominator (CDI_DOMINATORS, exit2_bb, exit_bb);
4354 set_immediate_dominator (CDI_DOMINATORS, exit3_bb, exit_bb);
4355 }
4954efd4 4356
2e966e2a 4357 class loop *loop = body_bb->loop_father;
4954efd4 4358 if (loop != entry_bb->loop_father)
4359 {
4360 gcc_assert (broken_loop || loop->header == body_bb);
4361 gcc_assert (broken_loop
4362 || loop->latch == region->cont
4363 || single_pred (loop->latch) == region->cont);
4364 return;
4365 }
4366
4367 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
4368 {
4369 loop = alloc_loop ();
4370 loop->header = body_bb;
4371 if (collapse_bb == NULL)
4372 loop->latch = cont_bb;
4373 add_loop (loop, body_bb->loop_father);
4374 }
4375}
4376
4377/* Return phi in E->DEST with ARG on edge E. */
4378
4379static gphi *
4380find_phi_with_arg_on_edge (tree arg, edge e)
4381{
4382 basic_block bb = e->dest;
4383
4384 for (gphi_iterator gpi = gsi_start_phis (bb);
4385 !gsi_end_p (gpi);
4386 gsi_next (&gpi))
4387 {
4388 gphi *phi = gpi.phi ();
4389 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
4390 return phi;
4391 }
4392
4393 return NULL;
4394}
4395
4396/* A subroutine of expand_omp_for. Generate code for a parallel
4397 loop with static schedule and a specified chunk size. Given
4398 parameters:
4399
4400 for (V = N1; V cond N2; V += STEP) BODY;
4401
4402 where COND is "<" or ">", we generate pseudocode
4403
4404 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
4405 if (cond is <)
4406 adj = STEP - 1;
4407 else
4408 adj = STEP + 1;
4409 if ((__typeof (V)) -1 > 0 && cond is >)
4410 n = -(adj + N2 - N1) / -STEP;
4411 else
4412 n = (adj + N2 - N1) / STEP;
4413 trip = 0;
4414 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
4415 here so that V is defined
4416 if the loop is not entered
4417 L0:
4418 s0 = (trip * nthreads + threadid) * CHUNK;
7c6746c9 4419 e0 = min (s0 + CHUNK, n);
4954efd4 4420 if (s0 < n) goto L1; else goto L4;
4421 L1:
4422 V = s0 * STEP + N1;
4423 e = e0 * STEP + N1;
4424 L2:
4425 BODY;
4426 V += STEP;
4427 if (V cond e) goto L2; else goto L3;
4428 L3:
4429 trip += 1;
4430 goto L0;
4431 L4:
4432*/
4433
4434static void
4435expand_omp_for_static_chunk (struct omp_region *region,
4436 struct omp_for_data *fd, gimple *inner_stmt)
4437{
4438 tree n, s0, e0, e, t;
4439 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
4440 tree type, itype, vmain, vback, vextra;
4441 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
4442 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
9a1d892b 4443 gimple_stmt_iterator gsi, gsip;
4954efd4 4444 edge se;
4445 bool broken_loop = region->cont == NULL;
4446 tree *counts = NULL;
4447 tree n1, n2, step;
7e5a76c8 4448 tree reductions = NULL_TREE;
48152aa2 4449 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
4954efd4 4450
4451 itype = type = TREE_TYPE (fd->loop.v);
4452 if (POINTER_TYPE_P (type))
4453 itype = signed_type_for (type);
4454
4455 entry_bb = region->entry;
4456 se = split_block (entry_bb, last_stmt (entry_bb));
4457 entry_bb = se->src;
4458 iter_part_bb = se->dest;
4459 cont_bb = region->cont;
4460 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
4461 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
4462 gcc_assert (broken_loop
4463 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
4464 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
4465 body_bb = single_succ (seq_start_bb);
4466 if (!broken_loop)
4467 {
4468 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
4469 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
4470 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4471 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
4472 }
4473 exit_bb = region->exit;
4474
4475 /* Trip and adjustment setup goes in ENTRY_BB. */
bce107d7 4476 gsi = gsi_last_nondebug_bb (entry_bb);
4954efd4 4477 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
9a1d892b 4478 gsip = gsi;
4479 gsi_prev (&gsip);
4954efd4 4480
4481 if (fd->collapse > 1)
4482 {
4483 int first_zero_iter = -1, dummy = -1;
4484 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
4485
4486 counts = XALLOCAVEC (tree, fd->collapse);
4487 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4488 fin_bb, first_zero_iter,
4489 dummy_bb, dummy, l2_dom_bb);
4490 t = NULL_TREE;
4491 }
4492 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
4493 t = integer_one_node;
4494 else
4495 t = fold_binary (fd->loop.cond_code, boolean_type_node,
4496 fold_convert (type, fd->loop.n1),
4497 fold_convert (type, fd->loop.n2));
4498 if (fd->collapse == 1
4499 && TYPE_UNSIGNED (type)
4500 && (t == NULL_TREE || !integer_onep (t)))
4501 {
4502 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
4503 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
4504 true, GSI_SAME_STMT);
4505 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
4506 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
4507 true, GSI_SAME_STMT);
4508 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
4509 NULL_TREE, NULL_TREE);
4510 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4511 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
4512 expand_omp_regimplify_p, NULL, NULL)
4513 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
4514 expand_omp_regimplify_p, NULL, NULL))
4515 {
4516 gsi = gsi_for_stmt (cond_stmt);
4517 gimple_regimplify_operands (cond_stmt, &gsi);
4518 }
4519 se = split_block (entry_bb, cond_stmt);
4520 se->flags = EDGE_TRUE_VALUE;
4521 entry_bb = se->dest;
720cfc43 4522 se->probability = profile_probability::very_likely ();
4954efd4 4523 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
720cfc43 4524 se->probability = profile_probability::very_unlikely ();
4954efd4 4525 if (gimple_in_ssa_p (cfun))
4526 {
4527 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
4528 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
4529 !gsi_end_p (gpi); gsi_next (&gpi))
4530 {
4531 gphi *phi = gpi.phi ();
4532 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
4533 se, UNKNOWN_LOCATION);
4534 }
4535 }
4536 gsi = gsi_last_bb (entry_bb);
4537 }
4538
48152aa2 4539 if (fd->lastprivate_conditional)
4540 {
4541 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4542 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
4543 if (fd->have_pointer_condtemp)
4544 condtemp = OMP_CLAUSE_DECL (c);
4545 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
4546 cond_var = OMP_CLAUSE_DECL (c);
4547 }
4548 if (fd->have_reductemp || fd->have_pointer_condtemp)
7e5a76c8 4549 {
4550 tree t1 = build_int_cst (long_integer_type_node, 0);
4551 tree t2 = build_int_cst (long_integer_type_node, 1);
4552 tree t3 = build_int_cstu (long_integer_type_node,
4553 (HOST_WIDE_INT_1U << 31) + 1);
4554 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
9a1d892b 4555 gimple_stmt_iterator gsi2 = gsi_none ();
4556 gimple *g = NULL;
4557 tree mem = null_pointer_node, memv = NULL_TREE;
9a1d892b 4558 if (fd->have_reductemp)
4559 {
4560 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
4561 reductions = OMP_CLAUSE_DECL (c);
4562 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
4563 g = SSA_NAME_DEF_STMT (reductions);
4564 reductions = gimple_assign_rhs1 (g);
4565 OMP_CLAUSE_DECL (c) = reductions;
4566 gsi2 = gsi_for_stmt (g);
4567 }
4568 else
4569 {
4570 if (gsi_end_p (gsip))
4571 gsi2 = gsi_after_labels (region->entry);
4572 else
4573 gsi2 = gsip;
4574 reductions = null_pointer_node;
4575 }
48152aa2 4576 if (fd->have_pointer_condtemp)
9a1d892b 4577 {
9a1d892b 4578 tree type = TREE_TYPE (condtemp);
4579 memv = create_tmp_var (type);
4580 TREE_ADDRESSABLE (memv) = 1;
4581 unsigned HOST_WIDE_INT sz
4582 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
4583 sz *= fd->lastprivate_conditional;
4584 expand_omp_build_assign (&gsi2, memv, build_int_cst (type, sz),
4585 false);
4586 mem = build_fold_addr_expr (memv);
4587 }
7e5a76c8 4588 tree t
4589 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
4590 9, t1, t2, t2, t3, t1, null_pointer_node,
9a1d892b 4591 null_pointer_node, reductions, mem);
7e5a76c8 4592 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
4593 true, GSI_SAME_STMT);
48152aa2 4594 if (fd->have_pointer_condtemp)
9a1d892b 4595 expand_omp_build_assign (&gsi2, condtemp, memv, false);
4596 if (fd->have_reductemp)
4597 {
4598 gsi_remove (&gsi2, true);
4599 release_ssa_name (gimple_assign_lhs (g));
4600 }
7e5a76c8 4601 }
4954efd4 4602 switch (gimple_omp_for_kind (fd->for_stmt))
4603 {
4604 case GF_OMP_FOR_KIND_FOR:
4605 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
4606 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
4607 break;
4608 case GF_OMP_FOR_KIND_DISTRIBUTE:
4609 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
4610 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
4611 break;
4612 default:
4613 gcc_unreachable ();
4614 }
4615 nthreads = build_call_expr (nthreads, 0);
4616 nthreads = fold_convert (itype, nthreads);
4617 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
4618 true, GSI_SAME_STMT);
4619 threadid = build_call_expr (threadid, 0);
4620 threadid = fold_convert (itype, threadid);
4621 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
4622 true, GSI_SAME_STMT);
4623
4624 n1 = fd->loop.n1;
4625 n2 = fd->loop.n2;
4626 step = fd->loop.step;
4627 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4628 {
4629 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4630 OMP_CLAUSE__LOOPTEMP_);
4631 gcc_assert (innerc);
4632 n1 = OMP_CLAUSE_DECL (innerc);
4633 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4634 OMP_CLAUSE__LOOPTEMP_);
4635 gcc_assert (innerc);
4636 n2 = OMP_CLAUSE_DECL (innerc);
4637 }
4638 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
4639 true, NULL_TREE, true, GSI_SAME_STMT);
4640 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
4641 true, NULL_TREE, true, GSI_SAME_STMT);
4642 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
4643 true, NULL_TREE, true, GSI_SAME_STMT);
4644 tree chunk_size = fold_convert (itype, fd->chunk_size);
4645 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
4646 chunk_size
4647 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
4648 GSI_SAME_STMT);
4649
4650 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
4651 t = fold_build2 (PLUS_EXPR, itype, step, t);
4652 t = fold_build2 (PLUS_EXPR, itype, t, n2);
4653 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
4654 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
4655 t = fold_build2 (TRUNC_DIV_EXPR, itype,
4656 fold_build1 (NEGATE_EXPR, itype, t),
4657 fold_build1 (NEGATE_EXPR, itype, step));
4658 else
4659 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
4660 t = fold_convert (itype, t);
4661 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4662 true, GSI_SAME_STMT);
4663
4664 trip_var = create_tmp_reg (itype, ".trip");
4665 if (gimple_in_ssa_p (cfun))
4666 {
4667 trip_init = make_ssa_name (trip_var);
4668 trip_main = make_ssa_name (trip_var);
4669 trip_back = make_ssa_name (trip_var);
4670 }
4671 else
4672 {
4673 trip_init = trip_var;
4674 trip_main = trip_var;
4675 trip_back = trip_var;
4676 }
4677
4678 gassign *assign_stmt
4679 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
4680 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4681
4682 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
4683 t = fold_build2 (MULT_EXPR, itype, t, step);
4684 if (POINTER_TYPE_P (type))
4685 t = fold_build_pointer_plus (n1, t);
4686 else
4687 t = fold_build2 (PLUS_EXPR, type, t, n1);
4688 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4689 true, GSI_SAME_STMT);
4690
4691 /* Remove the GIMPLE_OMP_FOR. */
4692 gsi_remove (&gsi, true);
4693
4694 gimple_stmt_iterator gsif = gsi;
4695
4696 /* Iteration space partitioning goes in ITER_PART_BB. */
4697 gsi = gsi_last_bb (iter_part_bb);
4698
4699 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
4700 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
4701 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
4702 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4703 false, GSI_CONTINUE_LINKING);
4704
4705 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
4706 t = fold_build2 (MIN_EXPR, itype, t, n);
4707 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4708 false, GSI_CONTINUE_LINKING);
4709
4710 t = build2 (LT_EXPR, boolean_type_node, s0, n);
4711 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
4712
4713 /* Setup code for sequential iteration goes in SEQ_START_BB. */
4714 gsi = gsi_start_bb (seq_start_bb);
4715
4716 tree startvar = fd->loop.v;
4717 tree endvar = NULL_TREE;
4718
4719 if (gimple_omp_for_combined_p (fd->for_stmt))
4720 {
4721 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
4722 ? gimple_omp_parallel_clauses (inner_stmt)
4723 : gimple_omp_for_clauses (inner_stmt);
4724 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
4725 gcc_assert (innerc);
4726 startvar = OMP_CLAUSE_DECL (innerc);
4727 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4728 OMP_CLAUSE__LOOPTEMP_);
4729 gcc_assert (innerc);
4730 endvar = OMP_CLAUSE_DECL (innerc);
4731 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
4732 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
4733 {
4734 int i;
4735 for (i = 1; i < fd->collapse; i++)
4736 {
4737 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4738 OMP_CLAUSE__LOOPTEMP_);
4739 gcc_assert (innerc);
4740 }
4741 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4742 OMP_CLAUSE__LOOPTEMP_);
4743 if (innerc)
4744 {
4745 /* If needed (distribute parallel for with lastprivate),
4746 propagate down the total number of iterations. */
4747 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
4748 fd->loop.n2);
4749 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
4750 GSI_CONTINUE_LINKING);
4751 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4752 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4753 }
4754 }
4755 }
4756
4757 t = fold_convert (itype, s0);
4758 t = fold_build2 (MULT_EXPR, itype, t, step);
4759 if (POINTER_TYPE_P (type))
2480c191 4760 {
4761 t = fold_build_pointer_plus (n1, t);
4762 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4763 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4764 t = fold_convert (signed_type_for (type), t);
4765 }
4954efd4 4766 else
4767 t = fold_build2 (PLUS_EXPR, type, t, n1);
4768 t = fold_convert (TREE_TYPE (startvar), t);
4769 t = force_gimple_operand_gsi (&gsi, t,
4770 DECL_P (startvar)
4771 && TREE_ADDRESSABLE (startvar),
4772 NULL_TREE, false, GSI_CONTINUE_LINKING);
4773 assign_stmt = gimple_build_assign (startvar, t);
4774 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
9a1d892b 4775 if (cond_var)
4776 {
4777 tree itype = TREE_TYPE (cond_var);
4778 /* For lastprivate(conditional:) itervar, we need some iteration
4779 counter that starts at unsigned non-zero and increases.
4780 Prefer as few IVs as possible, so if we can use startvar
4781 itself, use that, or startvar + constant (those would be
4782 incremented with step), and as last resort use the s0 + 1
4783 incremented by 1. */
4784 if (POINTER_TYPE_P (type)
4785 || TREE_CODE (n1) != INTEGER_CST
4786 || fd->loop.cond_code != LT_EXPR)
4787 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
4788 build_int_cst (itype, 1));
4789 else if (tree_int_cst_sgn (n1) == 1)
4790 t = fold_convert (itype, t);
4791 else
4792 {
4793 tree c = fold_convert (itype, n1);
4794 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
4795 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
4796 }
4797 t = force_gimple_operand_gsi (&gsi, t, false,
4798 NULL_TREE, false, GSI_CONTINUE_LINKING);
4799 assign_stmt = gimple_build_assign (cond_var, t);
4800 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4801 }
4954efd4 4802
4803 t = fold_convert (itype, e0);
4804 t = fold_build2 (MULT_EXPR, itype, t, step);
4805 if (POINTER_TYPE_P (type))
2480c191 4806 {
4807 t = fold_build_pointer_plus (n1, t);
4808 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4809 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4810 t = fold_convert (signed_type_for (type), t);
4811 }
4954efd4 4812 else
4813 t = fold_build2 (PLUS_EXPR, type, t, n1);
4814 t = fold_convert (TREE_TYPE (startvar), t);
4815 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4816 false, GSI_CONTINUE_LINKING);
4817 if (endvar)
4818 {
4819 assign_stmt = gimple_build_assign (endvar, e);
4820 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4821 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4822 assign_stmt = gimple_build_assign (fd->loop.v, e);
4823 else
4824 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4825 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4826 }
4827 /* Handle linear clause adjustments. */
4828 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
4829 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4830 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4831 c; c = OMP_CLAUSE_CHAIN (c))
4832 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4833 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4834 {
4835 tree d = OMP_CLAUSE_DECL (c);
4836 bool is_ref = omp_is_reference (d);
4837 tree t = d, a, dest;
4838 if (is_ref)
4839 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4840 tree type = TREE_TYPE (t);
4841 if (POINTER_TYPE_P (type))
4842 type = sizetype;
4843 dest = unshare_expr (t);
4844 tree v = create_tmp_var (TREE_TYPE (t), NULL);
4845 expand_omp_build_assign (&gsif, v, t);
4846 if (itercnt == NULL_TREE)
4847 {
4848 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4849 {
4850 itercntbias
4851 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
4852 fold_convert (itype, fd->loop.n1));
4853 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
4854 itercntbias, step);
4855 itercntbias
4856 = force_gimple_operand_gsi (&gsif, itercntbias, true,
4857 NULL_TREE, true,
4858 GSI_SAME_STMT);
4859 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
4860 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4861 NULL_TREE, false,
4862 GSI_CONTINUE_LINKING);
4863 }
4864 else
4865 itercnt = s0;
4866 }
4867 a = fold_build2 (MULT_EXPR, type,
4868 fold_convert (type, itercnt),
4869 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4870 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4871 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4872 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4873 false, GSI_CONTINUE_LINKING);
4874 assign_stmt = gimple_build_assign (dest, t);
4875 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4876 }
4877 if (fd->collapse > 1)
4878 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4879
4880 if (!broken_loop)
4881 {
4882 /* The code controlling the sequential loop goes in CONT_BB,
4883 replacing the GIMPLE_OMP_CONTINUE. */
bce107d7 4884 gsi = gsi_last_nondebug_bb (cont_bb);
4954efd4 4885 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4886 vmain = gimple_omp_continue_control_use (cont_stmt);
4887 vback = gimple_omp_continue_control_def (cont_stmt);
4888
384aea12 4889 if (cond_var)
4890 {
4891 tree itype = TREE_TYPE (cond_var);
4892 tree t2;
4893 if (POINTER_TYPE_P (type)
4894 || TREE_CODE (n1) != INTEGER_CST
4895 || fd->loop.cond_code != LT_EXPR)
4896 t2 = build_int_cst (itype, 1);
4897 else
4898 t2 = fold_convert (itype, step);
4899 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
4900 t2 = force_gimple_operand_gsi (&gsi, t2, false,
4901 NULL_TREE, true, GSI_SAME_STMT);
4902 assign_stmt = gimple_build_assign (cond_var, t2);
4903 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4904 }
4905
4954efd4 4906 if (!gimple_omp_for_combined_p (fd->for_stmt))
4907 {
4908 if (POINTER_TYPE_P (type))
4909 t = fold_build_pointer_plus (vmain, step);
4910 else
4911 t = fold_build2 (PLUS_EXPR, type, vmain, step);
4912 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
4913 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4914 true, GSI_SAME_STMT);
4915 assign_stmt = gimple_build_assign (vback, t);
4916 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4917
4918 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
4919 t = build2 (EQ_EXPR, boolean_type_node,
4920 build_int_cst (itype, 0),
4921 build_int_cst (itype, 1));
4922 else
4923 t = build2 (fd->loop.cond_code, boolean_type_node,
4924 DECL_P (vback) && TREE_ADDRESSABLE (vback)
4925 ? t : vback, e);
4926 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4927 }
4928
4929 /* Remove GIMPLE_OMP_CONTINUE. */
4930 gsi_remove (&gsi, true);
4931
4932 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4933 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
4934
4935 /* Trip update code goes into TRIP_UPDATE_BB. */
4936 gsi = gsi_start_bb (trip_update_bb);
4937
4938 t = build_int_cst (itype, 1);
4939 t = build2 (PLUS_EXPR, itype, trip_main, t);
4940 assign_stmt = gimple_build_assign (trip_back, t);
4941 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4942 }
4943
4944 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
bce107d7 4945 gsi = gsi_last_nondebug_bb (exit_bb);
4954efd4 4946 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4947 {
4948 t = gimple_omp_return_lhs (gsi_stmt (gsi));
48152aa2 4949 if (fd->have_reductemp || fd->have_pointer_condtemp)
7e5a76c8 4950 {
4951 tree fn;
4952 if (t)
4953 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4954 else
4955 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4956 gcall *g = gimple_build_call (fn, 0);
4957 if (t)
4958 {
4959 gimple_call_set_lhs (g, t);
9a1d892b 4960 if (fd->have_reductemp)
4961 gsi_insert_after (&gsi, gimple_build_assign (reductions,
4962 NOP_EXPR, t),
4963 GSI_SAME_STMT);
7e5a76c8 4964 }
4965 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4966 }
4967 else
4968 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
4954efd4 4969 }
89c6fe66 4970 else if (fd->have_pointer_condtemp)
4971 {
4972 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
4973 gcall *g = gimple_build_call (fn, 0);
4974 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4975 }
4954efd4 4976 gsi_remove (&gsi, true);
4977
4978 /* Connect the new blocks. */
4979 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
4980 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
4981
4982 if (!broken_loop)
4983 {
4984 se = find_edge (cont_bb, body_bb);
4985 if (se == NULL)
4986 {
4987 se = BRANCH_EDGE (cont_bb);
4988 gcc_assert (single_succ (se->dest) == body_bb);
4989 }
4990 if (gimple_omp_for_combined_p (fd->for_stmt))
4991 {
4992 remove_edge (se);
4993 se = NULL;
4994 }
4995 else if (fd->collapse > 1)
4996 {
4997 remove_edge (se);
4998 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4999 }
5000 else
5001 se->flags = EDGE_TRUE_VALUE;
5002 find_edge (cont_bb, trip_update_bb)->flags
5003 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5004
7c6746c9 5005 redirect_edge_and_branch (single_succ_edge (trip_update_bb),
5006 iter_part_bb);
4954efd4 5007 }
5008
5009 if (gimple_in_ssa_p (cfun))
5010 {
5011 gphi_iterator psi;
5012 gphi *phi;
5013 edge re, ene;
5014 edge_var_map *vm;
5015 size_t i;
5016
5017 gcc_assert (fd->collapse == 1 && !broken_loop);
5018
5019 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
5020 remove arguments of the phi nodes in fin_bb. We need to create
5021 appropriate phi nodes in iter_part_bb instead. */
5022 se = find_edge (iter_part_bb, fin_bb);
5023 re = single_succ_edge (trip_update_bb);
5024 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
5025 ene = single_succ_edge (entry_bb);
5026
5027 psi = gsi_start_phis (fin_bb);
5028 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
5029 gsi_next (&psi), ++i)
5030 {
5031 gphi *nphi;
be1e7283 5032 location_t locus;
4954efd4 5033
5034 phi = psi.phi ();
e502c72f 5035 if (operand_equal_p (gimple_phi_arg_def (phi, 0),
5036 redirect_edge_var_map_def (vm), 0))
5037 continue;
5038
4954efd4 5039 t = gimple_phi_result (phi);
5040 gcc_assert (t == redirect_edge_var_map_result (vm));
5041
5042 if (!single_pred_p (fin_bb))
5043 t = copy_ssa_name (t, phi);
5044
5045 nphi = create_phi_node (t, iter_part_bb);
5046
5047 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
5048 locus = gimple_phi_arg_location_from_edge (phi, se);
5049
5050 /* A special case -- fd->loop.v is not yet computed in
5051 iter_part_bb, we need to use vextra instead. */
5052 if (t == fd->loop.v)
5053 t = vextra;
5054 add_phi_arg (nphi, t, ene, locus);
5055 locus = redirect_edge_var_map_location (vm);
5056 tree back_arg = redirect_edge_var_map_def (vm);
5057 add_phi_arg (nphi, back_arg, re, locus);
5058 edge ce = find_edge (cont_bb, body_bb);
5059 if (ce == NULL)
5060 {
5061 ce = BRANCH_EDGE (cont_bb);
5062 gcc_assert (single_succ (ce->dest) == body_bb);
5063 ce = single_succ_edge (ce->dest);
5064 }
5065 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
5066 gcc_assert (inner_loop_phi != NULL);
5067 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
5068 find_edge (seq_start_bb, body_bb), locus);
5069
5070 if (!single_pred_p (fin_bb))
5071 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
5072 }
5073 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
5074 redirect_edge_var_map_clear (re);
5075 if (single_pred_p (fin_bb))
5076 while (1)
5077 {
5078 psi = gsi_start_phis (fin_bb);
5079 if (gsi_end_p (psi))
5080 break;
5081 remove_phi_node (&psi, false);
5082 }
5083
5084 /* Make phi node for trip. */
5085 phi = create_phi_node (trip_main, iter_part_bb);
5086 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
5087 UNKNOWN_LOCATION);
5088 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
5089 UNKNOWN_LOCATION);
5090 }
5091
5092 if (!broken_loop)
5093 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
5094 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
5095 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
5096 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5097 recompute_dominator (CDI_DOMINATORS, fin_bb));
5098 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
5099 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
5100 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5101 recompute_dominator (CDI_DOMINATORS, body_bb));
5102
5103 if (!broken_loop)
5104 {
2e966e2a 5105 class loop *loop = body_bb->loop_father;
5106 class loop *trip_loop = alloc_loop ();
4954efd4 5107 trip_loop->header = iter_part_bb;
5108 trip_loop->latch = trip_update_bb;
5109 add_loop (trip_loop, iter_part_bb->loop_father);
5110
5111 if (loop != entry_bb->loop_father)
5112 {
5113 gcc_assert (loop->header == body_bb);
5114 gcc_assert (loop->latch == region->cont
5115 || single_pred (loop->latch) == region->cont);
5116 trip_loop->inner = loop;
5117 return;
5118 }
5119
5120 if (!gimple_omp_for_combined_p (fd->for_stmt))
5121 {
5122 loop = alloc_loop ();
5123 loop->header = body_bb;
5124 if (collapse_bb == NULL)
5125 loop->latch = cont_bb;
5126 add_loop (loop, trip_loop);
5127 }
5128 }
5129}
5130
4954efd4 5131/* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
5132 loop. Given parameters:
5133
5134 for (V = N1; V cond N2; V += STEP) BODY;
5135
5136 where COND is "<" or ">", we generate pseudocode
5137
5138 V = N1;
5139 goto L1;
5140 L0:
5141 BODY;
5142 V += STEP;
5143 L1:
5144 if (V cond N2) goto L0; else goto L2;
5145 L2:
5146
5147 For collapsed loops, given parameters:
5148 collapse(3)
5149 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
5150 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
5151 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
5152 BODY;
5153
5154 we generate pseudocode
5155
5156 if (cond3 is <)
5157 adj = STEP3 - 1;
5158 else
5159 adj = STEP3 + 1;
5160 count3 = (adj + N32 - N31) / STEP3;
5161 if (cond2 is <)
5162 adj = STEP2 - 1;
5163 else
5164 adj = STEP2 + 1;
5165 count2 = (adj + N22 - N21) / STEP2;
5166 if (cond1 is <)
5167 adj = STEP1 - 1;
5168 else
5169 adj = STEP1 + 1;
5170 count1 = (adj + N12 - N11) / STEP1;
5171 count = count1 * count2 * count3;
5172 V = 0;
5173 V1 = N11;
5174 V2 = N21;
5175 V3 = N31;
5176 goto L1;
5177 L0:
5178 BODY;
5179 V += 1;
5180 V3 += STEP3;
5181 V2 += (V3 cond3 N32) ? 0 : STEP2;
5182 V3 = (V3 cond3 N32) ? V3 : N31;
5183 V1 += (V2 cond2 N22) ? 0 : STEP1;
5184 V2 = (V2 cond2 N22) ? V2 : N21;
5185 L1:
5186 if (V < count) goto L0; else goto L2;
5187 L2:
5188
5189 */
5190
5191static void
5192expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
5193{
5194 tree type, t;
5195 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
5196 gimple_stmt_iterator gsi;
5197 gimple *stmt;
5198 gcond *cond_stmt;
5199 bool broken_loop = region->cont == NULL;
5200 edge e, ne;
5201 tree *counts = NULL;
5202 int i;
5203 int safelen_int = INT_MAX;
9144258a 5204 bool dont_vectorize = false;
4954efd4 5205 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5206 OMP_CLAUSE_SAFELEN);
5207 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5208 OMP_CLAUSE__SIMDUID_);
9144258a 5209 tree ifc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5210 OMP_CLAUSE_IF);
5211 tree simdlen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5212 OMP_CLAUSE_SIMDLEN);
4f4b92d8 5213 tree condtemp = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5214 OMP_CLAUSE__CONDTEMP_);
4954efd4 5215 tree n1, n2;
4f4b92d8 5216 tree cond_var = condtemp ? OMP_CLAUSE_DECL (condtemp) : NULL_TREE;
4954efd4 5217
5218 if (safelen)
5219 {
9d805ed8 5220 poly_uint64 val;
4954efd4 5221 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
9d805ed8 5222 if (!poly_int_tree_p (safelen, &val))
4954efd4 5223 safelen_int = 0;
9d805ed8 5224 else
5225 safelen_int = MIN (constant_lower_bound (val), INT_MAX);
4954efd4 5226 if (safelen_int == 1)
5227 safelen_int = 0;
5228 }
9144258a 5229 if ((ifc && integer_zerop (OMP_CLAUSE_IF_EXPR (ifc)))
5230 || (simdlen && integer_onep (OMP_CLAUSE_SIMDLEN_EXPR (simdlen))))
5231 {
5232 safelen_int = 0;
5233 dont_vectorize = true;
5234 }
4954efd4 5235 type = TREE_TYPE (fd->loop.v);
5236 entry_bb = region->entry;
5237 cont_bb = region->cont;
5238 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5239 gcc_assert (broken_loop
5240 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
5241 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
5242 if (!broken_loop)
5243 {
5244 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
5245 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5246 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
5247 l2_bb = BRANCH_EDGE (entry_bb)->dest;
5248 }
5249 else
5250 {
5251 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
5252 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
5253 l2_bb = single_succ (l1_bb);
5254 }
5255 exit_bb = region->exit;
5256 l2_dom_bb = NULL;
5257
bce107d7 5258 gsi = gsi_last_nondebug_bb (entry_bb);
4954efd4 5259
5260 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5261 /* Not needed in SSA form right now. */
5262 gcc_assert (!gimple_in_ssa_p (cfun));
5263 if (fd->collapse > 1)
5264 {
5265 int first_zero_iter = -1, dummy = -1;
5266 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
5267
5268 counts = XALLOCAVEC (tree, fd->collapse);
5269 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5270 zero_iter_bb, first_zero_iter,
5271 dummy_bb, dummy, l2_dom_bb);
5272 }
5273 if (l2_dom_bb == NULL)
5274 l2_dom_bb = l1_bb;
5275
5276 n1 = fd->loop.n1;
5277 n2 = fd->loop.n2;
5278 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5279 {
5280 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5281 OMP_CLAUSE__LOOPTEMP_);
5282 gcc_assert (innerc);
5283 n1 = OMP_CLAUSE_DECL (innerc);
5284 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5285 OMP_CLAUSE__LOOPTEMP_);
5286 gcc_assert (innerc);
5287 n2 = OMP_CLAUSE_DECL (innerc);
5288 }
5289 tree step = fd->loop.step;
5290
d8b267ad 5291 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5292 OMP_CLAUSE__SIMT_);
4954efd4 5293 if (is_simt)
5294 {
5295 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
d8b267ad 5296 is_simt = safelen_int > 1;
5297 }
5298 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
5299 if (is_simt)
5300 {
4954efd4 5301 simt_lane = create_tmp_var (unsigned_type_node);
5302 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
5303 gimple_call_set_lhs (g, simt_lane);
5304 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5305 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
5306 fold_convert (TREE_TYPE (step), simt_lane));
5307 n1 = fold_convert (type, n1);
5308 if (POINTER_TYPE_P (type))
5309 n1 = fold_build_pointer_plus (n1, offset);
5310 else
5311 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
5312
5313 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
5314 if (fd->collapse > 1)
5315 simt_maxlane = build_one_cst (unsigned_type_node);
5316 else if (safelen_int < omp_max_simt_vf ())
5317 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
5318 tree vf
5319 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
5320 unsigned_type_node, 0);
5321 if (simt_maxlane)
5322 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
5323 vf = fold_convert (TREE_TYPE (step), vf);
5324 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
5325 }
5326
5327 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
5328 if (fd->collapse > 1)
5329 {
5330 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5331 {
5332 gsi_prev (&gsi);
5333 expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1);
5334 gsi_next (&gsi);
5335 }
5336 else
5337 for (i = 0; i < fd->collapse; i++)
5338 {
5339 tree itype = TREE_TYPE (fd->loops[i].v);
5340 if (POINTER_TYPE_P (itype))
5341 itype = signed_type_for (itype);
5342 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
5343 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
5344 }
5345 }
4f4b92d8 5346 if (cond_var)
5347 {
5348 if (POINTER_TYPE_P (type)
5349 || TREE_CODE (n1) != INTEGER_CST
5350 || fd->loop.cond_code != LT_EXPR
5351 || tree_int_cst_sgn (n1) != 1)
5352 expand_omp_build_assign (&gsi, cond_var,
5353 build_one_cst (TREE_TYPE (cond_var)));
5354 else
5355 expand_omp_build_assign (&gsi, cond_var,
5356 fold_convert (TREE_TYPE (cond_var), n1));
5357 }
4954efd4 5358
5359 /* Remove the GIMPLE_OMP_FOR statement. */
5360 gsi_remove (&gsi, true);
5361
5362 if (!broken_loop)
5363 {
5364 /* Code to control the increment goes in the CONT_BB. */
bce107d7 5365 gsi = gsi_last_nondebug_bb (cont_bb);
4954efd4 5366 stmt = gsi_stmt (gsi);
5367 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
5368
5369 if (POINTER_TYPE_P (type))
5370 t = fold_build_pointer_plus (fd->loop.v, step);
5371 else
5372 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
5373 expand_omp_build_assign (&gsi, fd->loop.v, t);
5374
5375 if (fd->collapse > 1)
5376 {
5377 i = fd->collapse - 1;
5378 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
5379 {
5380 t = fold_convert (sizetype, fd->loops[i].step);
5381 t = fold_build_pointer_plus (fd->loops[i].v, t);
5382 }
5383 else
5384 {
5385 t = fold_convert (TREE_TYPE (fd->loops[i].v),
5386 fd->loops[i].step);
5387 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
5388 fd->loops[i].v, t);
5389 }
5390 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
5391
5392 for (i = fd->collapse - 1; i > 0; i--)
5393 {
5394 tree itype = TREE_TYPE (fd->loops[i].v);
5395 tree itype2 = TREE_TYPE (fd->loops[i - 1].v);
5396 if (POINTER_TYPE_P (itype2))
5397 itype2 = signed_type_for (itype2);
87ae84ee 5398 t = fold_convert (itype2, fd->loops[i - 1].step);
5399 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
5400 GSI_SAME_STMT);
4954efd4 5401 t = build3 (COND_EXPR, itype2,
5402 build2 (fd->loops[i].cond_code, boolean_type_node,
5403 fd->loops[i].v,
5404 fold_convert (itype, fd->loops[i].n2)),
87ae84ee 5405 build_int_cst (itype2, 0), t);
4954efd4 5406 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v)))
5407 t = fold_build_pointer_plus (fd->loops[i - 1].v, t);
5408 else
5409 t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t);
5410 expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t);
5411
87ae84ee 5412 t = fold_convert (itype, fd->loops[i].n1);
5413 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
5414 GSI_SAME_STMT);
4954efd4 5415 t = build3 (COND_EXPR, itype,
5416 build2 (fd->loops[i].cond_code, boolean_type_node,
5417 fd->loops[i].v,
5418 fold_convert (itype, fd->loops[i].n2)),
87ae84ee 5419 fd->loops[i].v, t);
4954efd4 5420 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
5421 }
5422 }
4f4b92d8 5423 if (cond_var)
5424 {
5425 if (POINTER_TYPE_P (type)
5426 || TREE_CODE (n1) != INTEGER_CST
5427 || fd->loop.cond_code != LT_EXPR
5428 || tree_int_cst_sgn (n1) != 1)
5429 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
5430 build_one_cst (TREE_TYPE (cond_var)));
5431 else
5432 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
5433 fold_convert (TREE_TYPE (cond_var), step));
5434 expand_omp_build_assign (&gsi, cond_var, t);
5435 }
4954efd4 5436
5437 /* Remove GIMPLE_OMP_CONTINUE. */
5438 gsi_remove (&gsi, true);
5439 }
5440
5441 /* Emit the condition in L1_BB. */
5442 gsi = gsi_start_bb (l1_bb);
5443
5444 t = fold_convert (type, n2);
5445 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5446 false, GSI_CONTINUE_LINKING);
5447 tree v = fd->loop.v;
5448 if (DECL_P (v) && TREE_ADDRESSABLE (v))
5449 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
5450 false, GSI_CONTINUE_LINKING);
5451 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
5452 cond_stmt = gimple_build_cond_empty (t);
5453 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
5454 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
5455 NULL, NULL)
5456 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
5457 NULL, NULL))
5458 {
5459 gsi = gsi_for_stmt (cond_stmt);
5460 gimple_regimplify_operands (cond_stmt, &gsi);
5461 }
5462
5463 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
5464 if (is_simt)
5465 {
5466 gsi = gsi_start_bb (l2_bb);
5467 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step);
5468 if (POINTER_TYPE_P (type))
5469 t = fold_build_pointer_plus (fd->loop.v, step);
5470 else
5471 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
5472 expand_omp_build_assign (&gsi, fd->loop.v, t);
5473 }
5474
5475 /* Remove GIMPLE_OMP_RETURN. */
bce107d7 5476 gsi = gsi_last_nondebug_bb (exit_bb);
4954efd4 5477 gsi_remove (&gsi, true);
5478
5479 /* Connect the new blocks. */
5480 remove_edge (FALLTHRU_EDGE (entry_bb));
5481
5482 if (!broken_loop)
5483 {
5484 remove_edge (BRANCH_EDGE (entry_bb));
5485 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
5486
5487 e = BRANCH_EDGE (l1_bb);
5488 ne = FALLTHRU_EDGE (l1_bb);
5489 e->flags = EDGE_TRUE_VALUE;
5490 }
5491 else
5492 {
5493 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
5494
5495 ne = single_succ_edge (l1_bb);
5496 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
5497
5498 }
5499 ne->flags = EDGE_FALSE_VALUE;
720cfc43 5500 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
5501 ne->probability = e->probability.invert ();
4954efd4 5502
5503 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
5504 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
5505
5506 if (simt_maxlane)
5507 {
5508 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
5509 NULL_TREE, NULL_TREE);
5510 gsi = gsi_last_bb (entry_bb);
5511 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
5512 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
5513 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
720cfc43 5514 FALLTHRU_EDGE (entry_bb)->probability
5515 = profile_probability::guessed_always ().apply_scale (7, 8);
5516 BRANCH_EDGE (entry_bb)->probability
5517 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
4954efd4 5518 l2_dom_bb = entry_bb;
5519 }
5520 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
5521
5522 if (!broken_loop)
5523 {
2e966e2a 5524 class loop *loop = alloc_loop ();
4954efd4 5525 loop->header = l1_bb;
5526 loop->latch = cont_bb;
5527 add_loop (loop, l1_bb->loop_father);
5528 loop->safelen = safelen_int;
5529 if (simduid)
5530 {
5531 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
5532 cfun->has_simduid_loops = true;
5533 }
5534 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
5535 the loop. */
5536 if ((flag_tree_loop_vectorize
52e94bf8 5537 || !global_options_set.x_flag_tree_loop_vectorize)
4954efd4 5538 && flag_tree_loop_optimize
5539 && loop->safelen > 1)
5540 {
5541 loop->force_vectorize = true;
e7419472 5542 if (simdlen && tree_fits_uhwi_p (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)))
5543 {
5544 unsigned HOST_WIDE_INT v
5545 = tree_to_uhwi (OMP_CLAUSE_SIMDLEN_EXPR (simdlen));
5546 if (v < INT_MAX && v <= (unsigned HOST_WIDE_INT) loop->safelen)
5547 loop->simdlen = v;
5548 }
4954efd4 5549 cfun->has_force_vectorize_loops = true;
5550 }
9144258a 5551 else if (dont_vectorize)
5552 loop->dont_vectorize = true;
4954efd4 5553 }
5554 else if (simduid)
5555 cfun->has_simduid_loops = true;
5556}
5557
5558/* Taskloop construct is represented after gimplification with
5559 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
5560 in between them. This routine expands the outer GIMPLE_OMP_FOR,
5561 which should just compute all the needed loop temporaries
5562 for GIMPLE_OMP_TASK. */
5563
5564static void
5565expand_omp_taskloop_for_outer (struct omp_region *region,
5566 struct omp_for_data *fd,
5567 gimple *inner_stmt)
5568{
5569 tree type, bias = NULL_TREE;
5570 basic_block entry_bb, cont_bb, exit_bb;
5571 gimple_stmt_iterator gsi;
5572 gassign *assign_stmt;
5573 tree *counts = NULL;
5574 int i;
5575
5576 gcc_assert (inner_stmt);
5577 gcc_assert (region->cont);
5578 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
5579 && gimple_omp_task_taskloop_p (inner_stmt));
5580 type = TREE_TYPE (fd->loop.v);
5581
5582 /* See if we need to bias by LLONG_MIN. */
5583 if (fd->iter_type == long_long_unsigned_type_node
5584 && TREE_CODE (type) == INTEGER_TYPE
5585 && !TYPE_UNSIGNED (type))
5586 {
5587 tree n1, n2;
5588
5589 if (fd->loop.cond_code == LT_EXPR)
5590 {
5591 n1 = fd->loop.n1;
5592 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
5593 }
5594 else
5595 {
5596 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
5597 n2 = fd->loop.n1;
5598 }
5599 if (TREE_CODE (n1) != INTEGER_CST
5600 || TREE_CODE (n2) != INTEGER_CST
5601 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
5602 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
5603 }
5604
5605 entry_bb = region->entry;
5606 cont_bb = region->cont;
5607 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5608 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
5609 exit_bb = region->exit;
5610
bce107d7 5611 gsi = gsi_last_nondebug_bb (entry_bb);
4954efd4 5612 gimple *for_stmt = gsi_stmt (gsi);
5613 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
5614 if (fd->collapse > 1)
5615 {
5616 int first_zero_iter = -1, dummy = -1;
5617 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
5618
5619 counts = XALLOCAVEC (tree, fd->collapse);
5620 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5621 zero_iter_bb, first_zero_iter,
5622 dummy_bb, dummy, l2_dom_bb);
5623
5624 if (zero_iter_bb)
5625 {
5626 /* Some counts[i] vars might be uninitialized if
5627 some loop has zero iterations. But the body shouldn't
5628 be executed in that case, so just avoid uninit warnings. */
5629 for (i = first_zero_iter; i < fd->collapse; i++)
5630 if (SSA_VAR_P (counts[i]))
5631 TREE_NO_WARNING (counts[i]) = 1;
5632 gsi_prev (&gsi);
5633 edge e = split_block (entry_bb, gsi_stmt (gsi));
5634 entry_bb = e->dest;
5635 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
5636 gsi = gsi_last_bb (entry_bb);
5637 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
5638 get_immediate_dominator (CDI_DOMINATORS,
5639 zero_iter_bb));
5640 }
5641 }
5642
5643 tree t0, t1;
5644 t1 = fd->loop.n2;
5645 t0 = fd->loop.n1;
5646 if (POINTER_TYPE_P (TREE_TYPE (t0))
5647 && TYPE_PRECISION (TREE_TYPE (t0))
5648 != TYPE_PRECISION (fd->iter_type))
5649 {
5650 /* Avoid casting pointers to integer of a different size. */
5651 tree itype = signed_type_for (type);
5652 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
5653 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
5654 }
5655 else
5656 {
5657 t1 = fold_convert (fd->iter_type, t1);
5658 t0 = fold_convert (fd->iter_type, t0);
5659 }
5660 if (bias)
5661 {
5662 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
5663 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
5664 }
5665
5666 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
5667 OMP_CLAUSE__LOOPTEMP_);
5668 gcc_assert (innerc);
5669 tree startvar = OMP_CLAUSE_DECL (innerc);
5670 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
5671 gcc_assert (innerc);
5672 tree endvar = OMP_CLAUSE_DECL (innerc);
5673 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
5674 {
5675 gcc_assert (innerc);
5676 for (i = 1; i < fd->collapse; i++)
5677 {
5678 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5679 OMP_CLAUSE__LOOPTEMP_);
5680 gcc_assert (innerc);
5681 }
5682 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5683 OMP_CLAUSE__LOOPTEMP_);
5684 if (innerc)
5685 {
5686 /* If needed (inner taskloop has lastprivate clause), propagate
5687 down the total number of iterations. */
5688 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
5689 NULL_TREE, false,
5690 GSI_CONTINUE_LINKING);
5691 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5692 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5693 }
5694 }
5695
5696 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
5697 GSI_CONTINUE_LINKING);
5698 assign_stmt = gimple_build_assign (startvar, t0);
5699 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5700
5701 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
5702 GSI_CONTINUE_LINKING);
5703 assign_stmt = gimple_build_assign (endvar, t1);
5704 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5705 if (fd->collapse > 1)
5706 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5707
5708 /* Remove the GIMPLE_OMP_FOR statement. */
5709 gsi = gsi_for_stmt (for_stmt);
5710 gsi_remove (&gsi, true);
5711
bce107d7 5712 gsi = gsi_last_nondebug_bb (cont_bb);
4954efd4 5713 gsi_remove (&gsi, true);
5714
bce107d7 5715 gsi = gsi_last_nondebug_bb (exit_bb);
4954efd4 5716 gsi_remove (&gsi, true);
5717
720cfc43 5718 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
4954efd4 5719 remove_edge (BRANCH_EDGE (entry_bb));
720cfc43 5720 FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
4954efd4 5721 remove_edge (BRANCH_EDGE (cont_bb));
5722 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
5723 set_immediate_dominator (CDI_DOMINATORS, region->entry,
5724 recompute_dominator (CDI_DOMINATORS, region->entry));
5725}
5726
5727/* Taskloop construct is represented after gimplification with
5728 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
5729 in between them. This routine expands the inner GIMPLE_OMP_FOR.
5730 GOMP_taskloop{,_ull} function arranges for each task to be given just
5731 a single range of iterations. */
5732
5733static void
5734expand_omp_taskloop_for_inner (struct omp_region *region,
5735 struct omp_for_data *fd,
5736 gimple *inner_stmt)
5737{
5738 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
5739 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
5740 basic_block fin_bb;
5741 gimple_stmt_iterator gsi;
5742 edge ep;
5743 bool broken_loop = region->cont == NULL;
5744 tree *counts = NULL;
5745 tree n1, n2, step;
5746
5747 itype = type = TREE_TYPE (fd->loop.v);
5748 if (POINTER_TYPE_P (type))
5749 itype = signed_type_for (type);
5750
5751 /* See if we need to bias by LLONG_MIN. */
5752 if (fd->iter_type == long_long_unsigned_type_node
5753 && TREE_CODE (type) == INTEGER_TYPE
5754 && !TYPE_UNSIGNED (type))
5755 {
5756 tree n1, n2;
5757
5758 if (fd->loop.cond_code == LT_EXPR)
5759 {
5760 n1 = fd->loop.n1;
5761 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
5762 }
5763 else
5764 {
5765 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
5766 n2 = fd->loop.n1;
5767 }
5768 if (TREE_CODE (n1) != INTEGER_CST
5769 || TREE_CODE (n2) != INTEGER_CST
5770 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
5771 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
5772 }
5773
5774 entry_bb = region->entry;
5775 cont_bb = region->cont;
5776 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5777 fin_bb = BRANCH_EDGE (entry_bb)->dest;
5778 gcc_assert (broken_loop
5779 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
5780 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5781 if (!broken_loop)
5782 {
5783 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
5784 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5785 }
5786 exit_bb = region->exit;
5787
5788 /* Iteration space partitioning goes in ENTRY_BB. */
bce107d7 5789 gsi = gsi_last_nondebug_bb (entry_bb);
4954efd4 5790 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5791
5792 if (fd->collapse > 1)
5793 {
5794 int first_zero_iter = -1, dummy = -1;
5795 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5796
5797 counts = XALLOCAVEC (tree, fd->collapse);
5798 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5799 fin_bb, first_zero_iter,
5800 dummy_bb, dummy, l2_dom_bb);
5801 t = NULL_TREE;
5802 }
5803 else
5804 t = integer_one_node;
5805
5806 step = fd->loop.step;
5807 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5808 OMP_CLAUSE__LOOPTEMP_);
5809 gcc_assert (innerc);
5810 n1 = OMP_CLAUSE_DECL (innerc);
5811 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
5812 gcc_assert (innerc);
5813 n2 = OMP_CLAUSE_DECL (innerc);
5814 if (bias)
5815 {
5816 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
5817 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
5818 }
5819 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5820 true, NULL_TREE, true, GSI_SAME_STMT);
5821 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5822 true, NULL_TREE, true, GSI_SAME_STMT);
5823 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5824 true, NULL_TREE, true, GSI_SAME_STMT);
5825
5826 tree startvar = fd->loop.v;
5827 tree endvar = NULL_TREE;
5828
5829 if (gimple_omp_for_combined_p (fd->for_stmt))
5830 {
5831 tree clauses = gimple_omp_for_clauses (inner_stmt);
5832 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5833 gcc_assert (innerc);
5834 startvar = OMP_CLAUSE_DECL (innerc);
5835 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5836 OMP_CLAUSE__LOOPTEMP_);
5837 gcc_assert (innerc);
5838 endvar = OMP_CLAUSE_DECL (innerc);
5839 }
5840 t = fold_convert (TREE_TYPE (startvar), n1);
5841 t = force_gimple_operand_gsi (&gsi, t,
5842 DECL_P (startvar)
5843 && TREE_ADDRESSABLE (startvar),
5844 NULL_TREE, false, GSI_CONTINUE_LINKING);
5845 gimple *assign_stmt = gimple_build_assign (startvar, t);
5846 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5847
5848 t = fold_convert (TREE_TYPE (startvar), n2);
5849 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5850 false, GSI_CONTINUE_LINKING);
5851 if (endvar)
5852 {
5853 assign_stmt = gimple_build_assign (endvar, e);
5854 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5855 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5856 assign_stmt = gimple_build_assign (fd->loop.v, e);
5857 else
5858 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5859 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5860 }
5861 if (fd->collapse > 1)
5862 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5863
5864 if (!broken_loop)
5865 {
5866 /* The code controlling the sequential loop replaces the
5867 GIMPLE_OMP_CONTINUE. */
bce107d7 5868 gsi = gsi_last_nondebug_bb (cont_bb);
4954efd4 5869 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5870 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5871 vmain = gimple_omp_continue_control_use (cont_stmt);
5872 vback = gimple_omp_continue_control_def (cont_stmt);
5873
5874 if (!gimple_omp_for_combined_p (fd->for_stmt))
5875 {
5876 if (POINTER_TYPE_P (type))
5877 t = fold_build_pointer_plus (vmain, step);
5878 else
5879 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5880 t = force_gimple_operand_gsi (&gsi, t,
5881 DECL_P (vback)
5882 && TREE_ADDRESSABLE (vback),
5883 NULL_TREE, true, GSI_SAME_STMT);
5884 assign_stmt = gimple_build_assign (vback, t);
5885 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5886
5887 t = build2 (fd->loop.cond_code, boolean_type_node,
5888 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5889 ? t : vback, e);
5890 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5891 }
5892
5893 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5894 gsi_remove (&gsi, true);
5895
5896 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5897 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
5898 }
5899
5900 /* Remove the GIMPLE_OMP_FOR statement. */
5901 gsi = gsi_for_stmt (fd->for_stmt);
5902 gsi_remove (&gsi, true);
5903
5904 /* Remove the GIMPLE_OMP_RETURN statement. */
bce107d7 5905 gsi = gsi_last_nondebug_bb (exit_bb);
4954efd4 5906 gsi_remove (&gsi, true);
5907
720cfc43 5908 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
4954efd4 5909 if (!broken_loop)
5910 remove_edge (BRANCH_EDGE (entry_bb));
5911 else
5912 {
5913 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
5914 region->outer->cont = NULL;
5915 }
5916
5917 /* Connect all the blocks. */
5918 if (!broken_loop)
5919 {
5920 ep = find_edge (cont_bb, body_bb);
5921 if (gimple_omp_for_combined_p (fd->for_stmt))
5922 {
5923 remove_edge (ep);
5924 ep = NULL;
5925 }
5926 else if (fd->collapse > 1)
5927 {
5928 remove_edge (ep);
5929 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5930 }
5931 else
5932 ep->flags = EDGE_TRUE_VALUE;
5933 find_edge (cont_bb, fin_bb)->flags
5934 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5935 }
5936
5937 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5938 recompute_dominator (CDI_DOMINATORS, body_bb));
5939 if (!broken_loop)
5940 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5941 recompute_dominator (CDI_DOMINATORS, fin_bb));
5942
5943 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5944 {
2e966e2a 5945 class loop *loop = alloc_loop ();
4954efd4 5946 loop->header = body_bb;
5947 if (collapse_bb == NULL)
5948 loop->latch = cont_bb;
5949 add_loop (loop, body_bb->loop_father);
5950 }
5951}
5952
5953/* A subroutine of expand_omp_for. Generate code for an OpenACC
5954 partitioned loop. The lowering here is abstracted, in that the
5955 loop parameters are passed through internal functions, which are
5956 further lowered by oacc_device_lower, once we get to the target
5957 compiler. The loop is of the form:
5958
5959 for (V = B; V LTGT E; V += S) {BODY}
5960
5961 where LTGT is < or >. We may have a specified chunking size, CHUNKING
5962 (constant 0 for no chunking) and we will have a GWV partitioning
5963 mask, specifying dimensions over which the loop is to be
719a7570 5964 partitioned (see note below). We generate code that looks like
5965 (this ignores tiling):
4954efd4 5966
5967 <entry_bb> [incoming FALL->body, BRANCH->exit]
5968 typedef signedintify (typeof (V)) T; // underlying signed integral type
5969 T range = E - B;
5970 T chunk_no = 0;
5971 T DIR = LTGT == '<' ? +1 : -1;
5972 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
5973 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
5974
5975 <head_bb> [created by splitting end of entry_bb]
5976 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
5977 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
5978 if (!(offset LTGT bound)) goto bottom_bb;
5979
5980 <body_bb> [incoming]
5981 V = B + offset;
5982 {BODY}
5983
5984 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
5985 offset += step;
5986 if (offset LTGT bound) goto body_bb; [*]
5987
5988 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
5989 chunk_no++;
5990 if (chunk < chunk_max) goto head_bb;
5991
5992 <exit_bb> [incoming]
5993 V = B + ((range -/+ 1) / S +/- 1) * S [*]
5994
719a7570 5995 [*] Needed if V live at end of loop. */
4954efd4 5996
5997static void
5998expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
5999{
6000 tree v = fd->loop.v;
6001 enum tree_code cond_code = fd->loop.cond_code;
6002 enum tree_code plus_code = PLUS_EXPR;
6003
6004 tree chunk_size = integer_minus_one_node;
6005 tree gwv = integer_zero_node;
6006 tree iter_type = TREE_TYPE (v);
6007 tree diff_type = iter_type;
6008 tree plus_type = iter_type;
6009 struct oacc_collapse *counts = NULL;
6010
6011 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
6012 == GF_OMP_FOR_KIND_OACC_LOOP);
6013 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
6014 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
6015
6016 if (POINTER_TYPE_P (iter_type))
6017 {
6018 plus_code = POINTER_PLUS_EXPR;
6019 plus_type = sizetype;
6020 }
6021 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
6022 diff_type = signed_type_for (diff_type);
ff88bae7 6023 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
6024 diff_type = integer_type_node;
4954efd4 6025
6026 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
6027 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
6028 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
6029 basic_block bottom_bb = NULL;
6030
6031 /* entry_bb has two sucessors; the branch edge is to the exit
6032 block, fallthrough edge to body. */
6033 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
6034 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
6035
6036 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
6037 body_bb, or to a block whose only successor is the body_bb. Its
6038 fallthrough successor is the final block (same as the branch
6039 successor of the entry_bb). */
6040 if (cont_bb)
6041 {
6042 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
6043 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
6044
6045 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
6046 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
6047 }
6048 else
6049 gcc_assert (!gimple_in_ssa_p (cfun));
6050
6051 /* The exit block only has entry_bb and cont_bb as predecessors. */
6052 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
6053
6054 tree chunk_no;
6055 tree chunk_max = NULL_TREE;
6056 tree bound, offset;
6057 tree step = create_tmp_var (diff_type, ".step");
6058 bool up = cond_code == LT_EXPR;
6059 tree dir = build_int_cst (diff_type, up ? +1 : -1);
719a7570 6060 bool chunking = !gimple_in_ssa_p (cfun);
4954efd4 6061 bool negating;
6062
719a7570 6063 /* Tiling vars. */
6064 tree tile_size = NULL_TREE;
6065 tree element_s = NULL_TREE;
6066 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
6067 basic_block elem_body_bb = NULL;
6068 basic_block elem_cont_bb = NULL;
6069
4954efd4 6070 /* SSA instances. */
6071 tree offset_incr = NULL_TREE;
6072 tree offset_init = NULL_TREE;
6073
6074 gimple_stmt_iterator gsi;
6075 gassign *ass;
6076 gcall *call;
6077 gimple *stmt;
6078 tree expr;
6079 location_t loc;
6080 edge split, be, fte;
6081
6082 /* Split the end of entry_bb to create head_bb. */
6083 split = split_block (entry_bb, last_stmt (entry_bb));
6084 basic_block head_bb = split->dest;
6085 entry_bb = split->src;
6086
6087 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
bce107d7 6088 gsi = gsi_last_nondebug_bb (entry_bb);
4954efd4 6089 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
6090 loc = gimple_location (for_stmt);
6091
6092 if (gimple_in_ssa_p (cfun))
6093 {
6094 offset_init = gimple_omp_for_index (for_stmt, 0);
6095 gcc_assert (integer_zerop (fd->loop.n1));
6096 /* The SSA parallelizer does gang parallelism. */
6097 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
6098 }
6099
719a7570 6100 if (fd->collapse > 1 || fd->tiling)
4954efd4 6101 {
719a7570 6102 gcc_assert (!gimple_in_ssa_p (cfun) && up);
4954efd4 6103 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
6104 tree total = expand_oacc_collapse_init (fd, &gsi, counts,
719a7570 6105 TREE_TYPE (fd->loop.n2), loc);
4954efd4 6106
6107 if (SSA_VAR_P (fd->loop.n2))
6108 {
6109 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
6110 true, GSI_SAME_STMT);
6111 ass = gimple_build_assign (fd->loop.n2, total);
6112 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6113 }
4954efd4 6114 }
6115
6116 tree b = fd->loop.n1;
6117 tree e = fd->loop.n2;
6118 tree s = fd->loop.step;
6119
6120 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
6121 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
6122
7c6746c9 6123 /* Convert the step, avoiding possible unsigned->signed overflow. */
4954efd4 6124 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
6125 if (negating)
6126 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
6127 s = fold_convert (diff_type, s);
6128 if (negating)
6129 s = fold_build1 (NEGATE_EXPR, diff_type, s);
6130 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
6131
6132 if (!chunking)
6133 chunk_size = integer_zero_node;
6134 expr = fold_convert (diff_type, chunk_size);
6135 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
6136 NULL_TREE, true, GSI_SAME_STMT);
719a7570 6137
6138 if (fd->tiling)
6139 {
6140 /* Determine the tile size and element step,
6141 modify the outer loop step size. */
6142 tile_size = create_tmp_var (diff_type, ".tile_size");
6143 expr = build_int_cst (diff_type, 1);
6144 for (int ix = 0; ix < fd->collapse; ix++)
6145 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
6146 expr = force_gimple_operand_gsi (&gsi, expr, true,
6147 NULL_TREE, true, GSI_SAME_STMT);
6148 ass = gimple_build_assign (tile_size, expr);
6149 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6150
6151 element_s = create_tmp_var (diff_type, ".element_s");
6152 ass = gimple_build_assign (element_s, s);
6153 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6154
6155 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
6156 s = force_gimple_operand_gsi (&gsi, expr, true,
6157 NULL_TREE, true, GSI_SAME_STMT);
6158 }
6159
7c6746c9 6160 /* Determine the range, avoiding possible unsigned->signed overflow. */
4954efd4 6161 negating = !up && TYPE_UNSIGNED (iter_type);
6162 expr = fold_build2 (MINUS_EXPR, plus_type,
6163 fold_convert (plus_type, negating ? b : e),
6164 fold_convert (plus_type, negating ? e : b));
6165 expr = fold_convert (diff_type, expr);
6166 if (negating)
6167 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
6168 tree range = force_gimple_operand_gsi (&gsi, expr, true,
6169 NULL_TREE, true, GSI_SAME_STMT);
6170
6171 chunk_no = build_int_cst (diff_type, 0);
6172 if (chunking)
6173 {
6174 gcc_assert (!gimple_in_ssa_p (cfun));
6175
6176 expr = chunk_no;
6177 chunk_max = create_tmp_var (diff_type, ".chunk_max");
6178 chunk_no = create_tmp_var (diff_type, ".chunk_no");
6179
6180 ass = gimple_build_assign (chunk_no, expr);
6181 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6182
6183 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
6184 build_int_cst (integer_type_node,
6185 IFN_GOACC_LOOP_CHUNKS),
6186 dir, range, s, chunk_size, gwv);
6187 gimple_call_set_lhs (call, chunk_max);
6188 gimple_set_location (call, loc);
6189 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
6190 }
6191 else
6192 chunk_size = chunk_no;
6193
6194 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
6195 build_int_cst (integer_type_node,
6196 IFN_GOACC_LOOP_STEP),
6197 dir, range, s, chunk_size, gwv);
6198 gimple_call_set_lhs (call, step);
6199 gimple_set_location (call, loc);
6200 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
6201
6202 /* Remove the GIMPLE_OMP_FOR. */
6203 gsi_remove (&gsi, true);
6204
7c6746c9 6205 /* Fixup edges from head_bb. */
4954efd4 6206 be = BRANCH_EDGE (head_bb);
6207 fte = FALLTHRU_EDGE (head_bb);
6208 be->flags |= EDGE_FALSE_VALUE;
6209 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
6210
6211 basic_block body_bb = fte->dest;
6212
6213 if (gimple_in_ssa_p (cfun))
6214 {
bce107d7 6215 gsi = gsi_last_nondebug_bb (cont_bb);
4954efd4 6216 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
6217
6218 offset = gimple_omp_continue_control_use (cont_stmt);
6219 offset_incr = gimple_omp_continue_control_def (cont_stmt);
6220 }
6221 else
6222 {
6223 offset = create_tmp_var (diff_type, ".offset");
6224 offset_init = offset_incr = offset;
6225 }
6226 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
6227
6228 /* Loop offset & bound go into head_bb. */
6229 gsi = gsi_start_bb (head_bb);
6230
6231 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
6232 build_int_cst (integer_type_node,
6233 IFN_GOACC_LOOP_OFFSET),
6234 dir, range, s,
6235 chunk_size, gwv, chunk_no);
6236 gimple_call_set_lhs (call, offset_init);
6237 gimple_set_location (call, loc);
6238 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
6239
6240 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
6241 build_int_cst (integer_type_node,
6242 IFN_GOACC_LOOP_BOUND),
6243 dir, range, s,
6244 chunk_size, gwv, offset_init);
6245 gimple_call_set_lhs (call, bound);
6246 gimple_set_location (call, loc);
6247 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
6248
6249 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
6250 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
6251 GSI_CONTINUE_LINKING);
6252
6253 /* V assignment goes into body_bb. */
6254 if (!gimple_in_ssa_p (cfun))
6255 {
6256 gsi = gsi_start_bb (body_bb);
6257
6258 expr = build2 (plus_code, iter_type, b,
6259 fold_convert (plus_type, offset));
6260 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
6261 true, GSI_SAME_STMT);
6262 ass = gimple_build_assign (v, expr);
6263 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
719a7570 6264
6265 if (fd->collapse > 1 || fd->tiling)
6266 expand_oacc_collapse_vars (fd, false, &gsi, counts, v);
6267
6268 if (fd->tiling)
6269 {
6270 /* Determine the range of the element loop -- usually simply
6271 the tile_size, but could be smaller if the final
6272 iteration of the outer loop is a partial tile. */
6273 tree e_range = create_tmp_var (diff_type, ".e_range");
6274
6275 expr = build2 (MIN_EXPR, diff_type,
6276 build2 (MINUS_EXPR, diff_type, bound, offset),
6277 build2 (MULT_EXPR, diff_type, tile_size,
6278 element_s));
6279 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
6280 true, GSI_SAME_STMT);
6281 ass = gimple_build_assign (e_range, expr);
6282 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6283
6284 /* Determine bound, offset & step of inner loop. */
6285 e_bound = create_tmp_var (diff_type, ".e_bound");
6286 e_offset = create_tmp_var (diff_type, ".e_offset");
6287 e_step = create_tmp_var (diff_type, ".e_step");
6288
6289 /* Mark these as element loops. */
6290 tree t, e_gwv = integer_minus_one_node;
6291 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */
6292
6293 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
6294 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
6295 element_s, chunk, e_gwv, chunk);
6296 gimple_call_set_lhs (call, e_offset);
6297 gimple_set_location (call, loc);
6298 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
6299
6300 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
6301 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
6302 element_s, chunk, e_gwv, e_offset);
6303 gimple_call_set_lhs (call, e_bound);
6304 gimple_set_location (call, loc);
6305 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
6306
6307 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
6308 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
6309 element_s, chunk, e_gwv);
6310 gimple_call_set_lhs (call, e_step);
6311 gimple_set_location (call, loc);
6312 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
6313
6314 /* Add test and split block. */
6315 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
6316 stmt = gimple_build_cond_empty (expr);
6317 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
6318 split = split_block (body_bb, stmt);
6319 elem_body_bb = split->dest;
6320 if (cont_bb == body_bb)
6321 cont_bb = elem_body_bb;
6322 body_bb = split->src;
6323
6324 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
6325
44d4153a 6326 /* Add a dummy exit for the tiled block when cont_bb is missing. */
6327 if (cont_bb == NULL)
6328 {
6329 edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
6330 e->probability = profile_probability::even ();
6331 split->probability = profile_probability::even ();
6332 }
6333
719a7570 6334 /* Initialize the user's loop vars. */
6335 gsi = gsi_start_bb (elem_body_bb);
6336 expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset);
6337 }
4954efd4 6338 }
6339
6340 /* Loop increment goes into cont_bb. If this is not a loop, we
6341 will have spawned threads as if it was, and each one will
6342 execute one iteration. The specification is not explicit about
6343 whether such constructs are ill-formed or not, and they can
6344 occur, especially when noreturn routines are involved. */
6345 if (cont_bb)
6346 {
bce107d7 6347 gsi = gsi_last_nondebug_bb (cont_bb);
4954efd4 6348 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
6349 loc = gimple_location (cont_stmt);
6350
719a7570 6351 if (fd->tiling)
6352 {
6353 /* Insert element loop increment and test. */
6354 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
6355 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
6356 true, GSI_SAME_STMT);
6357 ass = gimple_build_assign (e_offset, expr);
6358 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6359 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
6360
6361 stmt = gimple_build_cond_empty (expr);
6362 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
6363 split = split_block (cont_bb, stmt);
6364 elem_cont_bb = split->src;
6365 cont_bb = split->dest;
6366
6367 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
70b22dd9 6368 split->probability = profile_probability::unlikely ().guessed ();
6369 edge latch_edge
6370 = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
6371 latch_edge->probability = profile_probability::likely ().guessed ();
6372
6373 edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
6374 skip_edge->probability = profile_probability::unlikely ().guessed ();
6375 edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
6376 loop_entry_edge->probability
6377 = profile_probability::likely ().guessed ();
719a7570 6378
6379 gsi = gsi_for_stmt (cont_stmt);
6380 }
6381
4954efd4 6382 /* Increment offset. */
6383 if (gimple_in_ssa_p (cfun))
719a7570 6384 expr = build2 (plus_code, iter_type, offset,
6385 fold_convert (plus_type, step));
4954efd4 6386 else
6387 expr = build2 (PLUS_EXPR, diff_type, offset, step);
6388 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
6389 true, GSI_SAME_STMT);
6390 ass = gimple_build_assign (offset_incr, expr);
6391 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6392 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
6393 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
6394
6395 /* Remove the GIMPLE_OMP_CONTINUE. */
6396 gsi_remove (&gsi, true);
6397
7c6746c9 6398 /* Fixup edges from cont_bb. */
4954efd4 6399 be = BRANCH_EDGE (cont_bb);
6400 fte = FALLTHRU_EDGE (cont_bb);
6401 be->flags |= EDGE_TRUE_VALUE;
6402 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
6403
6404 if (chunking)
6405 {
6406 /* Split the beginning of exit_bb to make bottom_bb. We
6407 need to insert a nop at the start, because splitting is
7c6746c9 6408 after a stmt, not before. */
4954efd4 6409 gsi = gsi_start_bb (exit_bb);
6410 stmt = gimple_build_nop ();
6411 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
6412 split = split_block (exit_bb, stmt);
6413 bottom_bb = split->src;
6414 exit_bb = split->dest;
6415 gsi = gsi_last_bb (bottom_bb);
6416
6417 /* Chunk increment and test goes into bottom_bb. */
6418 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
6419 build_int_cst (diff_type, 1));
6420 ass = gimple_build_assign (chunk_no, expr);
6421 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
6422
6423 /* Chunk test at end of bottom_bb. */
6424 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
6425 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
6426 GSI_CONTINUE_LINKING);
6427
7c6746c9 6428 /* Fixup edges from bottom_bb. */
4954efd4 6429 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
a2054420 6430 split->probability = profile_probability::unlikely ().guessed ();
6431 edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
6432 latch_edge->probability = profile_probability::likely ().guessed ();
4954efd4 6433 }
6434 }
6435
bce107d7 6436 gsi = gsi_last_nondebug_bb (exit_bb);
4954efd4 6437 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
6438 loc = gimple_location (gsi_stmt (gsi));
6439
6440 if (!gimple_in_ssa_p (cfun))
6441 {
6442 /* Insert the final value of V, in case it is live. This is the
6443 value for the only thread that survives past the join. */
6444 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
6445 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
6446 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
6447 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
6448 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
6449 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
6450 true, GSI_SAME_STMT);
6451 ass = gimple_build_assign (v, expr);
6452 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6453 }
6454
7c6746c9 6455 /* Remove the OMP_RETURN. */
4954efd4 6456 gsi_remove (&gsi, true);
6457
6458 if (cont_bb)
6459 {
719a7570 6460 /* We now have one, two or three nested loops. Update the loop
4954efd4 6461 structures. */
2e966e2a 6462 class loop *parent = entry_bb->loop_father;
6463 class loop *body = body_bb->loop_father;
4954efd4 6464
6465 if (chunking)
6466 {
2e966e2a 6467 class loop *chunk_loop = alloc_loop ();
4954efd4 6468 chunk_loop->header = head_bb;
6469 chunk_loop->latch = bottom_bb;
6470 add_loop (chunk_loop, parent);
6471 parent = chunk_loop;
6472 }
6473 else if (parent != body)
6474 {
6475 gcc_assert (body->header == body_bb);
6476 gcc_assert (body->latch == cont_bb
6477 || single_pred (body->latch) == cont_bb);
6478 parent = NULL;
6479 }
6480
6481 if (parent)
6482 {
2e966e2a 6483 class loop *body_loop = alloc_loop ();
4954efd4 6484 body_loop->header = body_bb;
6485 body_loop->latch = cont_bb;
6486 add_loop (body_loop, parent);
719a7570 6487
6488 if (fd->tiling)
6489 {
6490 /* Insert tiling's element loop. */
2e966e2a 6491 class loop *inner_loop = alloc_loop ();
719a7570 6492 inner_loop->header = elem_body_bb;
6493 inner_loop->latch = elem_cont_bb;
6494 add_loop (inner_loop, body_loop);
6495 }
4954efd4 6496 }
6497 }
6498}
6499
6500/* Expand the OMP loop defined by REGION. */
6501
6502static void
6503expand_omp_for (struct omp_region *region, gimple *inner_stmt)
6504{
6505 struct omp_for_data fd;
6506 struct omp_for_data_loop *loops;
6507
6508 loops
6509 = (struct omp_for_data_loop *)
6510 alloca (gimple_omp_for_collapse (last_stmt (region->entry))
6511 * sizeof (struct omp_for_data_loop));
6512 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
6513 &fd, loops);
6514 region->sched_kind = fd.sched_kind;
6515 region->sched_modifiers = fd.sched_modifiers;
0365070c 6516 region->has_lastprivate_conditional = fd.lastprivate_conditional != 0;
4954efd4 6517
6518 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
6519 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
6520 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
6521 if (region->cont)
6522 {
6523 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
6524 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
6525 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
6526 }
6527 else
6528 /* If there isn't a continue then this is a degerate case where
6529 the introduction of abnormal edges during lowering will prevent
6530 original loops from being detected. Fix that up. */
6531 loops_state_set (LOOPS_NEED_FIXUP);
6532
0076df39 6533 if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_SIMD)
4954efd4 6534 expand_omp_simd (region, &fd);
4954efd4 6535 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
6536 {
6537 gcc_assert (!inner_stmt);
6538 expand_oacc_for (region, &fd);
6539 }
6540 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
6541 {
6542 if (gimple_omp_for_combined_into_p (fd.for_stmt))
6543 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
6544 else
6545 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
6546 }
6547 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
6548 && !fd.have_ordered)
6549 {
6550 if (fd.chunk_size == NULL)
6551 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
6552 else
6553 expand_omp_for_static_chunk (region, &fd, inner_stmt);
6554 }
6555 else
6556 {
6557 int fn_index, start_ix, next_ix;
7e5a76c8 6558 unsigned HOST_WIDE_INT sched = 0;
6559 tree sched_arg = NULL_TREE;
4954efd4 6560
6561 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
6562 == GF_OMP_FOR_KIND_FOR);
6563 if (fd.chunk_size == NULL
6564 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
6565 fd.chunk_size = integer_zero_node;
4954efd4 6566 switch (fd.sched_kind)
6567 {
6568 case OMP_CLAUSE_SCHEDULE_RUNTIME:
0365070c 6569 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0
6570 && fd.lastprivate_conditional == 0)
7e5a76c8 6571 {
6572 gcc_assert (!fd.have_ordered);
6573 fn_index = 6;
6574 sched = 4;
6575 }
6576 else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
0365070c 6577 && !fd.have_ordered
6578 && fd.lastprivate_conditional == 0)
7e5a76c8 6579 fn_index = 7;
6580 else
6581 {
6582 fn_index = 3;
6583 sched = (HOST_WIDE_INT_1U << 31);
6584 }
4954efd4 6585 break;
6586 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
6587 case OMP_CLAUSE_SCHEDULE_GUIDED:
7e5a76c8 6588 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
0365070c 6589 && !fd.have_ordered
6590 && fd.lastprivate_conditional == 0)
4954efd4 6591 {
6592 fn_index = 3 + fd.sched_kind;
7e5a76c8 6593 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
4954efd4 6594 break;
6595 }
4954efd4 6596 fn_index = fd.sched_kind;
7e5a76c8 6597 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
6598 sched += (HOST_WIDE_INT_1U << 31);
4954efd4 6599 break;
7e5a76c8 6600 case OMP_CLAUSE_SCHEDULE_STATIC:
6601 gcc_assert (fd.have_ordered);
6602 fn_index = 0;
6603 sched = (HOST_WIDE_INT_1U << 31) + 1;
6604 break;
6605 default:
6606 gcc_unreachable ();
4954efd4 6607 }
6608 if (!fd.ordered)
7e5a76c8 6609 fn_index += fd.have_ordered * 8;
4954efd4 6610 if (fd.ordered)
6611 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
6612 else
6613 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
6614 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
48152aa2 6615 if (fd.have_reductemp || fd.have_pointer_condtemp)
7e5a76c8 6616 {
6617 if (fd.ordered)
6618 start_ix = (int)BUILT_IN_GOMP_LOOP_DOACROSS_START;
6619 else if (fd.have_ordered)
6620 start_ix = (int)BUILT_IN_GOMP_LOOP_ORDERED_START;
6621 else
6622 start_ix = (int)BUILT_IN_GOMP_LOOP_START;
6623 sched_arg = build_int_cstu (long_integer_type_node, sched);
6624 if (!fd.chunk_size)
6625 fd.chunk_size = integer_zero_node;
6626 }
4954efd4 6627 if (fd.iter_type == long_long_unsigned_type_node)
6628 {
6629 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
6630 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
6631 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
6632 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
6633 }
6634 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
7e5a76c8 6635 (enum built_in_function) next_ix, sched_arg,
6636 inner_stmt);
4954efd4 6637 }
6638
6639 if (gimple_in_ssa_p (cfun))
6640 update_ssa (TODO_update_ssa_only_virtuals);
6641}
6642
6643/* Expand code for an OpenMP sections directive. In pseudo code, we generate
6644
6645 v = GOMP_sections_start (n);
6646 L0:
6647 switch (v)
6648 {
6649 case 0:
6650 goto L2;
6651 case 1:
6652 section 1;
6653 goto L1;
6654 case 2:
6655 ...
6656 case n:
6657 ...
6658 default:
6659 abort ();
6660 }
6661 L1:
6662 v = GOMP_sections_next ();
6663 goto L0;
6664 L2:
6665 reduction;
6666
6667 If this is a combined parallel sections, replace the call to
6668 GOMP_sections_start with call to GOMP_sections_next. */
6669
6670static void
6671expand_omp_sections (struct omp_region *region)
6672{
6673 tree t, u, vin = NULL, vmain, vnext, l2;
6674 unsigned len;
6675 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
6676 gimple_stmt_iterator si, switch_si;
6677 gomp_sections *sections_stmt;
6678 gimple *stmt;
6679 gomp_continue *cont;
6680 edge_iterator ei;
6681 edge e;
6682 struct omp_region *inner;
6683 unsigned i, casei;
6684 bool exit_reachable = region->cont != NULL;
6685
6686 gcc_assert (region->exit != NULL);
6687 entry_bb = region->entry;
6688 l0_bb = single_succ (entry_bb);
6689 l1_bb = region->cont;
6690 l2_bb = region->exit;
6691 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
6692 l2 = gimple_block_label (l2_bb);
6693 else
6694 {
6695 /* This can happen if there are reductions. */
6696 len = EDGE_COUNT (l0_bb->succs);
6697 gcc_assert (len > 0);
6698 e = EDGE_SUCC (l0_bb, len - 1);
bce107d7 6699 si = gsi_last_nondebug_bb (e->dest);
4954efd4 6700 l2 = NULL_TREE;
6701 if (gsi_end_p (si)
7c6746c9 6702 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
4954efd4 6703 l2 = gimple_block_label (e->dest);
6704 else
6705 FOR_EACH_EDGE (e, ei, l0_bb->succs)
6706 {
bce107d7 6707 si = gsi_last_nondebug_bb (e->dest);
4954efd4 6708 if (gsi_end_p (si)
6709 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
6710 {
6711 l2 = gimple_block_label (e->dest);
6712 break;
6713 }
6714 }
6715 }
6716 if (exit_reachable)
6717 default_bb = create_empty_bb (l1_bb->prev_bb);
6718 else
6719 default_bb = create_empty_bb (l0_bb);
6720
6721 /* We will build a switch() with enough cases for all the
6722 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
6723 and a default case to abort if something goes wrong. */
6724 len = EDGE_COUNT (l0_bb->succs);
6725
6726 /* Use vec::quick_push on label_vec throughout, since we know the size
6727 in advance. */
6728 auto_vec<tree> label_vec (len);
6729
6730 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
6731 GIMPLE_OMP_SECTIONS statement. */
bce107d7 6732 si = gsi_last_nondebug_bb (entry_bb);
4954efd4 6733 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
6734 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
6735 vin = gimple_omp_sections_control (sections_stmt);
7e5a76c8 6736 tree clauses = gimple_omp_sections_clauses (sections_stmt);
6737 tree reductmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
eb7a699d 6738 tree condtmp = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
6739 tree cond_var = NULL_TREE;
6740 if (reductmp || condtmp)
6741 {
6742 tree reductions = null_pointer_node, mem = null_pointer_node;
6743 tree memv = NULL_TREE, condtemp = NULL_TREE;
6744 gimple_stmt_iterator gsi = gsi_none ();
6745 gimple *g = NULL;
6746 if (reductmp)
6747 {
6748 reductions = OMP_CLAUSE_DECL (reductmp);
6749 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
6750 g = SSA_NAME_DEF_STMT (reductions);
6751 reductions = gimple_assign_rhs1 (g);
6752 OMP_CLAUSE_DECL (reductmp) = reductions;
6753 gsi = gsi_for_stmt (g);
6754 }
6755 else
6756 gsi = si;
6757 if (condtmp)
6758 {
6759 condtemp = OMP_CLAUSE_DECL (condtmp);
6760 tree c = omp_find_clause (OMP_CLAUSE_CHAIN (condtmp),
6761 OMP_CLAUSE__CONDTEMP_);
6762 cond_var = OMP_CLAUSE_DECL (c);
6763 tree type = TREE_TYPE (condtemp);
6764 memv = create_tmp_var (type);
6765 TREE_ADDRESSABLE (memv) = 1;
6766 unsigned cnt = 0;
6767 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
6768 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
6769 && OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (c))
6770 ++cnt;
6771 unsigned HOST_WIDE_INT sz
6772 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))) * cnt;
6773 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
6774 false);
6775 mem = build_fold_addr_expr (memv);
6776 }
7e5a76c8 6777 t = build_int_cst (unsigned_type_node, len - 1);
6778 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS2_START);
eb7a699d 6779 stmt = gimple_build_call (u, 3, t, reductions, mem);
7e5a76c8 6780 gimple_call_set_lhs (stmt, vin);
6781 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
eb7a699d 6782 if (condtmp)
6783 {
6784 expand_omp_build_assign (&gsi, condtemp, memv, false);
6785 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
6786 vin, build_one_cst (TREE_TYPE (cond_var)));
6787 expand_omp_build_assign (&gsi, cond_var, t, false);
6788 }
6789 if (reductmp)
6790 {
6791 gsi_remove (&gsi, true);
6792 release_ssa_name (gimple_assign_lhs (g));
6793 }
7e5a76c8 6794 }
6795 else if (!is_combined_parallel (region))
4954efd4 6796 {
6797 /* If we are not inside a combined parallel+sections region,
6798 call GOMP_sections_start. */
6799 t = build_int_cst (unsigned_type_node, len - 1);
6800 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
6801 stmt = gimple_build_call (u, 1, t);
6802 }
6803 else
6804 {
6805 /* Otherwise, call GOMP_sections_next. */
6806 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
6807 stmt = gimple_build_call (u, 0);
6808 }
eb7a699d 6809 if (!reductmp && !condtmp)
7e5a76c8 6810 {
6811 gimple_call_set_lhs (stmt, vin);
6812 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6813 }
4954efd4 6814 gsi_remove (&si, true);
6815
6816 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
6817 L0_BB. */
bce107d7 6818 switch_si = gsi_last_nondebug_bb (l0_bb);
4954efd4 6819 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
6820 if (exit_reachable)
6821 {
6822 cont = as_a <gomp_continue *> (last_stmt (l1_bb));
6823 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
6824 vmain = gimple_omp_continue_control_use (cont);
6825 vnext = gimple_omp_continue_control_def (cont);
6826 }
6827 else
6828 {
6829 vmain = vin;
6830 vnext = NULL_TREE;
6831 }
6832
6833 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
6834 label_vec.quick_push (t);
6835 i = 1;
6836
6837 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
6838 for (inner = region->inner, casei = 1;
6839 inner;
6840 inner = inner->next, i++, casei++)
6841 {
6842 basic_block s_entry_bb, s_exit_bb;
6843
6844 /* Skip optional reduction region. */
6845 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
6846 {
6847 --i;
6848 --casei;
6849 continue;
6850 }
6851
6852 s_entry_bb = inner->entry;
6853 s_exit_bb = inner->exit;
6854
6855 t = gimple_block_label (s_entry_bb);
6856 u = build_int_cst (unsigned_type_node, casei);
6857 u = build_case_label (u, NULL, t);
6858 label_vec.quick_push (u);
6859
bce107d7 6860 si = gsi_last_nondebug_bb (s_entry_bb);
4954efd4 6861 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
6862 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
6863 gsi_remove (&si, true);
6864 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
6865
6866 if (s_exit_bb == NULL)
6867 continue;
6868
bce107d7 6869 si = gsi_last_nondebug_bb (s_exit_bb);
4954efd4 6870 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6871 gsi_remove (&si, true);
6872
6873 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
6874 }
6875
6876 /* Error handling code goes in DEFAULT_BB. */
6877 t = gimple_block_label (default_bb);
6878 u = build_case_label (NULL, NULL, t);
6879 make_edge (l0_bb, default_bb, 0);
6880 add_bb_to_loop (default_bb, current_loops->tree_root);
6881
6882 stmt = gimple_build_switch (vmain, u, label_vec);
6883 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
6884 gsi_remove (&switch_si, true);
6885
6886 si = gsi_start_bb (default_bb);
6887 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
6888 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
6889
6890 if (exit_reachable)
6891 {
6892 tree bfn_decl;
6893
6894 /* Code to get the next section goes in L1_BB. */
bce107d7 6895 si = gsi_last_nondebug_bb (l1_bb);
4954efd4 6896 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
6897
6898 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
6899 stmt = gimple_build_call (bfn_decl, 0);
6900 gimple_call_set_lhs (stmt, vnext);
eb7a699d 6901 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6902 if (cond_var)
6903 {
6904 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
6905 vnext, build_one_cst (TREE_TYPE (cond_var)));
6906 expand_omp_build_assign (&si, cond_var, t, false);
6907 }
4954efd4 6908 gsi_remove (&si, true);
6909
6910 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
6911 }
6912
6913 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
bce107d7 6914 si = gsi_last_nondebug_bb (l2_bb);
4954efd4 6915 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
6916 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
6917 else if (gimple_omp_return_lhs (gsi_stmt (si)))
6918 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
6919 else
6920 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
6921 stmt = gimple_build_call (t, 0);
6922 if (gimple_omp_return_lhs (gsi_stmt (si)))
6923 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
6924 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6925 gsi_remove (&si, true);
6926
6927 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
6928}
6929
6930/* Expand code for an OpenMP single directive. We've already expanded
6931 much of the code, here we simply place the GOMP_barrier call. */
6932
6933static void
6934expand_omp_single (struct omp_region *region)
6935{
6936 basic_block entry_bb, exit_bb;
6937 gimple_stmt_iterator si;
6938
6939 entry_bb = region->entry;
6940 exit_bb = region->exit;
6941
bce107d7 6942 si = gsi_last_nondebug_bb (entry_bb);
4954efd4 6943 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
6944 gsi_remove (&si, true);
6945 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6946
bce107d7 6947 si = gsi_last_nondebug_bb (exit_bb);
4954efd4 6948 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
6949 {
6950 tree t = gimple_omp_return_lhs (gsi_stmt (si));
6951 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
6952 }
6953 gsi_remove (&si, true);
6954 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6955}
6956
6957/* Generic expansion for OpenMP synchronization directives: master,
6958 ordered and critical. All we need to do here is remove the entry
6959 and exit markers for REGION. */
6960
6961static void
6962expand_omp_synch (struct omp_region *region)
6963{
6964 basic_block entry_bb, exit_bb;
6965 gimple_stmt_iterator si;
6966
6967 entry_bb = region->entry;
6968 exit_bb = region->exit;
6969
bce107d7 6970 si = gsi_last_nondebug_bb (entry_bb);
4954efd4 6971 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
6972 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
6973 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
6974 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
6975 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
6976 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
7e5a76c8 6977 if (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS
6978 && gimple_omp_teams_host (as_a <gomp_teams *> (gsi_stmt (si))))
6979 {
6980 expand_omp_taskreg (region);
6981 return;
6982 }
4954efd4 6983 gsi_remove (&si, true);
6984 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6985
6986 if (exit_bb)
6987 {
bce107d7 6988 si = gsi_last_nondebug_bb (exit_bb);
4954efd4 6989 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6990 gsi_remove (&si, true);
6991 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6992 }
6993}
6994
7e5a76c8 6995/* Translate enum omp_memory_order to enum memmodel. The two enums
6996 are using different numbers so that OMP_MEMORY_ORDER_UNSPECIFIED
6997 is 0. */
6998
6999static enum memmodel
7000omp_memory_order_to_memmodel (enum omp_memory_order mo)
7001{
7002 switch (mo)
7003 {
7004 case OMP_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
7005 case OMP_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
7006 case OMP_MEMORY_ORDER_RELEASE: return MEMMODEL_RELEASE;
7007 case OMP_MEMORY_ORDER_ACQ_REL: return MEMMODEL_ACQ_REL;
7008 case OMP_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
7009 default: gcc_unreachable ();
7010 }
7011}
7012
4954efd4 7013/* A subroutine of expand_omp_atomic. Attempt to implement the atomic
7014 operation as a normal volatile load. */
7015
7016static bool
7017expand_omp_atomic_load (basic_block load_bb, tree addr,
7018 tree loaded_val, int index)
7019{
7020 enum built_in_function tmpbase;
7021 gimple_stmt_iterator gsi;
7022 basic_block store_bb;
7023 location_t loc;
7024 gimple *stmt;
7025 tree decl, call, type, itype;
7026
bce107d7 7027 gsi = gsi_last_nondebug_bb (load_bb);
4954efd4 7028 stmt = gsi_stmt (gsi);
7029 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
7030 loc = gimple_location (stmt);
7031
7032 /* ??? If the target does not implement atomic_load_optab[mode], and mode
7033 is smaller than word size, then expand_atomic_load assumes that the load
7034 is atomic. We could avoid the builtin entirely in this case. */
7035
7036 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
7037 decl = builtin_decl_explicit (tmpbase);
7038 if (decl == NULL_TREE)
7039 return false;
7040
7041 type = TREE_TYPE (loaded_val);
7042 itype = TREE_TYPE (TREE_TYPE (decl));
7043
7e5a76c8 7044 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
7045 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
7046 call = build_call_expr_loc (loc, decl, 2, addr, mo);
4954efd4 7047 if (!useless_type_conversion_p (type, itype))
7048 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
7049 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
7050
7051 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
7052 gsi_remove (&gsi, true);
7053
7054 store_bb = single_succ (load_bb);
bce107d7 7055 gsi = gsi_last_nondebug_bb (store_bb);
4954efd4 7056 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
7057 gsi_remove (&gsi, true);
7058
7059 if (gimple_in_ssa_p (cfun))
7060 update_ssa (TODO_update_ssa_no_phi);
7061
7062 return true;
7063}
7064
7065/* A subroutine of expand_omp_atomic. Attempt to implement the atomic
7066 operation as a normal volatile store. */
7067
7068static bool
7069expand_omp_atomic_store (basic_block load_bb, tree addr,
7070 tree loaded_val, tree stored_val, int index)
7071{
7072 enum built_in_function tmpbase;
7073 gimple_stmt_iterator gsi;
7074 basic_block store_bb = single_succ (load_bb);
7075 location_t loc;
7076 gimple *stmt;
7077 tree decl, call, type, itype;
7078 machine_mode imode;
7079 bool exchange;
7080
bce107d7 7081 gsi = gsi_last_nondebug_bb (load_bb);
4954efd4 7082 stmt = gsi_stmt (gsi);
7083 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
7084
7085 /* If the load value is needed, then this isn't a store but an exchange. */
7086 exchange = gimple_omp_atomic_need_value_p (stmt);
7087
bce107d7 7088 gsi = gsi_last_nondebug_bb (store_bb);
4954efd4 7089 stmt = gsi_stmt (gsi);
7090 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
7091 loc = gimple_location (stmt);
7092
7093 /* ??? If the target does not implement atomic_store_optab[mode], and mode
7094 is smaller than word size, then expand_atomic_store assumes that the store
7095 is atomic. We could avoid the builtin entirely in this case. */
7096
7097 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
7098 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
7099 decl = builtin_decl_explicit (tmpbase);
7100 if (decl == NULL_TREE)
7101 return false;
7102
7103 type = TREE_TYPE (stored_val);
7104
7105 /* Dig out the type of the function's second argument. */
7106 itype = TREE_TYPE (decl);
7107 itype = TYPE_ARG_TYPES (itype);
7108 itype = TREE_CHAIN (itype);
7109 itype = TREE_VALUE (itype);
7110 imode = TYPE_MODE (itype);
7111
7112 if (exchange && !can_atomic_exchange_p (imode, true))
7113 return false;
7114
7115 if (!useless_type_conversion_p (itype, type))
7116 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
7e5a76c8 7117 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
7118 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
7119 call = build_call_expr_loc (loc, decl, 3, addr, stored_val, mo);
4954efd4 7120 if (exchange)
7121 {
7122 if (!useless_type_conversion_p (type, itype))
7123 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
7124 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
7125 }
7126
7127 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
7128 gsi_remove (&gsi, true);
7129
7130 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
bce107d7 7131 gsi = gsi_last_nondebug_bb (load_bb);
4954efd4 7132 gsi_remove (&gsi, true);
7133
7134 if (gimple_in_ssa_p (cfun))
7135 update_ssa (TODO_update_ssa_no_phi);
7136
7137 return true;
7138}
7139
7140/* A subroutine of expand_omp_atomic. Attempt to implement the atomic
7141 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
7142 size of the data type, and thus usable to find the index of the builtin
7143 decl. Returns false if the expression is not of the proper form. */
7144
7145static bool
7146expand_omp_atomic_fetch_op (basic_block load_bb,
7147 tree addr, tree loaded_val,
7148 tree stored_val, int index)
7149{
7150 enum built_in_function oldbase, newbase, tmpbase;
7151 tree decl, itype, call;
7152 tree lhs, rhs;
7153 basic_block store_bb = single_succ (load_bb);
7154 gimple_stmt_iterator gsi;
7155 gimple *stmt;
7156 location_t loc;
7157 enum tree_code code;
7158 bool need_old, need_new;
7159 machine_mode imode;
4954efd4 7160
7161 /* We expect to find the following sequences:
7162
7163 load_bb:
7164 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
7165
7166 store_bb:
7167 val = tmp OP something; (or: something OP tmp)
7168 GIMPLE_OMP_STORE (val)
7169
7170 ???FIXME: Allow a more flexible sequence.
7171 Perhaps use data flow to pick the statements.
7172
7173 */
7174
7175 gsi = gsi_after_labels (store_bb);
7176 stmt = gsi_stmt (gsi);
bce107d7 7177 if (is_gimple_debug (stmt))
7178 {
7179 gsi_next_nondebug (&gsi);
7180 if (gsi_end_p (gsi))
7181 return false;
7182 stmt = gsi_stmt (gsi);
7183 }
4954efd4 7184 loc = gimple_location (stmt);
7185 if (!is_gimple_assign (stmt))
7186 return false;
bce107d7 7187 gsi_next_nondebug (&gsi);
4954efd4 7188 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
7189 return false;
7190 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
7191 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
7e5a76c8 7192 enum omp_memory_order omo
7193 = gimple_omp_atomic_memory_order (last_stmt (load_bb));
7194 enum memmodel mo = omp_memory_order_to_memmodel (omo);
4954efd4 7195 gcc_checking_assert (!need_old || !need_new);
7196
7197 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
7198 return false;
7199
7200 /* Check for one of the supported fetch-op operations. */
7201 code = gimple_assign_rhs_code (stmt);
7202 switch (code)
7203 {
7204 case PLUS_EXPR:
7205 case POINTER_PLUS_EXPR:
7206 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
7207 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
7208 break;
7209 case MINUS_EXPR:
7210 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
7211 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
7212 break;
7213 case BIT_AND_EXPR:
7214 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
7215 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
7216 break;
7217 case BIT_IOR_EXPR:
7218 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
7219 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
7220 break;
7221 case BIT_XOR_EXPR:
7222 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
7223 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
7224 break;
7225 default:
7226 return false;
7227 }
7228
7229 /* Make sure the expression is of the proper form. */
7230 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
7231 rhs = gimple_assign_rhs2 (stmt);
7232 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
7233 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
7234 rhs = gimple_assign_rhs1 (stmt);
7235 else
7236 return false;
7237
7238 tmpbase = ((enum built_in_function)
7239 ((need_new ? newbase : oldbase) + index + 1));
7240 decl = builtin_decl_explicit (tmpbase);
7241 if (decl == NULL_TREE)
7242 return false;
7243 itype = TREE_TYPE (TREE_TYPE (decl));
7244 imode = TYPE_MODE (itype);
7245
7246 /* We could test all of the various optabs involved, but the fact of the
7247 matter is that (with the exception of i486 vs i586 and xadd) all targets
7248 that support any atomic operaton optab also implements compare-and-swap.
7249 Let optabs.c take care of expanding any compare-and-swap loop. */
3a366e72 7250 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
4954efd4 7251 return false;
7252
bce107d7 7253 gsi = gsi_last_nondebug_bb (load_bb);
4954efd4 7254 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
7255
7256 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
7257 It only requires that the operation happen atomically. Thus we can
7258 use the RELAXED memory model. */
7259 call = build_call_expr_loc (loc, decl, 3, addr,
7260 fold_convert_loc (loc, itype, rhs),
7e5a76c8 7261 build_int_cst (NULL, mo));
4954efd4 7262
7263 if (need_old || need_new)
7264 {
7265 lhs = need_old ? loaded_val : stored_val;
7266 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
7267 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
7268 }
7269 else
7270 call = fold_convert_loc (loc, void_type_node, call);
7271 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
7272 gsi_remove (&gsi, true);
7273
bce107d7 7274 gsi = gsi_last_nondebug_bb (store_bb);
4954efd4 7275 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
7276 gsi_remove (&gsi, true);
bce107d7 7277 gsi = gsi_last_nondebug_bb (store_bb);
4954efd4 7278 stmt = gsi_stmt (gsi);
7279 gsi_remove (&gsi, true);
7280
7281 if (gimple_in_ssa_p (cfun))
7282 {
7283 release_defs (stmt);
7284 update_ssa (TODO_update_ssa_no_phi);
7285 }
7286
7287 return true;
7288}
7289
7290/* A subroutine of expand_omp_atomic. Implement the atomic operation as:
7291
7292 oldval = *addr;
7293 repeat:
7c6746c9 7294 newval = rhs; // with oldval replacing *addr in rhs
4954efd4 7295 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
7296 if (oldval != newval)
7297 goto repeat;
7298
7299 INDEX is log2 of the size of the data type, and thus usable to find the
7300 index of the builtin decl. */
7301
7302static bool
7303expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
7304 tree addr, tree loaded_val, tree stored_val,
7305 int index)
7306{
7307 tree loadedi, storedi, initial, new_storedi, old_vali;
3f4a4146 7308 tree type, itype, cmpxchg, iaddr, atype;
4954efd4 7309 gimple_stmt_iterator si;
7310 basic_block loop_header = single_succ (load_bb);
7311 gimple *phi, *stmt;
7312 edge e;
7313 enum built_in_function fncode;
7314
7315 /* ??? We need a non-pointer interface to __atomic_compare_exchange in
7316 order to use the RELAXED memory model effectively. */
7317 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
7318 + index + 1);
7319 cmpxchg = builtin_decl_explicit (fncode);
7320 if (cmpxchg == NULL_TREE)
7321 return false;
3f4a4146 7322 type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
7323 atype = type;
4954efd4 7324 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
7325
3a366e72 7326 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
7327 || !can_atomic_load_p (TYPE_MODE (itype)))
4954efd4 7328 return false;
7329
7330 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
bce107d7 7331 si = gsi_last_nondebug_bb (load_bb);
4954efd4 7332 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
7333
7334 /* For floating-point values, we'll need to view-convert them to integers
7335 so that we can perform the atomic compare and swap. Simplify the
7336 following code by always setting up the "i"ntegral variables. */
7337 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
7338 {
7339 tree iaddr_val;
7340
7341 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
7342 true));
3f4a4146 7343 atype = itype;
4954efd4 7344 iaddr_val
7345 = force_gimple_operand_gsi (&si,
7346 fold_convert (TREE_TYPE (iaddr), addr),
7347 false, NULL_TREE, true, GSI_SAME_STMT);
7348 stmt = gimple_build_assign (iaddr, iaddr_val);
7349 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7350 loadedi = create_tmp_var (itype);
7351 if (gimple_in_ssa_p (cfun))
7352 loadedi = make_ssa_name (loadedi);
7353 }
7354 else
7355 {
7356 iaddr = addr;
7357 loadedi = loaded_val;
7358 }
7359
7360 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
7361 tree loaddecl = builtin_decl_explicit (fncode);
7362 if (loaddecl)
7363 initial
3f4a4146 7364 = fold_convert (atype,
4954efd4 7365 build_call_expr (loaddecl, 2, iaddr,
7366 build_int_cst (NULL_TREE,
7367 MEMMODEL_RELAXED)));
7368 else
3f4a4146 7369 {
7370 tree off
7371 = build_int_cst (build_pointer_type_for_mode (atype, ptr_mode,
7372 true), 0);
7373 initial = build2 (MEM_REF, atype, iaddr, off);
7374 }
4954efd4 7375
7376 initial
7377 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
7378 GSI_SAME_STMT);
7379
7380 /* Move the value to the LOADEDI temporary. */
7381 if (gimple_in_ssa_p (cfun))
7382 {
7383 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
7384 phi = create_phi_node (loadedi, loop_header);
7385 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
7386 initial);
7387 }
7388 else
7389 gsi_insert_before (&si,
7390 gimple_build_assign (loadedi, initial),
7391 GSI_SAME_STMT);
7392 if (loadedi != loaded_val)
7393 {
7394 gimple_stmt_iterator gsi2;
7395 tree x;
7396
7397 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
7398 gsi2 = gsi_start_bb (loop_header);
7399 if (gimple_in_ssa_p (cfun))
7400 {
7401 gassign *stmt;
7402 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
7403 true, GSI_SAME_STMT);
7404 stmt = gimple_build_assign (loaded_val, x);
7405 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
7406 }
7407 else
7408 {
7409 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
7410 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
7411 true, GSI_SAME_STMT);
7412 }
7413 }
7414 gsi_remove (&si, true);
7415
bce107d7 7416 si = gsi_last_nondebug_bb (store_bb);
4954efd4 7417 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
7418
7419 if (iaddr == addr)
7420 storedi = stored_val;
7421 else
7c6746c9 7422 storedi
7423 = force_gimple_operand_gsi (&si,
7424 build1 (VIEW_CONVERT_EXPR, itype,
7425 stored_val), true, NULL_TREE, true,
7426 GSI_SAME_STMT);
4954efd4 7427
7428 /* Build the compare&swap statement. */
7429 new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
7430 new_storedi = force_gimple_operand_gsi (&si,
7431 fold_convert (TREE_TYPE (loadedi),
7432 new_storedi),
7433 true, NULL_TREE,
7434 true, GSI_SAME_STMT);
7435
7436 if (gimple_in_ssa_p (cfun))
7437 old_vali = loadedi;
7438 else
7439 {
7440 old_vali = create_tmp_var (TREE_TYPE (loadedi));
7441 stmt = gimple_build_assign (old_vali, loadedi);
7442 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7443
7444 stmt = gimple_build_assign (loadedi, new_storedi);
7445 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7446 }
7447
7448 /* Note that we always perform the comparison as an integer, even for
7449 floating point. This allows the atomic operation to properly
7450 succeed even with NaNs and -0.0. */
7c6746c9 7451 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
7452 stmt = gimple_build_cond_empty (ne);
4954efd4 7453 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7454
7455 /* Update cfg. */
7456 e = single_succ_edge (store_bb);
7457 e->flags &= ~EDGE_FALLTHRU;
7458 e->flags |= EDGE_FALSE_VALUE;
720cfc43 7459 /* Expect no looping. */
7460 e->probability = profile_probability::guessed_always ();
4954efd4 7461
7462 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
720cfc43 7463 e->probability = profile_probability::guessed_never ();
4954efd4 7464
7465 /* Copy the new value to loadedi (we already did that before the condition
7466 if we are not in SSA). */
7467 if (gimple_in_ssa_p (cfun))
7468 {
7469 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
7470 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
7471 }
7472
7473 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
7474 gsi_remove (&si, true);
7475
2e966e2a 7476 class loop *loop = alloc_loop ();
4954efd4 7477 loop->header = loop_header;
7478 loop->latch = store_bb;
7479 add_loop (loop, loop_header->loop_father);
7480
7481 if (gimple_in_ssa_p (cfun))
7482 update_ssa (TODO_update_ssa_no_phi);
7483
7484 return true;
7485}
7486
7487/* A subroutine of expand_omp_atomic. Implement the atomic operation as:
7488
7c6746c9 7489 GOMP_atomic_start ();
7490 *addr = rhs;
7491 GOMP_atomic_end ();
4954efd4 7492
7493 The result is not globally atomic, but works so long as all parallel
7494 references are within #pragma omp atomic directives. According to
7495 responses received from omp@openmp.org, appears to be within spec.
7496 Which makes sense, since that's how several other compilers handle
7497 this situation as well.
7498 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
7499 expanding. STORED_VAL is the operand of the matching
7500 GIMPLE_OMP_ATOMIC_STORE.
7501
7502 We replace
7503 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
7504 loaded_val = *addr;
7505
7506 and replace
7507 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
7508 *addr = stored_val;
7509*/
7510
7511static bool
7512expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
7513 tree addr, tree loaded_val, tree stored_val)
7514{
7515 gimple_stmt_iterator si;
7516 gassign *stmt;
7517 tree t;
7518
bce107d7 7519 si = gsi_last_nondebug_bb (load_bb);
4954efd4 7520 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
7521
7522 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
7523 t = build_call_expr (t, 0);
7524 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
7525
3f4a4146 7526 tree mem = build_simple_mem_ref (addr);
7527 TREE_TYPE (mem) = TREE_TYPE (loaded_val);
7528 TREE_OPERAND (mem, 1)
7529 = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode,
7530 true),
7531 TREE_OPERAND (mem, 1));
7532 stmt = gimple_build_assign (loaded_val, mem);
4954efd4 7533 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7534 gsi_remove (&si, true);
7535
bce107d7 7536 si = gsi_last_nondebug_bb (store_bb);
4954efd4 7537 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
7538
3f4a4146 7539 stmt = gimple_build_assign (unshare_expr (mem), stored_val);
4954efd4 7540 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7541
7542 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
7543 t = build_call_expr (t, 0);
7544 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
7545 gsi_remove (&si, true);
7546
7547 if (gimple_in_ssa_p (cfun))
7548 update_ssa (TODO_update_ssa_no_phi);
7549 return true;
7550}
7551
7552/* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
7c6746c9 7553 using expand_omp_atomic_fetch_op. If it failed, we try to
4954efd4 7554 call expand_omp_atomic_pipeline, and if it fails too, the
7555 ultimate fallback is wrapping the operation in a mutex
7556 (expand_omp_atomic_mutex). REGION is the atomic region built
7557 by build_omp_regions_1(). */
7558
7559static void
7560expand_omp_atomic (struct omp_region *region)
7561{
7562 basic_block load_bb = region->entry, store_bb = region->exit;
7563 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
7564 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
7565 tree loaded_val = gimple_omp_atomic_load_lhs (load);
7566 tree addr = gimple_omp_atomic_load_rhs (load);
7567 tree stored_val = gimple_omp_atomic_store_val (store);
3f4a4146 7568 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
4954efd4 7569 HOST_WIDE_INT index;
7570
7571 /* Make sure the type is one of the supported sizes. */
7572 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
7573 index = exact_log2 (index);
7574 if (index >= 0 && index <= 4)
7575 {
7576 unsigned int align = TYPE_ALIGN_UNIT (type);
7577
7578 /* __sync builtins require strict data alignment. */
7579 if (exact_log2 (align) >= index)
7580 {
7581 /* Atomic load. */
762500c2 7582 scalar_mode smode;
4954efd4 7583 if (loaded_val == stored_val
762500c2 7584 && (is_int_mode (TYPE_MODE (type), &smode)
7585 || is_float_mode (TYPE_MODE (type), &smode))
7586 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
4954efd4 7587 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
7588 return;
7589
7590 /* Atomic store. */
762500c2 7591 if ((is_int_mode (TYPE_MODE (type), &smode)
7592 || is_float_mode (TYPE_MODE (type), &smode))
7593 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
4954efd4 7594 && store_bb == single_succ (load_bb)
7595 && first_stmt (store_bb) == store
7596 && expand_omp_atomic_store (load_bb, addr, loaded_val,
7597 stored_val, index))
7598 return;
7599
7600 /* When possible, use specialized atomic update functions. */
7601 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
7602 && store_bb == single_succ (load_bb)
7603 && expand_omp_atomic_fetch_op (load_bb, addr,
7604 loaded_val, stored_val, index))
7605 return;
7606
7607 /* If we don't have specialized __sync builtins, try and implement
7608 as a compare and swap loop. */
7609 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
7610 loaded_val, stored_val, index))
7611 return;
7612 }
7613 }
7614
7615 /* The ultimate fallback is wrapping the operation in a mutex. */
7616 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
7617}
7618
7619/* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
7620 at REGION_EXIT. */
7621
7622static void
7623mark_loops_in_oacc_kernels_region (basic_block region_entry,
7624 basic_block region_exit)
7625{
2e966e2a 7626 class loop *outer = region_entry->loop_father;
4954efd4 7627 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
7628
7629 /* Don't parallelize the kernels region if it contains more than one outer
7630 loop. */
7631 unsigned int nr_outer_loops = 0;
2e966e2a 7632 class loop *single_outer = NULL;
7633 for (class loop *loop = outer->inner; loop != NULL; loop = loop->next)
4954efd4 7634 {
7635 gcc_assert (loop_outer (loop) == outer);
7636
7637 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
7638 continue;
7639
7640 if (region_exit != NULL
7641 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
7642 continue;
7643
7644 nr_outer_loops++;
7645 single_outer = loop;
7646 }
7647 if (nr_outer_loops != 1)
7648 return;
7649
2e966e2a 7650 for (class loop *loop = single_outer->inner;
7c6746c9 7651 loop != NULL;
7652 loop = loop->inner)
4954efd4 7653 if (loop->next)
7654 return;
7655
7656 /* Mark the loops in the region. */
2e966e2a 7657 for (class loop *loop = single_outer; loop != NULL; loop = loop->inner)
4954efd4 7658 loop->in_oacc_kernels_region = true;
7659}
7660
7661/* Types used to pass grid and wortkgroup sizes to kernel invocation. */
7662
7663struct GTY(()) grid_launch_attributes_trees
7664{
7665 tree kernel_dim_array_type;
7666 tree kernel_lattrs_dimnum_decl;
7667 tree kernel_lattrs_grid_decl;
7668 tree kernel_lattrs_group_decl;
7669 tree kernel_launch_attributes_type;
7670};
7671
7672static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees;
7673
7674/* Create types used to pass kernel launch attributes to target. */
7675
7676static void
7677grid_create_kernel_launch_attr_types (void)
7678{
7679 if (grid_attr_trees)
7680 return;
7681 grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> ();
7682
7683 tree dim_arr_index_type
7684 = build_index_type (build_int_cst (integer_type_node, 2));
7685 grid_attr_trees->kernel_dim_array_type
7686 = build_array_type (uint32_type_node, dim_arr_index_type);
7687
7688 grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE);
7689 grid_attr_trees->kernel_lattrs_dimnum_decl
7690 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"),
7691 uint32_type_node);
7692 DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE;
7693
7694 grid_attr_trees->kernel_lattrs_grid_decl
7695 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"),
7696 grid_attr_trees->kernel_dim_array_type);
7697 DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl)
7698 = grid_attr_trees->kernel_lattrs_dimnum_decl;
7699 grid_attr_trees->kernel_lattrs_group_decl
7700 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"),
7701 grid_attr_trees->kernel_dim_array_type);
7702 DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl)
7703 = grid_attr_trees->kernel_lattrs_grid_decl;
7704 finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type,
7705 "__gomp_kernel_launch_attributes",
7706 grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE);
7707}
7708
7709/* Insert before the current statement in GSI a store of VALUE to INDEX of
7710 array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be
7711 of type uint32_type_node. */
7712
7713static void
7714grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var,
7715 tree fld_decl, int index, tree value)
7716{
7717 tree ref = build4 (ARRAY_REF, uint32_type_node,
7718 build3 (COMPONENT_REF,
7719 grid_attr_trees->kernel_dim_array_type,
7720 range_var, fld_decl, NULL_TREE),
7721 build_int_cst (integer_type_node, index),
7722 NULL_TREE, NULL_TREE);
7723 gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT);
7724}
7725
7726/* Return a tree representation of a pointer to a structure with grid and
7727 work-group size information. Statements filling that information will be
7728 inserted before GSI, TGT_STMT is the target statement which has the
7729 necessary information in it. */
7730
7731static tree
7732grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi,
7733 gomp_target *tgt_stmt)
7734{
7735 grid_create_kernel_launch_attr_types ();
7736 tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type,
7737 "__kernel_launch_attrs");
7738
7739 unsigned max_dim = 0;
7740 for (tree clause = gimple_omp_target_clauses (tgt_stmt);
7741 clause;
7742 clause = OMP_CLAUSE_CHAIN (clause))
7743 {
7744 if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_)
7745 continue;
7746
7747 unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause);
7748 max_dim = MAX (dim, max_dim);
7749
7750 grid_insert_store_range_dim (gsi, lattrs,
7751 grid_attr_trees->kernel_lattrs_grid_decl,
7752 dim, OMP_CLAUSE__GRIDDIM__SIZE (clause));
7753 grid_insert_store_range_dim (gsi, lattrs,
7754 grid_attr_trees->kernel_lattrs_group_decl,
7755 dim, OMP_CLAUSE__GRIDDIM__GROUP (clause));
7756 }
7757
7758 tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs,
7759 grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE);
7760 gcc_checking_assert (max_dim <= 2);
7761 tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1);
7762 gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions),
7763 GSI_SAME_STMT);
7764 TREE_ADDRESSABLE (lattrs) = 1;
7765 return build_fold_addr_expr (lattrs);
7766}
7767
7768/* Build target argument identifier from the DEVICE identifier, value
7769 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
7770
7771static tree
7772get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
7773{
7774 tree t = build_int_cst (integer_type_node, device);
7775 if (subseqent_param)
7776 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
7777 build_int_cst (integer_type_node,
7778 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
7779 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
7780 build_int_cst (integer_type_node, id));
7781 return t;
7782}
7783
7784/* Like above but return it in type that can be directly stored as an element
7785 of the argument array. */
7786
7787static tree
7788get_target_argument_identifier (int device, bool subseqent_param, int id)
7789{
7790 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
7791 return fold_convert (ptr_type_node, t);
7792}
7793
7794/* Return a target argument consisting of DEVICE identifier, value identifier
7795 ID, and the actual VALUE. */
7796
7797static tree
7798get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
7799 tree value)
7800{
7801 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
7802 fold_convert (integer_type_node, value),
7803 build_int_cst (unsigned_type_node,
7804 GOMP_TARGET_ARG_VALUE_SHIFT));
7805 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
7806 get_target_argument_identifier_1 (device, false, id));
7807 t = fold_convert (ptr_type_node, t);
7808 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
7809}
7810
7811/* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
7812 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
7813 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
7814 arguments. */
7815
7816static void
7817push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
7818 int id, tree value, vec <tree> *args)
7819{
7820 if (tree_fits_shwi_p (value)
7821 && tree_to_shwi (value) > -(1 << 15)
7822 && tree_to_shwi (value) < (1 << 15))
7823 args->quick_push (get_target_argument_value (gsi, device, id, value));
7824 else
7825 {
7826 args->quick_push (get_target_argument_identifier (device, true, id));
7827 value = fold_convert (ptr_type_node, value);
7828 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
7829 GSI_SAME_STMT);
7830 args->quick_push (value);
7831 }
7832}
7833
7c6746c9 7834/* Create an array of arguments that is then passed to GOMP_target. */
4954efd4 7835
7836static tree
7837get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
7838{
7839 auto_vec <tree, 6> args;
7840 tree clauses = gimple_omp_target_clauses (tgt_stmt);
7841 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
7842 if (c)
7843 t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
7844 else
7845 t = integer_minus_one_node;
7846 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
7847 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
7848
7849 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
7850 if (c)
7851 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
7852 else
7853 t = integer_minus_one_node;
7854 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
7855 GOMP_TARGET_ARG_THREAD_LIMIT, t,
7856 &args);
7857
7858 /* Add HSA-specific grid sizes, if available. */
7859 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7860 OMP_CLAUSE__GRIDDIM_))
7861 {
7c6746c9 7862 int id = GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES;
7863 t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, id);
4954efd4 7864 args.quick_push (t);
7865 args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt));
7866 }
7867
7868 /* Produce more, perhaps device specific, arguments here. */
7869
7870 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
7871 args.length () + 1),
7872 ".omp_target_args");
7873 for (unsigned i = 0; i < args.length (); i++)
7874 {
7875 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
7876 build_int_cst (integer_type_node, i),
7877 NULL_TREE, NULL_TREE);
7878 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
7879 GSI_SAME_STMT);
7880 }
7881 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
7882 build_int_cst (integer_type_node, args.length ()),
7883 NULL_TREE, NULL_TREE);
7884 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
7885 GSI_SAME_STMT);
7886 TREE_ADDRESSABLE (argarray) = 1;
7887 return build_fold_addr_expr (argarray);
7888}
7889
7890/* Expand the GIMPLE_OMP_TARGET starting at REGION. */
7891
7892static void
7893expand_omp_target (struct omp_region *region)
7894{
7895 basic_block entry_bb, exit_bb, new_bb;
7896 struct function *child_cfun;
7897 tree child_fn, block, t;
7898 gimple_stmt_iterator gsi;
7899 gomp_target *entry_stmt;
7900 gimple *stmt;
7901 edge e;
7902 bool offloaded, data_region;
7903
7904 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
7905 new_bb = region->entry;
7906
7907 offloaded = is_gimple_omp_offloaded (entry_stmt);
7908 switch (gimple_omp_target_kind (entry_stmt))
7909 {
7910 case GF_OMP_TARGET_KIND_REGION:
7911 case GF_OMP_TARGET_KIND_UPDATE:
7912 case GF_OMP_TARGET_KIND_ENTER_DATA:
7913 case GF_OMP_TARGET_KIND_EXIT_DATA:
7914 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7915 case GF_OMP_TARGET_KIND_OACC_KERNELS:
7916 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7917 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7918 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7919 data_region = false;
7920 break;
7921 case GF_OMP_TARGET_KIND_DATA:
7922 case GF_OMP_TARGET_KIND_OACC_DATA:
7923 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7924 data_region = true;
7925 break;
7926 default:
7927 gcc_unreachable ();
7928 }
7929
7930 child_fn = NULL_TREE;
7931 child_cfun = NULL;
7932 if (offloaded)
7933 {
7934 child_fn = gimple_omp_target_child_fn (entry_stmt);
7935 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
7936 }
7937
7938 /* Supported by expand_omp_taskreg, but not here. */
7939 if (child_cfun != NULL)
7940 gcc_checking_assert (!child_cfun->cfg);
7941 gcc_checking_assert (!gimple_in_ssa_p (cfun));
7942
7943 entry_bb = region->entry;
7944 exit_bb = region->exit;
7945
7946 if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
1d3ea8fc 7947 {
7948 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
7949
7950 /* Further down, both OpenACC kernels and OpenACC parallel constructs
7951 will be mappted to BUILT_IN_GOACC_PARALLEL, and to distinguish the
7952 two, there is an "oacc kernels" attribute set for OpenACC kernels. */
7953 DECL_ATTRIBUTES (child_fn)
7954 = tree_cons (get_identifier ("oacc kernels"),
7955 NULL_TREE, DECL_ATTRIBUTES (child_fn));
7956 }
4954efd4 7957
7958 if (offloaded)
7959 {
7960 unsigned srcidx, dstidx, num;
7961
7962 /* If the offloading region needs data sent from the parent
7963 function, then the very first statement (except possible
7964 tree profile counter updates) of the offloading body
7965 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
7966 &.OMP_DATA_O is passed as an argument to the child function,
7967 we need to replace it with the argument as seen by the child
7968 function.
7969
7970 In most cases, this will end up being the identity assignment
7971 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
7972 a function call that has been inlined, the original PARM_DECL
7973 .OMP_DATA_I may have been converted into a different local
7974 variable. In which case, we need to keep the assignment. */
7975 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
7976 if (data_arg)
7977 {
7978 basic_block entry_succ_bb = single_succ (entry_bb);
7979 gimple_stmt_iterator gsi;
7980 tree arg;
7981 gimple *tgtcopy_stmt = NULL;
7982 tree sender = TREE_VEC_ELT (data_arg, 0);
7983
7984 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
7985 {
7986 gcc_assert (!gsi_end_p (gsi));
7987 stmt = gsi_stmt (gsi);
7988 if (gimple_code (stmt) != GIMPLE_ASSIGN)
7989 continue;
7990
7991 if (gimple_num_ops (stmt) == 2)
7992 {
7993 tree arg = gimple_assign_rhs1 (stmt);
7994
7995 /* We're ignoring the subcode because we're
7996 effectively doing a STRIP_NOPS. */
7997
7998 if (TREE_CODE (arg) == ADDR_EXPR
7999 && TREE_OPERAND (arg, 0) == sender)
8000 {
8001 tgtcopy_stmt = stmt;
8002 break;
8003 }
8004 }
8005 }
8006
8007 gcc_assert (tgtcopy_stmt != NULL);
8008 arg = DECL_ARGUMENTS (child_fn);
8009
8010 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
8011 gsi_remove (&gsi, true);
8012 }
8013
8014 /* Declare local variables needed in CHILD_CFUN. */
8015 block = DECL_INITIAL (child_fn);
8016 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
8017 /* The gimplifier could record temporaries in the offloading block
8018 rather than in containing function's local_decls chain,
8019 which would mean cgraph missed finalizing them. Do it now. */
8020 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
8021 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
8022 varpool_node::finalize_decl (t);
8023 DECL_SAVED_TREE (child_fn) = NULL;
8024 /* We'll create a CFG for child_fn, so no gimple body is needed. */
8025 gimple_set_body (child_fn, NULL);
8026 TREE_USED (block) = 1;
8027
8028 /* Reset DECL_CONTEXT on function arguments. */
8029 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
8030 DECL_CONTEXT (t) = child_fn;
8031
8032 /* Split ENTRY_BB at GIMPLE_*,
8033 so that it can be moved to the child function. */
bce107d7 8034 gsi = gsi_last_nondebug_bb (entry_bb);
4954efd4 8035 stmt = gsi_stmt (gsi);
8036 gcc_assert (stmt
8037 && gimple_code (stmt) == gimple_code (entry_stmt));
8038 e = split_block (entry_bb, stmt);
8039 gsi_remove (&gsi, true);
8040 entry_bb = e->dest;
8041 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8042
8043 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
8044 if (exit_bb)
8045 {
bce107d7 8046 gsi = gsi_last_nondebug_bb (exit_bb);
4954efd4 8047 gcc_assert (!gsi_end_p (gsi)
8048 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
8049 stmt = gimple_build_return (NULL);
8050 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
8051 gsi_remove (&gsi, true);
8052 }
8053
8054 /* Move the offloading region into CHILD_CFUN. */
8055
8056 block = gimple_block (entry_stmt);
8057
8058 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
8059 if (exit_bb)
8060 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
8061 /* When the OMP expansion process cannot guarantee an up-to-date
8062 loop tree arrange for the child function to fixup loops. */
8063 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
8064 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
8065
8066 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
8067 num = vec_safe_length (child_cfun->local_decls);
8068 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
8069 {
8070 t = (*child_cfun->local_decls)[srcidx];
8071 if (DECL_CONTEXT (t) == cfun->decl)
8072 continue;
8073 if (srcidx != dstidx)
8074 (*child_cfun->local_decls)[dstidx] = t;
8075 dstidx++;
8076 }
8077 if (dstidx != num)
8078 vec_safe_truncate (child_cfun->local_decls, dstidx);
8079
8080 /* Inform the callgraph about the new function. */
8081 child_cfun->curr_properties = cfun->curr_properties;
8082 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
8083 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
8084 cgraph_node *node = cgraph_node::get_create (child_fn);
8085 node->parallelized_function = 1;
8086 cgraph_node::add_new_function (child_fn, true);
8087
8088 /* Add the new function to the offload table. */
8089 if (ENABLE_OFFLOADING)
5458c829 8090 {
8091 if (in_lto_p)
8092 DECL_PRESERVE_P (child_fn) = 1;
8093 vec_safe_push (offload_funcs, child_fn);
8094 }
4954efd4 8095
8096 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
8097 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
8098
8099 /* Fix the callgraph edges for child_cfun. Those for cfun will be
8100 fixed in a following pass. */
8101 push_cfun (child_cfun);
8102 if (need_asm)
d687f868 8103 assign_assembler_name_if_needed (child_fn);
4954efd4 8104 cgraph_edge::rebuild_edges ();
8105
8106 /* Some EH regions might become dead, see PR34608. If
8107 pass_cleanup_cfg isn't the first pass to happen with the
8108 new child, these dead EH edges might cause problems.
8109 Clean them up now. */
8110 if (flag_exceptions)
8111 {
8112 basic_block bb;
8113 bool changed = false;
8114
8115 FOR_EACH_BB_FN (bb, cfun)
8116 changed |= gimple_purge_dead_eh_edges (bb);
8117 if (changed)
8118 cleanup_tree_cfg ();
8119 }
8120 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
8121 verify_loop_structure ();
8122 pop_cfun ();
8123
8124 if (dump_file && !gimple_in_ssa_p (cfun))
8125 {
8126 omp_any_child_fn_dumped = true;
8127 dump_function_header (dump_file, child_fn, dump_flags);
8128 dump_function_to_file (child_fn, dump_file, dump_flags);
8129 }
b3136cb0 8130
8131 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
4954efd4 8132 }
8133
8134 /* Emit a library call to launch the offloading region, or do data
8135 transfers. */
813421cd 8136 tree t1, t2, t3, t4, depend, c, clauses;
4954efd4 8137 enum built_in_function start_ix;
4954efd4 8138 unsigned int flags_i = 0;
4954efd4 8139
8140 switch (gimple_omp_target_kind (entry_stmt))
8141 {
8142 case GF_OMP_TARGET_KIND_REGION:
8143 start_ix = BUILT_IN_GOMP_TARGET;
8144 break;
8145 case GF_OMP_TARGET_KIND_DATA:
8146 start_ix = BUILT_IN_GOMP_TARGET_DATA;
8147 break;
8148 case GF_OMP_TARGET_KIND_UPDATE:
8149 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
8150 break;
8151 case GF_OMP_TARGET_KIND_ENTER_DATA:
8152 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
8153 break;
8154 case GF_OMP_TARGET_KIND_EXIT_DATA:
8155 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
8156 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
8157 break;
8158 case GF_OMP_TARGET_KIND_OACC_KERNELS:
4954efd4 8159 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8160 start_ix = BUILT_IN_GOACC_PARALLEL;
8161 break;
8162 case GF_OMP_TARGET_KIND_OACC_DATA:
8163 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8164 start_ix = BUILT_IN_GOACC_DATA_START;
8165 break;
8166 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8167 start_ix = BUILT_IN_GOACC_UPDATE;
8168 break;
8169 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8170 start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA;
8171 break;
8172 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8173 start_ix = BUILT_IN_GOACC_DECLARE;
8174 break;
8175 default:
8176 gcc_unreachable ();
8177 }
8178
8179 clauses = gimple_omp_target_clauses (entry_stmt);
8180
813421cd 8181 tree device = NULL_TREE;
8182 location_t device_loc = UNKNOWN_LOCATION;
8183 tree goacc_flags = NULL_TREE;
8184 if (is_gimple_omp_oacc (entry_stmt))
4954efd4 8185 {
813421cd 8186 /* By default, no GOACC_FLAGs are set. */
8187 goacc_flags = integer_zero_node;
4954efd4 8188 }
8189 else
813421cd 8190 {
8191 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
8192 if (c)
8193 {
8194 device = OMP_CLAUSE_DEVICE_ID (c);
8195 device_loc = OMP_CLAUSE_LOCATION (c);
8196 }
8197 else
8198 {
8199 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
8200 library choose). */
8201 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
8202 device_loc = gimple_location (entry_stmt);
8203 }
4954efd4 8204
813421cd 8205 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
8206 if (c)
8207 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
8208 }
4954efd4 8209
813421cd 8210 /* By default, there is no conditional. */
8211 tree cond = NULL_TREE;
8212 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
8213 if (c)
8214 cond = OMP_CLAUSE_IF_EXPR (c);
8215 /* If we found the clause 'if (cond)', build:
8216 OpenACC: goacc_flags = (cond ? goacc_flags : flags | GOACC_FLAG_HOST_FALLBACK)
8217 OpenMP: device = (cond ? device : GOMP_DEVICE_HOST_FALLBACK) */
4954efd4 8218 if (cond)
8219 {
813421cd 8220 tree *tp;
8221 if (is_gimple_omp_oacc (entry_stmt))
8222 tp = &goacc_flags;
8223 else
8224 {
8225 /* Ensure 'device' is of the correct type. */
8226 device = fold_convert_loc (device_loc, integer_type_node, device);
8227
8228 tp = &device;
8229 }
8230
4954efd4 8231 cond = gimple_boolify (cond);
8232
8233 basic_block cond_bb, then_bb, else_bb;
8234 edge e;
8235 tree tmp_var;
8236
813421cd 8237 tmp_var = create_tmp_var (TREE_TYPE (*tp));
4954efd4 8238 if (offloaded)
8239 e = split_block_after_labels (new_bb);
8240 else
8241 {
bce107d7 8242 gsi = gsi_last_nondebug_bb (new_bb);
4954efd4 8243 gsi_prev (&gsi);
8244 e = split_block (new_bb, gsi_stmt (gsi));
8245 }
8246 cond_bb = e->src;
8247 new_bb = e->dest;
8248 remove_edge (e);
8249
8250 then_bb = create_empty_bb (cond_bb);
8251 else_bb = create_empty_bb (then_bb);
8252 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
8253 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
8254
8255 stmt = gimple_build_cond_empty (cond);
8256 gsi = gsi_last_bb (cond_bb);
8257 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
8258
8259 gsi = gsi_start_bb (then_bb);
813421cd 8260 stmt = gimple_build_assign (tmp_var, *tp);
4954efd4 8261 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
8262
8263 gsi = gsi_start_bb (else_bb);
813421cd 8264 if (is_gimple_omp_oacc (entry_stmt))
8265 stmt = gimple_build_assign (tmp_var,
8266 BIT_IOR_EXPR,
8267 *tp,
8268 build_int_cst (integer_type_node,
8269 GOACC_FLAG_HOST_FALLBACK));
8270 else
8271 stmt = gimple_build_assign (tmp_var,
8272 build_int_cst (integer_type_node,
8273 GOMP_DEVICE_HOST_FALLBACK));
4954efd4 8274 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
8275
8276 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
8277 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
8278 add_bb_to_loop (then_bb, cond_bb->loop_father);
8279 add_bb_to_loop (else_bb, cond_bb->loop_father);
8280 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
8281 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
8282
813421cd 8283 *tp = tmp_var;
8284
bce107d7 8285 gsi = gsi_last_nondebug_bb (new_bb);
4954efd4 8286 }
8287 else
8288 {
bce107d7 8289 gsi = gsi_last_nondebug_bb (new_bb);
813421cd 8290
8291 if (device != NULL_TREE)
8292 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
8293 true, GSI_SAME_STMT);
4954efd4 8294 }
8295
8296 t = gimple_omp_target_data_arg (entry_stmt);
8297 if (t == NULL)
8298 {
8299 t1 = size_zero_node;
8300 t2 = build_zero_cst (ptr_type_node);
8301 t3 = t2;
8302 t4 = t2;
8303 }
8304 else
8305 {
8306 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
8307 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
8308 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
8309 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
8310 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
8311 }
8312
8313 gimple *g;
8314 bool tagging = false;
8315 /* The maximum number used by any start_ix, without varargs. */
8316 auto_vec<tree, 11> args;
813421cd 8317 if (is_gimple_omp_oacc (entry_stmt))
8318 {
8319 tree goacc_flags_m = fold_build1 (GOACC_FLAGS_MARSHAL_OP,
8320 TREE_TYPE (goacc_flags), goacc_flags);
8321 goacc_flags_m = force_gimple_operand_gsi (&gsi, goacc_flags_m, true,
8322 NULL_TREE, true,
8323 GSI_SAME_STMT);
8324 args.quick_push (goacc_flags_m);
8325 }
8326 else
8327 args.quick_push (device);
4954efd4 8328 if (offloaded)
8329 args.quick_push (build_fold_addr_expr (child_fn));
8330 args.quick_push (t1);
8331 args.quick_push (t2);
8332 args.quick_push (t3);
8333 args.quick_push (t4);
8334 switch (start_ix)
8335 {
8336 case BUILT_IN_GOACC_DATA_START:
8337 case BUILT_IN_GOACC_DECLARE:
8338 case BUILT_IN_GOMP_TARGET_DATA:
8339 break;
8340 case BUILT_IN_GOMP_TARGET:
8341 case BUILT_IN_GOMP_TARGET_UPDATE:
8342 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
8343 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
8344 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
8345 if (c)
8346 depend = OMP_CLAUSE_DECL (c);
8347 else
8348 depend = build_int_cst (ptr_type_node, 0);
8349 args.quick_push (depend);
8350 if (start_ix == BUILT_IN_GOMP_TARGET)
8351 args.quick_push (get_target_arguments (&gsi, entry_stmt));
8352 break;
8353 case BUILT_IN_GOACC_PARALLEL:
1d3ea8fc 8354 oacc_set_fn_attrib (child_fn, clauses, &args);
8355 tagging = true;
4954efd4 8356 /* FALLTHRU */
8357 case BUILT_IN_GOACC_ENTER_EXIT_DATA:
8358 case BUILT_IN_GOACC_UPDATE:
8359 {
8360 tree t_async = NULL_TREE;
8361
8362 /* If present, use the value specified by the respective
8363 clause, making sure that is of the correct type. */
8364 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
8365 if (c)
8366 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
8367 integer_type_node,
8368 OMP_CLAUSE_ASYNC_EXPR (c));
8369 else if (!tagging)
8370 /* Default values for t_async. */
8371 t_async = fold_convert_loc (gimple_location (entry_stmt),
8372 integer_type_node,
8373 build_int_cst (integer_type_node,
8374 GOMP_ASYNC_SYNC));
8375 if (tagging && t_async)
8376 {
8377 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
8378
8379 if (TREE_CODE (t_async) == INTEGER_CST)
8380 {
8381 /* See if we can pack the async arg in to the tag's
8382 operand. */
8383 i_async = TREE_INT_CST_LOW (t_async);
8384 if (i_async < GOMP_LAUNCH_OP_MAX)
8385 t_async = NULL_TREE;
8386 else
8387 i_async = GOMP_LAUNCH_OP_MAX;
8388 }
8389 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
8390 i_async));
8391 }
8392 if (t_async)
8393 args.safe_push (t_async);
8394
8395 /* Save the argument index, and ... */
8396 unsigned t_wait_idx = args.length ();
8397 unsigned num_waits = 0;
8398 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
8399 if (!tagging || c)
8400 /* ... push a placeholder. */
8401 args.safe_push (integer_zero_node);
8402
8403 for (; c; c = OMP_CLAUSE_CHAIN (c))
8404 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
8405 {
8406 args.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c),
8407 integer_type_node,
8408 OMP_CLAUSE_WAIT_EXPR (c)));
8409 num_waits++;
8410 }
8411
8412 if (!tagging || num_waits)
8413 {
8414 tree len;
8415
8416 /* Now that we know the number, update the placeholder. */
8417 if (tagging)
8418 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
8419 else
8420 len = build_int_cst (integer_type_node, num_waits);
8421 len = fold_convert_loc (gimple_location (entry_stmt),
8422 unsigned_type_node, len);
8423 args[t_wait_idx] = len;
8424 }
8425 }
8426 break;
8427 default:
8428 gcc_unreachable ();
8429 }
8430 if (tagging)
8431 /* Push terminal marker - zero. */
8432 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
8433
8434 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
8435 gimple_set_location (g, gimple_location (entry_stmt));
8436 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
8437 if (!offloaded)
8438 {
8439 g = gsi_stmt (gsi);
8440 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
8441 gsi_remove (&gsi, true);
8442 }
8443 if (data_region && region->exit)
8444 {
bce107d7 8445 gsi = gsi_last_nondebug_bb (region->exit);
4954efd4 8446 g = gsi_stmt (gsi);
8447 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN);
8448 gsi_remove (&gsi, true);
8449 }
8450}
8451
8452/* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with
8453 iteration variable derived from the thread number. INTRA_GROUP means this
8454 is an expansion of a loop iterating over work-items within a separate
7c6746c9 8455 iteration over groups. */
4954efd4 8456
8457static void
8458grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group)
8459{
8460 gimple_stmt_iterator gsi;
8461 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
8462 gcc_checking_assert (gimple_omp_for_kind (for_stmt)
8463 == GF_OMP_FOR_KIND_GRID_LOOP);
8464 size_t collapse = gimple_omp_for_collapse (for_stmt);
8465 struct omp_for_data_loop *loops
8466 = XALLOCAVEC (struct omp_for_data_loop,
7c6746c9 8467 gimple_omp_for_collapse (for_stmt));
4954efd4 8468 struct omp_for_data fd;
8469
8470 remove_edge (BRANCH_EDGE (kfor->entry));
8471 basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest;
8472
8473 gcc_assert (kfor->cont);
8474 omp_extract_for_data (for_stmt, &fd, loops);
8475
8476 gsi = gsi_start_bb (body_bb);
8477
8478 for (size_t dim = 0; dim < collapse; dim++)
8479 {
8480 tree type, itype;
8481 itype = type = TREE_TYPE (fd.loops[dim].v);
8482 if (POINTER_TYPE_P (type))
8483 itype = signed_type_for (type);
8484
8485 tree n1 = fd.loops[dim].n1;
8486 tree step = fd.loops[dim].step;
8487 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
8488 true, NULL_TREE, true, GSI_SAME_STMT);
8489 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
8490 true, NULL_TREE, true, GSI_SAME_STMT);
8491 tree threadid;
8492 if (gimple_omp_for_grid_group_iter (for_stmt))
8493 {
8494 gcc_checking_assert (!intra_group);
8495 threadid = build_call_expr (builtin_decl_explicit
8496 (BUILT_IN_HSA_WORKGROUPID), 1,
8497 build_int_cstu (unsigned_type_node, dim));
8498 }
8499 else if (intra_group)
8500 threadid = build_call_expr (builtin_decl_explicit
8501 (BUILT_IN_HSA_WORKITEMID), 1,
8502 build_int_cstu (unsigned_type_node, dim));
8503 else
8504 threadid = build_call_expr (builtin_decl_explicit
8505 (BUILT_IN_HSA_WORKITEMABSID), 1,
8506 build_int_cstu (unsigned_type_node, dim));
8507 threadid = fold_convert (itype, threadid);
8508 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
8509 true, GSI_SAME_STMT);
8510
8511 tree startvar = fd.loops[dim].v;
8512 tree t = fold_build2 (MULT_EXPR, itype, threadid, step);
8513 if (POINTER_TYPE_P (type))
8514 t = fold_build_pointer_plus (n1, t);
8515 else
8516 t = fold_build2 (PLUS_EXPR, type, t, n1);
8517 t = fold_convert (type, t);
8518 t = force_gimple_operand_gsi (&gsi, t,
8519 DECL_P (startvar)
8520 && TREE_ADDRESSABLE (startvar),
8521 NULL_TREE, true, GSI_SAME_STMT);
8522 gassign *assign_stmt = gimple_build_assign (startvar, t);
8523 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
8524 }
7c6746c9 8525 /* Remove the omp for statement. */
bce107d7 8526 gsi = gsi_last_nondebug_bb (kfor->entry);
4954efd4 8527 gsi_remove (&gsi, true);
8528
8529 /* Remove the GIMPLE_OMP_CONTINUE statement. */
bce107d7 8530 gsi = gsi_last_nondebug_bb (kfor->cont);
4954efd4 8531 gcc_assert (!gsi_end_p (gsi)
8532 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE);
8533 gsi_remove (&gsi, true);
8534
8535 /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary. */
bce107d7 8536 gsi = gsi_last_nondebug_bb (kfor->exit);
4954efd4 8537 gcc_assert (!gsi_end_p (gsi)
8538 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
8539 if (intra_group)
8540 gsi_insert_before (&gsi, omp_build_barrier (NULL_TREE), GSI_SAME_STMT);
8541 gsi_remove (&gsi, true);
8542
8543 /* Fixup the much simpler CFG. */
8544 remove_edge (find_edge (kfor->cont, body_bb));
8545
8546 if (kfor->cont != body_bb)
8547 set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb);
8548 set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont);
8549}
8550
8551/* Structure passed to grid_remap_kernel_arg_accesses so that it can remap
8552 argument_decls. */
8553
8554struct grid_arg_decl_map
8555{
8556 tree old_arg;
8557 tree new_arg;
8558};
8559
8560/* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones
8561 pertaining to kernel function. */
8562
8563static tree
8564grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data)
8565{
8566 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
8567 struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info;
8568 tree t = *tp;
8569
8570 if (t == adm->old_arg)
8571 *tp = adm->new_arg;
8572 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
8573 return NULL_TREE;
8574}
8575
8576/* If TARGET region contains a kernel body for loop, remove its region from the
7c6746c9 8577 TARGET and expand it in HSA gridified kernel fashion. */
4954efd4 8578
8579static void
8580grid_expand_target_grid_body (struct omp_region *target)
8581{
8582 if (!hsa_gen_requested_p ())
8583 return;
8584
8585 gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry));
8586 struct omp_region **pp;
8587
8588 for (pp = &target->inner; *pp; pp = &(*pp)->next)
8589 if ((*pp)->type == GIMPLE_OMP_GRID_BODY)
8590 break;
8591
8592 struct omp_region *gpukernel = *pp;
8593
8594 tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt);
8595 if (!gpukernel)
8596 {
8597 /* HSA cannot handle OACC stuff. */
8598 if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION)
8599 return;
8600 gcc_checking_assert (orig_child_fndecl);
8601 gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
8602 OMP_CLAUSE__GRIDDIM_));
8603 cgraph_node *n = cgraph_node::get (orig_child_fndecl);
8604
8605 hsa_register_kernel (n);
8606 return;
8607 }
8608
8609 gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
8610 OMP_CLAUSE__GRIDDIM_));
7c6746c9 8611 tree inside_block
8612 = gimple_block (first_stmt (single_succ (gpukernel->entry)));
4954efd4 8613 *pp = gpukernel->next;
8614 for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next)
8615 if ((*pp)->type == GIMPLE_OMP_FOR)
8616 break;
8617
8618 struct omp_region *kfor = *pp;
8619 gcc_assert (kfor);
8620 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
8621 gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP);
8622 *pp = kfor->next;
8623 if (kfor->inner)
8624 {
8625 if (gimple_omp_for_grid_group_iter (for_stmt))
8626 {
8627 struct omp_region **next_pp;
8628 for (pp = &kfor->inner; *pp; pp = next_pp)
8629 {
8630 next_pp = &(*pp)->next;
8631 if ((*pp)->type != GIMPLE_OMP_FOR)
8632 continue;
8633 gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry));
8634 gcc_assert (gimple_omp_for_kind (inner)
8635 == GF_OMP_FOR_KIND_GRID_LOOP);
8636 grid_expand_omp_for_loop (*pp, true);
8637 *pp = (*pp)->next;
8638 next_pp = pp;
8639 }
8640 }
8641 expand_omp (kfor->inner);
8642 }
8643 if (gpukernel->inner)
8644 expand_omp (gpukernel->inner);
8645
8646 tree kern_fndecl = copy_node (orig_child_fndecl);
87943388 8647 DECL_NAME (kern_fndecl) = clone_function_name_numbered (kern_fndecl,
8648 "kernel");
4954efd4 8649 SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl));
8650 tree tgtblock = gimple_block (tgt_stmt);
8651 tree fniniblock = make_node (BLOCK);
0e59d2fb 8652 BLOCK_ABSTRACT_ORIGIN (fniniblock) = BLOCK_ORIGIN (tgtblock);
4954efd4 8653 BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock);
8654 BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock);
8655 BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl;
8656 DECL_INITIAL (kern_fndecl) = fniniblock;
8657 push_struct_function (kern_fndecl);
8658 cfun->function_end_locus = gimple_location (tgt_stmt);
8659 init_tree_ssa (cfun);
8660 pop_cfun ();
8661
8662 tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl);
8663 gcc_assert (!DECL_CHAIN (old_parm_decl));
8664 tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl));
8665 DECL_CONTEXT (new_parm_decl) = kern_fndecl;
8666 DECL_ARGUMENTS (kern_fndecl) = new_parm_decl;
8667 gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl))));
8668 DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl));
8669 DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl;
8670 struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl);
8671 kern_cfun->curr_properties = cfun->curr_properties;
8672
8673 grid_expand_omp_for_loop (kfor, false);
8674
7c6746c9 8675 /* Remove the omp for statement. */
bce107d7 8676 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (gpukernel->entry);
4954efd4 8677 gsi_remove (&gsi, true);
8678 /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real
8679 return. */
bce107d7 8680 gsi = gsi_last_nondebug_bb (gpukernel->exit);
4954efd4 8681 gcc_assert (!gsi_end_p (gsi)
8682 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
8683 gimple *ret_stmt = gimple_build_return (NULL);
8684 gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT);
8685 gsi_remove (&gsi, true);
8686
8687 /* Statements in the first BB in the target construct have been produced by
8688 target lowering and must be copied inside the GPUKERNEL, with the two
8689 exceptions of the first OMP statement and the OMP_DATA assignment
8690 statement. */
8691 gsi = gsi_start_bb (single_succ (gpukernel->entry));
8692 tree data_arg = gimple_omp_target_data_arg (tgt_stmt);
8693 tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL;
8694 for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry));
8695 !gsi_end_p (tsi); gsi_next (&tsi))
8696 {
8697 gimple *stmt = gsi_stmt (tsi);
8698 if (is_gimple_omp (stmt))
8699 break;
8700 if (sender
8701 && is_gimple_assign (stmt)
8702 && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR
8703 && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender)
8704 continue;
8705 gimple *copy = gimple_copy (stmt);
8706 gsi_insert_before (&gsi, copy, GSI_SAME_STMT);
8707 gimple_set_block (copy, fniniblock);
8708 }
8709
8710 move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry),
8711 gpukernel->exit, inside_block);
8712
8713 cgraph_node *kcn = cgraph_node::get_create (kern_fndecl);
8714 kcn->mark_force_output ();
8715 cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl);
8716
8717 hsa_register_kernel (kcn, orig_child);
8718
8719 cgraph_node::add_new_function (kern_fndecl, true);
8720 push_cfun (kern_cfun);
8721 cgraph_edge::rebuild_edges ();
8722
8723 /* Re-map any mention of the PARM_DECL of the original function to the
8724 PARM_DECL of the new one.
8725
8726 TODO: It would be great if lowering produced references into the GPU
8727 kernel decl straight away and we did not have to do this. */
8728 struct grid_arg_decl_map adm;
8729 adm.old_arg = old_parm_decl;
8730 adm.new_arg = new_parm_decl;
8731 basic_block bb;
8732 FOR_EACH_BB_FN (bb, kern_cfun)
8733 {
8734 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
8735 {
8736 gimple *stmt = gsi_stmt (gsi);
8737 struct walk_stmt_info wi;
8738 memset (&wi, 0, sizeof (wi));
8739 wi.info = &adm;
8740 walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi);
8741 }
8742 }
8743 pop_cfun ();
8744
8745 return;
8746}
8747
8748/* Expand the parallel region tree rooted at REGION. Expansion
8749 proceeds in depth-first order. Innermost regions are expanded
8750 first. This way, parallel regions that require a new function to
8751 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
8752 internal dependencies in their body. */
8753
8754static void
8755expand_omp (struct omp_region *region)
8756{
8757 omp_any_child_fn_dumped = false;
8758 while (region)
8759 {
8760 location_t saved_location;
8761 gimple *inner_stmt = NULL;
8762
8763 /* First, determine whether this is a combined parallel+workshare
7c6746c9 8764 region. */
4954efd4 8765 if (region->type == GIMPLE_OMP_PARALLEL)
8766 determine_parallel_type (region);
8767 else if (region->type == GIMPLE_OMP_TARGET)
8768 grid_expand_target_grid_body (region);
8769
8770 if (region->type == GIMPLE_OMP_FOR
8771 && gimple_omp_for_combined_p (last_stmt (region->entry)))
8772 inner_stmt = last_stmt (region->inner->entry);
8773
8774 if (region->inner)
8775 expand_omp (region->inner);
8776
8777 saved_location = input_location;
8778 if (gimple_has_location (last_stmt (region->entry)))
8779 input_location = gimple_location (last_stmt (region->entry));
8780
8781 switch (region->type)
8782 {
8783 case GIMPLE_OMP_PARALLEL:
8784 case GIMPLE_OMP_TASK:
8785 expand_omp_taskreg (region);
8786 break;
8787
8788 case GIMPLE_OMP_FOR:
8789 expand_omp_for (region, inner_stmt);
8790 break;
8791
8792 case GIMPLE_OMP_SECTIONS:
8793 expand_omp_sections (region);
8794 break;
8795
8796 case GIMPLE_OMP_SECTION:
8797 /* Individual omp sections are handled together with their
8798 parent GIMPLE_OMP_SECTIONS region. */
8799 break;
8800
8801 case GIMPLE_OMP_SINGLE:
8802 expand_omp_single (region);
8803 break;
8804
8805 case GIMPLE_OMP_ORDERED:
8806 {
8807 gomp_ordered *ord_stmt
8808 = as_a <gomp_ordered *> (last_stmt (region->entry));
8809 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
8810 OMP_CLAUSE_DEPEND))
8811 {
8812 /* We'll expand these when expanding corresponding
8813 worksharing region with ordered(n) clause. */
8814 gcc_assert (region->outer
8815 && region->outer->type == GIMPLE_OMP_FOR);
8816 region->ord_stmt = ord_stmt;
8817 break;
8818 }
8819 }
8820 /* FALLTHRU */
8821 case GIMPLE_OMP_MASTER:
8822 case GIMPLE_OMP_TASKGROUP:
8823 case GIMPLE_OMP_CRITICAL:
8824 case GIMPLE_OMP_TEAMS:
8825 expand_omp_synch (region);
8826 break;
8827
8828 case GIMPLE_OMP_ATOMIC_LOAD:
8829 expand_omp_atomic (region);
8830 break;
8831
8832 case GIMPLE_OMP_TARGET:
8833 expand_omp_target (region);
8834 break;
8835
8836 default:
8837 gcc_unreachable ();
8838 }
8839
8840 input_location = saved_location;
8841 region = region->next;
8842 }
8843 if (omp_any_child_fn_dumped)
8844 {
8845 if (dump_file)
8846 dump_function_header (dump_file, current_function_decl, dump_flags);
8847 omp_any_child_fn_dumped = false;
8848 }
8849}
8850
8851/* Helper for build_omp_regions. Scan the dominator tree starting at
8852 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
8853 true, the function ends once a single tree is built (otherwise, whole
8854 forest of OMP constructs may be built). */
8855
8856static void
8857build_omp_regions_1 (basic_block bb, struct omp_region *parent,
8858 bool single_tree)
8859{
8860 gimple_stmt_iterator gsi;
8861 gimple *stmt;
8862 basic_block son;
8863
bce107d7 8864 gsi = gsi_last_nondebug_bb (bb);
4954efd4 8865 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
8866 {
8867 struct omp_region *region;
8868 enum gimple_code code;
8869
8870 stmt = gsi_stmt (gsi);
8871 code = gimple_code (stmt);
8872 if (code == GIMPLE_OMP_RETURN)
8873 {
8874 /* STMT is the return point out of region PARENT. Mark it
8875 as the exit point and make PARENT the immediately
8876 enclosing region. */
8877 gcc_assert (parent);
8878 region = parent;
8879 region->exit = bb;
8880 parent = parent->outer;
8881 }
8882 else if (code == GIMPLE_OMP_ATOMIC_STORE)
8883 {
2fbe7a32 8884 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
4954efd4 8885 GIMPLE_OMP_RETURN, but matches with
8886 GIMPLE_OMP_ATOMIC_LOAD. */
8887 gcc_assert (parent);
8888 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
8889 region = parent;
8890 region->exit = bb;
8891 parent = parent->outer;
8892 }
8893 else if (code == GIMPLE_OMP_CONTINUE)
8894 {
8895 gcc_assert (parent);
8896 parent->cont = bb;
8897 }
8898 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
8899 {
8900 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
8901 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
8902 }
8903 else
8904 {
8905 region = new_omp_region (bb, code, parent);
8906 /* Otherwise... */
8907 if (code == GIMPLE_OMP_TARGET)
8908 {
8909 switch (gimple_omp_target_kind (stmt))
8910 {
8911 case GF_OMP_TARGET_KIND_REGION:
8912 case GF_OMP_TARGET_KIND_DATA:
8913 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8914 case GF_OMP_TARGET_KIND_OACC_KERNELS:
8915 case GF_OMP_TARGET_KIND_OACC_DATA:
8916 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8917 break;
8918 case GF_OMP_TARGET_KIND_UPDATE:
8919 case GF_OMP_TARGET_KIND_ENTER_DATA:
8920 case GF_OMP_TARGET_KIND_EXIT_DATA:
8921 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8922 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8923 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8924 /* ..., other than for those stand-alone directives... */
8925 region = NULL;
8926 break;
8927 default:
8928 gcc_unreachable ();
8929 }
8930 }
8931 else if (code == GIMPLE_OMP_ORDERED
8932 && omp_find_clause (gimple_omp_ordered_clauses
8933 (as_a <gomp_ordered *> (stmt)),
8934 OMP_CLAUSE_DEPEND))
8935 /* #pragma omp ordered depend is also just a stand-alone
8936 directive. */
8937 region = NULL;
7e5a76c8 8938 else if (code == GIMPLE_OMP_TASK
8939 && gimple_omp_task_taskwait_p (stmt))
8940 /* #pragma omp taskwait depend(...) is a stand-alone directive. */
8941 region = NULL;
4954efd4 8942 /* ..., this directive becomes the parent for a new region. */
8943 if (region)
8944 parent = region;
8945 }
8946 }
8947
8948 if (single_tree && !parent)
8949 return;
8950
8951 for (son = first_dom_son (CDI_DOMINATORS, bb);
8952 son;
8953 son = next_dom_son (CDI_DOMINATORS, son))
8954 build_omp_regions_1 (son, parent, single_tree);
8955}
8956
8957/* Builds the tree of OMP regions rooted at ROOT, storing it to
8958 root_omp_region. */
8959
8960static void
8961build_omp_regions_root (basic_block root)
8962{
8963 gcc_assert (root_omp_region == NULL);
8964 build_omp_regions_1 (root, NULL, true);
8965 gcc_assert (root_omp_region != NULL);
8966}
8967
8968/* Expands omp construct (and its subconstructs) starting in HEAD. */
8969
8970void
8971omp_expand_local (basic_block head)
8972{
8973 build_omp_regions_root (head);
8974 if (dump_file && (dump_flags & TDF_DETAILS))
8975 {
8976 fprintf (dump_file, "\nOMP region tree\n\n");
8977 dump_omp_region (dump_file, root_omp_region, 0);
8978 fprintf (dump_file, "\n");
8979 }
8980
8981 remove_exit_barriers (root_omp_region);
8982 expand_omp (root_omp_region);
8983
8984 omp_free_regions ();
8985}
8986
8987/* Scan the CFG and build a tree of OMP regions. Return the root of
8988 the OMP region tree. */
8989
8990static void
8991build_omp_regions (void)
8992{
8993 gcc_assert (root_omp_region == NULL);
8994 calculate_dominance_info (CDI_DOMINATORS);
8995 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
8996}
8997
8998/* Main entry point for expanding OMP-GIMPLE into runtime calls. */
8999
9000static unsigned int
9001execute_expand_omp (void)
9002{
9003 build_omp_regions ();
9004
9005 if (!root_omp_region)
9006 return 0;
9007
9008 if (dump_file)
9009 {
9010 fprintf (dump_file, "\nOMP region tree\n\n");
9011 dump_omp_region (dump_file, root_omp_region, 0);
9012 fprintf (dump_file, "\n");
9013 }
9014
9015 remove_exit_barriers (root_omp_region);
9016
9017 expand_omp (root_omp_region);
9018
9019 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9020 verify_loop_structure ();
9021 cleanup_tree_cfg ();
9022
9023 omp_free_regions ();
9024
9025 return 0;
9026}
9027
9028/* OMP expansion -- the default pass, run before creation of SSA form. */
9029
9030namespace {
9031
9032const pass_data pass_data_expand_omp =
9033{
9034 GIMPLE_PASS, /* type */
9035 "ompexp", /* name */
f57c8178 9036 OPTGROUP_OMP, /* optinfo_flags */
4954efd4 9037 TV_NONE, /* tv_id */
9038 PROP_gimple_any, /* properties_required */
9039 PROP_gimple_eomp, /* properties_provided */
9040 0, /* properties_destroyed */
9041 0, /* todo_flags_start */
9042 0, /* todo_flags_finish */
9043};
9044
9045class pass_expand_omp : public gimple_opt_pass
9046{
9047public:
9048 pass_expand_omp (gcc::context *ctxt)
9049 : gimple_opt_pass (pass_data_expand_omp, ctxt)
9050 {}
9051
9052 /* opt_pass methods: */
9053 virtual unsigned int execute (function *)
9054 {
efa02472 9055 bool gate = ((flag_openacc != 0 || flag_openmp != 0
4954efd4 9056 || flag_openmp_simd != 0)
9057 && !seen_error ());
9058
9059 /* This pass always runs, to provide PROP_gimple_eomp.
9060 But often, there is nothing to do. */
9061 if (!gate)
9062 return 0;
9063
9064 return execute_expand_omp ();
9065 }
9066
9067}; // class pass_expand_omp
9068
9069} // anon namespace
9070
9071gimple_opt_pass *
9072make_pass_expand_omp (gcc::context *ctxt)
9073{
9074 return new pass_expand_omp (ctxt);
9075}
9076
9077namespace {
9078
9079const pass_data pass_data_expand_omp_ssa =
9080{
9081 GIMPLE_PASS, /* type */
9082 "ompexpssa", /* name */
f57c8178 9083 OPTGROUP_OMP, /* optinfo_flags */
4954efd4 9084 TV_NONE, /* tv_id */
9085 PROP_cfg | PROP_ssa, /* properties_required */
9086 PROP_gimple_eomp, /* properties_provided */
9087 0, /* properties_destroyed */
9088 0, /* todo_flags_start */
9089 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
9090};
9091
9092class pass_expand_omp_ssa : public gimple_opt_pass
9093{
9094public:
9095 pass_expand_omp_ssa (gcc::context *ctxt)
9096 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
9097 {}
9098
9099 /* opt_pass methods: */
9100 virtual bool gate (function *fun)
9101 {
9102 return !(fun->curr_properties & PROP_gimple_eomp);
9103 }
9104 virtual unsigned int execute (function *) { return execute_expand_omp (); }
9105 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
9106
9107}; // class pass_expand_omp_ssa
9108
9109} // anon namespace
9110
9111gimple_opt_pass *
9112make_pass_expand_omp_ssa (gcc::context *ctxt)
9113{
9114 return new pass_expand_omp_ssa (ctxt);
9115}
9116
9117/* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
9118 GIMPLE_* codes. */
9119
9120bool
9121omp_make_gimple_edges (basic_block bb, struct omp_region **region,
9122 int *region_idx)
9123{
9124 gimple *last = last_stmt (bb);
9125 enum gimple_code code = gimple_code (last);
9126 struct omp_region *cur_region = *region;
9127 bool fallthru = false;
9128
9129 switch (code)
9130 {
9131 case GIMPLE_OMP_PARALLEL:
4954efd4 9132 case GIMPLE_OMP_FOR:
9133 case GIMPLE_OMP_SINGLE:
9134 case GIMPLE_OMP_TEAMS:
9135 case GIMPLE_OMP_MASTER:
9136 case GIMPLE_OMP_TASKGROUP:
9137 case GIMPLE_OMP_CRITICAL:
9138 case GIMPLE_OMP_SECTION:
9139 case GIMPLE_OMP_GRID_BODY:
9140 cur_region = new_omp_region (bb, code, cur_region);
9141 fallthru = true;
9142 break;
9143
7e5a76c8 9144 case GIMPLE_OMP_TASK:
9145 cur_region = new_omp_region (bb, code, cur_region);
9146 fallthru = true;
9147 if (gimple_omp_task_taskwait_p (last))
9148 cur_region = cur_region->outer;
9149 break;
9150
4954efd4 9151 case GIMPLE_OMP_ORDERED:
9152 cur_region = new_omp_region (bb, code, cur_region);
9153 fallthru = true;
9154 if (omp_find_clause (gimple_omp_ordered_clauses
9155 (as_a <gomp_ordered *> (last)),
9156 OMP_CLAUSE_DEPEND))
9157 cur_region = cur_region->outer;
9158 break;
9159
9160 case GIMPLE_OMP_TARGET:
9161 cur_region = new_omp_region (bb, code, cur_region);
9162 fallthru = true;
9163 switch (gimple_omp_target_kind (last))
9164 {
9165 case GF_OMP_TARGET_KIND_REGION:
9166 case GF_OMP_TARGET_KIND_DATA:
9167 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9168 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9169 case GF_OMP_TARGET_KIND_OACC_DATA:
9170 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9171 break;
9172 case GF_OMP_TARGET_KIND_UPDATE:
9173 case GF_OMP_TARGET_KIND_ENTER_DATA:
9174 case GF_OMP_TARGET_KIND_EXIT_DATA:
9175 case GF_OMP_TARGET_KIND_OACC_UPDATE:
9176 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
9177 case GF_OMP_TARGET_KIND_OACC_DECLARE:
9178 cur_region = cur_region->outer;
9179 break;
9180 default:
9181 gcc_unreachable ();
9182 }
9183 break;
9184
9185 case GIMPLE_OMP_SECTIONS:
9186 cur_region = new_omp_region (bb, code, cur_region);
9187 fallthru = true;
9188 break;
9189
9190 case GIMPLE_OMP_SECTIONS_SWITCH:
9191 fallthru = false;
9192 break;
9193
9194 case GIMPLE_OMP_ATOMIC_LOAD:
9195 case GIMPLE_OMP_ATOMIC_STORE:
9196 fallthru = true;
9197 break;
9198
9199 case GIMPLE_OMP_RETURN:
9200 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
9201 somewhere other than the next block. This will be
9202 created later. */
9203 cur_region->exit = bb;
9204 if (cur_region->type == GIMPLE_OMP_TASK)
9205 /* Add an edge corresponding to not scheduling the task
9206 immediately. */
9207 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
9208 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
9209 cur_region = cur_region->outer;
9210 break;
9211
9212 case GIMPLE_OMP_CONTINUE:
9213 cur_region->cont = bb;
9214 switch (cur_region->type)
9215 {
9216 case GIMPLE_OMP_FOR:
9217 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
9218 succs edges as abnormal to prevent splitting
9219 them. */
9220 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
9221 /* Make the loopback edge. */
9222 make_edge (bb, single_succ (cur_region->entry),
9223 EDGE_ABNORMAL);
9224
9225 /* Create an edge from GIMPLE_OMP_FOR to exit, which
9226 corresponds to the case that the body of the loop
9227 is not executed at all. */
9228 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
9229 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
9230 fallthru = false;
9231 break;
9232
9233 case GIMPLE_OMP_SECTIONS:
9234 /* Wire up the edges into and out of the nested sections. */
9235 {
9236 basic_block switch_bb = single_succ (cur_region->entry);
9237
9238 struct omp_region *i;
9239 for (i = cur_region->inner; i ; i = i->next)
9240 {
9241 gcc_assert (i->type == GIMPLE_OMP_SECTION);
9242 make_edge (switch_bb, i->entry, 0);
9243 make_edge (i->exit, bb, EDGE_FALLTHRU);
9244 }
9245
9246 /* Make the loopback edge to the block with
9247 GIMPLE_OMP_SECTIONS_SWITCH. */
9248 make_edge (bb, switch_bb, 0);
9249
9250 /* Make the edge from the switch to exit. */
9251 make_edge (switch_bb, bb->next_bb, 0);
9252 fallthru = false;
9253 }
9254 break;
9255
9256 case GIMPLE_OMP_TASK:
9257 fallthru = true;
9258 break;
9259
9260 default:
9261 gcc_unreachable ();
9262 }
9263 break;
9264
9265 default:
9266 gcc_unreachable ();
9267 }
9268
9269 if (*region != cur_region)
9270 {
9271 *region = cur_region;
9272 if (cur_region)
9273 *region_idx = cur_region->entry->index;
9274 else
9275 *region_idx = 0;
9276 }
9277
9278 return fallthru;
9279}
9280
9281#include "gt-omp-expand.h"