]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/omp-expand.c
tree-core.h (enum omp_clause_code): Add OMP_CLAUSE__SCANTEMP_ clause.
[thirdparty/gcc.git] / gcc / omp-expand.c
1 /* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
4
5 Copyright (C) 2005-2019 Free Software Foundation, Inc.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "tree-pass.h"
34 #include "ssa.h"
35 #include "optabs.h"
36 #include "cgraph.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "cfganal.h"
42 #include "internal-fn.h"
43 #include "gimplify.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
47 #include "tree-cfg.h"
48 #include "tree-into-ssa.h"
49 #include "tree-ssa.h"
50 #include "splay-tree.h"
51 #include "cfgloop.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "symbol-summary.h"
56 #include "gomp-constants.h"
57 #include "gimple-pretty-print.h"
58 #include "hsa-common.h"
59 #include "stringpool.h"
60 #include "attribs.h"
61
62 /* OMP region information. Every parallel and workshare
63 directive is enclosed between two markers, the OMP_* directive
64 and a corresponding GIMPLE_OMP_RETURN statement. */
65
66 struct omp_region
67 {
68 /* The enclosing region. */
69 struct omp_region *outer;
70
71 /* First child region. */
72 struct omp_region *inner;
73
74 /* Next peer region. */
75 struct omp_region *next;
76
77 /* Block containing the omp directive as its last stmt. */
78 basic_block entry;
79
80 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
81 basic_block exit;
82
83 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
84 basic_block cont;
85
86 /* If this is a combined parallel+workshare region, this is a list
87 of additional arguments needed by the combined parallel+workshare
88 library call. */
89 vec<tree, va_gc> *ws_args;
90
91 /* The code for the omp directive of this region. */
92 enum gimple_code type;
93
94 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
95 enum omp_clause_schedule_kind sched_kind;
96
97 /* Schedule modifiers. */
98 unsigned char sched_modifiers;
99
100 /* True if this is a combined parallel+workshare region. */
101 bool is_combined_parallel;
102
103 /* Copy of fd.lastprivate_conditional != 0. */
104 bool has_lastprivate_conditional;
105
106 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
107 a depend clause. */
108 gomp_ordered *ord_stmt;
109 };
110
111 static struct omp_region *root_omp_region;
112 static bool omp_any_child_fn_dumped;
113
114 static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
115 bool = false);
116 static gphi *find_phi_with_arg_on_edge (tree, edge);
117 static void expand_omp (struct omp_region *region);
118
119 /* Return true if REGION is a combined parallel+workshare region. */
120
121 static inline bool
122 is_combined_parallel (struct omp_region *region)
123 {
124 return region->is_combined_parallel;
125 }
126
127 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
128 is the immediate dominator of PAR_ENTRY_BB, return true if there
129 are no data dependencies that would prevent expanding the parallel
130 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
131
132 When expanding a combined parallel+workshare region, the call to
133 the child function may need additional arguments in the case of
134 GIMPLE_OMP_FOR regions. In some cases, these arguments are
135 computed out of variables passed in from the parent to the child
136 via 'struct .omp_data_s'. For instance:
137
138 #pragma omp parallel for schedule (guided, i * 4)
139 for (j ...)
140
141 Is lowered into:
142
143 # BLOCK 2 (PAR_ENTRY_BB)
144 .omp_data_o.i = i;
145 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
146
147 # BLOCK 3 (WS_ENTRY_BB)
148 .omp_data_i = &.omp_data_o;
149 D.1667 = .omp_data_i->i;
150 D.1598 = D.1667 * 4;
151 #pragma omp for schedule (guided, D.1598)
152
153 When we outline the parallel region, the call to the child function
154 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
155 that value is computed *after* the call site. So, in principle we
156 cannot do the transformation.
157
158 To see whether the code in WS_ENTRY_BB blocks the combined
159 parallel+workshare call, we collect all the variables used in the
160 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
161 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
162 call.
163
164 FIXME. If we had the SSA form built at this point, we could merely
165 hoist the code in block 3 into block 2 and be done with it. But at
166 this point we don't have dataflow information and though we could
167 hack something up here, it is really not worth the aggravation. */
168
169 static bool
170 workshare_safe_to_combine_p (basic_block ws_entry_bb)
171 {
172 struct omp_for_data fd;
173 gimple *ws_stmt = last_stmt (ws_entry_bb);
174
175 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
176 return true;
177
178 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
179 if (gimple_omp_for_kind (ws_stmt) != GF_OMP_FOR_KIND_FOR)
180 return false;
181
182 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
183
184 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
185 return false;
186 if (fd.iter_type != long_integer_type_node)
187 return false;
188
189 /* FIXME. We give up too easily here. If any of these arguments
190 are not constants, they will likely involve variables that have
191 been mapped into fields of .omp_data_s for sharing with the child
192 function. With appropriate data flow, it would be possible to
193 see through this. */
194 if (!is_gimple_min_invariant (fd.loop.n1)
195 || !is_gimple_min_invariant (fd.loop.n2)
196 || !is_gimple_min_invariant (fd.loop.step)
197 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
198 return false;
199
200 return true;
201 }
202
203 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
204 presence (SIMD_SCHEDULE). */
205
206 static tree
207 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
208 {
209 if (!simd_schedule || integer_zerop (chunk_size))
210 return chunk_size;
211
212 poly_uint64 vf = omp_max_vf ();
213 if (known_eq (vf, 1U))
214 return chunk_size;
215
216 tree type = TREE_TYPE (chunk_size);
217 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
218 build_int_cst (type, vf - 1));
219 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
220 build_int_cst (type, -vf));
221 }
222
223 /* Collect additional arguments needed to emit a combined
224 parallel+workshare call. WS_STMT is the workshare directive being
225 expanded. */
226
227 static vec<tree, va_gc> *
228 get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
229 {
230 tree t;
231 location_t loc = gimple_location (ws_stmt);
232 vec<tree, va_gc> *ws_args;
233
234 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
235 {
236 struct omp_for_data fd;
237 tree n1, n2;
238
239 omp_extract_for_data (for_stmt, &fd, NULL);
240 n1 = fd.loop.n1;
241 n2 = fd.loop.n2;
242
243 if (gimple_omp_for_combined_into_p (for_stmt))
244 {
245 tree innerc
246 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
247 OMP_CLAUSE__LOOPTEMP_);
248 gcc_assert (innerc);
249 n1 = OMP_CLAUSE_DECL (innerc);
250 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
251 OMP_CLAUSE__LOOPTEMP_);
252 gcc_assert (innerc);
253 n2 = OMP_CLAUSE_DECL (innerc);
254 }
255
256 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
257
258 t = fold_convert_loc (loc, long_integer_type_node, n1);
259 ws_args->quick_push (t);
260
261 t = fold_convert_loc (loc, long_integer_type_node, n2);
262 ws_args->quick_push (t);
263
264 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
265 ws_args->quick_push (t);
266
267 if (fd.chunk_size)
268 {
269 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
270 t = omp_adjust_chunk_size (t, fd.simd_schedule);
271 ws_args->quick_push (t);
272 }
273
274 return ws_args;
275 }
276 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
277 {
278 /* Number of sections is equal to the number of edges from the
279 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
280 the exit of the sections region. */
281 basic_block bb = single_succ (gimple_bb (ws_stmt));
282 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
283 vec_alloc (ws_args, 1);
284 ws_args->quick_push (t);
285 return ws_args;
286 }
287
288 gcc_unreachable ();
289 }
290
291 /* Discover whether REGION is a combined parallel+workshare region. */
292
293 static void
294 determine_parallel_type (struct omp_region *region)
295 {
296 basic_block par_entry_bb, par_exit_bb;
297 basic_block ws_entry_bb, ws_exit_bb;
298
299 if (region == NULL || region->inner == NULL
300 || region->exit == NULL || region->inner->exit == NULL
301 || region->inner->cont == NULL)
302 return;
303
304 /* We only support parallel+for and parallel+sections. */
305 if (region->type != GIMPLE_OMP_PARALLEL
306 || (region->inner->type != GIMPLE_OMP_FOR
307 && region->inner->type != GIMPLE_OMP_SECTIONS))
308 return;
309
310 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
311 WS_EXIT_BB -> PAR_EXIT_BB. */
312 par_entry_bb = region->entry;
313 par_exit_bb = region->exit;
314 ws_entry_bb = region->inner->entry;
315 ws_exit_bb = region->inner->exit;
316
317 /* Give up for task reductions on the parallel, while it is implementable,
318 adding another big set of APIs or slowing down the normal paths is
319 not acceptable. */
320 tree pclauses = gimple_omp_parallel_clauses (last_stmt (par_entry_bb));
321 if (omp_find_clause (pclauses, OMP_CLAUSE__REDUCTEMP_))
322 return;
323
324 if (single_succ (par_entry_bb) == ws_entry_bb
325 && single_succ (ws_exit_bb) == par_exit_bb
326 && workshare_safe_to_combine_p (ws_entry_bb)
327 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
328 || (last_and_only_stmt (ws_entry_bb)
329 && last_and_only_stmt (par_exit_bb))))
330 {
331 gimple *par_stmt = last_stmt (par_entry_bb);
332 gimple *ws_stmt = last_stmt (ws_entry_bb);
333
334 if (region->inner->type == GIMPLE_OMP_FOR)
335 {
336 /* If this is a combined parallel loop, we need to determine
337 whether or not to use the combined library calls. There
338 are two cases where we do not apply the transformation:
339 static loops and any kind of ordered loop. In the first
340 case, we already open code the loop so there is no need
341 to do anything else. In the latter case, the combined
342 parallel loop call would still need extra synchronization
343 to implement ordered semantics, so there would not be any
344 gain in using the combined call. */
345 tree clauses = gimple_omp_for_clauses (ws_stmt);
346 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
347 if (c == NULL
348 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
349 == OMP_CLAUSE_SCHEDULE_STATIC)
350 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED)
351 || omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_)
352 || ((c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_))
353 && POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (c)))))
354 return;
355 }
356 else if (region->inner->type == GIMPLE_OMP_SECTIONS
357 && (omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
358 OMP_CLAUSE__REDUCTEMP_)
359 || omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
360 OMP_CLAUSE__CONDTEMP_)))
361 return;
362
363 region->is_combined_parallel = true;
364 region->inner->is_combined_parallel = true;
365 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
366 }
367 }
368
369 /* Debugging dumps for parallel regions. */
370 void dump_omp_region (FILE *, struct omp_region *, int);
371 void debug_omp_region (struct omp_region *);
372 void debug_all_omp_regions (void);
373
374 /* Dump the parallel region tree rooted at REGION. */
375
376 void
377 dump_omp_region (FILE *file, struct omp_region *region, int indent)
378 {
379 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
380 gimple_code_name[region->type]);
381
382 if (region->inner)
383 dump_omp_region (file, region->inner, indent + 4);
384
385 if (region->cont)
386 {
387 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
388 region->cont->index);
389 }
390
391 if (region->exit)
392 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
393 region->exit->index);
394 else
395 fprintf (file, "%*s[no exit marker]\n", indent, "");
396
397 if (region->next)
398 dump_omp_region (file, region->next, indent);
399 }
400
401 DEBUG_FUNCTION void
402 debug_omp_region (struct omp_region *region)
403 {
404 dump_omp_region (stderr, region, 0);
405 }
406
407 DEBUG_FUNCTION void
408 debug_all_omp_regions (void)
409 {
410 dump_omp_region (stderr, root_omp_region, 0);
411 }
412
413 /* Create a new parallel region starting at STMT inside region PARENT. */
414
415 static struct omp_region *
416 new_omp_region (basic_block bb, enum gimple_code type,
417 struct omp_region *parent)
418 {
419 struct omp_region *region = XCNEW (struct omp_region);
420
421 region->outer = parent;
422 region->entry = bb;
423 region->type = type;
424
425 if (parent)
426 {
427 /* This is a nested region. Add it to the list of inner
428 regions in PARENT. */
429 region->next = parent->inner;
430 parent->inner = region;
431 }
432 else
433 {
434 /* This is a toplevel region. Add it to the list of toplevel
435 regions in ROOT_OMP_REGION. */
436 region->next = root_omp_region;
437 root_omp_region = region;
438 }
439
440 return region;
441 }
442
443 /* Release the memory associated with the region tree rooted at REGION. */
444
445 static void
446 free_omp_region_1 (struct omp_region *region)
447 {
448 struct omp_region *i, *n;
449
450 for (i = region->inner; i ; i = n)
451 {
452 n = i->next;
453 free_omp_region_1 (i);
454 }
455
456 free (region);
457 }
458
459 /* Release the memory for the entire omp region tree. */
460
461 void
462 omp_free_regions (void)
463 {
464 struct omp_region *r, *n;
465 for (r = root_omp_region; r ; r = n)
466 {
467 n = r->next;
468 free_omp_region_1 (r);
469 }
470 root_omp_region = NULL;
471 }
472
473 /* A convenience function to build an empty GIMPLE_COND with just the
474 condition. */
475
476 static gcond *
477 gimple_build_cond_empty (tree cond)
478 {
479 enum tree_code pred_code;
480 tree lhs, rhs;
481
482 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
483 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
484 }
485
486 /* Return true if a parallel REGION is within a declare target function or
487 within a target region and is not a part of a gridified target. */
488
489 static bool
490 parallel_needs_hsa_kernel_p (struct omp_region *region)
491 {
492 bool indirect = false;
493 for (region = region->outer; region; region = region->outer)
494 {
495 if (region->type == GIMPLE_OMP_PARALLEL)
496 indirect = true;
497 else if (region->type == GIMPLE_OMP_TARGET)
498 {
499 gomp_target *tgt_stmt
500 = as_a <gomp_target *> (last_stmt (region->entry));
501
502 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
503 OMP_CLAUSE__GRIDDIM_))
504 return indirect;
505 else
506 return true;
507 }
508 }
509
510 if (lookup_attribute ("omp declare target",
511 DECL_ATTRIBUTES (current_function_decl)))
512 return true;
513
514 return false;
515 }
516
517 /* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
518 Add CHILD_FNDECL to decl chain of the supercontext of the block
519 ENTRY_BLOCK - this is the block which originally contained the
520 code from which CHILD_FNDECL was created.
521
522 Together, these actions ensure that the debug info for the outlined
523 function will be emitted with the correct lexical scope. */
524
525 static void
526 adjust_context_and_scope (struct omp_region *region, tree entry_block,
527 tree child_fndecl)
528 {
529 tree parent_fndecl = NULL_TREE;
530 gimple *entry_stmt;
531 /* OMP expansion expands inner regions before outer ones, so if
532 we e.g. have explicit task region nested in parallel region, when
533 expanding the task region current_function_decl will be the original
534 source function, but we actually want to use as context the child
535 function of the parallel. */
536 for (region = region->outer;
537 region && parent_fndecl == NULL_TREE; region = region->outer)
538 switch (region->type)
539 {
540 case GIMPLE_OMP_PARALLEL:
541 case GIMPLE_OMP_TASK:
542 case GIMPLE_OMP_TEAMS:
543 entry_stmt = last_stmt (region->entry);
544 parent_fndecl = gimple_omp_taskreg_child_fn (entry_stmt);
545 break;
546 case GIMPLE_OMP_TARGET:
547 entry_stmt = last_stmt (region->entry);
548 parent_fndecl
549 = gimple_omp_target_child_fn (as_a <gomp_target *> (entry_stmt));
550 break;
551 default:
552 break;
553 }
554
555 if (parent_fndecl == NULL_TREE)
556 parent_fndecl = current_function_decl;
557 DECL_CONTEXT (child_fndecl) = parent_fndecl;
558
559 if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
560 {
561 tree b = BLOCK_SUPERCONTEXT (entry_block);
562 if (TREE_CODE (b) == BLOCK)
563 {
564 DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
565 BLOCK_VARS (b) = child_fndecl;
566 }
567 }
568 }
569
570 /* Build the function calls to GOMP_parallel etc to actually
571 generate the parallel operation. REGION is the parallel region
572 being expanded. BB is the block where to insert the code. WS_ARGS
573 will be set if this is a call to a combined parallel+workshare
574 construct, it contains the list of additional arguments needed by
575 the workshare construct. */
576
577 static void
578 expand_parallel_call (struct omp_region *region, basic_block bb,
579 gomp_parallel *entry_stmt,
580 vec<tree, va_gc> *ws_args)
581 {
582 tree t, t1, t2, val, cond, c, clauses, flags;
583 gimple_stmt_iterator gsi;
584 gimple *stmt;
585 enum built_in_function start_ix;
586 int start_ix2;
587 location_t clause_loc;
588 vec<tree, va_gc> *args;
589
590 clauses = gimple_omp_parallel_clauses (entry_stmt);
591
592 /* Determine what flavor of GOMP_parallel we will be
593 emitting. */
594 start_ix = BUILT_IN_GOMP_PARALLEL;
595 tree rtmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
596 if (rtmp)
597 start_ix = BUILT_IN_GOMP_PARALLEL_REDUCTIONS;
598 else if (is_combined_parallel (region))
599 {
600 switch (region->inner->type)
601 {
602 case GIMPLE_OMP_FOR:
603 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
604 switch (region->inner->sched_kind)
605 {
606 case OMP_CLAUSE_SCHEDULE_RUNTIME:
607 /* For lastprivate(conditional:), our implementation
608 requires monotonic behavior. */
609 if (region->inner->has_lastprivate_conditional != 0)
610 start_ix2 = 3;
611 else if ((region->inner->sched_modifiers
612 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
613 start_ix2 = 6;
614 else if ((region->inner->sched_modifiers
615 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
616 start_ix2 = 7;
617 else
618 start_ix2 = 3;
619 break;
620 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
621 case OMP_CLAUSE_SCHEDULE_GUIDED:
622 if ((region->inner->sched_modifiers
623 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
624 && !region->inner->has_lastprivate_conditional)
625 {
626 start_ix2 = 3 + region->inner->sched_kind;
627 break;
628 }
629 /* FALLTHRU */
630 default:
631 start_ix2 = region->inner->sched_kind;
632 break;
633 }
634 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
635 start_ix = (enum built_in_function) start_ix2;
636 break;
637 case GIMPLE_OMP_SECTIONS:
638 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
639 break;
640 default:
641 gcc_unreachable ();
642 }
643 }
644
645 /* By default, the value of NUM_THREADS is zero (selected at run time)
646 and there is no conditional. */
647 cond = NULL_TREE;
648 val = build_int_cst (unsigned_type_node, 0);
649 flags = build_int_cst (unsigned_type_node, 0);
650
651 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
652 if (c)
653 cond = OMP_CLAUSE_IF_EXPR (c);
654
655 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
656 if (c)
657 {
658 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
659 clause_loc = OMP_CLAUSE_LOCATION (c);
660 }
661 else
662 clause_loc = gimple_location (entry_stmt);
663
664 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
665 if (c)
666 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
667
668 /* Ensure 'val' is of the correct type. */
669 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
670
671 /* If we found the clause 'if (cond)', build either
672 (cond != 0) or (cond ? val : 1u). */
673 if (cond)
674 {
675 cond = gimple_boolify (cond);
676
677 if (integer_zerop (val))
678 val = fold_build2_loc (clause_loc,
679 EQ_EXPR, unsigned_type_node, cond,
680 build_int_cst (TREE_TYPE (cond), 0));
681 else
682 {
683 basic_block cond_bb, then_bb, else_bb;
684 edge e, e_then, e_else;
685 tree tmp_then, tmp_else, tmp_join, tmp_var;
686
687 tmp_var = create_tmp_var (TREE_TYPE (val));
688 if (gimple_in_ssa_p (cfun))
689 {
690 tmp_then = make_ssa_name (tmp_var);
691 tmp_else = make_ssa_name (tmp_var);
692 tmp_join = make_ssa_name (tmp_var);
693 }
694 else
695 {
696 tmp_then = tmp_var;
697 tmp_else = tmp_var;
698 tmp_join = tmp_var;
699 }
700
701 e = split_block_after_labels (bb);
702 cond_bb = e->src;
703 bb = e->dest;
704 remove_edge (e);
705
706 then_bb = create_empty_bb (cond_bb);
707 else_bb = create_empty_bb (then_bb);
708 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
709 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
710
711 stmt = gimple_build_cond_empty (cond);
712 gsi = gsi_start_bb (cond_bb);
713 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
714
715 gsi = gsi_start_bb (then_bb);
716 expand_omp_build_assign (&gsi, tmp_then, val, true);
717
718 gsi = gsi_start_bb (else_bb);
719 expand_omp_build_assign (&gsi, tmp_else,
720 build_int_cst (unsigned_type_node, 1),
721 true);
722
723 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
724 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
725 add_bb_to_loop (then_bb, cond_bb->loop_father);
726 add_bb_to_loop (else_bb, cond_bb->loop_father);
727 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
728 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
729
730 if (gimple_in_ssa_p (cfun))
731 {
732 gphi *phi = create_phi_node (tmp_join, bb);
733 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
734 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
735 }
736
737 val = tmp_join;
738 }
739
740 gsi = gsi_start_bb (bb);
741 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
742 false, GSI_CONTINUE_LINKING);
743 }
744
745 gsi = gsi_last_nondebug_bb (bb);
746 t = gimple_omp_parallel_data_arg (entry_stmt);
747 if (t == NULL)
748 t1 = null_pointer_node;
749 else
750 t1 = build_fold_addr_expr (t);
751 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
752 t2 = build_fold_addr_expr (child_fndecl);
753
754 vec_alloc (args, 4 + vec_safe_length (ws_args));
755 args->quick_push (t2);
756 args->quick_push (t1);
757 args->quick_push (val);
758 if (ws_args)
759 args->splice (*ws_args);
760 args->quick_push (flags);
761
762 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
763 builtin_decl_explicit (start_ix), args);
764
765 if (rtmp)
766 {
767 tree type = TREE_TYPE (OMP_CLAUSE_DECL (rtmp));
768 t = build2 (MODIFY_EXPR, type, OMP_CLAUSE_DECL (rtmp),
769 fold_convert (type,
770 fold_convert (pointer_sized_int_node, t)));
771 }
772 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
773 false, GSI_CONTINUE_LINKING);
774
775 if (hsa_gen_requested_p ()
776 && parallel_needs_hsa_kernel_p (region))
777 {
778 cgraph_node *child_cnode = cgraph_node::get (child_fndecl);
779 hsa_register_kernel (child_cnode);
780 }
781 }
782
783 /* Build the function call to GOMP_task to actually
784 generate the task operation. BB is the block where to insert the code. */
785
786 static void
787 expand_task_call (struct omp_region *region, basic_block bb,
788 gomp_task *entry_stmt)
789 {
790 tree t1, t2, t3;
791 gimple_stmt_iterator gsi;
792 location_t loc = gimple_location (entry_stmt);
793
794 tree clauses = gimple_omp_task_clauses (entry_stmt);
795
796 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
797 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
798 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
799 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
800 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
801 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
802
803 unsigned int iflags
804 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
805 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
806 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
807
808 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
809 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
810 tree num_tasks = NULL_TREE;
811 bool ull = false;
812 if (taskloop_p)
813 {
814 gimple *g = last_stmt (region->outer->entry);
815 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
816 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
817 struct omp_for_data fd;
818 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
819 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
820 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
821 OMP_CLAUSE__LOOPTEMP_);
822 startvar = OMP_CLAUSE_DECL (startvar);
823 endvar = OMP_CLAUSE_DECL (endvar);
824 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
825 if (fd.loop.cond_code == LT_EXPR)
826 iflags |= GOMP_TASK_FLAG_UP;
827 tree tclauses = gimple_omp_for_clauses (g);
828 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
829 if (num_tasks)
830 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
831 else
832 {
833 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
834 if (num_tasks)
835 {
836 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
837 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
838 }
839 else
840 num_tasks = integer_zero_node;
841 }
842 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
843 if (ifc == NULL_TREE)
844 iflags |= GOMP_TASK_FLAG_IF;
845 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
846 iflags |= GOMP_TASK_FLAG_NOGROUP;
847 ull = fd.iter_type == long_long_unsigned_type_node;
848 if (omp_find_clause (clauses, OMP_CLAUSE_REDUCTION))
849 iflags |= GOMP_TASK_FLAG_REDUCTION;
850 }
851 else if (priority)
852 iflags |= GOMP_TASK_FLAG_PRIORITY;
853
854 tree flags = build_int_cst (unsigned_type_node, iflags);
855
856 tree cond = boolean_true_node;
857 if (ifc)
858 {
859 if (taskloop_p)
860 {
861 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
862 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
863 build_int_cst (unsigned_type_node,
864 GOMP_TASK_FLAG_IF),
865 build_int_cst (unsigned_type_node, 0));
866 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
867 flags, t);
868 }
869 else
870 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
871 }
872
873 if (finalc)
874 {
875 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
876 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
877 build_int_cst (unsigned_type_node,
878 GOMP_TASK_FLAG_FINAL),
879 build_int_cst (unsigned_type_node, 0));
880 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
881 }
882 if (depend)
883 depend = OMP_CLAUSE_DECL (depend);
884 else
885 depend = build_int_cst (ptr_type_node, 0);
886 if (priority)
887 priority = fold_convert (integer_type_node,
888 OMP_CLAUSE_PRIORITY_EXPR (priority));
889 else
890 priority = integer_zero_node;
891
892 gsi = gsi_last_nondebug_bb (bb);
893 tree t = gimple_omp_task_data_arg (entry_stmt);
894 if (t == NULL)
895 t2 = null_pointer_node;
896 else
897 t2 = build_fold_addr_expr_loc (loc, t);
898 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
899 t = gimple_omp_task_copy_fn (entry_stmt);
900 if (t == NULL)
901 t3 = null_pointer_node;
902 else
903 t3 = build_fold_addr_expr_loc (loc, t);
904
905 if (taskloop_p)
906 t = build_call_expr (ull
907 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
908 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
909 11, t1, t2, t3,
910 gimple_omp_task_arg_size (entry_stmt),
911 gimple_omp_task_arg_align (entry_stmt), flags,
912 num_tasks, priority, startvar, endvar, step);
913 else
914 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
915 9, t1, t2, t3,
916 gimple_omp_task_arg_size (entry_stmt),
917 gimple_omp_task_arg_align (entry_stmt), cond, flags,
918 depend, priority);
919
920 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
921 false, GSI_CONTINUE_LINKING);
922 }
923
924 /* Build the function call to GOMP_taskwait_depend to actually
925 generate the taskwait operation. BB is the block where to insert the
926 code. */
927
928 static void
929 expand_taskwait_call (basic_block bb, gomp_task *entry_stmt)
930 {
931 tree clauses = gimple_omp_task_clauses (entry_stmt);
932 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
933 if (depend == NULL_TREE)
934 return;
935
936 depend = OMP_CLAUSE_DECL (depend);
937
938 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
939 tree t
940 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASKWAIT_DEPEND),
941 1, depend);
942
943 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
944 false, GSI_CONTINUE_LINKING);
945 }
946
947 /* Build the function call to GOMP_teams_reg to actually
948 generate the host teams operation. REGION is the teams region
949 being expanded. BB is the block where to insert the code. */
950
951 static void
952 expand_teams_call (basic_block bb, gomp_teams *entry_stmt)
953 {
954 tree clauses = gimple_omp_teams_clauses (entry_stmt);
955 tree num_teams = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
956 if (num_teams == NULL_TREE)
957 num_teams = build_int_cst (unsigned_type_node, 0);
958 else
959 {
960 num_teams = OMP_CLAUSE_NUM_TEAMS_EXPR (num_teams);
961 num_teams = fold_convert (unsigned_type_node, num_teams);
962 }
963 tree thread_limit = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
964 if (thread_limit == NULL_TREE)
965 thread_limit = build_int_cst (unsigned_type_node, 0);
966 else
967 {
968 thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit);
969 thread_limit = fold_convert (unsigned_type_node, thread_limit);
970 }
971
972 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
973 tree t = gimple_omp_teams_data_arg (entry_stmt), t1;
974 if (t == NULL)
975 t1 = null_pointer_node;
976 else
977 t1 = build_fold_addr_expr (t);
978 tree child_fndecl = gimple_omp_teams_child_fn (entry_stmt);
979 tree t2 = build_fold_addr_expr (child_fndecl);
980
981 vec<tree, va_gc> *args;
982 vec_alloc (args, 5);
983 args->quick_push (t2);
984 args->quick_push (t1);
985 args->quick_push (num_teams);
986 args->quick_push (thread_limit);
987 /* For future extensibility. */
988 args->quick_push (build_zero_cst (unsigned_type_node));
989
990 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
991 builtin_decl_explicit (BUILT_IN_GOMP_TEAMS_REG),
992 args);
993
994 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
995 false, GSI_CONTINUE_LINKING);
996 }
997
998 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
999
1000 static tree
1001 vec2chain (vec<tree, va_gc> *v)
1002 {
1003 tree chain = NULL_TREE, t;
1004 unsigned ix;
1005
1006 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
1007 {
1008 DECL_CHAIN (t) = chain;
1009 chain = t;
1010 }
1011
1012 return chain;
1013 }
1014
1015 /* Remove barriers in REGION->EXIT's block. Note that this is only
1016 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
1017 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
1018 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
1019 removed. */
1020
1021 static void
1022 remove_exit_barrier (struct omp_region *region)
1023 {
1024 gimple_stmt_iterator gsi;
1025 basic_block exit_bb;
1026 edge_iterator ei;
1027 edge e;
1028 gimple *stmt;
1029 int any_addressable_vars = -1;
1030
1031 exit_bb = region->exit;
1032
1033 /* If the parallel region doesn't return, we don't have REGION->EXIT
1034 block at all. */
1035 if (! exit_bb)
1036 return;
1037
1038 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
1039 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
1040 statements that can appear in between are extremely limited -- no
1041 memory operations at all. Here, we allow nothing at all, so the
1042 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
1043 gsi = gsi_last_nondebug_bb (exit_bb);
1044 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1045 gsi_prev_nondebug (&gsi);
1046 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
1047 return;
1048
1049 FOR_EACH_EDGE (e, ei, exit_bb->preds)
1050 {
1051 gsi = gsi_last_nondebug_bb (e->src);
1052 if (gsi_end_p (gsi))
1053 continue;
1054 stmt = gsi_stmt (gsi);
1055 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
1056 && !gimple_omp_return_nowait_p (stmt))
1057 {
1058 /* OpenMP 3.0 tasks unfortunately prevent this optimization
1059 in many cases. If there could be tasks queued, the barrier
1060 might be needed to let the tasks run before some local
1061 variable of the parallel that the task uses as shared
1062 runs out of scope. The task can be spawned either
1063 from within current function (this would be easy to check)
1064 or from some function it calls and gets passed an address
1065 of such a variable. */
1066 if (any_addressable_vars < 0)
1067 {
1068 gomp_parallel *parallel_stmt
1069 = as_a <gomp_parallel *> (last_stmt (region->entry));
1070 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
1071 tree local_decls, block, decl;
1072 unsigned ix;
1073
1074 any_addressable_vars = 0;
1075 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
1076 if (TREE_ADDRESSABLE (decl))
1077 {
1078 any_addressable_vars = 1;
1079 break;
1080 }
1081 for (block = gimple_block (stmt);
1082 !any_addressable_vars
1083 && block
1084 && TREE_CODE (block) == BLOCK;
1085 block = BLOCK_SUPERCONTEXT (block))
1086 {
1087 for (local_decls = BLOCK_VARS (block);
1088 local_decls;
1089 local_decls = DECL_CHAIN (local_decls))
1090 if (TREE_ADDRESSABLE (local_decls))
1091 {
1092 any_addressable_vars = 1;
1093 break;
1094 }
1095 if (block == gimple_block (parallel_stmt))
1096 break;
1097 }
1098 }
1099 if (!any_addressable_vars)
1100 gimple_omp_return_set_nowait (stmt);
1101 }
1102 }
1103 }
1104
1105 static void
1106 remove_exit_barriers (struct omp_region *region)
1107 {
1108 if (region->type == GIMPLE_OMP_PARALLEL)
1109 remove_exit_barrier (region);
1110
1111 if (region->inner)
1112 {
1113 region = region->inner;
1114 remove_exit_barriers (region);
1115 while (region->next)
1116 {
1117 region = region->next;
1118 remove_exit_barriers (region);
1119 }
1120 }
1121 }
1122
1123 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
1124 calls. These can't be declared as const functions, but
1125 within one parallel body they are constant, so they can be
1126 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1127 which are declared const. Similarly for task body, except
1128 that in untied task omp_get_thread_num () can change at any task
1129 scheduling point. */
1130
1131 static void
1132 optimize_omp_library_calls (gimple *entry_stmt)
1133 {
1134 basic_block bb;
1135 gimple_stmt_iterator gsi;
1136 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1137 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1138 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1139 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1140 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1141 && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1142 OMP_CLAUSE_UNTIED) != NULL);
1143
1144 FOR_EACH_BB_FN (bb, cfun)
1145 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1146 {
1147 gimple *call = gsi_stmt (gsi);
1148 tree decl;
1149
1150 if (is_gimple_call (call)
1151 && (decl = gimple_call_fndecl (call))
1152 && DECL_EXTERNAL (decl)
1153 && TREE_PUBLIC (decl)
1154 && DECL_INITIAL (decl) == NULL)
1155 {
1156 tree built_in;
1157
1158 if (DECL_NAME (decl) == thr_num_id)
1159 {
1160 /* In #pragma omp task untied omp_get_thread_num () can change
1161 during the execution of the task region. */
1162 if (untied_task)
1163 continue;
1164 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1165 }
1166 else if (DECL_NAME (decl) == num_thr_id)
1167 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1168 else
1169 continue;
1170
1171 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1172 || gimple_call_num_args (call) != 0)
1173 continue;
1174
1175 if (flag_exceptions && !TREE_NOTHROW (decl))
1176 continue;
1177
1178 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1179 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1180 TREE_TYPE (TREE_TYPE (built_in))))
1181 continue;
1182
1183 gimple_call_set_fndecl (call, built_in);
1184 }
1185 }
1186 }
1187
1188 /* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1189 regimplified. */
1190
1191 static tree
1192 expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1193 {
1194 tree t = *tp;
1195
1196 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1197 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1198 return t;
1199
1200 if (TREE_CODE (t) == ADDR_EXPR)
1201 recompute_tree_invariant_for_addr_expr (t);
1202
1203 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1204 return NULL_TREE;
1205 }
1206
1207 /* Prepend or append TO = FROM assignment before or after *GSI_P. */
1208
1209 static void
1210 expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1211 bool after)
1212 {
1213 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1214 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1215 !after, after ? GSI_CONTINUE_LINKING
1216 : GSI_SAME_STMT);
1217 gimple *stmt = gimple_build_assign (to, from);
1218 if (after)
1219 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1220 else
1221 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1222 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1223 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1224 {
1225 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1226 gimple_regimplify_operands (stmt, &gsi);
1227 }
1228 }
1229
1230 /* Expand the OpenMP parallel or task directive starting at REGION. */
1231
1232 static void
1233 expand_omp_taskreg (struct omp_region *region)
1234 {
1235 basic_block entry_bb, exit_bb, new_bb;
1236 struct function *child_cfun;
1237 tree child_fn, block, t;
1238 gimple_stmt_iterator gsi;
1239 gimple *entry_stmt, *stmt;
1240 edge e;
1241 vec<tree, va_gc> *ws_args;
1242
1243 entry_stmt = last_stmt (region->entry);
1244 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1245 && gimple_omp_task_taskwait_p (entry_stmt))
1246 {
1247 new_bb = region->entry;
1248 gsi = gsi_last_nondebug_bb (region->entry);
1249 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1250 gsi_remove (&gsi, true);
1251 expand_taskwait_call (new_bb, as_a <gomp_task *> (entry_stmt));
1252 return;
1253 }
1254
1255 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1256 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1257
1258 entry_bb = region->entry;
1259 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1260 exit_bb = region->cont;
1261 else
1262 exit_bb = region->exit;
1263
1264 if (is_combined_parallel (region))
1265 ws_args = region->ws_args;
1266 else
1267 ws_args = NULL;
1268
1269 if (child_cfun->cfg)
1270 {
1271 /* Due to inlining, it may happen that we have already outlined
1272 the region, in which case all we need to do is make the
1273 sub-graph unreachable and emit the parallel call. */
1274 edge entry_succ_e, exit_succ_e;
1275
1276 entry_succ_e = single_succ_edge (entry_bb);
1277
1278 gsi = gsi_last_nondebug_bb (entry_bb);
1279 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1280 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK
1281 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TEAMS);
1282 gsi_remove (&gsi, true);
1283
1284 new_bb = entry_bb;
1285 if (exit_bb)
1286 {
1287 exit_succ_e = single_succ_edge (exit_bb);
1288 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1289 }
1290 remove_edge_and_dominated_blocks (entry_succ_e);
1291 }
1292 else
1293 {
1294 unsigned srcidx, dstidx, num;
1295
1296 /* If the parallel region needs data sent from the parent
1297 function, then the very first statement (except possible
1298 tree profile counter updates) of the parallel body
1299 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1300 &.OMP_DATA_O is passed as an argument to the child function,
1301 we need to replace it with the argument as seen by the child
1302 function.
1303
1304 In most cases, this will end up being the identity assignment
1305 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1306 a function call that has been inlined, the original PARM_DECL
1307 .OMP_DATA_I may have been converted into a different local
1308 variable. In which case, we need to keep the assignment. */
1309 if (gimple_omp_taskreg_data_arg (entry_stmt))
1310 {
1311 basic_block entry_succ_bb
1312 = single_succ_p (entry_bb) ? single_succ (entry_bb)
1313 : FALLTHRU_EDGE (entry_bb)->dest;
1314 tree arg;
1315 gimple *parcopy_stmt = NULL;
1316
1317 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1318 {
1319 gimple *stmt;
1320
1321 gcc_assert (!gsi_end_p (gsi));
1322 stmt = gsi_stmt (gsi);
1323 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1324 continue;
1325
1326 if (gimple_num_ops (stmt) == 2)
1327 {
1328 tree arg = gimple_assign_rhs1 (stmt);
1329
1330 /* We're ignore the subcode because we're
1331 effectively doing a STRIP_NOPS. */
1332
1333 if (TREE_CODE (arg) == ADDR_EXPR
1334 && (TREE_OPERAND (arg, 0)
1335 == gimple_omp_taskreg_data_arg (entry_stmt)))
1336 {
1337 parcopy_stmt = stmt;
1338 break;
1339 }
1340 }
1341 }
1342
1343 gcc_assert (parcopy_stmt != NULL);
1344 arg = DECL_ARGUMENTS (child_fn);
1345
1346 if (!gimple_in_ssa_p (cfun))
1347 {
1348 if (gimple_assign_lhs (parcopy_stmt) == arg)
1349 gsi_remove (&gsi, true);
1350 else
1351 {
1352 /* ?? Is setting the subcode really necessary ?? */
1353 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1354 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1355 }
1356 }
1357 else
1358 {
1359 tree lhs = gimple_assign_lhs (parcopy_stmt);
1360 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1361 /* We'd like to set the rhs to the default def in the child_fn,
1362 but it's too early to create ssa names in the child_fn.
1363 Instead, we set the rhs to the parm. In
1364 move_sese_region_to_fn, we introduce a default def for the
1365 parm, map the parm to it's default def, and once we encounter
1366 this stmt, replace the parm with the default def. */
1367 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1368 update_stmt (parcopy_stmt);
1369 }
1370 }
1371
1372 /* Declare local variables needed in CHILD_CFUN. */
1373 block = DECL_INITIAL (child_fn);
1374 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1375 /* The gimplifier could record temporaries in parallel/task block
1376 rather than in containing function's local_decls chain,
1377 which would mean cgraph missed finalizing them. Do it now. */
1378 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1379 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1380 varpool_node::finalize_decl (t);
1381 DECL_SAVED_TREE (child_fn) = NULL;
1382 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1383 gimple_set_body (child_fn, NULL);
1384 TREE_USED (block) = 1;
1385
1386 /* Reset DECL_CONTEXT on function arguments. */
1387 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1388 DECL_CONTEXT (t) = child_fn;
1389
1390 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1391 so that it can be moved to the child function. */
1392 gsi = gsi_last_nondebug_bb (entry_bb);
1393 stmt = gsi_stmt (gsi);
1394 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1395 || gimple_code (stmt) == GIMPLE_OMP_TASK
1396 || gimple_code (stmt) == GIMPLE_OMP_TEAMS));
1397 e = split_block (entry_bb, stmt);
1398 gsi_remove (&gsi, true);
1399 entry_bb = e->dest;
1400 edge e2 = NULL;
1401 if (gimple_code (entry_stmt) != GIMPLE_OMP_TASK)
1402 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1403 else
1404 {
1405 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1406 gcc_assert (e2->dest == region->exit);
1407 remove_edge (BRANCH_EDGE (entry_bb));
1408 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1409 gsi = gsi_last_nondebug_bb (region->exit);
1410 gcc_assert (!gsi_end_p (gsi)
1411 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1412 gsi_remove (&gsi, true);
1413 }
1414
1415 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1416 if (exit_bb)
1417 {
1418 gsi = gsi_last_nondebug_bb (exit_bb);
1419 gcc_assert (!gsi_end_p (gsi)
1420 && (gimple_code (gsi_stmt (gsi))
1421 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1422 stmt = gimple_build_return (NULL);
1423 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1424 gsi_remove (&gsi, true);
1425 }
1426
1427 /* Move the parallel region into CHILD_CFUN. */
1428
1429 if (gimple_in_ssa_p (cfun))
1430 {
1431 init_tree_ssa (child_cfun);
1432 init_ssa_operands (child_cfun);
1433 child_cfun->gimple_df->in_ssa_p = true;
1434 block = NULL_TREE;
1435 }
1436 else
1437 block = gimple_block (entry_stmt);
1438
1439 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1440 if (exit_bb)
1441 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1442 if (e2)
1443 {
1444 basic_block dest_bb = e2->dest;
1445 if (!exit_bb)
1446 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1447 remove_edge (e2);
1448 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1449 }
1450 /* When the OMP expansion process cannot guarantee an up-to-date
1451 loop tree arrange for the child function to fixup loops. */
1452 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1453 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1454
1455 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1456 num = vec_safe_length (child_cfun->local_decls);
1457 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1458 {
1459 t = (*child_cfun->local_decls)[srcidx];
1460 if (DECL_CONTEXT (t) == cfun->decl)
1461 continue;
1462 if (srcidx != dstidx)
1463 (*child_cfun->local_decls)[dstidx] = t;
1464 dstidx++;
1465 }
1466 if (dstidx != num)
1467 vec_safe_truncate (child_cfun->local_decls, dstidx);
1468
1469 /* Inform the callgraph about the new function. */
1470 child_cfun->curr_properties = cfun->curr_properties;
1471 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1472 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1473 cgraph_node *node = cgraph_node::get_create (child_fn);
1474 node->parallelized_function = 1;
1475 cgraph_node::add_new_function (child_fn, true);
1476
1477 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1478 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1479
1480 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1481 fixed in a following pass. */
1482 push_cfun (child_cfun);
1483 if (need_asm)
1484 assign_assembler_name_if_needed (child_fn);
1485
1486 if (optimize)
1487 optimize_omp_library_calls (entry_stmt);
1488 update_max_bb_count ();
1489 cgraph_edge::rebuild_edges ();
1490
1491 /* Some EH regions might become dead, see PR34608. If
1492 pass_cleanup_cfg isn't the first pass to happen with the
1493 new child, these dead EH edges might cause problems.
1494 Clean them up now. */
1495 if (flag_exceptions)
1496 {
1497 basic_block bb;
1498 bool changed = false;
1499
1500 FOR_EACH_BB_FN (bb, cfun)
1501 changed |= gimple_purge_dead_eh_edges (bb);
1502 if (changed)
1503 cleanup_tree_cfg ();
1504 }
1505 if (gimple_in_ssa_p (cfun))
1506 update_ssa (TODO_update_ssa);
1507 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1508 verify_loop_structure ();
1509 pop_cfun ();
1510
1511 if (dump_file && !gimple_in_ssa_p (cfun))
1512 {
1513 omp_any_child_fn_dumped = true;
1514 dump_function_header (dump_file, child_fn, dump_flags);
1515 dump_function_to_file (child_fn, dump_file, dump_flags);
1516 }
1517 }
1518
1519 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
1520
1521 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1522 expand_parallel_call (region, new_bb,
1523 as_a <gomp_parallel *> (entry_stmt), ws_args);
1524 else if (gimple_code (entry_stmt) == GIMPLE_OMP_TEAMS)
1525 expand_teams_call (new_bb, as_a <gomp_teams *> (entry_stmt));
1526 else
1527 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1528 if (gimple_in_ssa_p (cfun))
1529 update_ssa (TODO_update_ssa_only_virtuals);
1530 }
1531
1532 /* Information about members of an OpenACC collapsed loop nest. */
1533
1534 struct oacc_collapse
1535 {
1536 tree base; /* Base value. */
1537 tree iters; /* Number of steps. */
1538 tree step; /* Step size. */
1539 tree tile; /* Tile increment (if tiled). */
1540 tree outer; /* Tile iterator var. */
1541 };
1542
1543 /* Helper for expand_oacc_for. Determine collapsed loop information.
1544 Fill in COUNTS array. Emit any initialization code before GSI.
1545 Return the calculated outer loop bound of BOUND_TYPE. */
1546
1547 static tree
1548 expand_oacc_collapse_init (const struct omp_for_data *fd,
1549 gimple_stmt_iterator *gsi,
1550 oacc_collapse *counts, tree bound_type,
1551 location_t loc)
1552 {
1553 tree tiling = fd->tiling;
1554 tree total = build_int_cst (bound_type, 1);
1555 int ix;
1556
1557 gcc_assert (integer_onep (fd->loop.step));
1558 gcc_assert (integer_zerop (fd->loop.n1));
1559
1560 /* When tiling, the first operand of the tile clause applies to the
1561 innermost loop, and we work outwards from there. Seems
1562 backwards, but whatever. */
1563 for (ix = fd->collapse; ix--;)
1564 {
1565 const omp_for_data_loop *loop = &fd->loops[ix];
1566
1567 tree iter_type = TREE_TYPE (loop->v);
1568 tree diff_type = iter_type;
1569 tree plus_type = iter_type;
1570
1571 gcc_assert (loop->cond_code == fd->loop.cond_code);
1572
1573 if (POINTER_TYPE_P (iter_type))
1574 plus_type = sizetype;
1575 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
1576 diff_type = signed_type_for (diff_type);
1577 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
1578 diff_type = integer_type_node;
1579
1580 if (tiling)
1581 {
1582 tree num = build_int_cst (integer_type_node, fd->collapse);
1583 tree loop_no = build_int_cst (integer_type_node, ix);
1584 tree tile = TREE_VALUE (tiling);
1585 gcall *call
1586 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1587 /* gwv-outer=*/integer_zero_node,
1588 /* gwv-inner=*/integer_zero_node);
1589
1590 counts[ix].outer = create_tmp_var (iter_type, ".outer");
1591 counts[ix].tile = create_tmp_var (diff_type, ".tile");
1592 gimple_call_set_lhs (call, counts[ix].tile);
1593 gimple_set_location (call, loc);
1594 gsi_insert_before (gsi, call, GSI_SAME_STMT);
1595
1596 tiling = TREE_CHAIN (tiling);
1597 }
1598 else
1599 {
1600 counts[ix].tile = NULL;
1601 counts[ix].outer = loop->v;
1602 }
1603
1604 tree b = loop->n1;
1605 tree e = loop->n2;
1606 tree s = loop->step;
1607 bool up = loop->cond_code == LT_EXPR;
1608 tree dir = build_int_cst (diff_type, up ? +1 : -1);
1609 bool negating;
1610 tree expr;
1611
1612 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1613 true, GSI_SAME_STMT);
1614 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1615 true, GSI_SAME_STMT);
1616
1617 /* Convert the step, avoiding possible unsigned->signed overflow. */
1618 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1619 if (negating)
1620 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1621 s = fold_convert (diff_type, s);
1622 if (negating)
1623 s = fold_build1 (NEGATE_EXPR, diff_type, s);
1624 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1625 true, GSI_SAME_STMT);
1626
1627 /* Determine the range, avoiding possible unsigned->signed overflow. */
1628 negating = !up && TYPE_UNSIGNED (iter_type);
1629 expr = fold_build2 (MINUS_EXPR, plus_type,
1630 fold_convert (plus_type, negating ? b : e),
1631 fold_convert (plus_type, negating ? e : b));
1632 expr = fold_convert (diff_type, expr);
1633 if (negating)
1634 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1635 tree range = force_gimple_operand_gsi
1636 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1637
1638 /* Determine number of iterations. */
1639 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1640 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1641 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1642
1643 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1644 true, GSI_SAME_STMT);
1645
1646 counts[ix].base = b;
1647 counts[ix].iters = iters;
1648 counts[ix].step = s;
1649
1650 total = fold_build2 (MULT_EXPR, bound_type, total,
1651 fold_convert (bound_type, iters));
1652 }
1653
1654 return total;
1655 }
1656
1657 /* Emit initializers for collapsed loop members. INNER is true if
1658 this is for the element loop of a TILE. IVAR is the outer
1659 loop iteration variable, from which collapsed loop iteration values
1660 are calculated. COUNTS array has been initialized by
1661 expand_oacc_collapse_inits. */
1662
1663 static void
1664 expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
1665 gimple_stmt_iterator *gsi,
1666 const oacc_collapse *counts, tree ivar)
1667 {
1668 tree ivar_type = TREE_TYPE (ivar);
1669
1670 /* The most rapidly changing iteration variable is the innermost
1671 one. */
1672 for (int ix = fd->collapse; ix--;)
1673 {
1674 const omp_for_data_loop *loop = &fd->loops[ix];
1675 const oacc_collapse *collapse = &counts[ix];
1676 tree v = inner ? loop->v : collapse->outer;
1677 tree iter_type = TREE_TYPE (v);
1678 tree diff_type = TREE_TYPE (collapse->step);
1679 tree plus_type = iter_type;
1680 enum tree_code plus_code = PLUS_EXPR;
1681 tree expr;
1682
1683 if (POINTER_TYPE_P (iter_type))
1684 {
1685 plus_code = POINTER_PLUS_EXPR;
1686 plus_type = sizetype;
1687 }
1688
1689 expr = ivar;
1690 if (ix)
1691 {
1692 tree mod = fold_convert (ivar_type, collapse->iters);
1693 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1694 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1695 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1696 true, GSI_SAME_STMT);
1697 }
1698
1699 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1700 collapse->step);
1701 expr = fold_build2 (plus_code, iter_type,
1702 inner ? collapse->outer : collapse->base,
1703 fold_convert (plus_type, expr));
1704 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1705 true, GSI_SAME_STMT);
1706 gassign *ass = gimple_build_assign (v, expr);
1707 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1708 }
1709 }
1710
1711 /* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1712 of the combined collapse > 1 loop constructs, generate code like:
1713 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1714 if (cond3 is <)
1715 adj = STEP3 - 1;
1716 else
1717 adj = STEP3 + 1;
1718 count3 = (adj + N32 - N31) / STEP3;
1719 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1720 if (cond2 is <)
1721 adj = STEP2 - 1;
1722 else
1723 adj = STEP2 + 1;
1724 count2 = (adj + N22 - N21) / STEP2;
1725 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1726 if (cond1 is <)
1727 adj = STEP1 - 1;
1728 else
1729 adj = STEP1 + 1;
1730 count1 = (adj + N12 - N11) / STEP1;
1731 count = count1 * count2 * count3;
1732 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1733 count = 0;
1734 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1735 of the combined loop constructs, just initialize COUNTS array
1736 from the _looptemp_ clauses. */
1737
1738 /* NOTE: It *could* be better to moosh all of the BBs together,
1739 creating one larger BB with all the computation and the unexpected
1740 jump at the end. I.e.
1741
1742 bool zero3, zero2, zero1, zero;
1743
1744 zero3 = N32 c3 N31;
1745 count3 = (N32 - N31) /[cl] STEP3;
1746 zero2 = N22 c2 N21;
1747 count2 = (N22 - N21) /[cl] STEP2;
1748 zero1 = N12 c1 N11;
1749 count1 = (N12 - N11) /[cl] STEP1;
1750 zero = zero3 || zero2 || zero1;
1751 count = count1 * count2 * count3;
1752 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1753
1754 After all, we expect the zero=false, and thus we expect to have to
1755 evaluate all of the comparison expressions, so short-circuiting
1756 oughtn't be a win. Since the condition isn't protecting a
1757 denominator, we're not concerned about divide-by-zero, so we can
1758 fully evaluate count even if a numerator turned out to be wrong.
1759
1760 It seems like putting this all together would create much better
1761 scheduling opportunities, and less pressure on the chip's branch
1762 predictor. */
1763
1764 static void
1765 expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1766 basic_block &entry_bb, tree *counts,
1767 basic_block &zero_iter1_bb, int &first_zero_iter1,
1768 basic_block &zero_iter2_bb, int &first_zero_iter2,
1769 basic_block &l2_dom_bb)
1770 {
1771 tree t, type = TREE_TYPE (fd->loop.v);
1772 edge e, ne;
1773 int i;
1774
1775 /* Collapsed loops need work for expansion into SSA form. */
1776 gcc_assert (!gimple_in_ssa_p (cfun));
1777
1778 if (gimple_omp_for_combined_into_p (fd->for_stmt)
1779 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1780 {
1781 gcc_assert (fd->ordered == 0);
1782 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1783 isn't supposed to be handled, as the inner loop doesn't
1784 use it. */
1785 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1786 OMP_CLAUSE__LOOPTEMP_);
1787 gcc_assert (innerc);
1788 for (i = 0; i < fd->collapse; i++)
1789 {
1790 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1791 OMP_CLAUSE__LOOPTEMP_);
1792 gcc_assert (innerc);
1793 if (i)
1794 counts[i] = OMP_CLAUSE_DECL (innerc);
1795 else
1796 counts[0] = NULL_TREE;
1797 }
1798 return;
1799 }
1800
1801 for (i = fd->collapse; i < fd->ordered; i++)
1802 {
1803 tree itype = TREE_TYPE (fd->loops[i].v);
1804 counts[i] = NULL_TREE;
1805 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1806 fold_convert (itype, fd->loops[i].n1),
1807 fold_convert (itype, fd->loops[i].n2));
1808 if (t && integer_zerop (t))
1809 {
1810 for (i = fd->collapse; i < fd->ordered; i++)
1811 counts[i] = build_int_cst (type, 0);
1812 break;
1813 }
1814 }
1815 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1816 {
1817 tree itype = TREE_TYPE (fd->loops[i].v);
1818
1819 if (i >= fd->collapse && counts[i])
1820 continue;
1821 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1822 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1823 fold_convert (itype, fd->loops[i].n1),
1824 fold_convert (itype, fd->loops[i].n2)))
1825 == NULL_TREE || !integer_onep (t)))
1826 {
1827 gcond *cond_stmt;
1828 tree n1, n2;
1829 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1830 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1831 true, GSI_SAME_STMT);
1832 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1833 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1834 true, GSI_SAME_STMT);
1835 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1836 NULL_TREE, NULL_TREE);
1837 gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1838 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1839 expand_omp_regimplify_p, NULL, NULL)
1840 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1841 expand_omp_regimplify_p, NULL, NULL))
1842 {
1843 *gsi = gsi_for_stmt (cond_stmt);
1844 gimple_regimplify_operands (cond_stmt, gsi);
1845 }
1846 e = split_block (entry_bb, cond_stmt);
1847 basic_block &zero_iter_bb
1848 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1849 int &first_zero_iter
1850 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1851 if (zero_iter_bb == NULL)
1852 {
1853 gassign *assign_stmt;
1854 first_zero_iter = i;
1855 zero_iter_bb = create_empty_bb (entry_bb);
1856 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1857 *gsi = gsi_after_labels (zero_iter_bb);
1858 if (i < fd->collapse)
1859 assign_stmt = gimple_build_assign (fd->loop.n2,
1860 build_zero_cst (type));
1861 else
1862 {
1863 counts[i] = create_tmp_reg (type, ".count");
1864 assign_stmt
1865 = gimple_build_assign (counts[i], build_zero_cst (type));
1866 }
1867 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1868 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1869 entry_bb);
1870 }
1871 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1872 ne->probability = profile_probability::very_unlikely ();
1873 e->flags = EDGE_TRUE_VALUE;
1874 e->probability = ne->probability.invert ();
1875 if (l2_dom_bb == NULL)
1876 l2_dom_bb = entry_bb;
1877 entry_bb = e->dest;
1878 *gsi = gsi_last_nondebug_bb (entry_bb);
1879 }
1880
1881 if (POINTER_TYPE_P (itype))
1882 itype = signed_type_for (itype);
1883 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1884 ? -1 : 1));
1885 t = fold_build2 (PLUS_EXPR, itype,
1886 fold_convert (itype, fd->loops[i].step), t);
1887 t = fold_build2 (PLUS_EXPR, itype, t,
1888 fold_convert (itype, fd->loops[i].n2));
1889 t = fold_build2 (MINUS_EXPR, itype, t,
1890 fold_convert (itype, fd->loops[i].n1));
1891 /* ?? We could probably use CEIL_DIV_EXPR instead of
1892 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1893 generate the same code in the end because generically we
1894 don't know that the values involved must be negative for
1895 GT?? */
1896 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1897 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1898 fold_build1 (NEGATE_EXPR, itype, t),
1899 fold_build1 (NEGATE_EXPR, itype,
1900 fold_convert (itype,
1901 fd->loops[i].step)));
1902 else
1903 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1904 fold_convert (itype, fd->loops[i].step));
1905 t = fold_convert (type, t);
1906 if (TREE_CODE (t) == INTEGER_CST)
1907 counts[i] = t;
1908 else
1909 {
1910 if (i < fd->collapse || i != first_zero_iter2)
1911 counts[i] = create_tmp_reg (type, ".count");
1912 expand_omp_build_assign (gsi, counts[i], t);
1913 }
1914 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1915 {
1916 if (i == 0)
1917 t = counts[0];
1918 else
1919 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1920 expand_omp_build_assign (gsi, fd->loop.n2, t);
1921 }
1922 }
1923 }
1924
1925 /* Helper function for expand_omp_{for_*,simd}. Generate code like:
1926 T = V;
1927 V3 = N31 + (T % count3) * STEP3;
1928 T = T / count3;
1929 V2 = N21 + (T % count2) * STEP2;
1930 T = T / count2;
1931 V1 = N11 + T * STEP1;
1932 if this loop doesn't have an inner loop construct combined with it.
1933 If it does have an inner loop construct combined with it and the
1934 iteration count isn't known constant, store values from counts array
1935 into its _looptemp_ temporaries instead. */
1936
1937 static void
1938 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1939 tree *counts, gimple *inner_stmt, tree startvar)
1940 {
1941 int i;
1942 if (gimple_omp_for_combined_p (fd->for_stmt))
1943 {
1944 /* If fd->loop.n2 is constant, then no propagation of the counts
1945 is needed, they are constant. */
1946 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
1947 return;
1948
1949 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
1950 ? gimple_omp_taskreg_clauses (inner_stmt)
1951 : gimple_omp_for_clauses (inner_stmt);
1952 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1953 isn't supposed to be handled, as the inner loop doesn't
1954 use it. */
1955 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
1956 gcc_assert (innerc);
1957 for (i = 0; i < fd->collapse; i++)
1958 {
1959 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1960 OMP_CLAUSE__LOOPTEMP_);
1961 gcc_assert (innerc);
1962 if (i)
1963 {
1964 tree tem = OMP_CLAUSE_DECL (innerc);
1965 tree t = fold_convert (TREE_TYPE (tem), counts[i]);
1966 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1967 false, GSI_CONTINUE_LINKING);
1968 gassign *stmt = gimple_build_assign (tem, t);
1969 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1970 }
1971 }
1972 return;
1973 }
1974
1975 tree type = TREE_TYPE (fd->loop.v);
1976 tree tem = create_tmp_reg (type, ".tem");
1977 gassign *stmt = gimple_build_assign (tem, startvar);
1978 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1979
1980 for (i = fd->collapse - 1; i >= 0; i--)
1981 {
1982 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
1983 itype = vtype;
1984 if (POINTER_TYPE_P (vtype))
1985 itype = signed_type_for (vtype);
1986 if (i != 0)
1987 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
1988 else
1989 t = tem;
1990 t = fold_convert (itype, t);
1991 t = fold_build2 (MULT_EXPR, itype, t,
1992 fold_convert (itype, fd->loops[i].step));
1993 if (POINTER_TYPE_P (vtype))
1994 t = fold_build_pointer_plus (fd->loops[i].n1, t);
1995 else
1996 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
1997 t = force_gimple_operand_gsi (gsi, t,
1998 DECL_P (fd->loops[i].v)
1999 && TREE_ADDRESSABLE (fd->loops[i].v),
2000 NULL_TREE, false,
2001 GSI_CONTINUE_LINKING);
2002 stmt = gimple_build_assign (fd->loops[i].v, t);
2003 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2004 if (i != 0)
2005 {
2006 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
2007 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
2008 false, GSI_CONTINUE_LINKING);
2009 stmt = gimple_build_assign (tem, t);
2010 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2011 }
2012 }
2013 }
2014
2015 /* Helper function for expand_omp_for_*. Generate code like:
2016 L10:
2017 V3 += STEP3;
2018 if (V3 cond3 N32) goto BODY_BB; else goto L11;
2019 L11:
2020 V3 = N31;
2021 V2 += STEP2;
2022 if (V2 cond2 N22) goto BODY_BB; else goto L12;
2023 L12:
2024 V2 = N21;
2025 V1 += STEP1;
2026 goto BODY_BB; */
2027
2028 static basic_block
2029 extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb,
2030 basic_block body_bb)
2031 {
2032 basic_block last_bb, bb, collapse_bb = NULL;
2033 int i;
2034 gimple_stmt_iterator gsi;
2035 edge e;
2036 tree t;
2037 gimple *stmt;
2038
2039 last_bb = cont_bb;
2040 for (i = fd->collapse - 1; i >= 0; i--)
2041 {
2042 tree vtype = TREE_TYPE (fd->loops[i].v);
2043
2044 bb = create_empty_bb (last_bb);
2045 add_bb_to_loop (bb, last_bb->loop_father);
2046 gsi = gsi_start_bb (bb);
2047
2048 if (i < fd->collapse - 1)
2049 {
2050 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
2051 e->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2052
2053 t = fd->loops[i + 1].n1;
2054 t = force_gimple_operand_gsi (&gsi, t,
2055 DECL_P (fd->loops[i + 1].v)
2056 && TREE_ADDRESSABLE (fd->loops[i
2057 + 1].v),
2058 NULL_TREE, false,
2059 GSI_CONTINUE_LINKING);
2060 stmt = gimple_build_assign (fd->loops[i + 1].v, t);
2061 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2062 }
2063 else
2064 collapse_bb = bb;
2065
2066 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
2067
2068 if (POINTER_TYPE_P (vtype))
2069 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
2070 else
2071 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
2072 t = force_gimple_operand_gsi (&gsi, t,
2073 DECL_P (fd->loops[i].v)
2074 && TREE_ADDRESSABLE (fd->loops[i].v),
2075 NULL_TREE, false, GSI_CONTINUE_LINKING);
2076 stmt = gimple_build_assign (fd->loops[i].v, t);
2077 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2078
2079 if (i > 0)
2080 {
2081 t = fd->loops[i].n2;
2082 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2083 false, GSI_CONTINUE_LINKING);
2084 tree v = fd->loops[i].v;
2085 if (DECL_P (v) && TREE_ADDRESSABLE (v))
2086 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
2087 false, GSI_CONTINUE_LINKING);
2088 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
2089 stmt = gimple_build_cond_empty (t);
2090 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2091 if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)),
2092 expand_omp_regimplify_p, NULL, NULL)
2093 || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)),
2094 expand_omp_regimplify_p, NULL, NULL))
2095 gimple_regimplify_operands (stmt, &gsi);
2096 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
2097 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
2098 }
2099 else
2100 make_edge (bb, body_bb, EDGE_FALLTHRU);
2101 last_bb = bb;
2102 }
2103
2104 return collapse_bb;
2105 }
2106
2107 /* Expand #pragma omp ordered depend(source). */
2108
2109 static void
2110 expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
2111 tree *counts, location_t loc)
2112 {
2113 enum built_in_function source_ix
2114 = fd->iter_type == long_integer_type_node
2115 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
2116 gimple *g
2117 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
2118 build_fold_addr_expr (counts[fd->ordered]));
2119 gimple_set_location (g, loc);
2120 gsi_insert_before (gsi, g, GSI_SAME_STMT);
2121 }
2122
2123 /* Expand a single depend from #pragma omp ordered depend(sink:...). */
2124
2125 static void
2126 expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
2127 tree *counts, tree c, location_t loc)
2128 {
2129 auto_vec<tree, 10> args;
2130 enum built_in_function sink_ix
2131 = fd->iter_type == long_integer_type_node
2132 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
2133 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
2134 int i;
2135 gimple_stmt_iterator gsi2 = *gsi;
2136 bool warned_step = false;
2137
2138 for (i = 0; i < fd->ordered; i++)
2139 {
2140 tree step = NULL_TREE;
2141 off = TREE_PURPOSE (deps);
2142 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2143 {
2144 step = TREE_OPERAND (off, 1);
2145 off = TREE_OPERAND (off, 0);
2146 }
2147 if (!integer_zerop (off))
2148 {
2149 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2150 || fd->loops[i].cond_code == GT_EXPR);
2151 bool forward = fd->loops[i].cond_code == LT_EXPR;
2152 if (step)
2153 {
2154 /* Non-simple Fortran DO loops. If step is variable,
2155 we don't know at compile even the direction, so can't
2156 warn. */
2157 if (TREE_CODE (step) != INTEGER_CST)
2158 break;
2159 forward = tree_int_cst_sgn (step) != -1;
2160 }
2161 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2162 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
2163 "waiting for lexically later iteration");
2164 break;
2165 }
2166 deps = TREE_CHAIN (deps);
2167 }
2168 /* If all offsets corresponding to the collapsed loops are zero,
2169 this depend clause can be ignored. FIXME: but there is still a
2170 flush needed. We need to emit one __sync_synchronize () for it
2171 though (perhaps conditionally)? Solve this together with the
2172 conservative dependence folding optimization.
2173 if (i >= fd->collapse)
2174 return; */
2175
2176 deps = OMP_CLAUSE_DECL (c);
2177 gsi_prev (&gsi2);
2178 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
2179 edge e2 = split_block_after_labels (e1->dest);
2180
2181 gsi2 = gsi_after_labels (e1->dest);
2182 *gsi = gsi_last_bb (e1->src);
2183 for (i = 0; i < fd->ordered; i++)
2184 {
2185 tree itype = TREE_TYPE (fd->loops[i].v);
2186 tree step = NULL_TREE;
2187 tree orig_off = NULL_TREE;
2188 if (POINTER_TYPE_P (itype))
2189 itype = sizetype;
2190 if (i)
2191 deps = TREE_CHAIN (deps);
2192 off = TREE_PURPOSE (deps);
2193 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2194 {
2195 step = TREE_OPERAND (off, 1);
2196 off = TREE_OPERAND (off, 0);
2197 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2198 && integer_onep (fd->loops[i].step)
2199 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
2200 }
2201 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
2202 if (step)
2203 {
2204 off = fold_convert_loc (loc, itype, off);
2205 orig_off = off;
2206 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2207 }
2208
2209 if (integer_zerop (off))
2210 t = boolean_true_node;
2211 else
2212 {
2213 tree a;
2214 tree co = fold_convert_loc (loc, itype, off);
2215 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
2216 {
2217 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2218 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
2219 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
2220 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
2221 co);
2222 }
2223 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2224 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2225 fd->loops[i].v, co);
2226 else
2227 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
2228 fd->loops[i].v, co);
2229 if (step)
2230 {
2231 tree t1, t2;
2232 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2233 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2234 fd->loops[i].n1);
2235 else
2236 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2237 fd->loops[i].n2);
2238 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2239 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2240 fd->loops[i].n2);
2241 else
2242 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2243 fd->loops[i].n1);
2244 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
2245 step, build_int_cst (TREE_TYPE (step), 0));
2246 if (TREE_CODE (step) != INTEGER_CST)
2247 {
2248 t1 = unshare_expr (t1);
2249 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
2250 false, GSI_CONTINUE_LINKING);
2251 t2 = unshare_expr (t2);
2252 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
2253 false, GSI_CONTINUE_LINKING);
2254 }
2255 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
2256 t, t2, t1);
2257 }
2258 else if (fd->loops[i].cond_code == LT_EXPR)
2259 {
2260 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2261 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2262 fd->loops[i].n1);
2263 else
2264 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2265 fd->loops[i].n2);
2266 }
2267 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2268 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
2269 fd->loops[i].n2);
2270 else
2271 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
2272 fd->loops[i].n1);
2273 }
2274 if (cond)
2275 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
2276 else
2277 cond = t;
2278
2279 off = fold_convert_loc (loc, itype, off);
2280
2281 if (step
2282 || (fd->loops[i].cond_code == LT_EXPR
2283 ? !integer_onep (fd->loops[i].step)
2284 : !integer_minus_onep (fd->loops[i].step)))
2285 {
2286 if (step == NULL_TREE
2287 && TYPE_UNSIGNED (itype)
2288 && fd->loops[i].cond_code == GT_EXPR)
2289 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
2290 fold_build1_loc (loc, NEGATE_EXPR, itype,
2291 s));
2292 else
2293 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
2294 orig_off ? orig_off : off, s);
2295 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
2296 build_int_cst (itype, 0));
2297 if (integer_zerop (t) && !warned_step)
2298 {
2299 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
2300 "refers to iteration never in the iteration "
2301 "space");
2302 warned_step = true;
2303 }
2304 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
2305 cond, t);
2306 }
2307
2308 if (i <= fd->collapse - 1 && fd->collapse > 1)
2309 t = fd->loop.v;
2310 else if (counts[i])
2311 t = counts[i];
2312 else
2313 {
2314 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2315 fd->loops[i].v, fd->loops[i].n1);
2316 t = fold_convert_loc (loc, fd->iter_type, t);
2317 }
2318 if (step)
2319 /* We have divided off by step already earlier. */;
2320 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
2321 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
2322 fold_build1_loc (loc, NEGATE_EXPR, itype,
2323 s));
2324 else
2325 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2326 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2327 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
2328 off = fold_convert_loc (loc, fd->iter_type, off);
2329 if (i <= fd->collapse - 1 && fd->collapse > 1)
2330 {
2331 if (i)
2332 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
2333 off);
2334 if (i < fd->collapse - 1)
2335 {
2336 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
2337 counts[i]);
2338 continue;
2339 }
2340 }
2341 off = unshare_expr (off);
2342 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
2343 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2344 true, GSI_SAME_STMT);
2345 args.safe_push (t);
2346 }
2347 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
2348 gimple_set_location (g, loc);
2349 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
2350
2351 cond = unshare_expr (cond);
2352 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
2353 GSI_CONTINUE_LINKING);
2354 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
2355 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
2356 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2357 e1->probability = e3->probability.invert ();
2358 e1->flags = EDGE_TRUE_VALUE;
2359 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
2360
2361 *gsi = gsi_after_labels (e2->dest);
2362 }
2363
2364 /* Expand all #pragma omp ordered depend(source) and
2365 #pragma omp ordered depend(sink:...) constructs in the current
2366 #pragma omp for ordered(n) region. */
2367
2368 static void
2369 expand_omp_ordered_source_sink (struct omp_region *region,
2370 struct omp_for_data *fd, tree *counts,
2371 basic_block cont_bb)
2372 {
2373 struct omp_region *inner;
2374 int i;
2375 for (i = fd->collapse - 1; i < fd->ordered; i++)
2376 if (i == fd->collapse - 1 && fd->collapse > 1)
2377 counts[i] = NULL_TREE;
2378 else if (i >= fd->collapse && !cont_bb)
2379 counts[i] = build_zero_cst (fd->iter_type);
2380 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
2381 && integer_onep (fd->loops[i].step))
2382 counts[i] = NULL_TREE;
2383 else
2384 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
2385 tree atype
2386 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
2387 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
2388 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
2389
2390 for (inner = region->inner; inner; inner = inner->next)
2391 if (inner->type == GIMPLE_OMP_ORDERED)
2392 {
2393 gomp_ordered *ord_stmt = inner->ord_stmt;
2394 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
2395 location_t loc = gimple_location (ord_stmt);
2396 tree c;
2397 for (c = gimple_omp_ordered_clauses (ord_stmt);
2398 c; c = OMP_CLAUSE_CHAIN (c))
2399 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
2400 break;
2401 if (c)
2402 expand_omp_ordered_source (&gsi, fd, counts, loc);
2403 for (c = gimple_omp_ordered_clauses (ord_stmt);
2404 c; c = OMP_CLAUSE_CHAIN (c))
2405 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
2406 expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
2407 gsi_remove (&gsi, true);
2408 }
2409 }
2410
2411 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
2412 collapsed. */
2413
2414 static basic_block
2415 expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
2416 basic_block cont_bb, basic_block body_bb,
2417 bool ordered_lastprivate)
2418 {
2419 if (fd->ordered == fd->collapse)
2420 return cont_bb;
2421
2422 if (!cont_bb)
2423 {
2424 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2425 for (int i = fd->collapse; i < fd->ordered; i++)
2426 {
2427 tree type = TREE_TYPE (fd->loops[i].v);
2428 tree n1 = fold_convert (type, fd->loops[i].n1);
2429 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
2430 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2431 size_int (i - fd->collapse + 1),
2432 NULL_TREE, NULL_TREE);
2433 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2434 }
2435 return NULL;
2436 }
2437
2438 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
2439 {
2440 tree t, type = TREE_TYPE (fd->loops[i].v);
2441 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2442 expand_omp_build_assign (&gsi, fd->loops[i].v,
2443 fold_convert (type, fd->loops[i].n1));
2444 if (counts[i])
2445 expand_omp_build_assign (&gsi, counts[i],
2446 build_zero_cst (fd->iter_type));
2447 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2448 size_int (i - fd->collapse + 1),
2449 NULL_TREE, NULL_TREE);
2450 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2451 if (!gsi_end_p (gsi))
2452 gsi_prev (&gsi);
2453 else
2454 gsi = gsi_last_bb (body_bb);
2455 edge e1 = split_block (body_bb, gsi_stmt (gsi));
2456 basic_block new_body = e1->dest;
2457 if (body_bb == cont_bb)
2458 cont_bb = new_body;
2459 edge e2 = NULL;
2460 basic_block new_header;
2461 if (EDGE_COUNT (cont_bb->preds) > 0)
2462 {
2463 gsi = gsi_last_bb (cont_bb);
2464 if (POINTER_TYPE_P (type))
2465 t = fold_build_pointer_plus (fd->loops[i].v,
2466 fold_convert (sizetype,
2467 fd->loops[i].step));
2468 else
2469 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
2470 fold_convert (type, fd->loops[i].step));
2471 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
2472 if (counts[i])
2473 {
2474 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
2475 build_int_cst (fd->iter_type, 1));
2476 expand_omp_build_assign (&gsi, counts[i], t);
2477 t = counts[i];
2478 }
2479 else
2480 {
2481 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2482 fd->loops[i].v, fd->loops[i].n1);
2483 t = fold_convert (fd->iter_type, t);
2484 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2485 true, GSI_SAME_STMT);
2486 }
2487 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2488 size_int (i - fd->collapse + 1),
2489 NULL_TREE, NULL_TREE);
2490 expand_omp_build_assign (&gsi, aref, t);
2491 gsi_prev (&gsi);
2492 e2 = split_block (cont_bb, gsi_stmt (gsi));
2493 new_header = e2->dest;
2494 }
2495 else
2496 new_header = cont_bb;
2497 gsi = gsi_after_labels (new_header);
2498 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
2499 true, GSI_SAME_STMT);
2500 tree n2
2501 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
2502 true, NULL_TREE, true, GSI_SAME_STMT);
2503 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
2504 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
2505 edge e3 = split_block (new_header, gsi_stmt (gsi));
2506 cont_bb = e3->dest;
2507 remove_edge (e1);
2508 make_edge (body_bb, new_header, EDGE_FALLTHRU);
2509 e3->flags = EDGE_FALSE_VALUE;
2510 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2511 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
2512 e1->probability = e3->probability.invert ();
2513
2514 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
2515 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
2516
2517 if (e2)
2518 {
2519 struct loop *loop = alloc_loop ();
2520 loop->header = new_header;
2521 loop->latch = e2->src;
2522 add_loop (loop, body_bb->loop_father);
2523 }
2524 }
2525
2526 /* If there are any lastprivate clauses and it is possible some loops
2527 might have zero iterations, ensure all the decls are initialized,
2528 otherwise we could crash evaluating C++ class iterators with lastprivate
2529 clauses. */
2530 bool need_inits = false;
2531 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
2532 if (need_inits)
2533 {
2534 tree type = TREE_TYPE (fd->loops[i].v);
2535 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2536 expand_omp_build_assign (&gsi, fd->loops[i].v,
2537 fold_convert (type, fd->loops[i].n1));
2538 }
2539 else
2540 {
2541 tree type = TREE_TYPE (fd->loops[i].v);
2542 tree this_cond = fold_build2 (fd->loops[i].cond_code,
2543 boolean_type_node,
2544 fold_convert (type, fd->loops[i].n1),
2545 fold_convert (type, fd->loops[i].n2));
2546 if (!integer_onep (this_cond))
2547 need_inits = true;
2548 }
2549
2550 return cont_bb;
2551 }
2552
2553 /* A subroutine of expand_omp_for. Generate code for a parallel
2554 loop with any schedule. Given parameters:
2555
2556 for (V = N1; V cond N2; V += STEP) BODY;
2557
2558 where COND is "<" or ">", we generate pseudocode
2559
2560 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
2561 if (more) goto L0; else goto L3;
2562 L0:
2563 V = istart0;
2564 iend = iend0;
2565 L1:
2566 BODY;
2567 V += STEP;
2568 if (V cond iend) goto L1; else goto L2;
2569 L2:
2570 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2571 L3:
2572
2573 If this is a combined omp parallel loop, instead of the call to
2574 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
2575 If this is gimple_omp_for_combined_p loop, then instead of assigning
2576 V and iend in L0 we assign the first two _looptemp_ clause decls of the
2577 inner GIMPLE_OMP_FOR and V += STEP; and
2578 if (V cond iend) goto L1; else goto L2; are removed.
2579
2580 For collapsed loops, given parameters:
2581 collapse(3)
2582 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2583 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2584 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
2585 BODY;
2586
2587 we generate pseudocode
2588
2589 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
2590 if (cond3 is <)
2591 adj = STEP3 - 1;
2592 else
2593 adj = STEP3 + 1;
2594 count3 = (adj + N32 - N31) / STEP3;
2595 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
2596 if (cond2 is <)
2597 adj = STEP2 - 1;
2598 else
2599 adj = STEP2 + 1;
2600 count2 = (adj + N22 - N21) / STEP2;
2601 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
2602 if (cond1 is <)
2603 adj = STEP1 - 1;
2604 else
2605 adj = STEP1 + 1;
2606 count1 = (adj + N12 - N11) / STEP1;
2607 count = count1 * count2 * count3;
2608 goto Z1;
2609 Z0:
2610 count = 0;
2611 Z1:
2612 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
2613 if (more) goto L0; else goto L3;
2614 L0:
2615 V = istart0;
2616 T = V;
2617 V3 = N31 + (T % count3) * STEP3;
2618 T = T / count3;
2619 V2 = N21 + (T % count2) * STEP2;
2620 T = T / count2;
2621 V1 = N11 + T * STEP1;
2622 iend = iend0;
2623 L1:
2624 BODY;
2625 V += 1;
2626 if (V < iend) goto L10; else goto L2;
2627 L10:
2628 V3 += STEP3;
2629 if (V3 cond3 N32) goto L1; else goto L11;
2630 L11:
2631 V3 = N31;
2632 V2 += STEP2;
2633 if (V2 cond2 N22) goto L1; else goto L12;
2634 L12:
2635 V2 = N21;
2636 V1 += STEP1;
2637 goto L1;
2638 L2:
2639 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2640 L3:
2641
2642 */
2643
2644 static void
2645 expand_omp_for_generic (struct omp_region *region,
2646 struct omp_for_data *fd,
2647 enum built_in_function start_fn,
2648 enum built_in_function next_fn,
2649 tree sched_arg,
2650 gimple *inner_stmt)
2651 {
2652 tree type, istart0, iend0, iend;
2653 tree t, vmain, vback, bias = NULL_TREE;
2654 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
2655 basic_block l2_bb = NULL, l3_bb = NULL;
2656 gimple_stmt_iterator gsi;
2657 gassign *assign_stmt;
2658 bool in_combined_parallel = is_combined_parallel (region);
2659 bool broken_loop = region->cont == NULL;
2660 edge e, ne;
2661 tree *counts = NULL;
2662 int i;
2663 bool ordered_lastprivate = false;
2664
2665 gcc_assert (!broken_loop || !in_combined_parallel);
2666 gcc_assert (fd->iter_type == long_integer_type_node
2667 || !in_combined_parallel);
2668
2669 entry_bb = region->entry;
2670 cont_bb = region->cont;
2671 collapse_bb = NULL;
2672 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
2673 gcc_assert (broken_loop
2674 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
2675 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
2676 l1_bb = single_succ (l0_bb);
2677 if (!broken_loop)
2678 {
2679 l2_bb = create_empty_bb (cont_bb);
2680 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
2681 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
2682 == l1_bb));
2683 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
2684 }
2685 else
2686 l2_bb = NULL;
2687 l3_bb = BRANCH_EDGE (entry_bb)->dest;
2688 exit_bb = region->exit;
2689
2690 gsi = gsi_last_nondebug_bb (entry_bb);
2691
2692 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
2693 if (fd->ordered
2694 && omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2695 OMP_CLAUSE_LASTPRIVATE))
2696 ordered_lastprivate = false;
2697 tree reductions = NULL_TREE;
2698 tree mem = NULL_TREE, cond_var = NULL_TREE, condtemp = NULL_TREE;
2699 tree memv = NULL_TREE;
2700 if (fd->lastprivate_conditional)
2701 {
2702 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2703 OMP_CLAUSE__CONDTEMP_);
2704 if (fd->have_pointer_condtemp)
2705 condtemp = OMP_CLAUSE_DECL (c);
2706 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
2707 cond_var = OMP_CLAUSE_DECL (c);
2708 }
2709 if (sched_arg)
2710 {
2711 if (fd->have_reductemp)
2712 {
2713 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2714 OMP_CLAUSE__REDUCTEMP_);
2715 reductions = OMP_CLAUSE_DECL (c);
2716 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
2717 gimple *g = SSA_NAME_DEF_STMT (reductions);
2718 reductions = gimple_assign_rhs1 (g);
2719 OMP_CLAUSE_DECL (c) = reductions;
2720 entry_bb = gimple_bb (g);
2721 edge e = split_block (entry_bb, g);
2722 if (region->entry == entry_bb)
2723 region->entry = e->dest;
2724 gsi = gsi_last_bb (entry_bb);
2725 }
2726 else
2727 reductions = null_pointer_node;
2728 if (fd->have_pointer_condtemp)
2729 {
2730 tree type = TREE_TYPE (condtemp);
2731 memv = create_tmp_var (type);
2732 TREE_ADDRESSABLE (memv) = 1;
2733 unsigned HOST_WIDE_INT sz
2734 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
2735 sz *= fd->lastprivate_conditional;
2736 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
2737 false);
2738 mem = build_fold_addr_expr (memv);
2739 }
2740 else
2741 mem = null_pointer_node;
2742 }
2743 if (fd->collapse > 1 || fd->ordered)
2744 {
2745 int first_zero_iter1 = -1, first_zero_iter2 = -1;
2746 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
2747
2748 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
2749 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
2750 zero_iter1_bb, first_zero_iter1,
2751 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
2752
2753 if (zero_iter1_bb)
2754 {
2755 /* Some counts[i] vars might be uninitialized if
2756 some loop has zero iterations. But the body shouldn't
2757 be executed in that case, so just avoid uninit warnings. */
2758 for (i = first_zero_iter1;
2759 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
2760 if (SSA_VAR_P (counts[i]))
2761 TREE_NO_WARNING (counts[i]) = 1;
2762 gsi_prev (&gsi);
2763 e = split_block (entry_bb, gsi_stmt (gsi));
2764 entry_bb = e->dest;
2765 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
2766 gsi = gsi_last_nondebug_bb (entry_bb);
2767 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2768 get_immediate_dominator (CDI_DOMINATORS,
2769 zero_iter1_bb));
2770 }
2771 if (zero_iter2_bb)
2772 {
2773 /* Some counts[i] vars might be uninitialized if
2774 some loop has zero iterations. But the body shouldn't
2775 be executed in that case, so just avoid uninit warnings. */
2776 for (i = first_zero_iter2; i < fd->ordered; i++)
2777 if (SSA_VAR_P (counts[i]))
2778 TREE_NO_WARNING (counts[i]) = 1;
2779 if (zero_iter1_bb)
2780 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2781 else
2782 {
2783 gsi_prev (&gsi);
2784 e = split_block (entry_bb, gsi_stmt (gsi));
2785 entry_bb = e->dest;
2786 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2787 gsi = gsi_last_nondebug_bb (entry_bb);
2788 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2789 get_immediate_dominator
2790 (CDI_DOMINATORS, zero_iter2_bb));
2791 }
2792 }
2793 if (fd->collapse == 1)
2794 {
2795 counts[0] = fd->loop.n2;
2796 fd->loop = fd->loops[0];
2797 }
2798 }
2799
2800 type = TREE_TYPE (fd->loop.v);
2801 istart0 = create_tmp_var (fd->iter_type, ".istart0");
2802 iend0 = create_tmp_var (fd->iter_type, ".iend0");
2803 TREE_ADDRESSABLE (istart0) = 1;
2804 TREE_ADDRESSABLE (iend0) = 1;
2805
2806 /* See if we need to bias by LLONG_MIN. */
2807 if (fd->iter_type == long_long_unsigned_type_node
2808 && TREE_CODE (type) == INTEGER_TYPE
2809 && !TYPE_UNSIGNED (type)
2810 && fd->ordered == 0)
2811 {
2812 tree n1, n2;
2813
2814 if (fd->loop.cond_code == LT_EXPR)
2815 {
2816 n1 = fd->loop.n1;
2817 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
2818 }
2819 else
2820 {
2821 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
2822 n2 = fd->loop.n1;
2823 }
2824 if (TREE_CODE (n1) != INTEGER_CST
2825 || TREE_CODE (n2) != INTEGER_CST
2826 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
2827 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
2828 }
2829
2830 gimple_stmt_iterator gsif = gsi;
2831 gsi_prev (&gsif);
2832
2833 tree arr = NULL_TREE;
2834 if (in_combined_parallel)
2835 {
2836 gcc_assert (fd->ordered == 0);
2837 /* In a combined parallel loop, emit a call to
2838 GOMP_loop_foo_next. */
2839 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
2840 build_fold_addr_expr (istart0),
2841 build_fold_addr_expr (iend0));
2842 }
2843 else
2844 {
2845 tree t0, t1, t2, t3, t4;
2846 /* If this is not a combined parallel loop, emit a call to
2847 GOMP_loop_foo_start in ENTRY_BB. */
2848 t4 = build_fold_addr_expr (iend0);
2849 t3 = build_fold_addr_expr (istart0);
2850 if (fd->ordered)
2851 {
2852 t0 = build_int_cst (unsigned_type_node,
2853 fd->ordered - fd->collapse + 1);
2854 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
2855 fd->ordered
2856 - fd->collapse + 1),
2857 ".omp_counts");
2858 DECL_NAMELESS (arr) = 1;
2859 TREE_ADDRESSABLE (arr) = 1;
2860 TREE_STATIC (arr) = 1;
2861 vec<constructor_elt, va_gc> *v;
2862 vec_alloc (v, fd->ordered - fd->collapse + 1);
2863 int idx;
2864
2865 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
2866 {
2867 tree c;
2868 if (idx == 0 && fd->collapse > 1)
2869 c = fd->loop.n2;
2870 else
2871 c = counts[idx + fd->collapse - 1];
2872 tree purpose = size_int (idx);
2873 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
2874 if (TREE_CODE (c) != INTEGER_CST)
2875 TREE_STATIC (arr) = 0;
2876 }
2877
2878 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
2879 if (!TREE_STATIC (arr))
2880 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
2881 void_type_node, arr),
2882 true, NULL_TREE, true, GSI_SAME_STMT);
2883 t1 = build_fold_addr_expr (arr);
2884 t2 = NULL_TREE;
2885 }
2886 else
2887 {
2888 t2 = fold_convert (fd->iter_type, fd->loop.step);
2889 t1 = fd->loop.n2;
2890 t0 = fd->loop.n1;
2891 if (gimple_omp_for_combined_into_p (fd->for_stmt))
2892 {
2893 tree innerc
2894 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2895 OMP_CLAUSE__LOOPTEMP_);
2896 gcc_assert (innerc);
2897 t0 = OMP_CLAUSE_DECL (innerc);
2898 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2899 OMP_CLAUSE__LOOPTEMP_);
2900 gcc_assert (innerc);
2901 t1 = OMP_CLAUSE_DECL (innerc);
2902 }
2903 if (POINTER_TYPE_P (TREE_TYPE (t0))
2904 && TYPE_PRECISION (TREE_TYPE (t0))
2905 != TYPE_PRECISION (fd->iter_type))
2906 {
2907 /* Avoid casting pointers to integer of a different size. */
2908 tree itype = signed_type_for (type);
2909 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
2910 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
2911 }
2912 else
2913 {
2914 t1 = fold_convert (fd->iter_type, t1);
2915 t0 = fold_convert (fd->iter_type, t0);
2916 }
2917 if (bias)
2918 {
2919 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
2920 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
2921 }
2922 }
2923 if (fd->iter_type == long_integer_type_node || fd->ordered)
2924 {
2925 if (fd->chunk_size)
2926 {
2927 t = fold_convert (fd->iter_type, fd->chunk_size);
2928 t = omp_adjust_chunk_size (t, fd->simd_schedule);
2929 if (sched_arg)
2930 {
2931 if (fd->ordered)
2932 t = build_call_expr (builtin_decl_explicit (start_fn),
2933 8, t0, t1, sched_arg, t, t3, t4,
2934 reductions, mem);
2935 else
2936 t = build_call_expr (builtin_decl_explicit (start_fn),
2937 9, t0, t1, t2, sched_arg, t, t3, t4,
2938 reductions, mem);
2939 }
2940 else if (fd->ordered)
2941 t = build_call_expr (builtin_decl_explicit (start_fn),
2942 5, t0, t1, t, t3, t4);
2943 else
2944 t = build_call_expr (builtin_decl_explicit (start_fn),
2945 6, t0, t1, t2, t, t3, t4);
2946 }
2947 else if (fd->ordered)
2948 t = build_call_expr (builtin_decl_explicit (start_fn),
2949 4, t0, t1, t3, t4);
2950 else
2951 t = build_call_expr (builtin_decl_explicit (start_fn),
2952 5, t0, t1, t2, t3, t4);
2953 }
2954 else
2955 {
2956 tree t5;
2957 tree c_bool_type;
2958 tree bfn_decl;
2959
2960 /* The GOMP_loop_ull_*start functions have additional boolean
2961 argument, true for < loops and false for > loops.
2962 In Fortran, the C bool type can be different from
2963 boolean_type_node. */
2964 bfn_decl = builtin_decl_explicit (start_fn);
2965 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
2966 t5 = build_int_cst (c_bool_type,
2967 fd->loop.cond_code == LT_EXPR ? 1 : 0);
2968 if (fd->chunk_size)
2969 {
2970 tree bfn_decl = builtin_decl_explicit (start_fn);
2971 t = fold_convert (fd->iter_type, fd->chunk_size);
2972 t = omp_adjust_chunk_size (t, fd->simd_schedule);
2973 if (sched_arg)
2974 t = build_call_expr (bfn_decl, 10, t5, t0, t1, t2, sched_arg,
2975 t, t3, t4, reductions, mem);
2976 else
2977 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
2978 }
2979 else
2980 t = build_call_expr (builtin_decl_explicit (start_fn),
2981 6, t5, t0, t1, t2, t3, t4);
2982 }
2983 }
2984 if (TREE_TYPE (t) != boolean_type_node)
2985 t = fold_build2 (NE_EXPR, boolean_type_node,
2986 t, build_int_cst (TREE_TYPE (t), 0));
2987 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2988 true, GSI_SAME_STMT);
2989 if (arr && !TREE_STATIC (arr))
2990 {
2991 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
2992 TREE_THIS_VOLATILE (clobber) = 1;
2993 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
2994 GSI_SAME_STMT);
2995 }
2996 if (fd->have_pointer_condtemp)
2997 expand_omp_build_assign (&gsi, condtemp, memv, false);
2998 if (fd->have_reductemp)
2999 {
3000 gimple *g = gsi_stmt (gsi);
3001 gsi_remove (&gsi, true);
3002 release_ssa_name (gimple_assign_lhs (g));
3003
3004 entry_bb = region->entry;
3005 gsi = gsi_last_nondebug_bb (entry_bb);
3006
3007 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3008 }
3009 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3010
3011 /* Remove the GIMPLE_OMP_FOR statement. */
3012 gsi_remove (&gsi, true);
3013
3014 if (gsi_end_p (gsif))
3015 gsif = gsi_after_labels (gsi_bb (gsif));
3016 gsi_next (&gsif);
3017
3018 /* Iteration setup for sequential loop goes in L0_BB. */
3019 tree startvar = fd->loop.v;
3020 tree endvar = NULL_TREE;
3021
3022 if (gimple_omp_for_combined_p (fd->for_stmt))
3023 {
3024 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
3025 && gimple_omp_for_kind (inner_stmt)
3026 == GF_OMP_FOR_KIND_SIMD);
3027 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
3028 OMP_CLAUSE__LOOPTEMP_);
3029 gcc_assert (innerc);
3030 startvar = OMP_CLAUSE_DECL (innerc);
3031 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3032 OMP_CLAUSE__LOOPTEMP_);
3033 gcc_assert (innerc);
3034 endvar = OMP_CLAUSE_DECL (innerc);
3035 }
3036
3037 gsi = gsi_start_bb (l0_bb);
3038 t = istart0;
3039 if (fd->ordered && fd->collapse == 1)
3040 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
3041 fold_convert (fd->iter_type, fd->loop.step));
3042 else if (bias)
3043 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
3044 if (fd->ordered && fd->collapse == 1)
3045 {
3046 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3047 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
3048 fd->loop.n1, fold_convert (sizetype, t));
3049 else
3050 {
3051 t = fold_convert (TREE_TYPE (startvar), t);
3052 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
3053 fd->loop.n1, t);
3054 }
3055 }
3056 else
3057 {
3058 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3059 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
3060 t = fold_convert (TREE_TYPE (startvar), t);
3061 }
3062 t = force_gimple_operand_gsi (&gsi, t,
3063 DECL_P (startvar)
3064 && TREE_ADDRESSABLE (startvar),
3065 NULL_TREE, false, GSI_CONTINUE_LINKING);
3066 assign_stmt = gimple_build_assign (startvar, t);
3067 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3068 if (cond_var)
3069 {
3070 tree itype = TREE_TYPE (cond_var);
3071 /* For lastprivate(conditional:) itervar, we need some iteration
3072 counter that starts at unsigned non-zero and increases.
3073 Prefer as few IVs as possible, so if we can use startvar
3074 itself, use that, or startvar + constant (those would be
3075 incremented with step), and as last resort use the s0 + 1
3076 incremented by 1. */
3077 if ((fd->ordered && fd->collapse == 1)
3078 || bias
3079 || POINTER_TYPE_P (type)
3080 || TREE_CODE (fd->loop.n1) != INTEGER_CST
3081 || fd->loop.cond_code != LT_EXPR)
3082 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, istart0),
3083 build_int_cst (itype, 1));
3084 else if (tree_int_cst_sgn (fd->loop.n1) == 1)
3085 t = fold_convert (itype, t);
3086 else
3087 {
3088 tree c = fold_convert (itype, fd->loop.n1);
3089 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
3090 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
3091 }
3092 t = force_gimple_operand_gsi (&gsi, t, false,
3093 NULL_TREE, false, GSI_CONTINUE_LINKING);
3094 assign_stmt = gimple_build_assign (cond_var, t);
3095 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3096 }
3097
3098 t = iend0;
3099 if (fd->ordered && fd->collapse == 1)
3100 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
3101 fold_convert (fd->iter_type, fd->loop.step));
3102 else if (bias)
3103 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
3104 if (fd->ordered && fd->collapse == 1)
3105 {
3106 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3107 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
3108 fd->loop.n1, fold_convert (sizetype, t));
3109 else
3110 {
3111 t = fold_convert (TREE_TYPE (startvar), t);
3112 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
3113 fd->loop.n1, t);
3114 }
3115 }
3116 else
3117 {
3118 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3119 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
3120 t = fold_convert (TREE_TYPE (startvar), t);
3121 }
3122 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3123 false, GSI_CONTINUE_LINKING);
3124 if (endvar)
3125 {
3126 assign_stmt = gimple_build_assign (endvar, iend);
3127 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3128 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
3129 assign_stmt = gimple_build_assign (fd->loop.v, iend);
3130 else
3131 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
3132 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3133 }
3134 /* Handle linear clause adjustments. */
3135 tree itercnt = NULL_TREE;
3136 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
3137 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
3138 c; c = OMP_CLAUSE_CHAIN (c))
3139 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
3140 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
3141 {
3142 tree d = OMP_CLAUSE_DECL (c);
3143 bool is_ref = omp_is_reference (d);
3144 tree t = d, a, dest;
3145 if (is_ref)
3146 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
3147 tree type = TREE_TYPE (t);
3148 if (POINTER_TYPE_P (type))
3149 type = sizetype;
3150 dest = unshare_expr (t);
3151 tree v = create_tmp_var (TREE_TYPE (t), NULL);
3152 expand_omp_build_assign (&gsif, v, t);
3153 if (itercnt == NULL_TREE)
3154 {
3155 itercnt = startvar;
3156 tree n1 = fd->loop.n1;
3157 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
3158 {
3159 itercnt
3160 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
3161 itercnt);
3162 n1 = fold_convert (TREE_TYPE (itercnt), n1);
3163 }
3164 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
3165 itercnt, n1);
3166 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
3167 itercnt, fd->loop.step);
3168 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
3169 NULL_TREE, false,
3170 GSI_CONTINUE_LINKING);
3171 }
3172 a = fold_build2 (MULT_EXPR, type,
3173 fold_convert (type, itercnt),
3174 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
3175 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
3176 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
3177 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3178 false, GSI_CONTINUE_LINKING);
3179 assign_stmt = gimple_build_assign (dest, t);
3180 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3181 }
3182 if (fd->collapse > 1)
3183 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
3184
3185 if (fd->ordered)
3186 {
3187 /* Until now, counts array contained number of iterations or
3188 variable containing it for ith loop. From now on, we need
3189 those counts only for collapsed loops, and only for the 2nd
3190 till the last collapsed one. Move those one element earlier,
3191 we'll use counts[fd->collapse - 1] for the first source/sink
3192 iteration counter and so on and counts[fd->ordered]
3193 as the array holding the current counter values for
3194 depend(source). */
3195 if (fd->collapse > 1)
3196 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
3197 if (broken_loop)
3198 {
3199 int i;
3200 for (i = fd->collapse; i < fd->ordered; i++)
3201 {
3202 tree type = TREE_TYPE (fd->loops[i].v);
3203 tree this_cond
3204 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
3205 fold_convert (type, fd->loops[i].n1),
3206 fold_convert (type, fd->loops[i].n2));
3207 if (!integer_onep (this_cond))
3208 break;
3209 }
3210 if (i < fd->ordered)
3211 {
3212 cont_bb
3213 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
3214 add_bb_to_loop (cont_bb, l1_bb->loop_father);
3215 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
3216 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
3217 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3218 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
3219 make_edge (cont_bb, l1_bb, 0);
3220 l2_bb = create_empty_bb (cont_bb);
3221 broken_loop = false;
3222 }
3223 }
3224 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
3225 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
3226 ordered_lastprivate);
3227 if (counts[fd->collapse - 1])
3228 {
3229 gcc_assert (fd->collapse == 1);
3230 gsi = gsi_last_bb (l0_bb);
3231 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
3232 istart0, true);
3233 gsi = gsi_last_bb (cont_bb);
3234 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1],
3235 build_int_cst (fd->iter_type, 1));
3236 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
3237 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3238 size_zero_node, NULL_TREE, NULL_TREE);
3239 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
3240 t = counts[fd->collapse - 1];
3241 }
3242 else if (fd->collapse > 1)
3243 t = fd->loop.v;
3244 else
3245 {
3246 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3247 fd->loops[0].v, fd->loops[0].n1);
3248 t = fold_convert (fd->iter_type, t);
3249 }
3250 gsi = gsi_last_bb (l0_bb);
3251 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3252 size_zero_node, NULL_TREE, NULL_TREE);
3253 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3254 false, GSI_CONTINUE_LINKING);
3255 expand_omp_build_assign (&gsi, aref, t, true);
3256 }
3257
3258 if (!broken_loop)
3259 {
3260 /* Code to control the increment and predicate for the sequential
3261 loop goes in the CONT_BB. */
3262 gsi = gsi_last_nondebug_bb (cont_bb);
3263 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3264 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3265 vmain = gimple_omp_continue_control_use (cont_stmt);
3266 vback = gimple_omp_continue_control_def (cont_stmt);
3267
3268 if (cond_var)
3269 {
3270 tree itype = TREE_TYPE (cond_var);
3271 tree t2;
3272 if ((fd->ordered && fd->collapse == 1)
3273 || bias
3274 || POINTER_TYPE_P (type)
3275 || TREE_CODE (fd->loop.n1) != INTEGER_CST
3276 || fd->loop.cond_code != LT_EXPR)
3277 t2 = build_int_cst (itype, 1);
3278 else
3279 t2 = fold_convert (itype, fd->loop.step);
3280 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
3281 t2 = force_gimple_operand_gsi (&gsi, t2, false,
3282 NULL_TREE, true, GSI_SAME_STMT);
3283 assign_stmt = gimple_build_assign (cond_var, t2);
3284 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3285 }
3286
3287 if (!gimple_omp_for_combined_p (fd->for_stmt))
3288 {
3289 if (POINTER_TYPE_P (type))
3290 t = fold_build_pointer_plus (vmain, fd->loop.step);
3291 else
3292 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
3293 t = force_gimple_operand_gsi (&gsi, t,
3294 DECL_P (vback)
3295 && TREE_ADDRESSABLE (vback),
3296 NULL_TREE, true, GSI_SAME_STMT);
3297 assign_stmt = gimple_build_assign (vback, t);
3298 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3299
3300 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
3301 {
3302 tree tem;
3303 if (fd->collapse > 1)
3304 tem = fd->loop.v;
3305 else
3306 {
3307 tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3308 fd->loops[0].v, fd->loops[0].n1);
3309 tem = fold_convert (fd->iter_type, tem);
3310 }
3311 tree aref = build4 (ARRAY_REF, fd->iter_type,
3312 counts[fd->ordered], size_zero_node,
3313 NULL_TREE, NULL_TREE);
3314 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE,
3315 true, GSI_SAME_STMT);
3316 expand_omp_build_assign (&gsi, aref, tem);
3317 }
3318
3319 t = build2 (fd->loop.cond_code, boolean_type_node,
3320 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
3321 iend);
3322 gcond *cond_stmt = gimple_build_cond_empty (t);
3323 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3324 }
3325
3326 /* Remove GIMPLE_OMP_CONTINUE. */
3327 gsi_remove (&gsi, true);
3328
3329 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3330 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb);
3331
3332 /* Emit code to get the next parallel iteration in L2_BB. */
3333 gsi = gsi_start_bb (l2_bb);
3334
3335 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
3336 build_fold_addr_expr (istart0),
3337 build_fold_addr_expr (iend0));
3338 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3339 false, GSI_CONTINUE_LINKING);
3340 if (TREE_TYPE (t) != boolean_type_node)
3341 t = fold_build2 (NE_EXPR, boolean_type_node,
3342 t, build_int_cst (TREE_TYPE (t), 0));
3343 gcond *cond_stmt = gimple_build_cond_empty (t);
3344 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
3345 }
3346
3347 /* Add the loop cleanup function. */
3348 gsi = gsi_last_nondebug_bb (exit_bb);
3349 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3350 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
3351 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3352 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
3353 else
3354 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
3355 gcall *call_stmt = gimple_build_call (t, 0);
3356 if (fd->ordered)
3357 {
3358 tree arr = counts[fd->ordered];
3359 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
3360 TREE_THIS_VOLATILE (clobber) = 1;
3361 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
3362 GSI_SAME_STMT);
3363 }
3364 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3365 {
3366 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
3367 if (fd->have_reductemp)
3368 {
3369 gimple *g = gimple_build_assign (reductions, NOP_EXPR,
3370 gimple_call_lhs (call_stmt));
3371 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
3372 }
3373 }
3374 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
3375 gsi_remove (&gsi, true);
3376
3377 /* Connect the new blocks. */
3378 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
3379 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
3380
3381 if (!broken_loop)
3382 {
3383 gimple_seq phis;
3384
3385 e = find_edge (cont_bb, l3_bb);
3386 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
3387
3388 phis = phi_nodes (l3_bb);
3389 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
3390 {
3391 gimple *phi = gsi_stmt (gsi);
3392 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
3393 PHI_ARG_DEF_FROM_EDGE (phi, e));
3394 }
3395 remove_edge (e);
3396
3397 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
3398 e = find_edge (cont_bb, l1_bb);
3399 if (e == NULL)
3400 {
3401 e = BRANCH_EDGE (cont_bb);
3402 gcc_assert (single_succ (e->dest) == l1_bb);
3403 }
3404 if (gimple_omp_for_combined_p (fd->for_stmt))
3405 {
3406 remove_edge (e);
3407 e = NULL;
3408 }
3409 else if (fd->collapse > 1)
3410 {
3411 remove_edge (e);
3412 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3413 }
3414 else
3415 e->flags = EDGE_TRUE_VALUE;
3416 if (e)
3417 {
3418 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
3419 find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
3420 }
3421 else
3422 {
3423 e = find_edge (cont_bb, l2_bb);
3424 e->flags = EDGE_FALLTHRU;
3425 }
3426 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
3427
3428 if (gimple_in_ssa_p (cfun))
3429 {
3430 /* Add phis to the outer loop that connect to the phis in the inner,
3431 original loop, and move the loop entry value of the inner phi to
3432 the loop entry value of the outer phi. */
3433 gphi_iterator psi;
3434 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
3435 {
3436 location_t locus;
3437 gphi *nphi;
3438 gphi *exit_phi = psi.phi ();
3439
3440 if (virtual_operand_p (gimple_phi_result (exit_phi)))
3441 continue;
3442
3443 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
3444 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
3445
3446 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
3447 edge latch_to_l1 = find_edge (latch, l1_bb);
3448 gphi *inner_phi
3449 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
3450
3451 tree t = gimple_phi_result (exit_phi);
3452 tree new_res = copy_ssa_name (t, NULL);
3453 nphi = create_phi_node (new_res, l0_bb);
3454
3455 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
3456 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
3457 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
3458 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
3459 add_phi_arg (nphi, t, entry_to_l0, locus);
3460
3461 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
3462 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
3463
3464 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
3465 }
3466 }
3467
3468 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
3469 recompute_dominator (CDI_DOMINATORS, l2_bb));
3470 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
3471 recompute_dominator (CDI_DOMINATORS, l3_bb));
3472 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
3473 recompute_dominator (CDI_DOMINATORS, l0_bb));
3474 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
3475 recompute_dominator (CDI_DOMINATORS, l1_bb));
3476
3477 /* We enter expand_omp_for_generic with a loop. This original loop may
3478 have its own loop struct, or it may be part of an outer loop struct
3479 (which may be the fake loop). */
3480 struct loop *outer_loop = entry_bb->loop_father;
3481 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
3482
3483 add_bb_to_loop (l2_bb, outer_loop);
3484
3485 /* We've added a new loop around the original loop. Allocate the
3486 corresponding loop struct. */
3487 struct loop *new_loop = alloc_loop ();
3488 new_loop->header = l0_bb;
3489 new_loop->latch = l2_bb;
3490 add_loop (new_loop, outer_loop);
3491
3492 /* Allocate a loop structure for the original loop unless we already
3493 had one. */
3494 if (!orig_loop_has_loop_struct
3495 && !gimple_omp_for_combined_p (fd->for_stmt))
3496 {
3497 struct loop *orig_loop = alloc_loop ();
3498 orig_loop->header = l1_bb;
3499 /* The loop may have multiple latches. */
3500 add_loop (orig_loop, new_loop);
3501 }
3502 }
3503 }
3504
3505 /* Helper function for expand_omp_for_static_nochunk. If PTR is NULL,
3506 compute needed allocation size. If !ALLOC of team allocations,
3507 if ALLOC of thread allocation. SZ is the initial needed size for
3508 other purposes, ALLOC_ALIGN guaranteed alignment of allocation in bytes,
3509 CNT number of elements of each array, for !ALLOC this is
3510 omp_get_num_threads (), for ALLOC number of iterations handled by the
3511 current thread. If PTR is non-NULL, it is the start of the allocation
3512 and this routine shall assign to OMP_CLAUSE_DECL (c) of those _scantemp_
3513 clauses pointers to the corresponding arrays. */
3514
3515 static tree
3516 expand_omp_scantemp_alloc (tree clauses, tree ptr, unsigned HOST_WIDE_INT sz,
3517 unsigned HOST_WIDE_INT alloc_align, tree cnt,
3518 gimple_stmt_iterator *gsi, bool alloc)
3519 {
3520 tree eltsz = NULL_TREE;
3521 unsigned HOST_WIDE_INT preval = 0;
3522 if (ptr && sz)
3523 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
3524 ptr, size_int (sz));
3525 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
3526 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
3527 && !OMP_CLAUSE__SCANTEMP__CONTROL (c)
3528 && (!OMP_CLAUSE__SCANTEMP__ALLOC (c)) != alloc)
3529 {
3530 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
3531 unsigned HOST_WIDE_INT al = TYPE_ALIGN_UNIT (pointee_type);
3532 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
3533 {
3534 unsigned HOST_WIDE_INT szl
3535 = tree_to_uhwi (TYPE_SIZE_UNIT (pointee_type));
3536 szl = least_bit_hwi (szl);
3537 if (szl)
3538 al = MIN (al, szl);
3539 }
3540 if (ptr == NULL_TREE)
3541 {
3542 if (eltsz == NULL_TREE)
3543 eltsz = TYPE_SIZE_UNIT (pointee_type);
3544 else
3545 eltsz = size_binop (PLUS_EXPR, eltsz,
3546 TYPE_SIZE_UNIT (pointee_type));
3547 }
3548 if (preval == 0 && al <= alloc_align)
3549 {
3550 unsigned HOST_WIDE_INT diff = ROUND_UP (sz, al) - sz;
3551 sz += diff;
3552 if (diff && ptr)
3553 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
3554 ptr, size_int (diff));
3555 }
3556 else if (al > preval)
3557 {
3558 if (ptr)
3559 {
3560 ptr = fold_convert (pointer_sized_int_node, ptr);
3561 ptr = fold_build2 (PLUS_EXPR, pointer_sized_int_node, ptr,
3562 build_int_cst (pointer_sized_int_node,
3563 al - 1));
3564 ptr = fold_build2 (BIT_AND_EXPR, pointer_sized_int_node, ptr,
3565 build_int_cst (pointer_sized_int_node,
3566 -(HOST_WIDE_INT) al));
3567 ptr = fold_convert (ptr_type_node, ptr);
3568 }
3569 else
3570 sz += al - 1;
3571 }
3572 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
3573 preval = al;
3574 else
3575 preval = 1;
3576 if (ptr)
3577 {
3578 expand_omp_build_assign (gsi, OMP_CLAUSE_DECL (c), ptr, false);
3579 ptr = OMP_CLAUSE_DECL (c);
3580 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), ptr,
3581 size_binop (MULT_EXPR, cnt,
3582 TYPE_SIZE_UNIT (pointee_type)));
3583 }
3584 }
3585
3586 if (ptr == NULL_TREE)
3587 {
3588 eltsz = size_binop (MULT_EXPR, eltsz, cnt);
3589 if (sz)
3590 eltsz = size_binop (PLUS_EXPR, eltsz, size_int (sz));
3591 return eltsz;
3592 }
3593 else
3594 return ptr;
3595 }
3596
3597 /* A subroutine of expand_omp_for. Generate code for a parallel
3598 loop with static schedule and no specified chunk size. Given
3599 parameters:
3600
3601 for (V = N1; V cond N2; V += STEP) BODY;
3602
3603 where COND is "<" or ">", we generate pseudocode
3604
3605 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3606 if (cond is <)
3607 adj = STEP - 1;
3608 else
3609 adj = STEP + 1;
3610 if ((__typeof (V)) -1 > 0 && cond is >)
3611 n = -(adj + N2 - N1) / -STEP;
3612 else
3613 n = (adj + N2 - N1) / STEP;
3614 q = n / nthreads;
3615 tt = n % nthreads;
3616 if (threadid < tt) goto L3; else goto L4;
3617 L3:
3618 tt = 0;
3619 q = q + 1;
3620 L4:
3621 s0 = q * threadid + tt;
3622 e0 = s0 + q;
3623 V = s0 * STEP + N1;
3624 if (s0 >= e0) goto L2; else goto L0;
3625 L0:
3626 e = e0 * STEP + N1;
3627 L1:
3628 BODY;
3629 V += STEP;
3630 if (V cond e) goto L1;
3631 L2:
3632 */
3633
3634 static void
3635 expand_omp_for_static_nochunk (struct omp_region *region,
3636 struct omp_for_data *fd,
3637 gimple *inner_stmt)
3638 {
3639 tree n, q, s0, e0, e, t, tt, nthreads = NULL_TREE, threadid;
3640 tree type, itype, vmain, vback;
3641 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
3642 basic_block body_bb, cont_bb, collapse_bb = NULL;
3643 basic_block fin_bb, fourth_bb = NULL, fifth_bb = NULL, sixth_bb = NULL;
3644 basic_block exit1_bb = NULL, exit2_bb = NULL, exit3_bb = NULL;
3645 gimple_stmt_iterator gsi, gsip;
3646 edge ep;
3647 bool broken_loop = region->cont == NULL;
3648 tree *counts = NULL;
3649 tree n1, n2, step;
3650 tree reductions = NULL_TREE;
3651 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
3652
3653 itype = type = TREE_TYPE (fd->loop.v);
3654 if (POINTER_TYPE_P (type))
3655 itype = signed_type_for (type);
3656
3657 entry_bb = region->entry;
3658 cont_bb = region->cont;
3659 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3660 fin_bb = BRANCH_EDGE (entry_bb)->dest;
3661 gcc_assert (broken_loop
3662 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
3663 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3664 body_bb = single_succ (seq_start_bb);
3665 if (!broken_loop)
3666 {
3667 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3668 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3669 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3670 }
3671 exit_bb = region->exit;
3672
3673 /* Iteration space partitioning goes in ENTRY_BB. */
3674 gsi = gsi_last_nondebug_bb (entry_bb);
3675 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3676 gsip = gsi;
3677 gsi_prev (&gsip);
3678
3679 if (fd->collapse > 1)
3680 {
3681 int first_zero_iter = -1, dummy = -1;
3682 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3683
3684 counts = XALLOCAVEC (tree, fd->collapse);
3685 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3686 fin_bb, first_zero_iter,
3687 dummy_bb, dummy, l2_dom_bb);
3688 t = NULL_TREE;
3689 }
3690 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3691 t = integer_one_node;
3692 else
3693 t = fold_binary (fd->loop.cond_code, boolean_type_node,
3694 fold_convert (type, fd->loop.n1),
3695 fold_convert (type, fd->loop.n2));
3696 if (fd->collapse == 1
3697 && TYPE_UNSIGNED (type)
3698 && (t == NULL_TREE || !integer_onep (t)))
3699 {
3700 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3701 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3702 true, GSI_SAME_STMT);
3703 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3704 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3705 true, GSI_SAME_STMT);
3706 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3707 NULL_TREE, NULL_TREE);
3708 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3709 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3710 expand_omp_regimplify_p, NULL, NULL)
3711 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3712 expand_omp_regimplify_p, NULL, NULL))
3713 {
3714 gsi = gsi_for_stmt (cond_stmt);
3715 gimple_regimplify_operands (cond_stmt, &gsi);
3716 }
3717 ep = split_block (entry_bb, cond_stmt);
3718 ep->flags = EDGE_TRUE_VALUE;
3719 entry_bb = ep->dest;
3720 ep->probability = profile_probability::very_likely ();
3721 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
3722 ep->probability = profile_probability::very_unlikely ();
3723 if (gimple_in_ssa_p (cfun))
3724 {
3725 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
3726 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3727 !gsi_end_p (gpi); gsi_next (&gpi))
3728 {
3729 gphi *phi = gpi.phi ();
3730 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3731 ep, UNKNOWN_LOCATION);
3732 }
3733 }
3734 gsi = gsi_last_bb (entry_bb);
3735 }
3736
3737 if (fd->lastprivate_conditional)
3738 {
3739 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
3740 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
3741 if (fd->have_pointer_condtemp)
3742 condtemp = OMP_CLAUSE_DECL (c);
3743 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
3744 cond_var = OMP_CLAUSE_DECL (c);
3745 }
3746 if (fd->have_reductemp
3747 || fd->have_pointer_condtemp
3748 || fd->have_nonctrl_scantemp)
3749 {
3750 tree t1 = build_int_cst (long_integer_type_node, 0);
3751 tree t2 = build_int_cst (long_integer_type_node, 1);
3752 tree t3 = build_int_cstu (long_integer_type_node,
3753 (HOST_WIDE_INT_1U << 31) + 1);
3754 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
3755 gimple_stmt_iterator gsi2 = gsi_none ();
3756 gimple *g = NULL;
3757 tree mem = null_pointer_node, memv = NULL_TREE;
3758 unsigned HOST_WIDE_INT condtemp_sz = 0;
3759 unsigned HOST_WIDE_INT alloc_align = 0;
3760 if (fd->have_reductemp)
3761 {
3762 gcc_assert (!fd->have_nonctrl_scantemp);
3763 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
3764 reductions = OMP_CLAUSE_DECL (c);
3765 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
3766 g = SSA_NAME_DEF_STMT (reductions);
3767 reductions = gimple_assign_rhs1 (g);
3768 OMP_CLAUSE_DECL (c) = reductions;
3769 gsi2 = gsi_for_stmt (g);
3770 }
3771 else
3772 {
3773 if (gsi_end_p (gsip))
3774 gsi2 = gsi_after_labels (region->entry);
3775 else
3776 gsi2 = gsip;
3777 reductions = null_pointer_node;
3778 }
3779 if (fd->have_pointer_condtemp || fd->have_nonctrl_scantemp)
3780 {
3781 tree type;
3782 if (fd->have_pointer_condtemp)
3783 type = TREE_TYPE (condtemp);
3784 else
3785 type = ptr_type_node;
3786 memv = create_tmp_var (type);
3787 TREE_ADDRESSABLE (memv) = 1;
3788 unsigned HOST_WIDE_INT sz = 0;
3789 tree size = NULL_TREE;
3790 if (fd->have_pointer_condtemp)
3791 {
3792 sz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
3793 sz *= fd->lastprivate_conditional;
3794 condtemp_sz = sz;
3795 }
3796 if (fd->have_nonctrl_scantemp)
3797 {
3798 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3799 gimple *g = gimple_build_call (nthreads, 0);
3800 nthreads = create_tmp_var (integer_type_node);
3801 gimple_call_set_lhs (g, nthreads);
3802 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3803 nthreads = fold_convert (sizetype, nthreads);
3804 alloc_align = TYPE_ALIGN_UNIT (long_long_integer_type_node);
3805 size = expand_omp_scantemp_alloc (clauses, NULL_TREE, sz,
3806 alloc_align, nthreads, NULL,
3807 false);
3808 size = fold_convert (type, size);
3809 }
3810 else
3811 size = build_int_cst (type, sz);
3812 expand_omp_build_assign (&gsi2, memv, size, false);
3813 mem = build_fold_addr_expr (memv);
3814 }
3815 tree t
3816 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
3817 9, t1, t2, t2, t3, t1, null_pointer_node,
3818 null_pointer_node, reductions, mem);
3819 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3820 true, GSI_SAME_STMT);
3821 if (fd->have_pointer_condtemp)
3822 expand_omp_build_assign (&gsi2, condtemp, memv, false);
3823 if (fd->have_nonctrl_scantemp)
3824 {
3825 tree ptr = fd->have_pointer_condtemp ? condtemp : memv;
3826 expand_omp_scantemp_alloc (clauses, ptr, condtemp_sz,
3827 alloc_align, nthreads, &gsi2, false);
3828 }
3829 if (fd->have_reductemp)
3830 {
3831 gsi_remove (&gsi2, true);
3832 release_ssa_name (gimple_assign_lhs (g));
3833 }
3834 }
3835 switch (gimple_omp_for_kind (fd->for_stmt))
3836 {
3837 case GF_OMP_FOR_KIND_FOR:
3838 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3839 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3840 break;
3841 case GF_OMP_FOR_KIND_DISTRIBUTE:
3842 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3843 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3844 break;
3845 default:
3846 gcc_unreachable ();
3847 }
3848 nthreads = build_call_expr (nthreads, 0);
3849 nthreads = fold_convert (itype, nthreads);
3850 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3851 true, GSI_SAME_STMT);
3852 threadid = build_call_expr (threadid, 0);
3853 threadid = fold_convert (itype, threadid);
3854 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3855 true, GSI_SAME_STMT);
3856
3857 n1 = fd->loop.n1;
3858 n2 = fd->loop.n2;
3859 step = fd->loop.step;
3860 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3861 {
3862 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3863 OMP_CLAUSE__LOOPTEMP_);
3864 gcc_assert (innerc);
3865 n1 = OMP_CLAUSE_DECL (innerc);
3866 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3867 OMP_CLAUSE__LOOPTEMP_);
3868 gcc_assert (innerc);
3869 n2 = OMP_CLAUSE_DECL (innerc);
3870 }
3871 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3872 true, NULL_TREE, true, GSI_SAME_STMT);
3873 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3874 true, NULL_TREE, true, GSI_SAME_STMT);
3875 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3876 true, NULL_TREE, true, GSI_SAME_STMT);
3877
3878 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3879 t = fold_build2 (PLUS_EXPR, itype, step, t);
3880 t = fold_build2 (PLUS_EXPR, itype, t, n2);
3881 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3882 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3883 t = fold_build2 (TRUNC_DIV_EXPR, itype,
3884 fold_build1 (NEGATE_EXPR, itype, t),
3885 fold_build1 (NEGATE_EXPR, itype, step));
3886 else
3887 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3888 t = fold_convert (itype, t);
3889 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3890
3891 q = create_tmp_reg (itype, "q");
3892 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
3893 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3894 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
3895
3896 tt = create_tmp_reg (itype, "tt");
3897 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
3898 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3899 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
3900
3901 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
3902 gcond *cond_stmt = gimple_build_cond_empty (t);
3903 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3904
3905 second_bb = split_block (entry_bb, cond_stmt)->dest;
3906 gsi = gsi_last_nondebug_bb (second_bb);
3907 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3908
3909 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
3910 GSI_SAME_STMT);
3911 gassign *assign_stmt
3912 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
3913 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3914
3915 third_bb = split_block (second_bb, assign_stmt)->dest;
3916 gsi = gsi_last_nondebug_bb (third_bb);
3917 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3918
3919 if (fd->have_nonctrl_scantemp)
3920 {
3921 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
3922 tree controlp = NULL_TREE, controlb = NULL_TREE;
3923 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
3924 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
3925 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
3926 {
3927 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
3928 controlb = OMP_CLAUSE_DECL (c);
3929 else
3930 controlp = OMP_CLAUSE_DECL (c);
3931 if (controlb && controlp)
3932 break;
3933 }
3934 gcc_assert (controlp && controlb);
3935 tree cnt = create_tmp_var (sizetype);
3936 gimple *g = gimple_build_assign (cnt, NOP_EXPR, q);
3937 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3938 unsigned HOST_WIDE_INT alloc_align = TYPE_ALIGN_UNIT (ptr_type_node);
3939 tree sz = expand_omp_scantemp_alloc (clauses, NULL_TREE, 0,
3940 alloc_align, cnt, NULL, true);
3941 tree size = create_tmp_var (sizetype);
3942 expand_omp_build_assign (&gsi, size, sz, false);
3943 tree cmp = fold_build2 (GT_EXPR, boolean_type_node,
3944 size, size_int (16384));
3945 expand_omp_build_assign (&gsi, controlb, cmp);
3946 g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
3947 NULL_TREE, NULL_TREE);
3948 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3949 fourth_bb = split_block (third_bb, g)->dest;
3950 gsi = gsi_last_nondebug_bb (fourth_bb);
3951 /* FIXME: Once we have allocators, this should use allocator. */
3952 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_MALLOC), 1, size);
3953 gimple_call_set_lhs (g, controlp);
3954 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3955 expand_omp_scantemp_alloc (clauses, controlp, 0, alloc_align, cnt,
3956 &gsi, true);
3957 gsi_prev (&gsi);
3958 g = gsi_stmt (gsi);
3959 fifth_bb = split_block (fourth_bb, g)->dest;
3960 gsi = gsi_last_nondebug_bb (fifth_bb);
3961
3962 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_SAVE), 0);
3963 gimple_call_set_lhs (g, controlp);
3964 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3965 tree alloca_decl = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN);
3966 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
3967 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
3968 && OMP_CLAUSE__SCANTEMP__ALLOC (c))
3969 {
3970 tree tmp = create_tmp_var (sizetype);
3971 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
3972 g = gimple_build_assign (tmp, MULT_EXPR, cnt,
3973 TYPE_SIZE_UNIT (pointee_type));
3974 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3975 g = gimple_build_call (alloca_decl, 2, tmp,
3976 size_int (TYPE_ALIGN (pointee_type)));
3977 gimple_call_set_lhs (g, OMP_CLAUSE_DECL (c));
3978 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3979 }
3980
3981 sixth_bb = split_block (fifth_bb, g)->dest;
3982 gsi = gsi_last_nondebug_bb (sixth_bb);
3983 }
3984
3985 t = build2 (MULT_EXPR, itype, q, threadid);
3986 t = build2 (PLUS_EXPR, itype, t, tt);
3987 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3988
3989 t = fold_build2 (PLUS_EXPR, itype, s0, q);
3990 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3991
3992 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
3993 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3994
3995 /* Remove the GIMPLE_OMP_FOR statement. */
3996 gsi_remove (&gsi, true);
3997
3998 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3999 gsi = gsi_start_bb (seq_start_bb);
4000
4001 tree startvar = fd->loop.v;
4002 tree endvar = NULL_TREE;
4003
4004 if (gimple_omp_for_combined_p (fd->for_stmt))
4005 {
4006 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
4007 ? gimple_omp_parallel_clauses (inner_stmt)
4008 : gimple_omp_for_clauses (inner_stmt);
4009 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
4010 gcc_assert (innerc);
4011 startvar = OMP_CLAUSE_DECL (innerc);
4012 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4013 OMP_CLAUSE__LOOPTEMP_);
4014 gcc_assert (innerc);
4015 endvar = OMP_CLAUSE_DECL (innerc);
4016 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
4017 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
4018 {
4019 int i;
4020 for (i = 1; i < fd->collapse; i++)
4021 {
4022 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4023 OMP_CLAUSE__LOOPTEMP_);
4024 gcc_assert (innerc);
4025 }
4026 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4027 OMP_CLAUSE__LOOPTEMP_);
4028 if (innerc)
4029 {
4030 /* If needed (distribute parallel for with lastprivate),
4031 propagate down the total number of iterations. */
4032 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
4033 fd->loop.n2);
4034 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
4035 GSI_CONTINUE_LINKING);
4036 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4037 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4038 }
4039 }
4040 }
4041 t = fold_convert (itype, s0);
4042 t = fold_build2 (MULT_EXPR, itype, t, step);
4043 if (POINTER_TYPE_P (type))
4044 {
4045 t = fold_build_pointer_plus (n1, t);
4046 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4047 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4048 t = fold_convert (signed_type_for (type), t);
4049 }
4050 else
4051 t = fold_build2 (PLUS_EXPR, type, t, n1);
4052 t = fold_convert (TREE_TYPE (startvar), t);
4053 t = force_gimple_operand_gsi (&gsi, t,
4054 DECL_P (startvar)
4055 && TREE_ADDRESSABLE (startvar),
4056 NULL_TREE, false, GSI_CONTINUE_LINKING);
4057 assign_stmt = gimple_build_assign (startvar, t);
4058 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4059 if (cond_var)
4060 {
4061 tree itype = TREE_TYPE (cond_var);
4062 /* For lastprivate(conditional:) itervar, we need some iteration
4063 counter that starts at unsigned non-zero and increases.
4064 Prefer as few IVs as possible, so if we can use startvar
4065 itself, use that, or startvar + constant (those would be
4066 incremented with step), and as last resort use the s0 + 1
4067 incremented by 1. */
4068 if (POINTER_TYPE_P (type)
4069 || TREE_CODE (n1) != INTEGER_CST
4070 || fd->loop.cond_code != LT_EXPR)
4071 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
4072 build_int_cst (itype, 1));
4073 else if (tree_int_cst_sgn (n1) == 1)
4074 t = fold_convert (itype, t);
4075 else
4076 {
4077 tree c = fold_convert (itype, n1);
4078 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
4079 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
4080 }
4081 t = force_gimple_operand_gsi (&gsi, t, false,
4082 NULL_TREE, false, GSI_CONTINUE_LINKING);
4083 assign_stmt = gimple_build_assign (cond_var, t);
4084 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4085 }
4086
4087 t = fold_convert (itype, e0);
4088 t = fold_build2 (MULT_EXPR, itype, t, step);
4089 if (POINTER_TYPE_P (type))
4090 {
4091 t = fold_build_pointer_plus (n1, t);
4092 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4093 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4094 t = fold_convert (signed_type_for (type), t);
4095 }
4096 else
4097 t = fold_build2 (PLUS_EXPR, type, t, n1);
4098 t = fold_convert (TREE_TYPE (startvar), t);
4099 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4100 false, GSI_CONTINUE_LINKING);
4101 if (endvar)
4102 {
4103 assign_stmt = gimple_build_assign (endvar, e);
4104 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4105 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4106 assign_stmt = gimple_build_assign (fd->loop.v, e);
4107 else
4108 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4109 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4110 }
4111 /* Handle linear clause adjustments. */
4112 tree itercnt = NULL_TREE;
4113 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4114 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4115 c; c = OMP_CLAUSE_CHAIN (c))
4116 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4117 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4118 {
4119 tree d = OMP_CLAUSE_DECL (c);
4120 bool is_ref = omp_is_reference (d);
4121 tree t = d, a, dest;
4122 if (is_ref)
4123 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4124 if (itercnt == NULL_TREE)
4125 {
4126 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4127 {
4128 itercnt = fold_build2 (MINUS_EXPR, itype,
4129 fold_convert (itype, n1),
4130 fold_convert (itype, fd->loop.n1));
4131 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
4132 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
4133 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4134 NULL_TREE, false,
4135 GSI_CONTINUE_LINKING);
4136 }
4137 else
4138 itercnt = s0;
4139 }
4140 tree type = TREE_TYPE (t);
4141 if (POINTER_TYPE_P (type))
4142 type = sizetype;
4143 a = fold_build2 (MULT_EXPR, type,
4144 fold_convert (type, itercnt),
4145 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4146 dest = unshare_expr (t);
4147 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4148 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
4149 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4150 false, GSI_CONTINUE_LINKING);
4151 assign_stmt = gimple_build_assign (dest, t);
4152 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4153 }
4154 if (fd->collapse > 1)
4155 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4156
4157 if (!broken_loop)
4158 {
4159 /* The code controlling the sequential loop replaces the
4160 GIMPLE_OMP_CONTINUE. */
4161 gsi = gsi_last_nondebug_bb (cont_bb);
4162 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4163 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
4164 vmain = gimple_omp_continue_control_use (cont_stmt);
4165 vback = gimple_omp_continue_control_def (cont_stmt);
4166
4167 if (cond_var)
4168 {
4169 tree itype = TREE_TYPE (cond_var);
4170 tree t2;
4171 if (POINTER_TYPE_P (type)
4172 || TREE_CODE (n1) != INTEGER_CST
4173 || fd->loop.cond_code != LT_EXPR)
4174 t2 = build_int_cst (itype, 1);
4175 else
4176 t2 = fold_convert (itype, step);
4177 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
4178 t2 = force_gimple_operand_gsi (&gsi, t2, false,
4179 NULL_TREE, true, GSI_SAME_STMT);
4180 assign_stmt = gimple_build_assign (cond_var, t2);
4181 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4182 }
4183
4184 if (!gimple_omp_for_combined_p (fd->for_stmt))
4185 {
4186 if (POINTER_TYPE_P (type))
4187 t = fold_build_pointer_plus (vmain, step);
4188 else
4189 t = fold_build2 (PLUS_EXPR, type, vmain, step);
4190 t = force_gimple_operand_gsi (&gsi, t,
4191 DECL_P (vback)
4192 && TREE_ADDRESSABLE (vback),
4193 NULL_TREE, true, GSI_SAME_STMT);
4194 assign_stmt = gimple_build_assign (vback, t);
4195 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4196
4197 t = build2 (fd->loop.cond_code, boolean_type_node,
4198 DECL_P (vback) && TREE_ADDRESSABLE (vback)
4199 ? t : vback, e);
4200 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4201 }
4202
4203 /* Remove the GIMPLE_OMP_CONTINUE statement. */
4204 gsi_remove (&gsi, true);
4205
4206 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4207 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
4208 }
4209
4210 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
4211 gsi = gsi_last_nondebug_bb (exit_bb);
4212 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4213 {
4214 t = gimple_omp_return_lhs (gsi_stmt (gsi));
4215 if (fd->have_reductemp
4216 || ((fd->have_pointer_condtemp || fd->have_scantemp)
4217 && !fd->have_nonctrl_scantemp))
4218 {
4219 tree fn;
4220 if (t)
4221 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4222 else
4223 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4224 gcall *g = gimple_build_call (fn, 0);
4225 if (t)
4226 {
4227 gimple_call_set_lhs (g, t);
4228 if (fd->have_reductemp)
4229 gsi_insert_after (&gsi, gimple_build_assign (reductions,
4230 NOP_EXPR, t),
4231 GSI_SAME_STMT);
4232 }
4233 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4234 }
4235 else
4236 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
4237 }
4238 else if (fd->have_pointer_condtemp)
4239 {
4240 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
4241 gcall *g = gimple_build_call (fn, 0);
4242 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4243 }
4244 if (fd->have_scantemp && !fd->have_nonctrl_scantemp)
4245 {
4246 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4247 tree controlp = NULL_TREE, controlb = NULL_TREE;
4248 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
4249 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
4250 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
4251 {
4252 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
4253 controlb = OMP_CLAUSE_DECL (c);
4254 else
4255 controlp = OMP_CLAUSE_DECL (c);
4256 if (controlb && controlp)
4257 break;
4258 }
4259 gcc_assert (controlp && controlb);
4260 gimple *g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
4261 NULL_TREE, NULL_TREE);
4262 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4263 exit1_bb = split_block (exit_bb, g)->dest;
4264 gsi = gsi_after_labels (exit1_bb);
4265 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_FREE), 1,
4266 controlp);
4267 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4268 exit2_bb = split_block (exit1_bb, g)->dest;
4269 gsi = gsi_after_labels (exit2_bb);
4270 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_RESTORE), 1,
4271 controlp);
4272 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4273 exit3_bb = split_block (exit2_bb, g)->dest;
4274 gsi = gsi_after_labels (exit3_bb);
4275 }
4276 gsi_remove (&gsi, true);
4277
4278 /* Connect all the blocks. */
4279 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
4280 ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
4281 ep = find_edge (entry_bb, second_bb);
4282 ep->flags = EDGE_TRUE_VALUE;
4283 ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
4284 if (fourth_bb)
4285 {
4286 ep = make_edge (third_bb, fifth_bb, EDGE_FALSE_VALUE);
4287 ep->probability
4288 = profile_probability::guessed_always ().apply_scale (1, 2);
4289 ep = find_edge (third_bb, fourth_bb);
4290 ep->flags = EDGE_TRUE_VALUE;
4291 ep->probability
4292 = profile_probability::guessed_always ().apply_scale (1, 2);
4293 ep = find_edge (fourth_bb, fifth_bb);
4294 redirect_edge_and_branch (ep, sixth_bb);
4295 }
4296 else
4297 sixth_bb = third_bb;
4298 find_edge (sixth_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
4299 find_edge (sixth_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
4300 if (exit1_bb)
4301 {
4302 ep = make_edge (exit_bb, exit2_bb, EDGE_FALSE_VALUE);
4303 ep->probability
4304 = profile_probability::guessed_always ().apply_scale (1, 2);
4305 ep = find_edge (exit_bb, exit1_bb);
4306 ep->flags = EDGE_TRUE_VALUE;
4307 ep->probability
4308 = profile_probability::guessed_always ().apply_scale (1, 2);
4309 ep = find_edge (exit1_bb, exit2_bb);
4310 redirect_edge_and_branch (ep, exit3_bb);
4311 }
4312
4313 if (!broken_loop)
4314 {
4315 ep = find_edge (cont_bb, body_bb);
4316 if (ep == NULL)
4317 {
4318 ep = BRANCH_EDGE (cont_bb);
4319 gcc_assert (single_succ (ep->dest) == body_bb);
4320 }
4321 if (gimple_omp_for_combined_p (fd->for_stmt))
4322 {
4323 remove_edge (ep);
4324 ep = NULL;
4325 }
4326 else if (fd->collapse > 1)
4327 {
4328 remove_edge (ep);
4329 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4330 }
4331 else
4332 ep->flags = EDGE_TRUE_VALUE;
4333 find_edge (cont_bb, fin_bb)->flags
4334 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
4335 }
4336
4337 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
4338 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
4339 if (fourth_bb)
4340 {
4341 set_immediate_dominator (CDI_DOMINATORS, fifth_bb, third_bb);
4342 set_immediate_dominator (CDI_DOMINATORS, sixth_bb, third_bb);
4343 }
4344 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, sixth_bb);
4345
4346 set_immediate_dominator (CDI_DOMINATORS, body_bb,
4347 recompute_dominator (CDI_DOMINATORS, body_bb));
4348 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
4349 recompute_dominator (CDI_DOMINATORS, fin_bb));
4350 if (exit1_bb)
4351 {
4352 set_immediate_dominator (CDI_DOMINATORS, exit2_bb, exit_bb);
4353 set_immediate_dominator (CDI_DOMINATORS, exit3_bb, exit_bb);
4354 }
4355
4356 struct loop *loop = body_bb->loop_father;
4357 if (loop != entry_bb->loop_father)
4358 {
4359 gcc_assert (broken_loop || loop->header == body_bb);
4360 gcc_assert (broken_loop
4361 || loop->latch == region->cont
4362 || single_pred (loop->latch) == region->cont);
4363 return;
4364 }
4365
4366 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
4367 {
4368 loop = alloc_loop ();
4369 loop->header = body_bb;
4370 if (collapse_bb == NULL)
4371 loop->latch = cont_bb;
4372 add_loop (loop, body_bb->loop_father);
4373 }
4374 }
4375
4376 /* Return phi in E->DEST with ARG on edge E. */
4377
4378 static gphi *
4379 find_phi_with_arg_on_edge (tree arg, edge e)
4380 {
4381 basic_block bb = e->dest;
4382
4383 for (gphi_iterator gpi = gsi_start_phis (bb);
4384 !gsi_end_p (gpi);
4385 gsi_next (&gpi))
4386 {
4387 gphi *phi = gpi.phi ();
4388 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
4389 return phi;
4390 }
4391
4392 return NULL;
4393 }
4394
4395 /* A subroutine of expand_omp_for. Generate code for a parallel
4396 loop with static schedule and a specified chunk size. Given
4397 parameters:
4398
4399 for (V = N1; V cond N2; V += STEP) BODY;
4400
4401 where COND is "<" or ">", we generate pseudocode
4402
4403 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
4404 if (cond is <)
4405 adj = STEP - 1;
4406 else
4407 adj = STEP + 1;
4408 if ((__typeof (V)) -1 > 0 && cond is >)
4409 n = -(adj + N2 - N1) / -STEP;
4410 else
4411 n = (adj + N2 - N1) / STEP;
4412 trip = 0;
4413 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
4414 here so that V is defined
4415 if the loop is not entered
4416 L0:
4417 s0 = (trip * nthreads + threadid) * CHUNK;
4418 e0 = min (s0 + CHUNK, n);
4419 if (s0 < n) goto L1; else goto L4;
4420 L1:
4421 V = s0 * STEP + N1;
4422 e = e0 * STEP + N1;
4423 L2:
4424 BODY;
4425 V += STEP;
4426 if (V cond e) goto L2; else goto L3;
4427 L3:
4428 trip += 1;
4429 goto L0;
4430 L4:
4431 */
4432
4433 static void
4434 expand_omp_for_static_chunk (struct omp_region *region,
4435 struct omp_for_data *fd, gimple *inner_stmt)
4436 {
4437 tree n, s0, e0, e, t;
4438 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
4439 tree type, itype, vmain, vback, vextra;
4440 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
4441 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
4442 gimple_stmt_iterator gsi, gsip;
4443 edge se;
4444 bool broken_loop = region->cont == NULL;
4445 tree *counts = NULL;
4446 tree n1, n2, step;
4447 tree reductions = NULL_TREE;
4448 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
4449
4450 itype = type = TREE_TYPE (fd->loop.v);
4451 if (POINTER_TYPE_P (type))
4452 itype = signed_type_for (type);
4453
4454 entry_bb = region->entry;
4455 se = split_block (entry_bb, last_stmt (entry_bb));
4456 entry_bb = se->src;
4457 iter_part_bb = se->dest;
4458 cont_bb = region->cont;
4459 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
4460 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
4461 gcc_assert (broken_loop
4462 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
4463 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
4464 body_bb = single_succ (seq_start_bb);
4465 if (!broken_loop)
4466 {
4467 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
4468 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
4469 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4470 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
4471 }
4472 exit_bb = region->exit;
4473
4474 /* Trip and adjustment setup goes in ENTRY_BB. */
4475 gsi = gsi_last_nondebug_bb (entry_bb);
4476 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4477 gsip = gsi;
4478 gsi_prev (&gsip);
4479
4480 if (fd->collapse > 1)
4481 {
4482 int first_zero_iter = -1, dummy = -1;
4483 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
4484
4485 counts = XALLOCAVEC (tree, fd->collapse);
4486 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4487 fin_bb, first_zero_iter,
4488 dummy_bb, dummy, l2_dom_bb);
4489 t = NULL_TREE;
4490 }
4491 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
4492 t = integer_one_node;
4493 else
4494 t = fold_binary (fd->loop.cond_code, boolean_type_node,
4495 fold_convert (type, fd->loop.n1),
4496 fold_convert (type, fd->loop.n2));
4497 if (fd->collapse == 1
4498 && TYPE_UNSIGNED (type)
4499 && (t == NULL_TREE || !integer_onep (t)))
4500 {
4501 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
4502 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
4503 true, GSI_SAME_STMT);
4504 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
4505 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
4506 true, GSI_SAME_STMT);
4507 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
4508 NULL_TREE, NULL_TREE);
4509 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4510 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
4511 expand_omp_regimplify_p, NULL, NULL)
4512 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
4513 expand_omp_regimplify_p, NULL, NULL))
4514 {
4515 gsi = gsi_for_stmt (cond_stmt);
4516 gimple_regimplify_operands (cond_stmt, &gsi);
4517 }
4518 se = split_block (entry_bb, cond_stmt);
4519 se->flags = EDGE_TRUE_VALUE;
4520 entry_bb = se->dest;
4521 se->probability = profile_probability::very_likely ();
4522 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
4523 se->probability = profile_probability::very_unlikely ();
4524 if (gimple_in_ssa_p (cfun))
4525 {
4526 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
4527 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
4528 !gsi_end_p (gpi); gsi_next (&gpi))
4529 {
4530 gphi *phi = gpi.phi ();
4531 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
4532 se, UNKNOWN_LOCATION);
4533 }
4534 }
4535 gsi = gsi_last_bb (entry_bb);
4536 }
4537
4538 if (fd->lastprivate_conditional)
4539 {
4540 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4541 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
4542 if (fd->have_pointer_condtemp)
4543 condtemp = OMP_CLAUSE_DECL (c);
4544 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
4545 cond_var = OMP_CLAUSE_DECL (c);
4546 }
4547 if (fd->have_reductemp || fd->have_pointer_condtemp)
4548 {
4549 tree t1 = build_int_cst (long_integer_type_node, 0);
4550 tree t2 = build_int_cst (long_integer_type_node, 1);
4551 tree t3 = build_int_cstu (long_integer_type_node,
4552 (HOST_WIDE_INT_1U << 31) + 1);
4553 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4554 gimple_stmt_iterator gsi2 = gsi_none ();
4555 gimple *g = NULL;
4556 tree mem = null_pointer_node, memv = NULL_TREE;
4557 if (fd->have_reductemp)
4558 {
4559 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
4560 reductions = OMP_CLAUSE_DECL (c);
4561 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
4562 g = SSA_NAME_DEF_STMT (reductions);
4563 reductions = gimple_assign_rhs1 (g);
4564 OMP_CLAUSE_DECL (c) = reductions;
4565 gsi2 = gsi_for_stmt (g);
4566 }
4567 else
4568 {
4569 if (gsi_end_p (gsip))
4570 gsi2 = gsi_after_labels (region->entry);
4571 else
4572 gsi2 = gsip;
4573 reductions = null_pointer_node;
4574 }
4575 if (fd->have_pointer_condtemp)
4576 {
4577 tree type = TREE_TYPE (condtemp);
4578 memv = create_tmp_var (type);
4579 TREE_ADDRESSABLE (memv) = 1;
4580 unsigned HOST_WIDE_INT sz
4581 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
4582 sz *= fd->lastprivate_conditional;
4583 expand_omp_build_assign (&gsi2, memv, build_int_cst (type, sz),
4584 false);
4585 mem = build_fold_addr_expr (memv);
4586 }
4587 tree t
4588 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
4589 9, t1, t2, t2, t3, t1, null_pointer_node,
4590 null_pointer_node, reductions, mem);
4591 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
4592 true, GSI_SAME_STMT);
4593 if (fd->have_pointer_condtemp)
4594 expand_omp_build_assign (&gsi2, condtemp, memv, false);
4595 if (fd->have_reductemp)
4596 {
4597 gsi_remove (&gsi2, true);
4598 release_ssa_name (gimple_assign_lhs (g));
4599 }
4600 }
4601 switch (gimple_omp_for_kind (fd->for_stmt))
4602 {
4603 case GF_OMP_FOR_KIND_FOR:
4604 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
4605 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
4606 break;
4607 case GF_OMP_FOR_KIND_DISTRIBUTE:
4608 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
4609 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
4610 break;
4611 default:
4612 gcc_unreachable ();
4613 }
4614 nthreads = build_call_expr (nthreads, 0);
4615 nthreads = fold_convert (itype, nthreads);
4616 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
4617 true, GSI_SAME_STMT);
4618 threadid = build_call_expr (threadid, 0);
4619 threadid = fold_convert (itype, threadid);
4620 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
4621 true, GSI_SAME_STMT);
4622
4623 n1 = fd->loop.n1;
4624 n2 = fd->loop.n2;
4625 step = fd->loop.step;
4626 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4627 {
4628 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4629 OMP_CLAUSE__LOOPTEMP_);
4630 gcc_assert (innerc);
4631 n1 = OMP_CLAUSE_DECL (innerc);
4632 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4633 OMP_CLAUSE__LOOPTEMP_);
4634 gcc_assert (innerc);
4635 n2 = OMP_CLAUSE_DECL (innerc);
4636 }
4637 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
4638 true, NULL_TREE, true, GSI_SAME_STMT);
4639 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
4640 true, NULL_TREE, true, GSI_SAME_STMT);
4641 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
4642 true, NULL_TREE, true, GSI_SAME_STMT);
4643 tree chunk_size = fold_convert (itype, fd->chunk_size);
4644 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
4645 chunk_size
4646 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
4647 GSI_SAME_STMT);
4648
4649 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
4650 t = fold_build2 (PLUS_EXPR, itype, step, t);
4651 t = fold_build2 (PLUS_EXPR, itype, t, n2);
4652 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
4653 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
4654 t = fold_build2 (TRUNC_DIV_EXPR, itype,
4655 fold_build1 (NEGATE_EXPR, itype, t),
4656 fold_build1 (NEGATE_EXPR, itype, step));
4657 else
4658 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
4659 t = fold_convert (itype, t);
4660 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4661 true, GSI_SAME_STMT);
4662
4663 trip_var = create_tmp_reg (itype, ".trip");
4664 if (gimple_in_ssa_p (cfun))
4665 {
4666 trip_init = make_ssa_name (trip_var);
4667 trip_main = make_ssa_name (trip_var);
4668 trip_back = make_ssa_name (trip_var);
4669 }
4670 else
4671 {
4672 trip_init = trip_var;
4673 trip_main = trip_var;
4674 trip_back = trip_var;
4675 }
4676
4677 gassign *assign_stmt
4678 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
4679 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4680
4681 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
4682 t = fold_build2 (MULT_EXPR, itype, t, step);
4683 if (POINTER_TYPE_P (type))
4684 t = fold_build_pointer_plus (n1, t);
4685 else
4686 t = fold_build2 (PLUS_EXPR, type, t, n1);
4687 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4688 true, GSI_SAME_STMT);
4689
4690 /* Remove the GIMPLE_OMP_FOR. */
4691 gsi_remove (&gsi, true);
4692
4693 gimple_stmt_iterator gsif = gsi;
4694
4695 /* Iteration space partitioning goes in ITER_PART_BB. */
4696 gsi = gsi_last_bb (iter_part_bb);
4697
4698 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
4699 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
4700 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
4701 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4702 false, GSI_CONTINUE_LINKING);
4703
4704 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
4705 t = fold_build2 (MIN_EXPR, itype, t, n);
4706 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4707 false, GSI_CONTINUE_LINKING);
4708
4709 t = build2 (LT_EXPR, boolean_type_node, s0, n);
4710 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
4711
4712 /* Setup code for sequential iteration goes in SEQ_START_BB. */
4713 gsi = gsi_start_bb (seq_start_bb);
4714
4715 tree startvar = fd->loop.v;
4716 tree endvar = NULL_TREE;
4717
4718 if (gimple_omp_for_combined_p (fd->for_stmt))
4719 {
4720 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
4721 ? gimple_omp_parallel_clauses (inner_stmt)
4722 : gimple_omp_for_clauses (inner_stmt);
4723 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
4724 gcc_assert (innerc);
4725 startvar = OMP_CLAUSE_DECL (innerc);
4726 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4727 OMP_CLAUSE__LOOPTEMP_);
4728 gcc_assert (innerc);
4729 endvar = OMP_CLAUSE_DECL (innerc);
4730 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
4731 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
4732 {
4733 int i;
4734 for (i = 1; i < fd->collapse; i++)
4735 {
4736 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4737 OMP_CLAUSE__LOOPTEMP_);
4738 gcc_assert (innerc);
4739 }
4740 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4741 OMP_CLAUSE__LOOPTEMP_);
4742 if (innerc)
4743 {
4744 /* If needed (distribute parallel for with lastprivate),
4745 propagate down the total number of iterations. */
4746 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
4747 fd->loop.n2);
4748 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
4749 GSI_CONTINUE_LINKING);
4750 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4751 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4752 }
4753 }
4754 }
4755
4756 t = fold_convert (itype, s0);
4757 t = fold_build2 (MULT_EXPR, itype, t, step);
4758 if (POINTER_TYPE_P (type))
4759 {
4760 t = fold_build_pointer_plus (n1, t);
4761 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4762 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4763 t = fold_convert (signed_type_for (type), t);
4764 }
4765 else
4766 t = fold_build2 (PLUS_EXPR, type, t, n1);
4767 t = fold_convert (TREE_TYPE (startvar), t);
4768 t = force_gimple_operand_gsi (&gsi, t,
4769 DECL_P (startvar)
4770 && TREE_ADDRESSABLE (startvar),
4771 NULL_TREE, false, GSI_CONTINUE_LINKING);
4772 assign_stmt = gimple_build_assign (startvar, t);
4773 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4774 if (cond_var)
4775 {
4776 tree itype = TREE_TYPE (cond_var);
4777 /* For lastprivate(conditional:) itervar, we need some iteration
4778 counter that starts at unsigned non-zero and increases.
4779 Prefer as few IVs as possible, so if we can use startvar
4780 itself, use that, or startvar + constant (those would be
4781 incremented with step), and as last resort use the s0 + 1
4782 incremented by 1. */
4783 if (POINTER_TYPE_P (type)
4784 || TREE_CODE (n1) != INTEGER_CST
4785 || fd->loop.cond_code != LT_EXPR)
4786 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
4787 build_int_cst (itype, 1));
4788 else if (tree_int_cst_sgn (n1) == 1)
4789 t = fold_convert (itype, t);
4790 else
4791 {
4792 tree c = fold_convert (itype, n1);
4793 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
4794 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
4795 }
4796 t = force_gimple_operand_gsi (&gsi, t, false,
4797 NULL_TREE, false, GSI_CONTINUE_LINKING);
4798 assign_stmt = gimple_build_assign (cond_var, t);
4799 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4800 }
4801
4802 t = fold_convert (itype, e0);
4803 t = fold_build2 (MULT_EXPR, itype, t, step);
4804 if (POINTER_TYPE_P (type))
4805 {
4806 t = fold_build_pointer_plus (n1, t);
4807 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4808 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4809 t = fold_convert (signed_type_for (type), t);
4810 }
4811 else
4812 t = fold_build2 (PLUS_EXPR, type, t, n1);
4813 t = fold_convert (TREE_TYPE (startvar), t);
4814 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4815 false, GSI_CONTINUE_LINKING);
4816 if (endvar)
4817 {
4818 assign_stmt = gimple_build_assign (endvar, e);
4819 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4820 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4821 assign_stmt = gimple_build_assign (fd->loop.v, e);
4822 else
4823 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4824 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4825 }
4826 /* Handle linear clause adjustments. */
4827 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
4828 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4829 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4830 c; c = OMP_CLAUSE_CHAIN (c))
4831 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4832 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4833 {
4834 tree d = OMP_CLAUSE_DECL (c);
4835 bool is_ref = omp_is_reference (d);
4836 tree t = d, a, dest;
4837 if (is_ref)
4838 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4839 tree type = TREE_TYPE (t);
4840 if (POINTER_TYPE_P (type))
4841 type = sizetype;
4842 dest = unshare_expr (t);
4843 tree v = create_tmp_var (TREE_TYPE (t), NULL);
4844 expand_omp_build_assign (&gsif, v, t);
4845 if (itercnt == NULL_TREE)
4846 {
4847 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4848 {
4849 itercntbias
4850 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
4851 fold_convert (itype, fd->loop.n1));
4852 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
4853 itercntbias, step);
4854 itercntbias
4855 = force_gimple_operand_gsi (&gsif, itercntbias, true,
4856 NULL_TREE, true,
4857 GSI_SAME_STMT);
4858 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
4859 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4860 NULL_TREE, false,
4861 GSI_CONTINUE_LINKING);
4862 }
4863 else
4864 itercnt = s0;
4865 }
4866 a = fold_build2 (MULT_EXPR, type,
4867 fold_convert (type, itercnt),
4868 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4869 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4870 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4871 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4872 false, GSI_CONTINUE_LINKING);
4873 assign_stmt = gimple_build_assign (dest, t);
4874 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4875 }
4876 if (fd->collapse > 1)
4877 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4878
4879 if (!broken_loop)
4880 {
4881 /* The code controlling the sequential loop goes in CONT_BB,
4882 replacing the GIMPLE_OMP_CONTINUE. */
4883 gsi = gsi_last_nondebug_bb (cont_bb);
4884 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4885 vmain = gimple_omp_continue_control_use (cont_stmt);
4886 vback = gimple_omp_continue_control_def (cont_stmt);
4887
4888 if (cond_var)
4889 {
4890 tree itype = TREE_TYPE (cond_var);
4891 tree t2;
4892 if (POINTER_TYPE_P (type)
4893 || TREE_CODE (n1) != INTEGER_CST
4894 || fd->loop.cond_code != LT_EXPR)
4895 t2 = build_int_cst (itype, 1);
4896 else
4897 t2 = fold_convert (itype, step);
4898 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
4899 t2 = force_gimple_operand_gsi (&gsi, t2, false,
4900 NULL_TREE, true, GSI_SAME_STMT);
4901 assign_stmt = gimple_build_assign (cond_var, t2);
4902 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4903 }
4904
4905 if (!gimple_omp_for_combined_p (fd->for_stmt))
4906 {
4907 if (POINTER_TYPE_P (type))
4908 t = fold_build_pointer_plus (vmain, step);
4909 else
4910 t = fold_build2 (PLUS_EXPR, type, vmain, step);
4911 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
4912 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4913 true, GSI_SAME_STMT);
4914 assign_stmt = gimple_build_assign (vback, t);
4915 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4916
4917 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
4918 t = build2 (EQ_EXPR, boolean_type_node,
4919 build_int_cst (itype, 0),
4920 build_int_cst (itype, 1));
4921 else
4922 t = build2 (fd->loop.cond_code, boolean_type_node,
4923 DECL_P (vback) && TREE_ADDRESSABLE (vback)
4924 ? t : vback, e);
4925 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4926 }
4927
4928 /* Remove GIMPLE_OMP_CONTINUE. */
4929 gsi_remove (&gsi, true);
4930
4931 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4932 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
4933
4934 /* Trip update code goes into TRIP_UPDATE_BB. */
4935 gsi = gsi_start_bb (trip_update_bb);
4936
4937 t = build_int_cst (itype, 1);
4938 t = build2 (PLUS_EXPR, itype, trip_main, t);
4939 assign_stmt = gimple_build_assign (trip_back, t);
4940 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4941 }
4942
4943 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
4944 gsi = gsi_last_nondebug_bb (exit_bb);
4945 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4946 {
4947 t = gimple_omp_return_lhs (gsi_stmt (gsi));
4948 if (fd->have_reductemp || fd->have_pointer_condtemp)
4949 {
4950 tree fn;
4951 if (t)
4952 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4953 else
4954 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4955 gcall *g = gimple_build_call (fn, 0);
4956 if (t)
4957 {
4958 gimple_call_set_lhs (g, t);
4959 if (fd->have_reductemp)
4960 gsi_insert_after (&gsi, gimple_build_assign (reductions,
4961 NOP_EXPR, t),
4962 GSI_SAME_STMT);
4963 }
4964 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4965 }
4966 else
4967 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
4968 }
4969 else if (fd->have_pointer_condtemp)
4970 {
4971 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
4972 gcall *g = gimple_build_call (fn, 0);
4973 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4974 }
4975 gsi_remove (&gsi, true);
4976
4977 /* Connect the new blocks. */
4978 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
4979 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
4980
4981 if (!broken_loop)
4982 {
4983 se = find_edge (cont_bb, body_bb);
4984 if (se == NULL)
4985 {
4986 se = BRANCH_EDGE (cont_bb);
4987 gcc_assert (single_succ (se->dest) == body_bb);
4988 }
4989 if (gimple_omp_for_combined_p (fd->for_stmt))
4990 {
4991 remove_edge (se);
4992 se = NULL;
4993 }
4994 else if (fd->collapse > 1)
4995 {
4996 remove_edge (se);
4997 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4998 }
4999 else
5000 se->flags = EDGE_TRUE_VALUE;
5001 find_edge (cont_bb, trip_update_bb)->flags
5002 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5003
5004 redirect_edge_and_branch (single_succ_edge (trip_update_bb),
5005 iter_part_bb);
5006 }
5007
5008 if (gimple_in_ssa_p (cfun))
5009 {
5010 gphi_iterator psi;
5011 gphi *phi;
5012 edge re, ene;
5013 edge_var_map *vm;
5014 size_t i;
5015
5016 gcc_assert (fd->collapse == 1 && !broken_loop);
5017
5018 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
5019 remove arguments of the phi nodes in fin_bb. We need to create
5020 appropriate phi nodes in iter_part_bb instead. */
5021 se = find_edge (iter_part_bb, fin_bb);
5022 re = single_succ_edge (trip_update_bb);
5023 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
5024 ene = single_succ_edge (entry_bb);
5025
5026 psi = gsi_start_phis (fin_bb);
5027 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
5028 gsi_next (&psi), ++i)
5029 {
5030 gphi *nphi;
5031 location_t locus;
5032
5033 phi = psi.phi ();
5034 if (operand_equal_p (gimple_phi_arg_def (phi, 0),
5035 redirect_edge_var_map_def (vm), 0))
5036 continue;
5037
5038 t = gimple_phi_result (phi);
5039 gcc_assert (t == redirect_edge_var_map_result (vm));
5040
5041 if (!single_pred_p (fin_bb))
5042 t = copy_ssa_name (t, phi);
5043
5044 nphi = create_phi_node (t, iter_part_bb);
5045
5046 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
5047 locus = gimple_phi_arg_location_from_edge (phi, se);
5048
5049 /* A special case -- fd->loop.v is not yet computed in
5050 iter_part_bb, we need to use vextra instead. */
5051 if (t == fd->loop.v)
5052 t = vextra;
5053 add_phi_arg (nphi, t, ene, locus);
5054 locus = redirect_edge_var_map_location (vm);
5055 tree back_arg = redirect_edge_var_map_def (vm);
5056 add_phi_arg (nphi, back_arg, re, locus);
5057 edge ce = find_edge (cont_bb, body_bb);
5058 if (ce == NULL)
5059 {
5060 ce = BRANCH_EDGE (cont_bb);
5061 gcc_assert (single_succ (ce->dest) == body_bb);
5062 ce = single_succ_edge (ce->dest);
5063 }
5064 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
5065 gcc_assert (inner_loop_phi != NULL);
5066 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
5067 find_edge (seq_start_bb, body_bb), locus);
5068
5069 if (!single_pred_p (fin_bb))
5070 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
5071 }
5072 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
5073 redirect_edge_var_map_clear (re);
5074 if (single_pred_p (fin_bb))
5075 while (1)
5076 {
5077 psi = gsi_start_phis (fin_bb);
5078 if (gsi_end_p (psi))
5079 break;
5080 remove_phi_node (&psi, false);
5081 }
5082
5083 /* Make phi node for trip. */
5084 phi = create_phi_node (trip_main, iter_part_bb);
5085 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
5086 UNKNOWN_LOCATION);
5087 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
5088 UNKNOWN_LOCATION);
5089 }
5090
5091 if (!broken_loop)
5092 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
5093 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
5094 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
5095 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5096 recompute_dominator (CDI_DOMINATORS, fin_bb));
5097 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
5098 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
5099 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5100 recompute_dominator (CDI_DOMINATORS, body_bb));
5101
5102 if (!broken_loop)
5103 {
5104 struct loop *loop = body_bb->loop_father;
5105 struct loop *trip_loop = alloc_loop ();
5106 trip_loop->header = iter_part_bb;
5107 trip_loop->latch = trip_update_bb;
5108 add_loop (trip_loop, iter_part_bb->loop_father);
5109
5110 if (loop != entry_bb->loop_father)
5111 {
5112 gcc_assert (loop->header == body_bb);
5113 gcc_assert (loop->latch == region->cont
5114 || single_pred (loop->latch) == region->cont);
5115 trip_loop->inner = loop;
5116 return;
5117 }
5118
5119 if (!gimple_omp_for_combined_p (fd->for_stmt))
5120 {
5121 loop = alloc_loop ();
5122 loop->header = body_bb;
5123 if (collapse_bb == NULL)
5124 loop->latch = cont_bb;
5125 add_loop (loop, trip_loop);
5126 }
5127 }
5128 }
5129
5130 /* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
5131 loop. Given parameters:
5132
5133 for (V = N1; V cond N2; V += STEP) BODY;
5134
5135 where COND is "<" or ">", we generate pseudocode
5136
5137 V = N1;
5138 goto L1;
5139 L0:
5140 BODY;
5141 V += STEP;
5142 L1:
5143 if (V cond N2) goto L0; else goto L2;
5144 L2:
5145
5146 For collapsed loops, given parameters:
5147 collapse(3)
5148 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
5149 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
5150 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
5151 BODY;
5152
5153 we generate pseudocode
5154
5155 if (cond3 is <)
5156 adj = STEP3 - 1;
5157 else
5158 adj = STEP3 + 1;
5159 count3 = (adj + N32 - N31) / STEP3;
5160 if (cond2 is <)
5161 adj = STEP2 - 1;
5162 else
5163 adj = STEP2 + 1;
5164 count2 = (adj + N22 - N21) / STEP2;
5165 if (cond1 is <)
5166 adj = STEP1 - 1;
5167 else
5168 adj = STEP1 + 1;
5169 count1 = (adj + N12 - N11) / STEP1;
5170 count = count1 * count2 * count3;
5171 V = 0;
5172 V1 = N11;
5173 V2 = N21;
5174 V3 = N31;
5175 goto L1;
5176 L0:
5177 BODY;
5178 V += 1;
5179 V3 += STEP3;
5180 V2 += (V3 cond3 N32) ? 0 : STEP2;
5181 V3 = (V3 cond3 N32) ? V3 : N31;
5182 V1 += (V2 cond2 N22) ? 0 : STEP1;
5183 V2 = (V2 cond2 N22) ? V2 : N21;
5184 L1:
5185 if (V < count) goto L0; else goto L2;
5186 L2:
5187
5188 */
5189
5190 static void
5191 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
5192 {
5193 tree type, t;
5194 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
5195 gimple_stmt_iterator gsi;
5196 gimple *stmt;
5197 gcond *cond_stmt;
5198 bool broken_loop = region->cont == NULL;
5199 edge e, ne;
5200 tree *counts = NULL;
5201 int i;
5202 int safelen_int = INT_MAX;
5203 bool dont_vectorize = false;
5204 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5205 OMP_CLAUSE_SAFELEN);
5206 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5207 OMP_CLAUSE__SIMDUID_);
5208 tree ifc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5209 OMP_CLAUSE_IF);
5210 tree simdlen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5211 OMP_CLAUSE_SIMDLEN);
5212 tree condtemp = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5213 OMP_CLAUSE__CONDTEMP_);
5214 tree n1, n2;
5215 tree cond_var = condtemp ? OMP_CLAUSE_DECL (condtemp) : NULL_TREE;
5216
5217 if (safelen)
5218 {
5219 poly_uint64 val;
5220 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
5221 if (!poly_int_tree_p (safelen, &val))
5222 safelen_int = 0;
5223 else
5224 safelen_int = MIN (constant_lower_bound (val), INT_MAX);
5225 if (safelen_int == 1)
5226 safelen_int = 0;
5227 }
5228 if ((ifc && integer_zerop (OMP_CLAUSE_IF_EXPR (ifc)))
5229 || (simdlen && integer_onep (OMP_CLAUSE_SIMDLEN_EXPR (simdlen))))
5230 {
5231 safelen_int = 0;
5232 dont_vectorize = true;
5233 }
5234 type = TREE_TYPE (fd->loop.v);
5235 entry_bb = region->entry;
5236 cont_bb = region->cont;
5237 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5238 gcc_assert (broken_loop
5239 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
5240 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
5241 if (!broken_loop)
5242 {
5243 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
5244 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5245 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
5246 l2_bb = BRANCH_EDGE (entry_bb)->dest;
5247 }
5248 else
5249 {
5250 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
5251 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
5252 l2_bb = single_succ (l1_bb);
5253 }
5254 exit_bb = region->exit;
5255 l2_dom_bb = NULL;
5256
5257 gsi = gsi_last_nondebug_bb (entry_bb);
5258
5259 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5260 /* Not needed in SSA form right now. */
5261 gcc_assert (!gimple_in_ssa_p (cfun));
5262 if (fd->collapse > 1)
5263 {
5264 int first_zero_iter = -1, dummy = -1;
5265 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
5266
5267 counts = XALLOCAVEC (tree, fd->collapse);
5268 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5269 zero_iter_bb, first_zero_iter,
5270 dummy_bb, dummy, l2_dom_bb);
5271 }
5272 if (l2_dom_bb == NULL)
5273 l2_dom_bb = l1_bb;
5274
5275 n1 = fd->loop.n1;
5276 n2 = fd->loop.n2;
5277 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5278 {
5279 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5280 OMP_CLAUSE__LOOPTEMP_);
5281 gcc_assert (innerc);
5282 n1 = OMP_CLAUSE_DECL (innerc);
5283 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5284 OMP_CLAUSE__LOOPTEMP_);
5285 gcc_assert (innerc);
5286 n2 = OMP_CLAUSE_DECL (innerc);
5287 }
5288 tree step = fd->loop.step;
5289
5290 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5291 OMP_CLAUSE__SIMT_);
5292 if (is_simt)
5293 {
5294 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
5295 is_simt = safelen_int > 1;
5296 }
5297 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
5298 if (is_simt)
5299 {
5300 simt_lane = create_tmp_var (unsigned_type_node);
5301 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
5302 gimple_call_set_lhs (g, simt_lane);
5303 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5304 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
5305 fold_convert (TREE_TYPE (step), simt_lane));
5306 n1 = fold_convert (type, n1);
5307 if (POINTER_TYPE_P (type))
5308 n1 = fold_build_pointer_plus (n1, offset);
5309 else
5310 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
5311
5312 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
5313 if (fd->collapse > 1)
5314 simt_maxlane = build_one_cst (unsigned_type_node);
5315 else if (safelen_int < omp_max_simt_vf ())
5316 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
5317 tree vf
5318 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
5319 unsigned_type_node, 0);
5320 if (simt_maxlane)
5321 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
5322 vf = fold_convert (TREE_TYPE (step), vf);
5323 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
5324 }
5325
5326 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
5327 if (fd->collapse > 1)
5328 {
5329 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5330 {
5331 gsi_prev (&gsi);
5332 expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1);
5333 gsi_next (&gsi);
5334 }
5335 else
5336 for (i = 0; i < fd->collapse; i++)
5337 {
5338 tree itype = TREE_TYPE (fd->loops[i].v);
5339 if (POINTER_TYPE_P (itype))
5340 itype = signed_type_for (itype);
5341 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
5342 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
5343 }
5344 }
5345 if (cond_var)
5346 {
5347 if (POINTER_TYPE_P (type)
5348 || TREE_CODE (n1) != INTEGER_CST
5349 || fd->loop.cond_code != LT_EXPR
5350 || tree_int_cst_sgn (n1) != 1)
5351 expand_omp_build_assign (&gsi, cond_var,
5352 build_one_cst (TREE_TYPE (cond_var)));
5353 else
5354 expand_omp_build_assign (&gsi, cond_var,
5355 fold_convert (TREE_TYPE (cond_var), n1));
5356 }
5357
5358 /* Remove the GIMPLE_OMP_FOR statement. */
5359 gsi_remove (&gsi, true);
5360
5361 if (!broken_loop)
5362 {
5363 /* Code to control the increment goes in the CONT_BB. */
5364 gsi = gsi_last_nondebug_bb (cont_bb);
5365 stmt = gsi_stmt (gsi);
5366 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
5367
5368 if (POINTER_TYPE_P (type))
5369 t = fold_build_pointer_plus (fd->loop.v, step);
5370 else
5371 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
5372 expand_omp_build_assign (&gsi, fd->loop.v, t);
5373
5374 if (fd->collapse > 1)
5375 {
5376 i = fd->collapse - 1;
5377 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
5378 {
5379 t = fold_convert (sizetype, fd->loops[i].step);
5380 t = fold_build_pointer_plus (fd->loops[i].v, t);
5381 }
5382 else
5383 {
5384 t = fold_convert (TREE_TYPE (fd->loops[i].v),
5385 fd->loops[i].step);
5386 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
5387 fd->loops[i].v, t);
5388 }
5389 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
5390
5391 for (i = fd->collapse - 1; i > 0; i--)
5392 {
5393 tree itype = TREE_TYPE (fd->loops[i].v);
5394 tree itype2 = TREE_TYPE (fd->loops[i - 1].v);
5395 if (POINTER_TYPE_P (itype2))
5396 itype2 = signed_type_for (itype2);
5397 t = fold_convert (itype2, fd->loops[i - 1].step);
5398 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
5399 GSI_SAME_STMT);
5400 t = build3 (COND_EXPR, itype2,
5401 build2 (fd->loops[i].cond_code, boolean_type_node,
5402 fd->loops[i].v,
5403 fold_convert (itype, fd->loops[i].n2)),
5404 build_int_cst (itype2, 0), t);
5405 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v)))
5406 t = fold_build_pointer_plus (fd->loops[i - 1].v, t);
5407 else
5408 t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t);
5409 expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t);
5410
5411 t = fold_convert (itype, fd->loops[i].n1);
5412 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
5413 GSI_SAME_STMT);
5414 t = build3 (COND_EXPR, itype,
5415 build2 (fd->loops[i].cond_code, boolean_type_node,
5416 fd->loops[i].v,
5417 fold_convert (itype, fd->loops[i].n2)),
5418 fd->loops[i].v, t);
5419 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
5420 }
5421 }
5422 if (cond_var)
5423 {
5424 if (POINTER_TYPE_P (type)
5425 || TREE_CODE (n1) != INTEGER_CST
5426 || fd->loop.cond_code != LT_EXPR
5427 || tree_int_cst_sgn (n1) != 1)
5428 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
5429 build_one_cst (TREE_TYPE (cond_var)));
5430 else
5431 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
5432 fold_convert (TREE_TYPE (cond_var), step));
5433 expand_omp_build_assign (&gsi, cond_var, t);
5434 }
5435
5436 /* Remove GIMPLE_OMP_CONTINUE. */
5437 gsi_remove (&gsi, true);
5438 }
5439
5440 /* Emit the condition in L1_BB. */
5441 gsi = gsi_start_bb (l1_bb);
5442
5443 t = fold_convert (type, n2);
5444 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5445 false, GSI_CONTINUE_LINKING);
5446 tree v = fd->loop.v;
5447 if (DECL_P (v) && TREE_ADDRESSABLE (v))
5448 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
5449 false, GSI_CONTINUE_LINKING);
5450 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
5451 cond_stmt = gimple_build_cond_empty (t);
5452 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
5453 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
5454 NULL, NULL)
5455 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
5456 NULL, NULL))
5457 {
5458 gsi = gsi_for_stmt (cond_stmt);
5459 gimple_regimplify_operands (cond_stmt, &gsi);
5460 }
5461
5462 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
5463 if (is_simt)
5464 {
5465 gsi = gsi_start_bb (l2_bb);
5466 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step);
5467 if (POINTER_TYPE_P (type))
5468 t = fold_build_pointer_plus (fd->loop.v, step);
5469 else
5470 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
5471 expand_omp_build_assign (&gsi, fd->loop.v, t);
5472 }
5473
5474 /* Remove GIMPLE_OMP_RETURN. */
5475 gsi = gsi_last_nondebug_bb (exit_bb);
5476 gsi_remove (&gsi, true);
5477
5478 /* Connect the new blocks. */
5479 remove_edge (FALLTHRU_EDGE (entry_bb));
5480
5481 if (!broken_loop)
5482 {
5483 remove_edge (BRANCH_EDGE (entry_bb));
5484 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
5485
5486 e = BRANCH_EDGE (l1_bb);
5487 ne = FALLTHRU_EDGE (l1_bb);
5488 e->flags = EDGE_TRUE_VALUE;
5489 }
5490 else
5491 {
5492 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
5493
5494 ne = single_succ_edge (l1_bb);
5495 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
5496
5497 }
5498 ne->flags = EDGE_FALSE_VALUE;
5499 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
5500 ne->probability = e->probability.invert ();
5501
5502 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
5503 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
5504
5505 if (simt_maxlane)
5506 {
5507 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
5508 NULL_TREE, NULL_TREE);
5509 gsi = gsi_last_bb (entry_bb);
5510 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
5511 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
5512 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
5513 FALLTHRU_EDGE (entry_bb)->probability
5514 = profile_probability::guessed_always ().apply_scale (7, 8);
5515 BRANCH_EDGE (entry_bb)->probability
5516 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
5517 l2_dom_bb = entry_bb;
5518 }
5519 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
5520
5521 if (!broken_loop)
5522 {
5523 struct loop *loop = alloc_loop ();
5524 loop->header = l1_bb;
5525 loop->latch = cont_bb;
5526 add_loop (loop, l1_bb->loop_father);
5527 loop->safelen = safelen_int;
5528 if (simduid)
5529 {
5530 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
5531 cfun->has_simduid_loops = true;
5532 }
5533 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
5534 the loop. */
5535 if ((flag_tree_loop_vectorize
5536 || !global_options_set.x_flag_tree_loop_vectorize)
5537 && flag_tree_loop_optimize
5538 && loop->safelen > 1)
5539 {
5540 loop->force_vectorize = true;
5541 if (simdlen && tree_fits_uhwi_p (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)))
5542 {
5543 unsigned HOST_WIDE_INT v
5544 = tree_to_uhwi (OMP_CLAUSE_SIMDLEN_EXPR (simdlen));
5545 if (v < INT_MAX && v <= (unsigned HOST_WIDE_INT) loop->safelen)
5546 loop->simdlen = v;
5547 }
5548 cfun->has_force_vectorize_loops = true;
5549 }
5550 else if (dont_vectorize)
5551 loop->dont_vectorize = true;
5552 }
5553 else if (simduid)
5554 cfun->has_simduid_loops = true;
5555 }
5556
5557 /* Taskloop construct is represented after gimplification with
5558 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
5559 in between them. This routine expands the outer GIMPLE_OMP_FOR,
5560 which should just compute all the needed loop temporaries
5561 for GIMPLE_OMP_TASK. */
5562
5563 static void
5564 expand_omp_taskloop_for_outer (struct omp_region *region,
5565 struct omp_for_data *fd,
5566 gimple *inner_stmt)
5567 {
5568 tree type, bias = NULL_TREE;
5569 basic_block entry_bb, cont_bb, exit_bb;
5570 gimple_stmt_iterator gsi;
5571 gassign *assign_stmt;
5572 tree *counts = NULL;
5573 int i;
5574
5575 gcc_assert (inner_stmt);
5576 gcc_assert (region->cont);
5577 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
5578 && gimple_omp_task_taskloop_p (inner_stmt));
5579 type = TREE_TYPE (fd->loop.v);
5580
5581 /* See if we need to bias by LLONG_MIN. */
5582 if (fd->iter_type == long_long_unsigned_type_node
5583 && TREE_CODE (type) == INTEGER_TYPE
5584 && !TYPE_UNSIGNED (type))
5585 {
5586 tree n1, n2;
5587
5588 if (fd->loop.cond_code == LT_EXPR)
5589 {
5590 n1 = fd->loop.n1;
5591 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
5592 }
5593 else
5594 {
5595 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
5596 n2 = fd->loop.n1;
5597 }
5598 if (TREE_CODE (n1) != INTEGER_CST
5599 || TREE_CODE (n2) != INTEGER_CST
5600 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
5601 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
5602 }
5603
5604 entry_bb = region->entry;
5605 cont_bb = region->cont;
5606 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5607 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
5608 exit_bb = region->exit;
5609
5610 gsi = gsi_last_nondebug_bb (entry_bb);
5611 gimple *for_stmt = gsi_stmt (gsi);
5612 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
5613 if (fd->collapse > 1)
5614 {
5615 int first_zero_iter = -1, dummy = -1;
5616 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
5617
5618 counts = XALLOCAVEC (tree, fd->collapse);
5619 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5620 zero_iter_bb, first_zero_iter,
5621 dummy_bb, dummy, l2_dom_bb);
5622
5623 if (zero_iter_bb)
5624 {
5625 /* Some counts[i] vars might be uninitialized if
5626 some loop has zero iterations. But the body shouldn't
5627 be executed in that case, so just avoid uninit warnings. */
5628 for (i = first_zero_iter; i < fd->collapse; i++)
5629 if (SSA_VAR_P (counts[i]))
5630 TREE_NO_WARNING (counts[i]) = 1;
5631 gsi_prev (&gsi);
5632 edge e = split_block (entry_bb, gsi_stmt (gsi));
5633 entry_bb = e->dest;
5634 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
5635 gsi = gsi_last_bb (entry_bb);
5636 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
5637 get_immediate_dominator (CDI_DOMINATORS,
5638 zero_iter_bb));
5639 }
5640 }
5641
5642 tree t0, t1;
5643 t1 = fd->loop.n2;
5644 t0 = fd->loop.n1;
5645 if (POINTER_TYPE_P (TREE_TYPE (t0))
5646 && TYPE_PRECISION (TREE_TYPE (t0))
5647 != TYPE_PRECISION (fd->iter_type))
5648 {
5649 /* Avoid casting pointers to integer of a different size. */
5650 tree itype = signed_type_for (type);
5651 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
5652 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
5653 }
5654 else
5655 {
5656 t1 = fold_convert (fd->iter_type, t1);
5657 t0 = fold_convert (fd->iter_type, t0);
5658 }
5659 if (bias)
5660 {
5661 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
5662 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
5663 }
5664
5665 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
5666 OMP_CLAUSE__LOOPTEMP_);
5667 gcc_assert (innerc);
5668 tree startvar = OMP_CLAUSE_DECL (innerc);
5669 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
5670 gcc_assert (innerc);
5671 tree endvar = OMP_CLAUSE_DECL (innerc);
5672 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
5673 {
5674 gcc_assert (innerc);
5675 for (i = 1; i < fd->collapse; i++)
5676 {
5677 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5678 OMP_CLAUSE__LOOPTEMP_);
5679 gcc_assert (innerc);
5680 }
5681 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5682 OMP_CLAUSE__LOOPTEMP_);
5683 if (innerc)
5684 {
5685 /* If needed (inner taskloop has lastprivate clause), propagate
5686 down the total number of iterations. */
5687 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
5688 NULL_TREE, false,
5689 GSI_CONTINUE_LINKING);
5690 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5691 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5692 }
5693 }
5694
5695 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
5696 GSI_CONTINUE_LINKING);
5697 assign_stmt = gimple_build_assign (startvar, t0);
5698 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5699
5700 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
5701 GSI_CONTINUE_LINKING);
5702 assign_stmt = gimple_build_assign (endvar, t1);
5703 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5704 if (fd->collapse > 1)
5705 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5706
5707 /* Remove the GIMPLE_OMP_FOR statement. */
5708 gsi = gsi_for_stmt (for_stmt);
5709 gsi_remove (&gsi, true);
5710
5711 gsi = gsi_last_nondebug_bb (cont_bb);
5712 gsi_remove (&gsi, true);
5713
5714 gsi = gsi_last_nondebug_bb (exit_bb);
5715 gsi_remove (&gsi, true);
5716
5717 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
5718 remove_edge (BRANCH_EDGE (entry_bb));
5719 FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
5720 remove_edge (BRANCH_EDGE (cont_bb));
5721 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
5722 set_immediate_dominator (CDI_DOMINATORS, region->entry,
5723 recompute_dominator (CDI_DOMINATORS, region->entry));
5724 }
5725
5726 /* Taskloop construct is represented after gimplification with
5727 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
5728 in between them. This routine expands the inner GIMPLE_OMP_FOR.
5729 GOMP_taskloop{,_ull} function arranges for each task to be given just
5730 a single range of iterations. */
5731
5732 static void
5733 expand_omp_taskloop_for_inner (struct omp_region *region,
5734 struct omp_for_data *fd,
5735 gimple *inner_stmt)
5736 {
5737 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
5738 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
5739 basic_block fin_bb;
5740 gimple_stmt_iterator gsi;
5741 edge ep;
5742 bool broken_loop = region->cont == NULL;
5743 tree *counts = NULL;
5744 tree n1, n2, step;
5745
5746 itype = type = TREE_TYPE (fd->loop.v);
5747 if (POINTER_TYPE_P (type))
5748 itype = signed_type_for (type);
5749
5750 /* See if we need to bias by LLONG_MIN. */
5751 if (fd->iter_type == long_long_unsigned_type_node
5752 && TREE_CODE (type) == INTEGER_TYPE
5753 && !TYPE_UNSIGNED (type))
5754 {
5755 tree n1, n2;
5756
5757 if (fd->loop.cond_code == LT_EXPR)
5758 {
5759 n1 = fd->loop.n1;
5760 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
5761 }
5762 else
5763 {
5764 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
5765 n2 = fd->loop.n1;
5766 }
5767 if (TREE_CODE (n1) != INTEGER_CST
5768 || TREE_CODE (n2) != INTEGER_CST
5769 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
5770 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
5771 }
5772
5773 entry_bb = region->entry;
5774 cont_bb = region->cont;
5775 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5776 fin_bb = BRANCH_EDGE (entry_bb)->dest;
5777 gcc_assert (broken_loop
5778 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
5779 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5780 if (!broken_loop)
5781 {
5782 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
5783 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5784 }
5785 exit_bb = region->exit;
5786
5787 /* Iteration space partitioning goes in ENTRY_BB. */
5788 gsi = gsi_last_nondebug_bb (entry_bb);
5789 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5790
5791 if (fd->collapse > 1)
5792 {
5793 int first_zero_iter = -1, dummy = -1;
5794 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5795
5796 counts = XALLOCAVEC (tree, fd->collapse);
5797 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5798 fin_bb, first_zero_iter,
5799 dummy_bb, dummy, l2_dom_bb);
5800 t = NULL_TREE;
5801 }
5802 else
5803 t = integer_one_node;
5804
5805 step = fd->loop.step;
5806 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5807 OMP_CLAUSE__LOOPTEMP_);
5808 gcc_assert (innerc);
5809 n1 = OMP_CLAUSE_DECL (innerc);
5810 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
5811 gcc_assert (innerc);
5812 n2 = OMP_CLAUSE_DECL (innerc);
5813 if (bias)
5814 {
5815 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
5816 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
5817 }
5818 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5819 true, NULL_TREE, true, GSI_SAME_STMT);
5820 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5821 true, NULL_TREE, true, GSI_SAME_STMT);
5822 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5823 true, NULL_TREE, true, GSI_SAME_STMT);
5824
5825 tree startvar = fd->loop.v;
5826 tree endvar = NULL_TREE;
5827
5828 if (gimple_omp_for_combined_p (fd->for_stmt))
5829 {
5830 tree clauses = gimple_omp_for_clauses (inner_stmt);
5831 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5832 gcc_assert (innerc);
5833 startvar = OMP_CLAUSE_DECL (innerc);
5834 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5835 OMP_CLAUSE__LOOPTEMP_);
5836 gcc_assert (innerc);
5837 endvar = OMP_CLAUSE_DECL (innerc);
5838 }
5839 t = fold_convert (TREE_TYPE (startvar), n1);
5840 t = force_gimple_operand_gsi (&gsi, t,
5841 DECL_P (startvar)
5842 && TREE_ADDRESSABLE (startvar),
5843 NULL_TREE, false, GSI_CONTINUE_LINKING);
5844 gimple *assign_stmt = gimple_build_assign (startvar, t);
5845 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5846
5847 t = fold_convert (TREE_TYPE (startvar), n2);
5848 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5849 false, GSI_CONTINUE_LINKING);
5850 if (endvar)
5851 {
5852 assign_stmt = gimple_build_assign (endvar, e);
5853 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5854 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5855 assign_stmt = gimple_build_assign (fd->loop.v, e);
5856 else
5857 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5858 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5859 }
5860 if (fd->collapse > 1)
5861 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5862
5863 if (!broken_loop)
5864 {
5865 /* The code controlling the sequential loop replaces the
5866 GIMPLE_OMP_CONTINUE. */
5867 gsi = gsi_last_nondebug_bb (cont_bb);
5868 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5869 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5870 vmain = gimple_omp_continue_control_use (cont_stmt);
5871 vback = gimple_omp_continue_control_def (cont_stmt);
5872
5873 if (!gimple_omp_for_combined_p (fd->for_stmt))
5874 {
5875 if (POINTER_TYPE_P (type))
5876 t = fold_build_pointer_plus (vmain, step);
5877 else
5878 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5879 t = force_gimple_operand_gsi (&gsi, t,
5880 DECL_P (vback)
5881 && TREE_ADDRESSABLE (vback),
5882 NULL_TREE, true, GSI_SAME_STMT);
5883 assign_stmt = gimple_build_assign (vback, t);
5884 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5885
5886 t = build2 (fd->loop.cond_code, boolean_type_node,
5887 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5888 ? t : vback, e);
5889 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5890 }
5891
5892 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5893 gsi_remove (&gsi, true);
5894
5895 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5896 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
5897 }
5898
5899 /* Remove the GIMPLE_OMP_FOR statement. */
5900 gsi = gsi_for_stmt (fd->for_stmt);
5901 gsi_remove (&gsi, true);
5902
5903 /* Remove the GIMPLE_OMP_RETURN statement. */
5904 gsi = gsi_last_nondebug_bb (exit_bb);
5905 gsi_remove (&gsi, true);
5906
5907 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
5908 if (!broken_loop)
5909 remove_edge (BRANCH_EDGE (entry_bb));
5910 else
5911 {
5912 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
5913 region->outer->cont = NULL;
5914 }
5915
5916 /* Connect all the blocks. */
5917 if (!broken_loop)
5918 {
5919 ep = find_edge (cont_bb, body_bb);
5920 if (gimple_omp_for_combined_p (fd->for_stmt))
5921 {
5922 remove_edge (ep);
5923 ep = NULL;
5924 }
5925 else if (fd->collapse > 1)
5926 {
5927 remove_edge (ep);
5928 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5929 }
5930 else
5931 ep->flags = EDGE_TRUE_VALUE;
5932 find_edge (cont_bb, fin_bb)->flags
5933 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5934 }
5935
5936 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5937 recompute_dominator (CDI_DOMINATORS, body_bb));
5938 if (!broken_loop)
5939 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5940 recompute_dominator (CDI_DOMINATORS, fin_bb));
5941
5942 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5943 {
5944 struct loop *loop = alloc_loop ();
5945 loop->header = body_bb;
5946 if (collapse_bb == NULL)
5947 loop->latch = cont_bb;
5948 add_loop (loop, body_bb->loop_father);
5949 }
5950 }
5951
5952 /* A subroutine of expand_omp_for. Generate code for an OpenACC
5953 partitioned loop. The lowering here is abstracted, in that the
5954 loop parameters are passed through internal functions, which are
5955 further lowered by oacc_device_lower, once we get to the target
5956 compiler. The loop is of the form:
5957
5958 for (V = B; V LTGT E; V += S) {BODY}
5959
5960 where LTGT is < or >. We may have a specified chunking size, CHUNKING
5961 (constant 0 for no chunking) and we will have a GWV partitioning
5962 mask, specifying dimensions over which the loop is to be
5963 partitioned (see note below). We generate code that looks like
5964 (this ignores tiling):
5965
5966 <entry_bb> [incoming FALL->body, BRANCH->exit]
5967 typedef signedintify (typeof (V)) T; // underlying signed integral type
5968 T range = E - B;
5969 T chunk_no = 0;
5970 T DIR = LTGT == '<' ? +1 : -1;
5971 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
5972 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
5973
5974 <head_bb> [created by splitting end of entry_bb]
5975 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
5976 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
5977 if (!(offset LTGT bound)) goto bottom_bb;
5978
5979 <body_bb> [incoming]
5980 V = B + offset;
5981 {BODY}
5982
5983 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
5984 offset += step;
5985 if (offset LTGT bound) goto body_bb; [*]
5986
5987 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
5988 chunk_no++;
5989 if (chunk < chunk_max) goto head_bb;
5990
5991 <exit_bb> [incoming]
5992 V = B + ((range -/+ 1) / S +/- 1) * S [*]
5993
5994 [*] Needed if V live at end of loop. */
5995
5996 static void
5997 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
5998 {
5999 tree v = fd->loop.v;
6000 enum tree_code cond_code = fd->loop.cond_code;
6001 enum tree_code plus_code = PLUS_EXPR;
6002
6003 tree chunk_size = integer_minus_one_node;
6004 tree gwv = integer_zero_node;
6005 tree iter_type = TREE_TYPE (v);
6006 tree diff_type = iter_type;
6007 tree plus_type = iter_type;
6008 struct oacc_collapse *counts = NULL;
6009
6010 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
6011 == GF_OMP_FOR_KIND_OACC_LOOP);
6012 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
6013 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
6014
6015 if (POINTER_TYPE_P (iter_type))
6016 {
6017 plus_code = POINTER_PLUS_EXPR;
6018 plus_type = sizetype;
6019 }
6020 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
6021 diff_type = signed_type_for (diff_type);
6022 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
6023 diff_type = integer_type_node;
6024
6025 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
6026 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
6027 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
6028 basic_block bottom_bb = NULL;
6029
6030 /* entry_bb has two sucessors; the branch edge is to the exit
6031 block, fallthrough edge to body. */
6032 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
6033 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
6034
6035 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
6036 body_bb, or to a block whose only successor is the body_bb. Its
6037 fallthrough successor is the final block (same as the branch
6038 successor of the entry_bb). */
6039 if (cont_bb)
6040 {
6041 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
6042 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
6043
6044 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
6045 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
6046 }
6047 else
6048 gcc_assert (!gimple_in_ssa_p (cfun));
6049
6050 /* The exit block only has entry_bb and cont_bb as predecessors. */
6051 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
6052
6053 tree chunk_no;
6054 tree chunk_max = NULL_TREE;
6055 tree bound, offset;
6056 tree step = create_tmp_var (diff_type, ".step");
6057 bool up = cond_code == LT_EXPR;
6058 tree dir = build_int_cst (diff_type, up ? +1 : -1);
6059 bool chunking = !gimple_in_ssa_p (cfun);
6060 bool negating;
6061
6062 /* Tiling vars. */
6063 tree tile_size = NULL_TREE;
6064 tree element_s = NULL_TREE;
6065 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
6066 basic_block elem_body_bb = NULL;
6067 basic_block elem_cont_bb = NULL;
6068
6069 /* SSA instances. */
6070 tree offset_incr = NULL_TREE;
6071 tree offset_init = NULL_TREE;
6072
6073 gimple_stmt_iterator gsi;
6074 gassign *ass;
6075 gcall *call;
6076 gimple *stmt;
6077 tree expr;
6078 location_t loc;
6079 edge split, be, fte;
6080
6081 /* Split the end of entry_bb to create head_bb. */
6082 split = split_block (entry_bb, last_stmt (entry_bb));
6083 basic_block head_bb = split->dest;
6084 entry_bb = split->src;
6085
6086 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
6087 gsi = gsi_last_nondebug_bb (entry_bb);
6088 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
6089 loc = gimple_location (for_stmt);
6090
6091 if (gimple_in_ssa_p (cfun))
6092 {
6093 offset_init = gimple_omp_for_index (for_stmt, 0);
6094 gcc_assert (integer_zerop (fd->loop.n1));
6095 /* The SSA parallelizer does gang parallelism. */
6096 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
6097 }
6098
6099 if (fd->collapse > 1 || fd->tiling)
6100 {
6101 gcc_assert (!gimple_in_ssa_p (cfun) && up);
6102 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
6103 tree total = expand_oacc_collapse_init (fd, &gsi, counts,
6104 TREE_TYPE (fd->loop.n2), loc);
6105
6106 if (SSA_VAR_P (fd->loop.n2))
6107 {
6108 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
6109 true, GSI_SAME_STMT);
6110 ass = gimple_build_assign (fd->loop.n2, total);
6111 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6112 }
6113 }
6114
6115 tree b = fd->loop.n1;
6116 tree e = fd->loop.n2;
6117 tree s = fd->loop.step;
6118
6119 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
6120 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
6121
6122 /* Convert the step, avoiding possible unsigned->signed overflow. */
6123 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
6124 if (negating)
6125 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
6126 s = fold_convert (diff_type, s);
6127 if (negating)
6128 s = fold_build1 (NEGATE_EXPR, diff_type, s);
6129 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
6130
6131 if (!chunking)
6132 chunk_size = integer_zero_node;
6133 expr = fold_convert (diff_type, chunk_size);
6134 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
6135 NULL_TREE, true, GSI_SAME_STMT);
6136
6137 if (fd->tiling)
6138 {
6139 /* Determine the tile size and element step,
6140 modify the outer loop step size. */
6141 tile_size = create_tmp_var (diff_type, ".tile_size");
6142 expr = build_int_cst (diff_type, 1);
6143 for (int ix = 0; ix < fd->collapse; ix++)
6144 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
6145 expr = force_gimple_operand_gsi (&gsi, expr, true,
6146 NULL_TREE, true, GSI_SAME_STMT);
6147 ass = gimple_build_assign (tile_size, expr);
6148 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6149
6150 element_s = create_tmp_var (diff_type, ".element_s");
6151 ass = gimple_build_assign (element_s, s);
6152 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6153
6154 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
6155 s = force_gimple_operand_gsi (&gsi, expr, true,
6156 NULL_TREE, true, GSI_SAME_STMT);
6157 }
6158
6159 /* Determine the range, avoiding possible unsigned->signed overflow. */
6160 negating = !up && TYPE_UNSIGNED (iter_type);
6161 expr = fold_build2 (MINUS_EXPR, plus_type,
6162 fold_convert (plus_type, negating ? b : e),
6163 fold_convert (plus_type, negating ? e : b));
6164 expr = fold_convert (diff_type, expr);
6165 if (negating)
6166 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
6167 tree range = force_gimple_operand_gsi (&gsi, expr, true,
6168 NULL_TREE, true, GSI_SAME_STMT);
6169
6170 chunk_no = build_int_cst (diff_type, 0);
6171 if (chunking)
6172 {
6173 gcc_assert (!gimple_in_ssa_p (cfun));
6174
6175 expr = chunk_no;
6176 chunk_max = create_tmp_var (diff_type, ".chunk_max");
6177 chunk_no = create_tmp_var (diff_type, ".chunk_no");
6178
6179 ass = gimple_build_assign (chunk_no, expr);
6180 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6181
6182 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
6183 build_int_cst (integer_type_node,
6184 IFN_GOACC_LOOP_CHUNKS),
6185 dir, range, s, chunk_size, gwv);
6186 gimple_call_set_lhs (call, chunk_max);
6187 gimple_set_location (call, loc);
6188 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
6189 }
6190 else
6191 chunk_size = chunk_no;
6192
6193 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
6194 build_int_cst (integer_type_node,
6195 IFN_GOACC_LOOP_STEP),
6196 dir, range, s, chunk_size, gwv);
6197 gimple_call_set_lhs (call, step);
6198 gimple_set_location (call, loc);
6199 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
6200
6201 /* Remove the GIMPLE_OMP_FOR. */
6202 gsi_remove (&gsi, true);
6203
6204 /* Fixup edges from head_bb. */
6205 be = BRANCH_EDGE (head_bb);
6206 fte = FALLTHRU_EDGE (head_bb);
6207 be->flags |= EDGE_FALSE_VALUE;
6208 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
6209
6210 basic_block body_bb = fte->dest;
6211
6212 if (gimple_in_ssa_p (cfun))
6213 {
6214 gsi = gsi_last_nondebug_bb (cont_bb);
6215 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
6216
6217 offset = gimple_omp_continue_control_use (cont_stmt);
6218 offset_incr = gimple_omp_continue_control_def (cont_stmt);
6219 }
6220 else
6221 {
6222 offset = create_tmp_var (diff_type, ".offset");
6223 offset_init = offset_incr = offset;
6224 }
6225 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
6226
6227 /* Loop offset & bound go into head_bb. */
6228 gsi = gsi_start_bb (head_bb);
6229
6230 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
6231 build_int_cst (integer_type_node,
6232 IFN_GOACC_LOOP_OFFSET),
6233 dir, range, s,
6234 chunk_size, gwv, chunk_no);
6235 gimple_call_set_lhs (call, offset_init);
6236 gimple_set_location (call, loc);
6237 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
6238
6239 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
6240 build_int_cst (integer_type_node,
6241 IFN_GOACC_LOOP_BOUND),
6242 dir, range, s,
6243 chunk_size, gwv, offset_init);
6244 gimple_call_set_lhs (call, bound);
6245 gimple_set_location (call, loc);
6246 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
6247
6248 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
6249 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
6250 GSI_CONTINUE_LINKING);
6251
6252 /* V assignment goes into body_bb. */
6253 if (!gimple_in_ssa_p (cfun))
6254 {
6255 gsi = gsi_start_bb (body_bb);
6256
6257 expr = build2 (plus_code, iter_type, b,
6258 fold_convert (plus_type, offset));
6259 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
6260 true, GSI_SAME_STMT);
6261 ass = gimple_build_assign (v, expr);
6262 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6263
6264 if (fd->collapse > 1 || fd->tiling)
6265 expand_oacc_collapse_vars (fd, false, &gsi, counts, v);
6266
6267 if (fd->tiling)
6268 {
6269 /* Determine the range of the element loop -- usually simply
6270 the tile_size, but could be smaller if the final
6271 iteration of the outer loop is a partial tile. */
6272 tree e_range = create_tmp_var (diff_type, ".e_range");
6273
6274 expr = build2 (MIN_EXPR, diff_type,
6275 build2 (MINUS_EXPR, diff_type, bound, offset),
6276 build2 (MULT_EXPR, diff_type, tile_size,
6277 element_s));
6278 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
6279 true, GSI_SAME_STMT);
6280 ass = gimple_build_assign (e_range, expr);
6281 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6282
6283 /* Determine bound, offset & step of inner loop. */
6284 e_bound = create_tmp_var (diff_type, ".e_bound");
6285 e_offset = create_tmp_var (diff_type, ".e_offset");
6286 e_step = create_tmp_var (diff_type, ".e_step");
6287
6288 /* Mark these as element loops. */
6289 tree t, e_gwv = integer_minus_one_node;
6290 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */
6291
6292 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
6293 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
6294 element_s, chunk, e_gwv, chunk);
6295 gimple_call_set_lhs (call, e_offset);
6296 gimple_set_location (call, loc);
6297 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
6298
6299 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
6300 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
6301 element_s, chunk, e_gwv, e_offset);
6302 gimple_call_set_lhs (call, e_bound);
6303 gimple_set_location (call, loc);
6304 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
6305
6306 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
6307 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
6308 element_s, chunk, e_gwv);
6309 gimple_call_set_lhs (call, e_step);
6310 gimple_set_location (call, loc);
6311 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
6312
6313 /* Add test and split block. */
6314 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
6315 stmt = gimple_build_cond_empty (expr);
6316 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
6317 split = split_block (body_bb, stmt);
6318 elem_body_bb = split->dest;
6319 if (cont_bb == body_bb)
6320 cont_bb = elem_body_bb;
6321 body_bb = split->src;
6322
6323 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
6324
6325 /* Add a dummy exit for the tiled block when cont_bb is missing. */
6326 if (cont_bb == NULL)
6327 {
6328 edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
6329 e->probability = profile_probability::even ();
6330 split->probability = profile_probability::even ();
6331 }
6332
6333 /* Initialize the user's loop vars. */
6334 gsi = gsi_start_bb (elem_body_bb);
6335 expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset);
6336 }
6337 }
6338
6339 /* Loop increment goes into cont_bb. If this is not a loop, we
6340 will have spawned threads as if it was, and each one will
6341 execute one iteration. The specification is not explicit about
6342 whether such constructs are ill-formed or not, and they can
6343 occur, especially when noreturn routines are involved. */
6344 if (cont_bb)
6345 {
6346 gsi = gsi_last_nondebug_bb (cont_bb);
6347 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
6348 loc = gimple_location (cont_stmt);
6349
6350 if (fd->tiling)
6351 {
6352 /* Insert element loop increment and test. */
6353 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
6354 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
6355 true, GSI_SAME_STMT);
6356 ass = gimple_build_assign (e_offset, expr);
6357 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6358 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
6359
6360 stmt = gimple_build_cond_empty (expr);
6361 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
6362 split = split_block (cont_bb, stmt);
6363 elem_cont_bb = split->src;
6364 cont_bb = split->dest;
6365
6366 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
6367 split->probability = profile_probability::unlikely ().guessed ();
6368 edge latch_edge
6369 = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
6370 latch_edge->probability = profile_probability::likely ().guessed ();
6371
6372 edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
6373 skip_edge->probability = profile_probability::unlikely ().guessed ();
6374 edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
6375 loop_entry_edge->probability
6376 = profile_probability::likely ().guessed ();
6377
6378 gsi = gsi_for_stmt (cont_stmt);
6379 }
6380
6381 /* Increment offset. */
6382 if (gimple_in_ssa_p (cfun))
6383 expr = build2 (plus_code, iter_type, offset,
6384 fold_convert (plus_type, step));
6385 else
6386 expr = build2 (PLUS_EXPR, diff_type, offset, step);
6387 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
6388 true, GSI_SAME_STMT);
6389 ass = gimple_build_assign (offset_incr, expr);
6390 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6391 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
6392 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
6393
6394 /* Remove the GIMPLE_OMP_CONTINUE. */
6395 gsi_remove (&gsi, true);
6396
6397 /* Fixup edges from cont_bb. */
6398 be = BRANCH_EDGE (cont_bb);
6399 fte = FALLTHRU_EDGE (cont_bb);
6400 be->flags |= EDGE_TRUE_VALUE;
6401 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
6402
6403 if (chunking)
6404 {
6405 /* Split the beginning of exit_bb to make bottom_bb. We
6406 need to insert a nop at the start, because splitting is
6407 after a stmt, not before. */
6408 gsi = gsi_start_bb (exit_bb);
6409 stmt = gimple_build_nop ();
6410 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
6411 split = split_block (exit_bb, stmt);
6412 bottom_bb = split->src;
6413 exit_bb = split->dest;
6414 gsi = gsi_last_bb (bottom_bb);
6415
6416 /* Chunk increment and test goes into bottom_bb. */
6417 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
6418 build_int_cst (diff_type, 1));
6419 ass = gimple_build_assign (chunk_no, expr);
6420 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
6421
6422 /* Chunk test at end of bottom_bb. */
6423 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
6424 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
6425 GSI_CONTINUE_LINKING);
6426
6427 /* Fixup edges from bottom_bb. */
6428 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
6429 split->probability = profile_probability::unlikely ().guessed ();
6430 edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
6431 latch_edge->probability = profile_probability::likely ().guessed ();
6432 }
6433 }
6434
6435 gsi = gsi_last_nondebug_bb (exit_bb);
6436 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
6437 loc = gimple_location (gsi_stmt (gsi));
6438
6439 if (!gimple_in_ssa_p (cfun))
6440 {
6441 /* Insert the final value of V, in case it is live. This is the
6442 value for the only thread that survives past the join. */
6443 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
6444 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
6445 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
6446 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
6447 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
6448 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
6449 true, GSI_SAME_STMT);
6450 ass = gimple_build_assign (v, expr);
6451 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6452 }
6453
6454 /* Remove the OMP_RETURN. */
6455 gsi_remove (&gsi, true);
6456
6457 if (cont_bb)
6458 {
6459 /* We now have one, two or three nested loops. Update the loop
6460 structures. */
6461 struct loop *parent = entry_bb->loop_father;
6462 struct loop *body = body_bb->loop_father;
6463
6464 if (chunking)
6465 {
6466 struct loop *chunk_loop = alloc_loop ();
6467 chunk_loop->header = head_bb;
6468 chunk_loop->latch = bottom_bb;
6469 add_loop (chunk_loop, parent);
6470 parent = chunk_loop;
6471 }
6472 else if (parent != body)
6473 {
6474 gcc_assert (body->header == body_bb);
6475 gcc_assert (body->latch == cont_bb
6476 || single_pred (body->latch) == cont_bb);
6477 parent = NULL;
6478 }
6479
6480 if (parent)
6481 {
6482 struct loop *body_loop = alloc_loop ();
6483 body_loop->header = body_bb;
6484 body_loop->latch = cont_bb;
6485 add_loop (body_loop, parent);
6486
6487 if (fd->tiling)
6488 {
6489 /* Insert tiling's element loop. */
6490 struct loop *inner_loop = alloc_loop ();
6491 inner_loop->header = elem_body_bb;
6492 inner_loop->latch = elem_cont_bb;
6493 add_loop (inner_loop, body_loop);
6494 }
6495 }
6496 }
6497 }
6498
6499 /* Expand the OMP loop defined by REGION. */
6500
6501 static void
6502 expand_omp_for (struct omp_region *region, gimple *inner_stmt)
6503 {
6504 struct omp_for_data fd;
6505 struct omp_for_data_loop *loops;
6506
6507 loops
6508 = (struct omp_for_data_loop *)
6509 alloca (gimple_omp_for_collapse (last_stmt (region->entry))
6510 * sizeof (struct omp_for_data_loop));
6511 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
6512 &fd, loops);
6513 region->sched_kind = fd.sched_kind;
6514 region->sched_modifiers = fd.sched_modifiers;
6515 region->has_lastprivate_conditional = fd.lastprivate_conditional != 0;
6516
6517 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
6518 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
6519 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
6520 if (region->cont)
6521 {
6522 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
6523 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
6524 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
6525 }
6526 else
6527 /* If there isn't a continue then this is a degerate case where
6528 the introduction of abnormal edges during lowering will prevent
6529 original loops from being detected. Fix that up. */
6530 loops_state_set (LOOPS_NEED_FIXUP);
6531
6532 if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_SIMD)
6533 expand_omp_simd (region, &fd);
6534 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
6535 {
6536 gcc_assert (!inner_stmt);
6537 expand_oacc_for (region, &fd);
6538 }
6539 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
6540 {
6541 if (gimple_omp_for_combined_into_p (fd.for_stmt))
6542 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
6543 else
6544 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
6545 }
6546 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
6547 && !fd.have_ordered)
6548 {
6549 if (fd.chunk_size == NULL)
6550 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
6551 else
6552 expand_omp_for_static_chunk (region, &fd, inner_stmt);
6553 }
6554 else
6555 {
6556 int fn_index, start_ix, next_ix;
6557 unsigned HOST_WIDE_INT sched = 0;
6558 tree sched_arg = NULL_TREE;
6559
6560 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
6561 == GF_OMP_FOR_KIND_FOR);
6562 if (fd.chunk_size == NULL
6563 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
6564 fd.chunk_size = integer_zero_node;
6565 switch (fd.sched_kind)
6566 {
6567 case OMP_CLAUSE_SCHEDULE_RUNTIME:
6568 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0
6569 && fd.lastprivate_conditional == 0)
6570 {
6571 gcc_assert (!fd.have_ordered);
6572 fn_index = 6;
6573 sched = 4;
6574 }
6575 else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
6576 && !fd.have_ordered
6577 && fd.lastprivate_conditional == 0)
6578 fn_index = 7;
6579 else
6580 {
6581 fn_index = 3;
6582 sched = (HOST_WIDE_INT_1U << 31);
6583 }
6584 break;
6585 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
6586 case OMP_CLAUSE_SCHEDULE_GUIDED:
6587 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
6588 && !fd.have_ordered
6589 && fd.lastprivate_conditional == 0)
6590 {
6591 fn_index = 3 + fd.sched_kind;
6592 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
6593 break;
6594 }
6595 fn_index = fd.sched_kind;
6596 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
6597 sched += (HOST_WIDE_INT_1U << 31);
6598 break;
6599 case OMP_CLAUSE_SCHEDULE_STATIC:
6600 gcc_assert (fd.have_ordered);
6601 fn_index = 0;
6602 sched = (HOST_WIDE_INT_1U << 31) + 1;
6603 break;
6604 default:
6605 gcc_unreachable ();
6606 }
6607 if (!fd.ordered)
6608 fn_index += fd.have_ordered * 8;
6609 if (fd.ordered)
6610 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
6611 else
6612 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
6613 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
6614 if (fd.have_reductemp || fd.have_pointer_condtemp)
6615 {
6616 if (fd.ordered)
6617 start_ix = (int)BUILT_IN_GOMP_LOOP_DOACROSS_START;
6618 else if (fd.have_ordered)
6619 start_ix = (int)BUILT_IN_GOMP_LOOP_ORDERED_START;
6620 else
6621 start_ix = (int)BUILT_IN_GOMP_LOOP_START;
6622 sched_arg = build_int_cstu (long_integer_type_node, sched);
6623 if (!fd.chunk_size)
6624 fd.chunk_size = integer_zero_node;
6625 }
6626 if (fd.iter_type == long_long_unsigned_type_node)
6627 {
6628 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
6629 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
6630 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
6631 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
6632 }
6633 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
6634 (enum built_in_function) next_ix, sched_arg,
6635 inner_stmt);
6636 }
6637
6638 if (gimple_in_ssa_p (cfun))
6639 update_ssa (TODO_update_ssa_only_virtuals);
6640 }
6641
6642 /* Expand code for an OpenMP sections directive. In pseudo code, we generate
6643
6644 v = GOMP_sections_start (n);
6645 L0:
6646 switch (v)
6647 {
6648 case 0:
6649 goto L2;
6650 case 1:
6651 section 1;
6652 goto L1;
6653 case 2:
6654 ...
6655 case n:
6656 ...
6657 default:
6658 abort ();
6659 }
6660 L1:
6661 v = GOMP_sections_next ();
6662 goto L0;
6663 L2:
6664 reduction;
6665
6666 If this is a combined parallel sections, replace the call to
6667 GOMP_sections_start with call to GOMP_sections_next. */
6668
6669 static void
6670 expand_omp_sections (struct omp_region *region)
6671 {
6672 tree t, u, vin = NULL, vmain, vnext, l2;
6673 unsigned len;
6674 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
6675 gimple_stmt_iterator si, switch_si;
6676 gomp_sections *sections_stmt;
6677 gimple *stmt;
6678 gomp_continue *cont;
6679 edge_iterator ei;
6680 edge e;
6681 struct omp_region *inner;
6682 unsigned i, casei;
6683 bool exit_reachable = region->cont != NULL;
6684
6685 gcc_assert (region->exit != NULL);
6686 entry_bb = region->entry;
6687 l0_bb = single_succ (entry_bb);
6688 l1_bb = region->cont;
6689 l2_bb = region->exit;
6690 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
6691 l2 = gimple_block_label (l2_bb);
6692 else
6693 {
6694 /* This can happen if there are reductions. */
6695 len = EDGE_COUNT (l0_bb->succs);
6696 gcc_assert (len > 0);
6697 e = EDGE_SUCC (l0_bb, len - 1);
6698 si = gsi_last_nondebug_bb (e->dest);
6699 l2 = NULL_TREE;
6700 if (gsi_end_p (si)
6701 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
6702 l2 = gimple_block_label (e->dest);
6703 else
6704 FOR_EACH_EDGE (e, ei, l0_bb->succs)
6705 {
6706 si = gsi_last_nondebug_bb (e->dest);
6707 if (gsi_end_p (si)
6708 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
6709 {
6710 l2 = gimple_block_label (e->dest);
6711 break;
6712 }
6713 }
6714 }
6715 if (exit_reachable)
6716 default_bb = create_empty_bb (l1_bb->prev_bb);
6717 else
6718 default_bb = create_empty_bb (l0_bb);
6719
6720 /* We will build a switch() with enough cases for all the
6721 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
6722 and a default case to abort if something goes wrong. */
6723 len = EDGE_COUNT (l0_bb->succs);
6724
6725 /* Use vec::quick_push on label_vec throughout, since we know the size
6726 in advance. */
6727 auto_vec<tree> label_vec (len);
6728
6729 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
6730 GIMPLE_OMP_SECTIONS statement. */
6731 si = gsi_last_nondebug_bb (entry_bb);
6732 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
6733 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
6734 vin = gimple_omp_sections_control (sections_stmt);
6735 tree clauses = gimple_omp_sections_clauses (sections_stmt);
6736 tree reductmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
6737 tree condtmp = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
6738 tree cond_var = NULL_TREE;
6739 if (reductmp || condtmp)
6740 {
6741 tree reductions = null_pointer_node, mem = null_pointer_node;
6742 tree memv = NULL_TREE, condtemp = NULL_TREE;
6743 gimple_stmt_iterator gsi = gsi_none ();
6744 gimple *g = NULL;
6745 if (reductmp)
6746 {
6747 reductions = OMP_CLAUSE_DECL (reductmp);
6748 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
6749 g = SSA_NAME_DEF_STMT (reductions);
6750 reductions = gimple_assign_rhs1 (g);
6751 OMP_CLAUSE_DECL (reductmp) = reductions;
6752 gsi = gsi_for_stmt (g);
6753 }
6754 else
6755 gsi = si;
6756 if (condtmp)
6757 {
6758 condtemp = OMP_CLAUSE_DECL (condtmp);
6759 tree c = omp_find_clause (OMP_CLAUSE_CHAIN (condtmp),
6760 OMP_CLAUSE__CONDTEMP_);
6761 cond_var = OMP_CLAUSE_DECL (c);
6762 tree type = TREE_TYPE (condtemp);
6763 memv = create_tmp_var (type);
6764 TREE_ADDRESSABLE (memv) = 1;
6765 unsigned cnt = 0;
6766 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
6767 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
6768 && OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (c))
6769 ++cnt;
6770 unsigned HOST_WIDE_INT sz
6771 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))) * cnt;
6772 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
6773 false);
6774 mem = build_fold_addr_expr (memv);
6775 }
6776 t = build_int_cst (unsigned_type_node, len - 1);
6777 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS2_START);
6778 stmt = gimple_build_call (u, 3, t, reductions, mem);
6779 gimple_call_set_lhs (stmt, vin);
6780 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
6781 if (condtmp)
6782 {
6783 expand_omp_build_assign (&gsi, condtemp, memv, false);
6784 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
6785 vin, build_one_cst (TREE_TYPE (cond_var)));
6786 expand_omp_build_assign (&gsi, cond_var, t, false);
6787 }
6788 if (reductmp)
6789 {
6790 gsi_remove (&gsi, true);
6791 release_ssa_name (gimple_assign_lhs (g));
6792 }
6793 }
6794 else if (!is_combined_parallel (region))
6795 {
6796 /* If we are not inside a combined parallel+sections region,
6797 call GOMP_sections_start. */
6798 t = build_int_cst (unsigned_type_node, len - 1);
6799 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
6800 stmt = gimple_build_call (u, 1, t);
6801 }
6802 else
6803 {
6804 /* Otherwise, call GOMP_sections_next. */
6805 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
6806 stmt = gimple_build_call (u, 0);
6807 }
6808 if (!reductmp && !condtmp)
6809 {
6810 gimple_call_set_lhs (stmt, vin);
6811 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6812 }
6813 gsi_remove (&si, true);
6814
6815 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
6816 L0_BB. */
6817 switch_si = gsi_last_nondebug_bb (l0_bb);
6818 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
6819 if (exit_reachable)
6820 {
6821 cont = as_a <gomp_continue *> (last_stmt (l1_bb));
6822 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
6823 vmain = gimple_omp_continue_control_use (cont);
6824 vnext = gimple_omp_continue_control_def (cont);
6825 }
6826 else
6827 {
6828 vmain = vin;
6829 vnext = NULL_TREE;
6830 }
6831
6832 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
6833 label_vec.quick_push (t);
6834 i = 1;
6835
6836 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
6837 for (inner = region->inner, casei = 1;
6838 inner;
6839 inner = inner->next, i++, casei++)
6840 {
6841 basic_block s_entry_bb, s_exit_bb;
6842
6843 /* Skip optional reduction region. */
6844 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
6845 {
6846 --i;
6847 --casei;
6848 continue;
6849 }
6850
6851 s_entry_bb = inner->entry;
6852 s_exit_bb = inner->exit;
6853
6854 t = gimple_block_label (s_entry_bb);
6855 u = build_int_cst (unsigned_type_node, casei);
6856 u = build_case_label (u, NULL, t);
6857 label_vec.quick_push (u);
6858
6859 si = gsi_last_nondebug_bb (s_entry_bb);
6860 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
6861 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
6862 gsi_remove (&si, true);
6863 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
6864
6865 if (s_exit_bb == NULL)
6866 continue;
6867
6868 si = gsi_last_nondebug_bb (s_exit_bb);
6869 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6870 gsi_remove (&si, true);
6871
6872 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
6873 }
6874
6875 /* Error handling code goes in DEFAULT_BB. */
6876 t = gimple_block_label (default_bb);
6877 u = build_case_label (NULL, NULL, t);
6878 make_edge (l0_bb, default_bb, 0);
6879 add_bb_to_loop (default_bb, current_loops->tree_root);
6880
6881 stmt = gimple_build_switch (vmain, u, label_vec);
6882 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
6883 gsi_remove (&switch_si, true);
6884
6885 si = gsi_start_bb (default_bb);
6886 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
6887 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
6888
6889 if (exit_reachable)
6890 {
6891 tree bfn_decl;
6892
6893 /* Code to get the next section goes in L1_BB. */
6894 si = gsi_last_nondebug_bb (l1_bb);
6895 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
6896
6897 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
6898 stmt = gimple_build_call (bfn_decl, 0);
6899 gimple_call_set_lhs (stmt, vnext);
6900 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6901 if (cond_var)
6902 {
6903 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
6904 vnext, build_one_cst (TREE_TYPE (cond_var)));
6905 expand_omp_build_assign (&si, cond_var, t, false);
6906 }
6907 gsi_remove (&si, true);
6908
6909 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
6910 }
6911
6912 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
6913 si = gsi_last_nondebug_bb (l2_bb);
6914 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
6915 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
6916 else if (gimple_omp_return_lhs (gsi_stmt (si)))
6917 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
6918 else
6919 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
6920 stmt = gimple_build_call (t, 0);
6921 if (gimple_omp_return_lhs (gsi_stmt (si)))
6922 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
6923 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6924 gsi_remove (&si, true);
6925
6926 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
6927 }
6928
6929 /* Expand code for an OpenMP single directive. We've already expanded
6930 much of the code, here we simply place the GOMP_barrier call. */
6931
6932 static void
6933 expand_omp_single (struct omp_region *region)
6934 {
6935 basic_block entry_bb, exit_bb;
6936 gimple_stmt_iterator si;
6937
6938 entry_bb = region->entry;
6939 exit_bb = region->exit;
6940
6941 si = gsi_last_nondebug_bb (entry_bb);
6942 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
6943 gsi_remove (&si, true);
6944 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6945
6946 si = gsi_last_nondebug_bb (exit_bb);
6947 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
6948 {
6949 tree t = gimple_omp_return_lhs (gsi_stmt (si));
6950 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
6951 }
6952 gsi_remove (&si, true);
6953 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6954 }
6955
6956 /* Generic expansion for OpenMP synchronization directives: master,
6957 ordered and critical. All we need to do here is remove the entry
6958 and exit markers for REGION. */
6959
6960 static void
6961 expand_omp_synch (struct omp_region *region)
6962 {
6963 basic_block entry_bb, exit_bb;
6964 gimple_stmt_iterator si;
6965
6966 entry_bb = region->entry;
6967 exit_bb = region->exit;
6968
6969 si = gsi_last_nondebug_bb (entry_bb);
6970 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
6971 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
6972 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
6973 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
6974 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
6975 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
6976 if (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS
6977 && gimple_omp_teams_host (as_a <gomp_teams *> (gsi_stmt (si))))
6978 {
6979 expand_omp_taskreg (region);
6980 return;
6981 }
6982 gsi_remove (&si, true);
6983 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6984
6985 if (exit_bb)
6986 {
6987 si = gsi_last_nondebug_bb (exit_bb);
6988 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6989 gsi_remove (&si, true);
6990 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6991 }
6992 }
6993
6994 /* Translate enum omp_memory_order to enum memmodel. The two enums
6995 are using different numbers so that OMP_MEMORY_ORDER_UNSPECIFIED
6996 is 0. */
6997
6998 static enum memmodel
6999 omp_memory_order_to_memmodel (enum omp_memory_order mo)
7000 {
7001 switch (mo)
7002 {
7003 case OMP_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
7004 case OMP_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
7005 case OMP_MEMORY_ORDER_RELEASE: return MEMMODEL_RELEASE;
7006 case OMP_MEMORY_ORDER_ACQ_REL: return MEMMODEL_ACQ_REL;
7007 case OMP_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
7008 default: gcc_unreachable ();
7009 }
7010 }
7011
7012 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
7013 operation as a normal volatile load. */
7014
7015 static bool
7016 expand_omp_atomic_load (basic_block load_bb, tree addr,
7017 tree loaded_val, int index)
7018 {
7019 enum built_in_function tmpbase;
7020 gimple_stmt_iterator gsi;
7021 basic_block store_bb;
7022 location_t loc;
7023 gimple *stmt;
7024 tree decl, call, type, itype;
7025
7026 gsi = gsi_last_nondebug_bb (load_bb);
7027 stmt = gsi_stmt (gsi);
7028 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
7029 loc = gimple_location (stmt);
7030
7031 /* ??? If the target does not implement atomic_load_optab[mode], and mode
7032 is smaller than word size, then expand_atomic_load assumes that the load
7033 is atomic. We could avoid the builtin entirely in this case. */
7034
7035 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
7036 decl = builtin_decl_explicit (tmpbase);
7037 if (decl == NULL_TREE)
7038 return false;
7039
7040 type = TREE_TYPE (loaded_val);
7041 itype = TREE_TYPE (TREE_TYPE (decl));
7042
7043 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
7044 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
7045 call = build_call_expr_loc (loc, decl, 2, addr, mo);
7046 if (!useless_type_conversion_p (type, itype))
7047 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
7048 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
7049
7050 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
7051 gsi_remove (&gsi, true);
7052
7053 store_bb = single_succ (load_bb);
7054 gsi = gsi_last_nondebug_bb (store_bb);
7055 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
7056 gsi_remove (&gsi, true);
7057
7058 if (gimple_in_ssa_p (cfun))
7059 update_ssa (TODO_update_ssa_no_phi);
7060
7061 return true;
7062 }
7063
7064 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
7065 operation as a normal volatile store. */
7066
7067 static bool
7068 expand_omp_atomic_store (basic_block load_bb, tree addr,
7069 tree loaded_val, tree stored_val, int index)
7070 {
7071 enum built_in_function tmpbase;
7072 gimple_stmt_iterator gsi;
7073 basic_block store_bb = single_succ (load_bb);
7074 location_t loc;
7075 gimple *stmt;
7076 tree decl, call, type, itype;
7077 machine_mode imode;
7078 bool exchange;
7079
7080 gsi = gsi_last_nondebug_bb (load_bb);
7081 stmt = gsi_stmt (gsi);
7082 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
7083
7084 /* If the load value is needed, then this isn't a store but an exchange. */
7085 exchange = gimple_omp_atomic_need_value_p (stmt);
7086
7087 gsi = gsi_last_nondebug_bb (store_bb);
7088 stmt = gsi_stmt (gsi);
7089 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
7090 loc = gimple_location (stmt);
7091
7092 /* ??? If the target does not implement atomic_store_optab[mode], and mode
7093 is smaller than word size, then expand_atomic_store assumes that the store
7094 is atomic. We could avoid the builtin entirely in this case. */
7095
7096 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
7097 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
7098 decl = builtin_decl_explicit (tmpbase);
7099 if (decl == NULL_TREE)
7100 return false;
7101
7102 type = TREE_TYPE (stored_val);
7103
7104 /* Dig out the type of the function's second argument. */
7105 itype = TREE_TYPE (decl);
7106 itype = TYPE_ARG_TYPES (itype);
7107 itype = TREE_CHAIN (itype);
7108 itype = TREE_VALUE (itype);
7109 imode = TYPE_MODE (itype);
7110
7111 if (exchange && !can_atomic_exchange_p (imode, true))
7112 return false;
7113
7114 if (!useless_type_conversion_p (itype, type))
7115 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
7116 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
7117 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
7118 call = build_call_expr_loc (loc, decl, 3, addr, stored_val, mo);
7119 if (exchange)
7120 {
7121 if (!useless_type_conversion_p (type, itype))
7122 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
7123 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
7124 }
7125
7126 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
7127 gsi_remove (&gsi, true);
7128
7129 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
7130 gsi = gsi_last_nondebug_bb (load_bb);
7131 gsi_remove (&gsi, true);
7132
7133 if (gimple_in_ssa_p (cfun))
7134 update_ssa (TODO_update_ssa_no_phi);
7135
7136 return true;
7137 }
7138
7139 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
7140 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
7141 size of the data type, and thus usable to find the index of the builtin
7142 decl. Returns false if the expression is not of the proper form. */
7143
7144 static bool
7145 expand_omp_atomic_fetch_op (basic_block load_bb,
7146 tree addr, tree loaded_val,
7147 tree stored_val, int index)
7148 {
7149 enum built_in_function oldbase, newbase, tmpbase;
7150 tree decl, itype, call;
7151 tree lhs, rhs;
7152 basic_block store_bb = single_succ (load_bb);
7153 gimple_stmt_iterator gsi;
7154 gimple *stmt;
7155 location_t loc;
7156 enum tree_code code;
7157 bool need_old, need_new;
7158 machine_mode imode;
7159
7160 /* We expect to find the following sequences:
7161
7162 load_bb:
7163 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
7164
7165 store_bb:
7166 val = tmp OP something; (or: something OP tmp)
7167 GIMPLE_OMP_STORE (val)
7168
7169 ???FIXME: Allow a more flexible sequence.
7170 Perhaps use data flow to pick the statements.
7171
7172 */
7173
7174 gsi = gsi_after_labels (store_bb);
7175 stmt = gsi_stmt (gsi);
7176 if (is_gimple_debug (stmt))
7177 {
7178 gsi_next_nondebug (&gsi);
7179 if (gsi_end_p (gsi))
7180 return false;
7181 stmt = gsi_stmt (gsi);
7182 }
7183 loc = gimple_location (stmt);
7184 if (!is_gimple_assign (stmt))
7185 return false;
7186 gsi_next_nondebug (&gsi);
7187 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
7188 return false;
7189 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
7190 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
7191 enum omp_memory_order omo
7192 = gimple_omp_atomic_memory_order (last_stmt (load_bb));
7193 enum memmodel mo = omp_memory_order_to_memmodel (omo);
7194 gcc_checking_assert (!need_old || !need_new);
7195
7196 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
7197 return false;
7198
7199 /* Check for one of the supported fetch-op operations. */
7200 code = gimple_assign_rhs_code (stmt);
7201 switch (code)
7202 {
7203 case PLUS_EXPR:
7204 case POINTER_PLUS_EXPR:
7205 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
7206 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
7207 break;
7208 case MINUS_EXPR:
7209 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
7210 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
7211 break;
7212 case BIT_AND_EXPR:
7213 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
7214 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
7215 break;
7216 case BIT_IOR_EXPR:
7217 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
7218 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
7219 break;
7220 case BIT_XOR_EXPR:
7221 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
7222 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
7223 break;
7224 default:
7225 return false;
7226 }
7227
7228 /* Make sure the expression is of the proper form. */
7229 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
7230 rhs = gimple_assign_rhs2 (stmt);
7231 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
7232 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
7233 rhs = gimple_assign_rhs1 (stmt);
7234 else
7235 return false;
7236
7237 tmpbase = ((enum built_in_function)
7238 ((need_new ? newbase : oldbase) + index + 1));
7239 decl = builtin_decl_explicit (tmpbase);
7240 if (decl == NULL_TREE)
7241 return false;
7242 itype = TREE_TYPE (TREE_TYPE (decl));
7243 imode = TYPE_MODE (itype);
7244
7245 /* We could test all of the various optabs involved, but the fact of the
7246 matter is that (with the exception of i486 vs i586 and xadd) all targets
7247 that support any atomic operaton optab also implements compare-and-swap.
7248 Let optabs.c take care of expanding any compare-and-swap loop. */
7249 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
7250 return false;
7251
7252 gsi = gsi_last_nondebug_bb (load_bb);
7253 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
7254
7255 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
7256 It only requires that the operation happen atomically. Thus we can
7257 use the RELAXED memory model. */
7258 call = build_call_expr_loc (loc, decl, 3, addr,
7259 fold_convert_loc (loc, itype, rhs),
7260 build_int_cst (NULL, mo));
7261
7262 if (need_old || need_new)
7263 {
7264 lhs = need_old ? loaded_val : stored_val;
7265 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
7266 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
7267 }
7268 else
7269 call = fold_convert_loc (loc, void_type_node, call);
7270 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
7271 gsi_remove (&gsi, true);
7272
7273 gsi = gsi_last_nondebug_bb (store_bb);
7274 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
7275 gsi_remove (&gsi, true);
7276 gsi = gsi_last_nondebug_bb (store_bb);
7277 stmt = gsi_stmt (gsi);
7278 gsi_remove (&gsi, true);
7279
7280 if (gimple_in_ssa_p (cfun))
7281 {
7282 release_defs (stmt);
7283 update_ssa (TODO_update_ssa_no_phi);
7284 }
7285
7286 return true;
7287 }
7288
7289 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
7290
7291 oldval = *addr;
7292 repeat:
7293 newval = rhs; // with oldval replacing *addr in rhs
7294 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
7295 if (oldval != newval)
7296 goto repeat;
7297
7298 INDEX is log2 of the size of the data type, and thus usable to find the
7299 index of the builtin decl. */
7300
7301 static bool
7302 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
7303 tree addr, tree loaded_val, tree stored_val,
7304 int index)
7305 {
7306 tree loadedi, storedi, initial, new_storedi, old_vali;
7307 tree type, itype, cmpxchg, iaddr, atype;
7308 gimple_stmt_iterator si;
7309 basic_block loop_header = single_succ (load_bb);
7310 gimple *phi, *stmt;
7311 edge e;
7312 enum built_in_function fncode;
7313
7314 /* ??? We need a non-pointer interface to __atomic_compare_exchange in
7315 order to use the RELAXED memory model effectively. */
7316 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
7317 + index + 1);
7318 cmpxchg = builtin_decl_explicit (fncode);
7319 if (cmpxchg == NULL_TREE)
7320 return false;
7321 type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
7322 atype = type;
7323 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
7324
7325 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
7326 || !can_atomic_load_p (TYPE_MODE (itype)))
7327 return false;
7328
7329 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
7330 si = gsi_last_nondebug_bb (load_bb);
7331 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
7332
7333 /* For floating-point values, we'll need to view-convert them to integers
7334 so that we can perform the atomic compare and swap. Simplify the
7335 following code by always setting up the "i"ntegral variables. */
7336 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
7337 {
7338 tree iaddr_val;
7339
7340 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
7341 true));
7342 atype = itype;
7343 iaddr_val
7344 = force_gimple_operand_gsi (&si,
7345 fold_convert (TREE_TYPE (iaddr), addr),
7346 false, NULL_TREE, true, GSI_SAME_STMT);
7347 stmt = gimple_build_assign (iaddr, iaddr_val);
7348 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7349 loadedi = create_tmp_var (itype);
7350 if (gimple_in_ssa_p (cfun))
7351 loadedi = make_ssa_name (loadedi);
7352 }
7353 else
7354 {
7355 iaddr = addr;
7356 loadedi = loaded_val;
7357 }
7358
7359 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
7360 tree loaddecl = builtin_decl_explicit (fncode);
7361 if (loaddecl)
7362 initial
7363 = fold_convert (atype,
7364 build_call_expr (loaddecl, 2, iaddr,
7365 build_int_cst (NULL_TREE,
7366 MEMMODEL_RELAXED)));
7367 else
7368 {
7369 tree off
7370 = build_int_cst (build_pointer_type_for_mode (atype, ptr_mode,
7371 true), 0);
7372 initial = build2 (MEM_REF, atype, iaddr, off);
7373 }
7374
7375 initial
7376 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
7377 GSI_SAME_STMT);
7378
7379 /* Move the value to the LOADEDI temporary. */
7380 if (gimple_in_ssa_p (cfun))
7381 {
7382 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
7383 phi = create_phi_node (loadedi, loop_header);
7384 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
7385 initial);
7386 }
7387 else
7388 gsi_insert_before (&si,
7389 gimple_build_assign (loadedi, initial),
7390 GSI_SAME_STMT);
7391 if (loadedi != loaded_val)
7392 {
7393 gimple_stmt_iterator gsi2;
7394 tree x;
7395
7396 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
7397 gsi2 = gsi_start_bb (loop_header);
7398 if (gimple_in_ssa_p (cfun))
7399 {
7400 gassign *stmt;
7401 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
7402 true, GSI_SAME_STMT);
7403 stmt = gimple_build_assign (loaded_val, x);
7404 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
7405 }
7406 else
7407 {
7408 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
7409 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
7410 true, GSI_SAME_STMT);
7411 }
7412 }
7413 gsi_remove (&si, true);
7414
7415 si = gsi_last_nondebug_bb (store_bb);
7416 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
7417
7418 if (iaddr == addr)
7419 storedi = stored_val;
7420 else
7421 storedi
7422 = force_gimple_operand_gsi (&si,
7423 build1 (VIEW_CONVERT_EXPR, itype,
7424 stored_val), true, NULL_TREE, true,
7425 GSI_SAME_STMT);
7426
7427 /* Build the compare&swap statement. */
7428 new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
7429 new_storedi = force_gimple_operand_gsi (&si,
7430 fold_convert (TREE_TYPE (loadedi),
7431 new_storedi),
7432 true, NULL_TREE,
7433 true, GSI_SAME_STMT);
7434
7435 if (gimple_in_ssa_p (cfun))
7436 old_vali = loadedi;
7437 else
7438 {
7439 old_vali = create_tmp_var (TREE_TYPE (loadedi));
7440 stmt = gimple_build_assign (old_vali, loadedi);
7441 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7442
7443 stmt = gimple_build_assign (loadedi, new_storedi);
7444 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7445 }
7446
7447 /* Note that we always perform the comparison as an integer, even for
7448 floating point. This allows the atomic operation to properly
7449 succeed even with NaNs and -0.0. */
7450 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
7451 stmt = gimple_build_cond_empty (ne);
7452 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7453
7454 /* Update cfg. */
7455 e = single_succ_edge (store_bb);
7456 e->flags &= ~EDGE_FALLTHRU;
7457 e->flags |= EDGE_FALSE_VALUE;
7458 /* Expect no looping. */
7459 e->probability = profile_probability::guessed_always ();
7460
7461 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
7462 e->probability = profile_probability::guessed_never ();
7463
7464 /* Copy the new value to loadedi (we already did that before the condition
7465 if we are not in SSA). */
7466 if (gimple_in_ssa_p (cfun))
7467 {
7468 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
7469 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
7470 }
7471
7472 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
7473 gsi_remove (&si, true);
7474
7475 struct loop *loop = alloc_loop ();
7476 loop->header = loop_header;
7477 loop->latch = store_bb;
7478 add_loop (loop, loop_header->loop_father);
7479
7480 if (gimple_in_ssa_p (cfun))
7481 update_ssa (TODO_update_ssa_no_phi);
7482
7483 return true;
7484 }
7485
7486 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
7487
7488 GOMP_atomic_start ();
7489 *addr = rhs;
7490 GOMP_atomic_end ();
7491
7492 The result is not globally atomic, but works so long as all parallel
7493 references are within #pragma omp atomic directives. According to
7494 responses received from omp@openmp.org, appears to be within spec.
7495 Which makes sense, since that's how several other compilers handle
7496 this situation as well.
7497 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
7498 expanding. STORED_VAL is the operand of the matching
7499 GIMPLE_OMP_ATOMIC_STORE.
7500
7501 We replace
7502 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
7503 loaded_val = *addr;
7504
7505 and replace
7506 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
7507 *addr = stored_val;
7508 */
7509
7510 static bool
7511 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
7512 tree addr, tree loaded_val, tree stored_val)
7513 {
7514 gimple_stmt_iterator si;
7515 gassign *stmt;
7516 tree t;
7517
7518 si = gsi_last_nondebug_bb (load_bb);
7519 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
7520
7521 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
7522 t = build_call_expr (t, 0);
7523 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
7524
7525 tree mem = build_simple_mem_ref (addr);
7526 TREE_TYPE (mem) = TREE_TYPE (loaded_val);
7527 TREE_OPERAND (mem, 1)
7528 = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode,
7529 true),
7530 TREE_OPERAND (mem, 1));
7531 stmt = gimple_build_assign (loaded_val, mem);
7532 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7533 gsi_remove (&si, true);
7534
7535 si = gsi_last_nondebug_bb (store_bb);
7536 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
7537
7538 stmt = gimple_build_assign (unshare_expr (mem), stored_val);
7539 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7540
7541 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
7542 t = build_call_expr (t, 0);
7543 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
7544 gsi_remove (&si, true);
7545
7546 if (gimple_in_ssa_p (cfun))
7547 update_ssa (TODO_update_ssa_no_phi);
7548 return true;
7549 }
7550
7551 /* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
7552 using expand_omp_atomic_fetch_op. If it failed, we try to
7553 call expand_omp_atomic_pipeline, and if it fails too, the
7554 ultimate fallback is wrapping the operation in a mutex
7555 (expand_omp_atomic_mutex). REGION is the atomic region built
7556 by build_omp_regions_1(). */
7557
7558 static void
7559 expand_omp_atomic (struct omp_region *region)
7560 {
7561 basic_block load_bb = region->entry, store_bb = region->exit;
7562 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
7563 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
7564 tree loaded_val = gimple_omp_atomic_load_lhs (load);
7565 tree addr = gimple_omp_atomic_load_rhs (load);
7566 tree stored_val = gimple_omp_atomic_store_val (store);
7567 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
7568 HOST_WIDE_INT index;
7569
7570 /* Make sure the type is one of the supported sizes. */
7571 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
7572 index = exact_log2 (index);
7573 if (index >= 0 && index <= 4)
7574 {
7575 unsigned int align = TYPE_ALIGN_UNIT (type);
7576
7577 /* __sync builtins require strict data alignment. */
7578 if (exact_log2 (align) >= index)
7579 {
7580 /* Atomic load. */
7581 scalar_mode smode;
7582 if (loaded_val == stored_val
7583 && (is_int_mode (TYPE_MODE (type), &smode)
7584 || is_float_mode (TYPE_MODE (type), &smode))
7585 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
7586 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
7587 return;
7588
7589 /* Atomic store. */
7590 if ((is_int_mode (TYPE_MODE (type), &smode)
7591 || is_float_mode (TYPE_MODE (type), &smode))
7592 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
7593 && store_bb == single_succ (load_bb)
7594 && first_stmt (store_bb) == store
7595 && expand_omp_atomic_store (load_bb, addr, loaded_val,
7596 stored_val, index))
7597 return;
7598
7599 /* When possible, use specialized atomic update functions. */
7600 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
7601 && store_bb == single_succ (load_bb)
7602 && expand_omp_atomic_fetch_op (load_bb, addr,
7603 loaded_val, stored_val, index))
7604 return;
7605
7606 /* If we don't have specialized __sync builtins, try and implement
7607 as a compare and swap loop. */
7608 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
7609 loaded_val, stored_val, index))
7610 return;
7611 }
7612 }
7613
7614 /* The ultimate fallback is wrapping the operation in a mutex. */
7615 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
7616 }
7617
7618 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
7619 at REGION_EXIT. */
7620
7621 static void
7622 mark_loops_in_oacc_kernels_region (basic_block region_entry,
7623 basic_block region_exit)
7624 {
7625 struct loop *outer = region_entry->loop_father;
7626 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
7627
7628 /* Don't parallelize the kernels region if it contains more than one outer
7629 loop. */
7630 unsigned int nr_outer_loops = 0;
7631 struct loop *single_outer = NULL;
7632 for (struct loop *loop = outer->inner; loop != NULL; loop = loop->next)
7633 {
7634 gcc_assert (loop_outer (loop) == outer);
7635
7636 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
7637 continue;
7638
7639 if (region_exit != NULL
7640 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
7641 continue;
7642
7643 nr_outer_loops++;
7644 single_outer = loop;
7645 }
7646 if (nr_outer_loops != 1)
7647 return;
7648
7649 for (struct loop *loop = single_outer->inner;
7650 loop != NULL;
7651 loop = loop->inner)
7652 if (loop->next)
7653 return;
7654
7655 /* Mark the loops in the region. */
7656 for (struct loop *loop = single_outer; loop != NULL; loop = loop->inner)
7657 loop->in_oacc_kernels_region = true;
7658 }
7659
7660 /* Types used to pass grid and wortkgroup sizes to kernel invocation. */
7661
7662 struct GTY(()) grid_launch_attributes_trees
7663 {
7664 tree kernel_dim_array_type;
7665 tree kernel_lattrs_dimnum_decl;
7666 tree kernel_lattrs_grid_decl;
7667 tree kernel_lattrs_group_decl;
7668 tree kernel_launch_attributes_type;
7669 };
7670
7671 static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees;
7672
7673 /* Create types used to pass kernel launch attributes to target. */
7674
7675 static void
7676 grid_create_kernel_launch_attr_types (void)
7677 {
7678 if (grid_attr_trees)
7679 return;
7680 grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> ();
7681
7682 tree dim_arr_index_type
7683 = build_index_type (build_int_cst (integer_type_node, 2));
7684 grid_attr_trees->kernel_dim_array_type
7685 = build_array_type (uint32_type_node, dim_arr_index_type);
7686
7687 grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE);
7688 grid_attr_trees->kernel_lattrs_dimnum_decl
7689 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"),
7690 uint32_type_node);
7691 DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE;
7692
7693 grid_attr_trees->kernel_lattrs_grid_decl
7694 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"),
7695 grid_attr_trees->kernel_dim_array_type);
7696 DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl)
7697 = grid_attr_trees->kernel_lattrs_dimnum_decl;
7698 grid_attr_trees->kernel_lattrs_group_decl
7699 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"),
7700 grid_attr_trees->kernel_dim_array_type);
7701 DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl)
7702 = grid_attr_trees->kernel_lattrs_grid_decl;
7703 finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type,
7704 "__gomp_kernel_launch_attributes",
7705 grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE);
7706 }
7707
7708 /* Insert before the current statement in GSI a store of VALUE to INDEX of
7709 array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be
7710 of type uint32_type_node. */
7711
7712 static void
7713 grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var,
7714 tree fld_decl, int index, tree value)
7715 {
7716 tree ref = build4 (ARRAY_REF, uint32_type_node,
7717 build3 (COMPONENT_REF,
7718 grid_attr_trees->kernel_dim_array_type,
7719 range_var, fld_decl, NULL_TREE),
7720 build_int_cst (integer_type_node, index),
7721 NULL_TREE, NULL_TREE);
7722 gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT);
7723 }
7724
7725 /* Return a tree representation of a pointer to a structure with grid and
7726 work-group size information. Statements filling that information will be
7727 inserted before GSI, TGT_STMT is the target statement which has the
7728 necessary information in it. */
7729
7730 static tree
7731 grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi,
7732 gomp_target *tgt_stmt)
7733 {
7734 grid_create_kernel_launch_attr_types ();
7735 tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type,
7736 "__kernel_launch_attrs");
7737
7738 unsigned max_dim = 0;
7739 for (tree clause = gimple_omp_target_clauses (tgt_stmt);
7740 clause;
7741 clause = OMP_CLAUSE_CHAIN (clause))
7742 {
7743 if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_)
7744 continue;
7745
7746 unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause);
7747 max_dim = MAX (dim, max_dim);
7748
7749 grid_insert_store_range_dim (gsi, lattrs,
7750 grid_attr_trees->kernel_lattrs_grid_decl,
7751 dim, OMP_CLAUSE__GRIDDIM__SIZE (clause));
7752 grid_insert_store_range_dim (gsi, lattrs,
7753 grid_attr_trees->kernel_lattrs_group_decl,
7754 dim, OMP_CLAUSE__GRIDDIM__GROUP (clause));
7755 }
7756
7757 tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs,
7758 grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE);
7759 gcc_checking_assert (max_dim <= 2);
7760 tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1);
7761 gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions),
7762 GSI_SAME_STMT);
7763 TREE_ADDRESSABLE (lattrs) = 1;
7764 return build_fold_addr_expr (lattrs);
7765 }
7766
7767 /* Build target argument identifier from the DEVICE identifier, value
7768 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
7769
7770 static tree
7771 get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
7772 {
7773 tree t = build_int_cst (integer_type_node, device);
7774 if (subseqent_param)
7775 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
7776 build_int_cst (integer_type_node,
7777 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
7778 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
7779 build_int_cst (integer_type_node, id));
7780 return t;
7781 }
7782
7783 /* Like above but return it in type that can be directly stored as an element
7784 of the argument array. */
7785
7786 static tree
7787 get_target_argument_identifier (int device, bool subseqent_param, int id)
7788 {
7789 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
7790 return fold_convert (ptr_type_node, t);
7791 }
7792
7793 /* Return a target argument consisting of DEVICE identifier, value identifier
7794 ID, and the actual VALUE. */
7795
7796 static tree
7797 get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
7798 tree value)
7799 {
7800 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
7801 fold_convert (integer_type_node, value),
7802 build_int_cst (unsigned_type_node,
7803 GOMP_TARGET_ARG_VALUE_SHIFT));
7804 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
7805 get_target_argument_identifier_1 (device, false, id));
7806 t = fold_convert (ptr_type_node, t);
7807 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
7808 }
7809
7810 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
7811 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
7812 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
7813 arguments. */
7814
7815 static void
7816 push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
7817 int id, tree value, vec <tree> *args)
7818 {
7819 if (tree_fits_shwi_p (value)
7820 && tree_to_shwi (value) > -(1 << 15)
7821 && tree_to_shwi (value) < (1 << 15))
7822 args->quick_push (get_target_argument_value (gsi, device, id, value));
7823 else
7824 {
7825 args->quick_push (get_target_argument_identifier (device, true, id));
7826 value = fold_convert (ptr_type_node, value);
7827 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
7828 GSI_SAME_STMT);
7829 args->quick_push (value);
7830 }
7831 }
7832
7833 /* Create an array of arguments that is then passed to GOMP_target. */
7834
7835 static tree
7836 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
7837 {
7838 auto_vec <tree, 6> args;
7839 tree clauses = gimple_omp_target_clauses (tgt_stmt);
7840 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
7841 if (c)
7842 t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
7843 else
7844 t = integer_minus_one_node;
7845 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
7846 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
7847
7848 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
7849 if (c)
7850 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
7851 else
7852 t = integer_minus_one_node;
7853 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
7854 GOMP_TARGET_ARG_THREAD_LIMIT, t,
7855 &args);
7856
7857 /* Add HSA-specific grid sizes, if available. */
7858 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7859 OMP_CLAUSE__GRIDDIM_))
7860 {
7861 int id = GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES;
7862 t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, id);
7863 args.quick_push (t);
7864 args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt));
7865 }
7866
7867 /* Produce more, perhaps device specific, arguments here. */
7868
7869 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
7870 args.length () + 1),
7871 ".omp_target_args");
7872 for (unsigned i = 0; i < args.length (); i++)
7873 {
7874 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
7875 build_int_cst (integer_type_node, i),
7876 NULL_TREE, NULL_TREE);
7877 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
7878 GSI_SAME_STMT);
7879 }
7880 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
7881 build_int_cst (integer_type_node, args.length ()),
7882 NULL_TREE, NULL_TREE);
7883 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
7884 GSI_SAME_STMT);
7885 TREE_ADDRESSABLE (argarray) = 1;
7886 return build_fold_addr_expr (argarray);
7887 }
7888
7889 /* Expand the GIMPLE_OMP_TARGET starting at REGION. */
7890
7891 static void
7892 expand_omp_target (struct omp_region *region)
7893 {
7894 basic_block entry_bb, exit_bb, new_bb;
7895 struct function *child_cfun;
7896 tree child_fn, block, t;
7897 gimple_stmt_iterator gsi;
7898 gomp_target *entry_stmt;
7899 gimple *stmt;
7900 edge e;
7901 bool offloaded, data_region;
7902
7903 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
7904 new_bb = region->entry;
7905
7906 offloaded = is_gimple_omp_offloaded (entry_stmt);
7907 switch (gimple_omp_target_kind (entry_stmt))
7908 {
7909 case GF_OMP_TARGET_KIND_REGION:
7910 case GF_OMP_TARGET_KIND_UPDATE:
7911 case GF_OMP_TARGET_KIND_ENTER_DATA:
7912 case GF_OMP_TARGET_KIND_EXIT_DATA:
7913 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7914 case GF_OMP_TARGET_KIND_OACC_KERNELS:
7915 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7916 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7917 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7918 data_region = false;
7919 break;
7920 case GF_OMP_TARGET_KIND_DATA:
7921 case GF_OMP_TARGET_KIND_OACC_DATA:
7922 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7923 data_region = true;
7924 break;
7925 default:
7926 gcc_unreachable ();
7927 }
7928
7929 child_fn = NULL_TREE;
7930 child_cfun = NULL;
7931 if (offloaded)
7932 {
7933 child_fn = gimple_omp_target_child_fn (entry_stmt);
7934 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
7935 }
7936
7937 /* Supported by expand_omp_taskreg, but not here. */
7938 if (child_cfun != NULL)
7939 gcc_checking_assert (!child_cfun->cfg);
7940 gcc_checking_assert (!gimple_in_ssa_p (cfun));
7941
7942 entry_bb = region->entry;
7943 exit_bb = region->exit;
7944
7945 if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
7946 {
7947 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
7948
7949 /* Further down, both OpenACC kernels and OpenACC parallel constructs
7950 will be mappted to BUILT_IN_GOACC_PARALLEL, and to distinguish the
7951 two, there is an "oacc kernels" attribute set for OpenACC kernels. */
7952 DECL_ATTRIBUTES (child_fn)
7953 = tree_cons (get_identifier ("oacc kernels"),
7954 NULL_TREE, DECL_ATTRIBUTES (child_fn));
7955 }
7956
7957 if (offloaded)
7958 {
7959 unsigned srcidx, dstidx, num;
7960
7961 /* If the offloading region needs data sent from the parent
7962 function, then the very first statement (except possible
7963 tree profile counter updates) of the offloading body
7964 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
7965 &.OMP_DATA_O is passed as an argument to the child function,
7966 we need to replace it with the argument as seen by the child
7967 function.
7968
7969 In most cases, this will end up being the identity assignment
7970 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
7971 a function call that has been inlined, the original PARM_DECL
7972 .OMP_DATA_I may have been converted into a different local
7973 variable. In which case, we need to keep the assignment. */
7974 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
7975 if (data_arg)
7976 {
7977 basic_block entry_succ_bb = single_succ (entry_bb);
7978 gimple_stmt_iterator gsi;
7979 tree arg;
7980 gimple *tgtcopy_stmt = NULL;
7981 tree sender = TREE_VEC_ELT (data_arg, 0);
7982
7983 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
7984 {
7985 gcc_assert (!gsi_end_p (gsi));
7986 stmt = gsi_stmt (gsi);
7987 if (gimple_code (stmt) != GIMPLE_ASSIGN)
7988 continue;
7989
7990 if (gimple_num_ops (stmt) == 2)
7991 {
7992 tree arg = gimple_assign_rhs1 (stmt);
7993
7994 /* We're ignoring the subcode because we're
7995 effectively doing a STRIP_NOPS. */
7996
7997 if (TREE_CODE (arg) == ADDR_EXPR
7998 && TREE_OPERAND (arg, 0) == sender)
7999 {
8000 tgtcopy_stmt = stmt;
8001 break;
8002 }
8003 }
8004 }
8005
8006 gcc_assert (tgtcopy_stmt != NULL);
8007 arg = DECL_ARGUMENTS (child_fn);
8008
8009 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
8010 gsi_remove (&gsi, true);
8011 }
8012
8013 /* Declare local variables needed in CHILD_CFUN. */
8014 block = DECL_INITIAL (child_fn);
8015 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
8016 /* The gimplifier could record temporaries in the offloading block
8017 rather than in containing function's local_decls chain,
8018 which would mean cgraph missed finalizing them. Do it now. */
8019 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
8020 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
8021 varpool_node::finalize_decl (t);
8022 DECL_SAVED_TREE (child_fn) = NULL;
8023 /* We'll create a CFG for child_fn, so no gimple body is needed. */
8024 gimple_set_body (child_fn, NULL);
8025 TREE_USED (block) = 1;
8026
8027 /* Reset DECL_CONTEXT on function arguments. */
8028 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
8029 DECL_CONTEXT (t) = child_fn;
8030
8031 /* Split ENTRY_BB at GIMPLE_*,
8032 so that it can be moved to the child function. */
8033 gsi = gsi_last_nondebug_bb (entry_bb);
8034 stmt = gsi_stmt (gsi);
8035 gcc_assert (stmt
8036 && gimple_code (stmt) == gimple_code (entry_stmt));
8037 e = split_block (entry_bb, stmt);
8038 gsi_remove (&gsi, true);
8039 entry_bb = e->dest;
8040 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8041
8042 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
8043 if (exit_bb)
8044 {
8045 gsi = gsi_last_nondebug_bb (exit_bb);
8046 gcc_assert (!gsi_end_p (gsi)
8047 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
8048 stmt = gimple_build_return (NULL);
8049 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
8050 gsi_remove (&gsi, true);
8051 }
8052
8053 /* Move the offloading region into CHILD_CFUN. */
8054
8055 block = gimple_block (entry_stmt);
8056
8057 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
8058 if (exit_bb)
8059 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
8060 /* When the OMP expansion process cannot guarantee an up-to-date
8061 loop tree arrange for the child function to fixup loops. */
8062 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
8063 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
8064
8065 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
8066 num = vec_safe_length (child_cfun->local_decls);
8067 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
8068 {
8069 t = (*child_cfun->local_decls)[srcidx];
8070 if (DECL_CONTEXT (t) == cfun->decl)
8071 continue;
8072 if (srcidx != dstidx)
8073 (*child_cfun->local_decls)[dstidx] = t;
8074 dstidx++;
8075 }
8076 if (dstidx != num)
8077 vec_safe_truncate (child_cfun->local_decls, dstidx);
8078
8079 /* Inform the callgraph about the new function. */
8080 child_cfun->curr_properties = cfun->curr_properties;
8081 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
8082 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
8083 cgraph_node *node = cgraph_node::get_create (child_fn);
8084 node->parallelized_function = 1;
8085 cgraph_node::add_new_function (child_fn, true);
8086
8087 /* Add the new function to the offload table. */
8088 if (ENABLE_OFFLOADING)
8089 {
8090 if (in_lto_p)
8091 DECL_PRESERVE_P (child_fn) = 1;
8092 vec_safe_push (offload_funcs, child_fn);
8093 }
8094
8095 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
8096 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
8097
8098 /* Fix the callgraph edges for child_cfun. Those for cfun will be
8099 fixed in a following pass. */
8100 push_cfun (child_cfun);
8101 if (need_asm)
8102 assign_assembler_name_if_needed (child_fn);
8103 cgraph_edge::rebuild_edges ();
8104
8105 /* Some EH regions might become dead, see PR34608. If
8106 pass_cleanup_cfg isn't the first pass to happen with the
8107 new child, these dead EH edges might cause problems.
8108 Clean them up now. */
8109 if (flag_exceptions)
8110 {
8111 basic_block bb;
8112 bool changed = false;
8113
8114 FOR_EACH_BB_FN (bb, cfun)
8115 changed |= gimple_purge_dead_eh_edges (bb);
8116 if (changed)
8117 cleanup_tree_cfg ();
8118 }
8119 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
8120 verify_loop_structure ();
8121 pop_cfun ();
8122
8123 if (dump_file && !gimple_in_ssa_p (cfun))
8124 {
8125 omp_any_child_fn_dumped = true;
8126 dump_function_header (dump_file, child_fn, dump_flags);
8127 dump_function_to_file (child_fn, dump_file, dump_flags);
8128 }
8129
8130 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
8131 }
8132
8133 /* Emit a library call to launch the offloading region, or do data
8134 transfers. */
8135 tree t1, t2, t3, t4, depend, c, clauses;
8136 enum built_in_function start_ix;
8137 unsigned int flags_i = 0;
8138
8139 switch (gimple_omp_target_kind (entry_stmt))
8140 {
8141 case GF_OMP_TARGET_KIND_REGION:
8142 start_ix = BUILT_IN_GOMP_TARGET;
8143 break;
8144 case GF_OMP_TARGET_KIND_DATA:
8145 start_ix = BUILT_IN_GOMP_TARGET_DATA;
8146 break;
8147 case GF_OMP_TARGET_KIND_UPDATE:
8148 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
8149 break;
8150 case GF_OMP_TARGET_KIND_ENTER_DATA:
8151 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
8152 break;
8153 case GF_OMP_TARGET_KIND_EXIT_DATA:
8154 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
8155 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
8156 break;
8157 case GF_OMP_TARGET_KIND_OACC_KERNELS:
8158 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8159 start_ix = BUILT_IN_GOACC_PARALLEL;
8160 break;
8161 case GF_OMP_TARGET_KIND_OACC_DATA:
8162 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8163 start_ix = BUILT_IN_GOACC_DATA_START;
8164 break;
8165 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8166 start_ix = BUILT_IN_GOACC_UPDATE;
8167 break;
8168 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8169 start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA;
8170 break;
8171 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8172 start_ix = BUILT_IN_GOACC_DECLARE;
8173 break;
8174 default:
8175 gcc_unreachable ();
8176 }
8177
8178 clauses = gimple_omp_target_clauses (entry_stmt);
8179
8180 tree device = NULL_TREE;
8181 location_t device_loc = UNKNOWN_LOCATION;
8182 tree goacc_flags = NULL_TREE;
8183 if (is_gimple_omp_oacc (entry_stmt))
8184 {
8185 /* By default, no GOACC_FLAGs are set. */
8186 goacc_flags = integer_zero_node;
8187 }
8188 else
8189 {
8190 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
8191 if (c)
8192 {
8193 device = OMP_CLAUSE_DEVICE_ID (c);
8194 device_loc = OMP_CLAUSE_LOCATION (c);
8195 }
8196 else
8197 {
8198 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
8199 library choose). */
8200 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
8201 device_loc = gimple_location (entry_stmt);
8202 }
8203
8204 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
8205 if (c)
8206 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
8207 }
8208
8209 /* By default, there is no conditional. */
8210 tree cond = NULL_TREE;
8211 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
8212 if (c)
8213 cond = OMP_CLAUSE_IF_EXPR (c);
8214 /* If we found the clause 'if (cond)', build:
8215 OpenACC: goacc_flags = (cond ? goacc_flags : flags | GOACC_FLAG_HOST_FALLBACK)
8216 OpenMP: device = (cond ? device : GOMP_DEVICE_HOST_FALLBACK) */
8217 if (cond)
8218 {
8219 tree *tp;
8220 if (is_gimple_omp_oacc (entry_stmt))
8221 tp = &goacc_flags;
8222 else
8223 {
8224 /* Ensure 'device' is of the correct type. */
8225 device = fold_convert_loc (device_loc, integer_type_node, device);
8226
8227 tp = &device;
8228 }
8229
8230 cond = gimple_boolify (cond);
8231
8232 basic_block cond_bb, then_bb, else_bb;
8233 edge e;
8234 tree tmp_var;
8235
8236 tmp_var = create_tmp_var (TREE_TYPE (*tp));
8237 if (offloaded)
8238 e = split_block_after_labels (new_bb);
8239 else
8240 {
8241 gsi = gsi_last_nondebug_bb (new_bb);
8242 gsi_prev (&gsi);
8243 e = split_block (new_bb, gsi_stmt (gsi));
8244 }
8245 cond_bb = e->src;
8246 new_bb = e->dest;
8247 remove_edge (e);
8248
8249 then_bb = create_empty_bb (cond_bb);
8250 else_bb = create_empty_bb (then_bb);
8251 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
8252 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
8253
8254 stmt = gimple_build_cond_empty (cond);
8255 gsi = gsi_last_bb (cond_bb);
8256 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
8257
8258 gsi = gsi_start_bb (then_bb);
8259 stmt = gimple_build_assign (tmp_var, *tp);
8260 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
8261
8262 gsi = gsi_start_bb (else_bb);
8263 if (is_gimple_omp_oacc (entry_stmt))
8264 stmt = gimple_build_assign (tmp_var,
8265 BIT_IOR_EXPR,
8266 *tp,
8267 build_int_cst (integer_type_node,
8268 GOACC_FLAG_HOST_FALLBACK));
8269 else
8270 stmt = gimple_build_assign (tmp_var,
8271 build_int_cst (integer_type_node,
8272 GOMP_DEVICE_HOST_FALLBACK));
8273 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
8274
8275 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
8276 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
8277 add_bb_to_loop (then_bb, cond_bb->loop_father);
8278 add_bb_to_loop (else_bb, cond_bb->loop_father);
8279 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
8280 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
8281
8282 *tp = tmp_var;
8283
8284 gsi = gsi_last_nondebug_bb (new_bb);
8285 }
8286 else
8287 {
8288 gsi = gsi_last_nondebug_bb (new_bb);
8289
8290 if (device != NULL_TREE)
8291 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
8292 true, GSI_SAME_STMT);
8293 }
8294
8295 t = gimple_omp_target_data_arg (entry_stmt);
8296 if (t == NULL)
8297 {
8298 t1 = size_zero_node;
8299 t2 = build_zero_cst (ptr_type_node);
8300 t3 = t2;
8301 t4 = t2;
8302 }
8303 else
8304 {
8305 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
8306 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
8307 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
8308 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
8309 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
8310 }
8311
8312 gimple *g;
8313 bool tagging = false;
8314 /* The maximum number used by any start_ix, without varargs. */
8315 auto_vec<tree, 11> args;
8316 if (is_gimple_omp_oacc (entry_stmt))
8317 {
8318 tree goacc_flags_m = fold_build1 (GOACC_FLAGS_MARSHAL_OP,
8319 TREE_TYPE (goacc_flags), goacc_flags);
8320 goacc_flags_m = force_gimple_operand_gsi (&gsi, goacc_flags_m, true,
8321 NULL_TREE, true,
8322 GSI_SAME_STMT);
8323 args.quick_push (goacc_flags_m);
8324 }
8325 else
8326 args.quick_push (device);
8327 if (offloaded)
8328 args.quick_push (build_fold_addr_expr (child_fn));
8329 args.quick_push (t1);
8330 args.quick_push (t2);
8331 args.quick_push (t3);
8332 args.quick_push (t4);
8333 switch (start_ix)
8334 {
8335 case BUILT_IN_GOACC_DATA_START:
8336 case BUILT_IN_GOACC_DECLARE:
8337 case BUILT_IN_GOMP_TARGET_DATA:
8338 break;
8339 case BUILT_IN_GOMP_TARGET:
8340 case BUILT_IN_GOMP_TARGET_UPDATE:
8341 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
8342 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
8343 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
8344 if (c)
8345 depend = OMP_CLAUSE_DECL (c);
8346 else
8347 depend = build_int_cst (ptr_type_node, 0);
8348 args.quick_push (depend);
8349 if (start_ix == BUILT_IN_GOMP_TARGET)
8350 args.quick_push (get_target_arguments (&gsi, entry_stmt));
8351 break;
8352 case BUILT_IN_GOACC_PARALLEL:
8353 oacc_set_fn_attrib (child_fn, clauses, &args);
8354 tagging = true;
8355 /* FALLTHRU */
8356 case BUILT_IN_GOACC_ENTER_EXIT_DATA:
8357 case BUILT_IN_GOACC_UPDATE:
8358 {
8359 tree t_async = NULL_TREE;
8360
8361 /* If present, use the value specified by the respective
8362 clause, making sure that is of the correct type. */
8363 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
8364 if (c)
8365 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
8366 integer_type_node,
8367 OMP_CLAUSE_ASYNC_EXPR (c));
8368 else if (!tagging)
8369 /* Default values for t_async. */
8370 t_async = fold_convert_loc (gimple_location (entry_stmt),
8371 integer_type_node,
8372 build_int_cst (integer_type_node,
8373 GOMP_ASYNC_SYNC));
8374 if (tagging && t_async)
8375 {
8376 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
8377
8378 if (TREE_CODE (t_async) == INTEGER_CST)
8379 {
8380 /* See if we can pack the async arg in to the tag's
8381 operand. */
8382 i_async = TREE_INT_CST_LOW (t_async);
8383 if (i_async < GOMP_LAUNCH_OP_MAX)
8384 t_async = NULL_TREE;
8385 else
8386 i_async = GOMP_LAUNCH_OP_MAX;
8387 }
8388 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
8389 i_async));
8390 }
8391 if (t_async)
8392 args.safe_push (t_async);
8393
8394 /* Save the argument index, and ... */
8395 unsigned t_wait_idx = args.length ();
8396 unsigned num_waits = 0;
8397 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
8398 if (!tagging || c)
8399 /* ... push a placeholder. */
8400 args.safe_push (integer_zero_node);
8401
8402 for (; c; c = OMP_CLAUSE_CHAIN (c))
8403 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
8404 {
8405 args.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c),
8406 integer_type_node,
8407 OMP_CLAUSE_WAIT_EXPR (c)));
8408 num_waits++;
8409 }
8410
8411 if (!tagging || num_waits)
8412 {
8413 tree len;
8414
8415 /* Now that we know the number, update the placeholder. */
8416 if (tagging)
8417 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
8418 else
8419 len = build_int_cst (integer_type_node, num_waits);
8420 len = fold_convert_loc (gimple_location (entry_stmt),
8421 unsigned_type_node, len);
8422 args[t_wait_idx] = len;
8423 }
8424 }
8425 break;
8426 default:
8427 gcc_unreachable ();
8428 }
8429 if (tagging)
8430 /* Push terminal marker - zero. */
8431 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
8432
8433 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
8434 gimple_set_location (g, gimple_location (entry_stmt));
8435 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
8436 if (!offloaded)
8437 {
8438 g = gsi_stmt (gsi);
8439 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
8440 gsi_remove (&gsi, true);
8441 }
8442 if (data_region && region->exit)
8443 {
8444 gsi = gsi_last_nondebug_bb (region->exit);
8445 g = gsi_stmt (gsi);
8446 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN);
8447 gsi_remove (&gsi, true);
8448 }
8449 }
8450
8451 /* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with
8452 iteration variable derived from the thread number. INTRA_GROUP means this
8453 is an expansion of a loop iterating over work-items within a separate
8454 iteration over groups. */
8455
8456 static void
8457 grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group)
8458 {
8459 gimple_stmt_iterator gsi;
8460 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
8461 gcc_checking_assert (gimple_omp_for_kind (for_stmt)
8462 == GF_OMP_FOR_KIND_GRID_LOOP);
8463 size_t collapse = gimple_omp_for_collapse (for_stmt);
8464 struct omp_for_data_loop *loops
8465 = XALLOCAVEC (struct omp_for_data_loop,
8466 gimple_omp_for_collapse (for_stmt));
8467 struct omp_for_data fd;
8468
8469 remove_edge (BRANCH_EDGE (kfor->entry));
8470 basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest;
8471
8472 gcc_assert (kfor->cont);
8473 omp_extract_for_data (for_stmt, &fd, loops);
8474
8475 gsi = gsi_start_bb (body_bb);
8476
8477 for (size_t dim = 0; dim < collapse; dim++)
8478 {
8479 tree type, itype;
8480 itype = type = TREE_TYPE (fd.loops[dim].v);
8481 if (POINTER_TYPE_P (type))
8482 itype = signed_type_for (type);
8483
8484 tree n1 = fd.loops[dim].n1;
8485 tree step = fd.loops[dim].step;
8486 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
8487 true, NULL_TREE, true, GSI_SAME_STMT);
8488 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
8489 true, NULL_TREE, true, GSI_SAME_STMT);
8490 tree threadid;
8491 if (gimple_omp_for_grid_group_iter (for_stmt))
8492 {
8493 gcc_checking_assert (!intra_group);
8494 threadid = build_call_expr (builtin_decl_explicit
8495 (BUILT_IN_HSA_WORKGROUPID), 1,
8496 build_int_cstu (unsigned_type_node, dim));
8497 }
8498 else if (intra_group)
8499 threadid = build_call_expr (builtin_decl_explicit
8500 (BUILT_IN_HSA_WORKITEMID), 1,
8501 build_int_cstu (unsigned_type_node, dim));
8502 else
8503 threadid = build_call_expr (builtin_decl_explicit
8504 (BUILT_IN_HSA_WORKITEMABSID), 1,
8505 build_int_cstu (unsigned_type_node, dim));
8506 threadid = fold_convert (itype, threadid);
8507 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
8508 true, GSI_SAME_STMT);
8509
8510 tree startvar = fd.loops[dim].v;
8511 tree t = fold_build2 (MULT_EXPR, itype, threadid, step);
8512 if (POINTER_TYPE_P (type))
8513 t = fold_build_pointer_plus (n1, t);
8514 else
8515 t = fold_build2 (PLUS_EXPR, type, t, n1);
8516 t = fold_convert (type, t);
8517 t = force_gimple_operand_gsi (&gsi, t,
8518 DECL_P (startvar)
8519 && TREE_ADDRESSABLE (startvar),
8520 NULL_TREE, true, GSI_SAME_STMT);
8521 gassign *assign_stmt = gimple_build_assign (startvar, t);
8522 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
8523 }
8524 /* Remove the omp for statement. */
8525 gsi = gsi_last_nondebug_bb (kfor->entry);
8526 gsi_remove (&gsi, true);
8527
8528 /* Remove the GIMPLE_OMP_CONTINUE statement. */
8529 gsi = gsi_last_nondebug_bb (kfor->cont);
8530 gcc_assert (!gsi_end_p (gsi)
8531 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE);
8532 gsi_remove (&gsi, true);
8533
8534 /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary. */
8535 gsi = gsi_last_nondebug_bb (kfor->exit);
8536 gcc_assert (!gsi_end_p (gsi)
8537 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
8538 if (intra_group)
8539 gsi_insert_before (&gsi, omp_build_barrier (NULL_TREE), GSI_SAME_STMT);
8540 gsi_remove (&gsi, true);
8541
8542 /* Fixup the much simpler CFG. */
8543 remove_edge (find_edge (kfor->cont, body_bb));
8544
8545 if (kfor->cont != body_bb)
8546 set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb);
8547 set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont);
8548 }
8549
8550 /* Structure passed to grid_remap_kernel_arg_accesses so that it can remap
8551 argument_decls. */
8552
8553 struct grid_arg_decl_map
8554 {
8555 tree old_arg;
8556 tree new_arg;
8557 };
8558
8559 /* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones
8560 pertaining to kernel function. */
8561
8562 static tree
8563 grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data)
8564 {
8565 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
8566 struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info;
8567 tree t = *tp;
8568
8569 if (t == adm->old_arg)
8570 *tp = adm->new_arg;
8571 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
8572 return NULL_TREE;
8573 }
8574
8575 /* If TARGET region contains a kernel body for loop, remove its region from the
8576 TARGET and expand it in HSA gridified kernel fashion. */
8577
8578 static void
8579 grid_expand_target_grid_body (struct omp_region *target)
8580 {
8581 if (!hsa_gen_requested_p ())
8582 return;
8583
8584 gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry));
8585 struct omp_region **pp;
8586
8587 for (pp = &target->inner; *pp; pp = &(*pp)->next)
8588 if ((*pp)->type == GIMPLE_OMP_GRID_BODY)
8589 break;
8590
8591 struct omp_region *gpukernel = *pp;
8592
8593 tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt);
8594 if (!gpukernel)
8595 {
8596 /* HSA cannot handle OACC stuff. */
8597 if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION)
8598 return;
8599 gcc_checking_assert (orig_child_fndecl);
8600 gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
8601 OMP_CLAUSE__GRIDDIM_));
8602 cgraph_node *n = cgraph_node::get (orig_child_fndecl);
8603
8604 hsa_register_kernel (n);
8605 return;
8606 }
8607
8608 gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
8609 OMP_CLAUSE__GRIDDIM_));
8610 tree inside_block
8611 = gimple_block (first_stmt (single_succ (gpukernel->entry)));
8612 *pp = gpukernel->next;
8613 for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next)
8614 if ((*pp)->type == GIMPLE_OMP_FOR)
8615 break;
8616
8617 struct omp_region *kfor = *pp;
8618 gcc_assert (kfor);
8619 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
8620 gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP);
8621 *pp = kfor->next;
8622 if (kfor->inner)
8623 {
8624 if (gimple_omp_for_grid_group_iter (for_stmt))
8625 {
8626 struct omp_region **next_pp;
8627 for (pp = &kfor->inner; *pp; pp = next_pp)
8628 {
8629 next_pp = &(*pp)->next;
8630 if ((*pp)->type != GIMPLE_OMP_FOR)
8631 continue;
8632 gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry));
8633 gcc_assert (gimple_omp_for_kind (inner)
8634 == GF_OMP_FOR_KIND_GRID_LOOP);
8635 grid_expand_omp_for_loop (*pp, true);
8636 *pp = (*pp)->next;
8637 next_pp = pp;
8638 }
8639 }
8640 expand_omp (kfor->inner);
8641 }
8642 if (gpukernel->inner)
8643 expand_omp (gpukernel->inner);
8644
8645 tree kern_fndecl = copy_node (orig_child_fndecl);
8646 DECL_NAME (kern_fndecl) = clone_function_name_numbered (kern_fndecl,
8647 "kernel");
8648 SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl));
8649 tree tgtblock = gimple_block (tgt_stmt);
8650 tree fniniblock = make_node (BLOCK);
8651 BLOCK_ABSTRACT_ORIGIN (fniniblock) = BLOCK_ORIGIN (tgtblock);
8652 BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock);
8653 BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock);
8654 BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl;
8655 DECL_INITIAL (kern_fndecl) = fniniblock;
8656 push_struct_function (kern_fndecl);
8657 cfun->function_end_locus = gimple_location (tgt_stmt);
8658 init_tree_ssa (cfun);
8659 pop_cfun ();
8660
8661 tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl);
8662 gcc_assert (!DECL_CHAIN (old_parm_decl));
8663 tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl));
8664 DECL_CONTEXT (new_parm_decl) = kern_fndecl;
8665 DECL_ARGUMENTS (kern_fndecl) = new_parm_decl;
8666 gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl))));
8667 DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl));
8668 DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl;
8669 struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl);
8670 kern_cfun->curr_properties = cfun->curr_properties;
8671
8672 grid_expand_omp_for_loop (kfor, false);
8673
8674 /* Remove the omp for statement. */
8675 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (gpukernel->entry);
8676 gsi_remove (&gsi, true);
8677 /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real
8678 return. */
8679 gsi = gsi_last_nondebug_bb (gpukernel->exit);
8680 gcc_assert (!gsi_end_p (gsi)
8681 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
8682 gimple *ret_stmt = gimple_build_return (NULL);
8683 gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT);
8684 gsi_remove (&gsi, true);
8685
8686 /* Statements in the first BB in the target construct have been produced by
8687 target lowering and must be copied inside the GPUKERNEL, with the two
8688 exceptions of the first OMP statement and the OMP_DATA assignment
8689 statement. */
8690 gsi = gsi_start_bb (single_succ (gpukernel->entry));
8691 tree data_arg = gimple_omp_target_data_arg (tgt_stmt);
8692 tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL;
8693 for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry));
8694 !gsi_end_p (tsi); gsi_next (&tsi))
8695 {
8696 gimple *stmt = gsi_stmt (tsi);
8697 if (is_gimple_omp (stmt))
8698 break;
8699 if (sender
8700 && is_gimple_assign (stmt)
8701 && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR
8702 && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender)
8703 continue;
8704 gimple *copy = gimple_copy (stmt);
8705 gsi_insert_before (&gsi, copy, GSI_SAME_STMT);
8706 gimple_set_block (copy, fniniblock);
8707 }
8708
8709 move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry),
8710 gpukernel->exit, inside_block);
8711
8712 cgraph_node *kcn = cgraph_node::get_create (kern_fndecl);
8713 kcn->mark_force_output ();
8714 cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl);
8715
8716 hsa_register_kernel (kcn, orig_child);
8717
8718 cgraph_node::add_new_function (kern_fndecl, true);
8719 push_cfun (kern_cfun);
8720 cgraph_edge::rebuild_edges ();
8721
8722 /* Re-map any mention of the PARM_DECL of the original function to the
8723 PARM_DECL of the new one.
8724
8725 TODO: It would be great if lowering produced references into the GPU
8726 kernel decl straight away and we did not have to do this. */
8727 struct grid_arg_decl_map adm;
8728 adm.old_arg = old_parm_decl;
8729 adm.new_arg = new_parm_decl;
8730 basic_block bb;
8731 FOR_EACH_BB_FN (bb, kern_cfun)
8732 {
8733 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
8734 {
8735 gimple *stmt = gsi_stmt (gsi);
8736 struct walk_stmt_info wi;
8737 memset (&wi, 0, sizeof (wi));
8738 wi.info = &adm;
8739 walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi);
8740 }
8741 }
8742 pop_cfun ();
8743
8744 return;
8745 }
8746
8747 /* Expand the parallel region tree rooted at REGION. Expansion
8748 proceeds in depth-first order. Innermost regions are expanded
8749 first. This way, parallel regions that require a new function to
8750 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
8751 internal dependencies in their body. */
8752
8753 static void
8754 expand_omp (struct omp_region *region)
8755 {
8756 omp_any_child_fn_dumped = false;
8757 while (region)
8758 {
8759 location_t saved_location;
8760 gimple *inner_stmt = NULL;
8761
8762 /* First, determine whether this is a combined parallel+workshare
8763 region. */
8764 if (region->type == GIMPLE_OMP_PARALLEL)
8765 determine_parallel_type (region);
8766 else if (region->type == GIMPLE_OMP_TARGET)
8767 grid_expand_target_grid_body (region);
8768
8769 if (region->type == GIMPLE_OMP_FOR
8770 && gimple_omp_for_combined_p (last_stmt (region->entry)))
8771 inner_stmt = last_stmt (region->inner->entry);
8772
8773 if (region->inner)
8774 expand_omp (region->inner);
8775
8776 saved_location = input_location;
8777 if (gimple_has_location (last_stmt (region->entry)))
8778 input_location = gimple_location (last_stmt (region->entry));
8779
8780 switch (region->type)
8781 {
8782 case GIMPLE_OMP_PARALLEL:
8783 case GIMPLE_OMP_TASK:
8784 expand_omp_taskreg (region);
8785 break;
8786
8787 case GIMPLE_OMP_FOR:
8788 expand_omp_for (region, inner_stmt);
8789 break;
8790
8791 case GIMPLE_OMP_SECTIONS:
8792 expand_omp_sections (region);
8793 break;
8794
8795 case GIMPLE_OMP_SECTION:
8796 /* Individual omp sections are handled together with their
8797 parent GIMPLE_OMP_SECTIONS region. */
8798 break;
8799
8800 case GIMPLE_OMP_SINGLE:
8801 expand_omp_single (region);
8802 break;
8803
8804 case GIMPLE_OMP_ORDERED:
8805 {
8806 gomp_ordered *ord_stmt
8807 = as_a <gomp_ordered *> (last_stmt (region->entry));
8808 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
8809 OMP_CLAUSE_DEPEND))
8810 {
8811 /* We'll expand these when expanding corresponding
8812 worksharing region with ordered(n) clause. */
8813 gcc_assert (region->outer
8814 && region->outer->type == GIMPLE_OMP_FOR);
8815 region->ord_stmt = ord_stmt;
8816 break;
8817 }
8818 }
8819 /* FALLTHRU */
8820 case GIMPLE_OMP_MASTER:
8821 case GIMPLE_OMP_TASKGROUP:
8822 case GIMPLE_OMP_CRITICAL:
8823 case GIMPLE_OMP_TEAMS:
8824 expand_omp_synch (region);
8825 break;
8826
8827 case GIMPLE_OMP_ATOMIC_LOAD:
8828 expand_omp_atomic (region);
8829 break;
8830
8831 case GIMPLE_OMP_TARGET:
8832 expand_omp_target (region);
8833 break;
8834
8835 default:
8836 gcc_unreachable ();
8837 }
8838
8839 input_location = saved_location;
8840 region = region->next;
8841 }
8842 if (omp_any_child_fn_dumped)
8843 {
8844 if (dump_file)
8845 dump_function_header (dump_file, current_function_decl, dump_flags);
8846 omp_any_child_fn_dumped = false;
8847 }
8848 }
8849
8850 /* Helper for build_omp_regions. Scan the dominator tree starting at
8851 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
8852 true, the function ends once a single tree is built (otherwise, whole
8853 forest of OMP constructs may be built). */
8854
8855 static void
8856 build_omp_regions_1 (basic_block bb, struct omp_region *parent,
8857 bool single_tree)
8858 {
8859 gimple_stmt_iterator gsi;
8860 gimple *stmt;
8861 basic_block son;
8862
8863 gsi = gsi_last_nondebug_bb (bb);
8864 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
8865 {
8866 struct omp_region *region;
8867 enum gimple_code code;
8868
8869 stmt = gsi_stmt (gsi);
8870 code = gimple_code (stmt);
8871 if (code == GIMPLE_OMP_RETURN)
8872 {
8873 /* STMT is the return point out of region PARENT. Mark it
8874 as the exit point and make PARENT the immediately
8875 enclosing region. */
8876 gcc_assert (parent);
8877 region = parent;
8878 region->exit = bb;
8879 parent = parent->outer;
8880 }
8881 else if (code == GIMPLE_OMP_ATOMIC_STORE)
8882 {
8883 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
8884 GIMPLE_OMP_RETURN, but matches with
8885 GIMPLE_OMP_ATOMIC_LOAD. */
8886 gcc_assert (parent);
8887 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
8888 region = parent;
8889 region->exit = bb;
8890 parent = parent->outer;
8891 }
8892 else if (code == GIMPLE_OMP_CONTINUE)
8893 {
8894 gcc_assert (parent);
8895 parent->cont = bb;
8896 }
8897 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
8898 {
8899 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
8900 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
8901 }
8902 else
8903 {
8904 region = new_omp_region (bb, code, parent);
8905 /* Otherwise... */
8906 if (code == GIMPLE_OMP_TARGET)
8907 {
8908 switch (gimple_omp_target_kind (stmt))
8909 {
8910 case GF_OMP_TARGET_KIND_REGION:
8911 case GF_OMP_TARGET_KIND_DATA:
8912 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8913 case GF_OMP_TARGET_KIND_OACC_KERNELS:
8914 case GF_OMP_TARGET_KIND_OACC_DATA:
8915 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8916 break;
8917 case GF_OMP_TARGET_KIND_UPDATE:
8918 case GF_OMP_TARGET_KIND_ENTER_DATA:
8919 case GF_OMP_TARGET_KIND_EXIT_DATA:
8920 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8921 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8922 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8923 /* ..., other than for those stand-alone directives... */
8924 region = NULL;
8925 break;
8926 default:
8927 gcc_unreachable ();
8928 }
8929 }
8930 else if (code == GIMPLE_OMP_ORDERED
8931 && omp_find_clause (gimple_omp_ordered_clauses
8932 (as_a <gomp_ordered *> (stmt)),
8933 OMP_CLAUSE_DEPEND))
8934 /* #pragma omp ordered depend is also just a stand-alone
8935 directive. */
8936 region = NULL;
8937 else if (code == GIMPLE_OMP_TASK
8938 && gimple_omp_task_taskwait_p (stmt))
8939 /* #pragma omp taskwait depend(...) is a stand-alone directive. */
8940 region = NULL;
8941 /* ..., this directive becomes the parent for a new region. */
8942 if (region)
8943 parent = region;
8944 }
8945 }
8946
8947 if (single_tree && !parent)
8948 return;
8949
8950 for (son = first_dom_son (CDI_DOMINATORS, bb);
8951 son;
8952 son = next_dom_son (CDI_DOMINATORS, son))
8953 build_omp_regions_1 (son, parent, single_tree);
8954 }
8955
8956 /* Builds the tree of OMP regions rooted at ROOT, storing it to
8957 root_omp_region. */
8958
8959 static void
8960 build_omp_regions_root (basic_block root)
8961 {
8962 gcc_assert (root_omp_region == NULL);
8963 build_omp_regions_1 (root, NULL, true);
8964 gcc_assert (root_omp_region != NULL);
8965 }
8966
8967 /* Expands omp construct (and its subconstructs) starting in HEAD. */
8968
8969 void
8970 omp_expand_local (basic_block head)
8971 {
8972 build_omp_regions_root (head);
8973 if (dump_file && (dump_flags & TDF_DETAILS))
8974 {
8975 fprintf (dump_file, "\nOMP region tree\n\n");
8976 dump_omp_region (dump_file, root_omp_region, 0);
8977 fprintf (dump_file, "\n");
8978 }
8979
8980 remove_exit_barriers (root_omp_region);
8981 expand_omp (root_omp_region);
8982
8983 omp_free_regions ();
8984 }
8985
8986 /* Scan the CFG and build a tree of OMP regions. Return the root of
8987 the OMP region tree. */
8988
8989 static void
8990 build_omp_regions (void)
8991 {
8992 gcc_assert (root_omp_region == NULL);
8993 calculate_dominance_info (CDI_DOMINATORS);
8994 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
8995 }
8996
8997 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */
8998
8999 static unsigned int
9000 execute_expand_omp (void)
9001 {
9002 build_omp_regions ();
9003
9004 if (!root_omp_region)
9005 return 0;
9006
9007 if (dump_file)
9008 {
9009 fprintf (dump_file, "\nOMP region tree\n\n");
9010 dump_omp_region (dump_file, root_omp_region, 0);
9011 fprintf (dump_file, "\n");
9012 }
9013
9014 remove_exit_barriers (root_omp_region);
9015
9016 expand_omp (root_omp_region);
9017
9018 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9019 verify_loop_structure ();
9020 cleanup_tree_cfg ();
9021
9022 omp_free_regions ();
9023
9024 return 0;
9025 }
9026
9027 /* OMP expansion -- the default pass, run before creation of SSA form. */
9028
9029 namespace {
9030
9031 const pass_data pass_data_expand_omp =
9032 {
9033 GIMPLE_PASS, /* type */
9034 "ompexp", /* name */
9035 OPTGROUP_OMP, /* optinfo_flags */
9036 TV_NONE, /* tv_id */
9037 PROP_gimple_any, /* properties_required */
9038 PROP_gimple_eomp, /* properties_provided */
9039 0, /* properties_destroyed */
9040 0, /* todo_flags_start */
9041 0, /* todo_flags_finish */
9042 };
9043
9044 class pass_expand_omp : public gimple_opt_pass
9045 {
9046 public:
9047 pass_expand_omp (gcc::context *ctxt)
9048 : gimple_opt_pass (pass_data_expand_omp, ctxt)
9049 {}
9050
9051 /* opt_pass methods: */
9052 virtual unsigned int execute (function *)
9053 {
9054 bool gate = ((flag_openacc != 0 || flag_openmp != 0
9055 || flag_openmp_simd != 0)
9056 && !seen_error ());
9057
9058 /* This pass always runs, to provide PROP_gimple_eomp.
9059 But often, there is nothing to do. */
9060 if (!gate)
9061 return 0;
9062
9063 return execute_expand_omp ();
9064 }
9065
9066 }; // class pass_expand_omp
9067
9068 } // anon namespace
9069
9070 gimple_opt_pass *
9071 make_pass_expand_omp (gcc::context *ctxt)
9072 {
9073 return new pass_expand_omp (ctxt);
9074 }
9075
9076 namespace {
9077
9078 const pass_data pass_data_expand_omp_ssa =
9079 {
9080 GIMPLE_PASS, /* type */
9081 "ompexpssa", /* name */
9082 OPTGROUP_OMP, /* optinfo_flags */
9083 TV_NONE, /* tv_id */
9084 PROP_cfg | PROP_ssa, /* properties_required */
9085 PROP_gimple_eomp, /* properties_provided */
9086 0, /* properties_destroyed */
9087 0, /* todo_flags_start */
9088 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
9089 };
9090
9091 class pass_expand_omp_ssa : public gimple_opt_pass
9092 {
9093 public:
9094 pass_expand_omp_ssa (gcc::context *ctxt)
9095 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
9096 {}
9097
9098 /* opt_pass methods: */
9099 virtual bool gate (function *fun)
9100 {
9101 return !(fun->curr_properties & PROP_gimple_eomp);
9102 }
9103 virtual unsigned int execute (function *) { return execute_expand_omp (); }
9104 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
9105
9106 }; // class pass_expand_omp_ssa
9107
9108 } // anon namespace
9109
9110 gimple_opt_pass *
9111 make_pass_expand_omp_ssa (gcc::context *ctxt)
9112 {
9113 return new pass_expand_omp_ssa (ctxt);
9114 }
9115
9116 /* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
9117 GIMPLE_* codes. */
9118
9119 bool
9120 omp_make_gimple_edges (basic_block bb, struct omp_region **region,
9121 int *region_idx)
9122 {
9123 gimple *last = last_stmt (bb);
9124 enum gimple_code code = gimple_code (last);
9125 struct omp_region *cur_region = *region;
9126 bool fallthru = false;
9127
9128 switch (code)
9129 {
9130 case GIMPLE_OMP_PARALLEL:
9131 case GIMPLE_OMP_FOR:
9132 case GIMPLE_OMP_SINGLE:
9133 case GIMPLE_OMP_TEAMS:
9134 case GIMPLE_OMP_MASTER:
9135 case GIMPLE_OMP_TASKGROUP:
9136 case GIMPLE_OMP_CRITICAL:
9137 case GIMPLE_OMP_SECTION:
9138 case GIMPLE_OMP_GRID_BODY:
9139 cur_region = new_omp_region (bb, code, cur_region);
9140 fallthru = true;
9141 break;
9142
9143 case GIMPLE_OMP_TASK:
9144 cur_region = new_omp_region (bb, code, cur_region);
9145 fallthru = true;
9146 if (gimple_omp_task_taskwait_p (last))
9147 cur_region = cur_region->outer;
9148 break;
9149
9150 case GIMPLE_OMP_ORDERED:
9151 cur_region = new_omp_region (bb, code, cur_region);
9152 fallthru = true;
9153 if (omp_find_clause (gimple_omp_ordered_clauses
9154 (as_a <gomp_ordered *> (last)),
9155 OMP_CLAUSE_DEPEND))
9156 cur_region = cur_region->outer;
9157 break;
9158
9159 case GIMPLE_OMP_TARGET:
9160 cur_region = new_omp_region (bb, code, cur_region);
9161 fallthru = true;
9162 switch (gimple_omp_target_kind (last))
9163 {
9164 case GF_OMP_TARGET_KIND_REGION:
9165 case GF_OMP_TARGET_KIND_DATA:
9166 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9167 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9168 case GF_OMP_TARGET_KIND_OACC_DATA:
9169 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9170 break;
9171 case GF_OMP_TARGET_KIND_UPDATE:
9172 case GF_OMP_TARGET_KIND_ENTER_DATA:
9173 case GF_OMP_TARGET_KIND_EXIT_DATA:
9174 case GF_OMP_TARGET_KIND_OACC_UPDATE:
9175 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
9176 case GF_OMP_TARGET_KIND_OACC_DECLARE:
9177 cur_region = cur_region->outer;
9178 break;
9179 default:
9180 gcc_unreachable ();
9181 }
9182 break;
9183
9184 case GIMPLE_OMP_SECTIONS:
9185 cur_region = new_omp_region (bb, code, cur_region);
9186 fallthru = true;
9187 break;
9188
9189 case GIMPLE_OMP_SECTIONS_SWITCH:
9190 fallthru = false;
9191 break;
9192
9193 case GIMPLE_OMP_ATOMIC_LOAD:
9194 case GIMPLE_OMP_ATOMIC_STORE:
9195 fallthru = true;
9196 break;
9197
9198 case GIMPLE_OMP_RETURN:
9199 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
9200 somewhere other than the next block. This will be
9201 created later. */
9202 cur_region->exit = bb;
9203 if (cur_region->type == GIMPLE_OMP_TASK)
9204 /* Add an edge corresponding to not scheduling the task
9205 immediately. */
9206 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
9207 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
9208 cur_region = cur_region->outer;
9209 break;
9210
9211 case GIMPLE_OMP_CONTINUE:
9212 cur_region->cont = bb;
9213 switch (cur_region->type)
9214 {
9215 case GIMPLE_OMP_FOR:
9216 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
9217 succs edges as abnormal to prevent splitting
9218 them. */
9219 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
9220 /* Make the loopback edge. */
9221 make_edge (bb, single_succ (cur_region->entry),
9222 EDGE_ABNORMAL);
9223
9224 /* Create an edge from GIMPLE_OMP_FOR to exit, which
9225 corresponds to the case that the body of the loop
9226 is not executed at all. */
9227 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
9228 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
9229 fallthru = false;
9230 break;
9231
9232 case GIMPLE_OMP_SECTIONS:
9233 /* Wire up the edges into and out of the nested sections. */
9234 {
9235 basic_block switch_bb = single_succ (cur_region->entry);
9236
9237 struct omp_region *i;
9238 for (i = cur_region->inner; i ; i = i->next)
9239 {
9240 gcc_assert (i->type == GIMPLE_OMP_SECTION);
9241 make_edge (switch_bb, i->entry, 0);
9242 make_edge (i->exit, bb, EDGE_FALLTHRU);
9243 }
9244
9245 /* Make the loopback edge to the block with
9246 GIMPLE_OMP_SECTIONS_SWITCH. */
9247 make_edge (bb, switch_bb, 0);
9248
9249 /* Make the edge from the switch to exit. */
9250 make_edge (switch_bb, bb->next_bb, 0);
9251 fallthru = false;
9252 }
9253 break;
9254
9255 case GIMPLE_OMP_TASK:
9256 fallthru = true;
9257 break;
9258
9259 default:
9260 gcc_unreachable ();
9261 }
9262 break;
9263
9264 default:
9265 gcc_unreachable ();
9266 }
9267
9268 if (*region != cur_region)
9269 {
9270 *region = cur_region;
9271 if (cur_region)
9272 *region_idx = cur_region->entry->index;
9273 else
9274 *region_idx = 0;
9275 }
9276
9277 return fallthru;
9278 }
9279
9280 #include "gt-omp-expand.h"