]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/omp-expand.c
Update copyright years.
[thirdparty/gcc.git] / gcc / omp-expand.c
1 /* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
4
5 Copyright (C) 2005-2020 Free Software Foundation, Inc.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "tree-pass.h"
34 #include "ssa.h"
35 #include "optabs.h"
36 #include "cgraph.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "cfganal.h"
42 #include "internal-fn.h"
43 #include "gimplify.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
47 #include "tree-cfg.h"
48 #include "tree-into-ssa.h"
49 #include "tree-ssa.h"
50 #include "splay-tree.h"
51 #include "cfgloop.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "alloc-pool.h"
56 #include "symbol-summary.h"
57 #include "gomp-constants.h"
58 #include "gimple-pretty-print.h"
59 #include "hsa-common.h"
60 #include "stringpool.h"
61 #include "attribs.h"
62
63 /* OMP region information. Every parallel and workshare
64 directive is enclosed between two markers, the OMP_* directive
65 and a corresponding GIMPLE_OMP_RETURN statement. */
66
67 struct omp_region
68 {
69 /* The enclosing region. */
70 struct omp_region *outer;
71
72 /* First child region. */
73 struct omp_region *inner;
74
75 /* Next peer region. */
76 struct omp_region *next;
77
78 /* Block containing the omp directive as its last stmt. */
79 basic_block entry;
80
81 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
82 basic_block exit;
83
84 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
85 basic_block cont;
86
87 /* If this is a combined parallel+workshare region, this is a list
88 of additional arguments needed by the combined parallel+workshare
89 library call. */
90 vec<tree, va_gc> *ws_args;
91
92 /* The code for the omp directive of this region. */
93 enum gimple_code type;
94
95 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
96 enum omp_clause_schedule_kind sched_kind;
97
98 /* Schedule modifiers. */
99 unsigned char sched_modifiers;
100
101 /* True if this is a combined parallel+workshare region. */
102 bool is_combined_parallel;
103
104 /* Copy of fd.lastprivate_conditional != 0. */
105 bool has_lastprivate_conditional;
106
107 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
108 a depend clause. */
109 gomp_ordered *ord_stmt;
110 };
111
112 static struct omp_region *root_omp_region;
113 static bool omp_any_child_fn_dumped;
114
115 static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
116 bool = false);
117 static gphi *find_phi_with_arg_on_edge (tree, edge);
118 static void expand_omp (struct omp_region *region);
119
120 /* Return true if REGION is a combined parallel+workshare region. */
121
122 static inline bool
123 is_combined_parallel (struct omp_region *region)
124 {
125 return region->is_combined_parallel;
126 }
127
128 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
129 is the immediate dominator of PAR_ENTRY_BB, return true if there
130 are no data dependencies that would prevent expanding the parallel
131 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
132
133 When expanding a combined parallel+workshare region, the call to
134 the child function may need additional arguments in the case of
135 GIMPLE_OMP_FOR regions. In some cases, these arguments are
136 computed out of variables passed in from the parent to the child
137 via 'struct .omp_data_s'. For instance:
138
139 #pragma omp parallel for schedule (guided, i * 4)
140 for (j ...)
141
142 Is lowered into:
143
144 # BLOCK 2 (PAR_ENTRY_BB)
145 .omp_data_o.i = i;
146 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
147
148 # BLOCK 3 (WS_ENTRY_BB)
149 .omp_data_i = &.omp_data_o;
150 D.1667 = .omp_data_i->i;
151 D.1598 = D.1667 * 4;
152 #pragma omp for schedule (guided, D.1598)
153
154 When we outline the parallel region, the call to the child function
155 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
156 that value is computed *after* the call site. So, in principle we
157 cannot do the transformation.
158
159 To see whether the code in WS_ENTRY_BB blocks the combined
160 parallel+workshare call, we collect all the variables used in the
161 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
162 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
163 call.
164
165 FIXME. If we had the SSA form built at this point, we could merely
166 hoist the code in block 3 into block 2 and be done with it. But at
167 this point we don't have dataflow information and though we could
168 hack something up here, it is really not worth the aggravation. */
169
170 static bool
171 workshare_safe_to_combine_p (basic_block ws_entry_bb)
172 {
173 struct omp_for_data fd;
174 gimple *ws_stmt = last_stmt (ws_entry_bb);
175
176 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
177 return true;
178
179 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
180 if (gimple_omp_for_kind (ws_stmt) != GF_OMP_FOR_KIND_FOR)
181 return false;
182
183 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
184
185 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
186 return false;
187 if (fd.iter_type != long_integer_type_node)
188 return false;
189
190 /* FIXME. We give up too easily here. If any of these arguments
191 are not constants, they will likely involve variables that have
192 been mapped into fields of .omp_data_s for sharing with the child
193 function. With appropriate data flow, it would be possible to
194 see through this. */
195 if (!is_gimple_min_invariant (fd.loop.n1)
196 || !is_gimple_min_invariant (fd.loop.n2)
197 || !is_gimple_min_invariant (fd.loop.step)
198 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
199 return false;
200
201 return true;
202 }
203
204 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
205 presence (SIMD_SCHEDULE). */
206
207 static tree
208 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
209 {
210 if (!simd_schedule || integer_zerop (chunk_size))
211 return chunk_size;
212
213 poly_uint64 vf = omp_max_vf ();
214 if (known_eq (vf, 1U))
215 return chunk_size;
216
217 tree type = TREE_TYPE (chunk_size);
218 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
219 build_int_cst (type, vf - 1));
220 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
221 build_int_cst (type, -vf));
222 }
223
224 /* Collect additional arguments needed to emit a combined
225 parallel+workshare call. WS_STMT is the workshare directive being
226 expanded. */
227
228 static vec<tree, va_gc> *
229 get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
230 {
231 tree t;
232 location_t loc = gimple_location (ws_stmt);
233 vec<tree, va_gc> *ws_args;
234
235 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
236 {
237 struct omp_for_data fd;
238 tree n1, n2;
239
240 omp_extract_for_data (for_stmt, &fd, NULL);
241 n1 = fd.loop.n1;
242 n2 = fd.loop.n2;
243
244 if (gimple_omp_for_combined_into_p (for_stmt))
245 {
246 tree innerc
247 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
248 OMP_CLAUSE__LOOPTEMP_);
249 gcc_assert (innerc);
250 n1 = OMP_CLAUSE_DECL (innerc);
251 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
252 OMP_CLAUSE__LOOPTEMP_);
253 gcc_assert (innerc);
254 n2 = OMP_CLAUSE_DECL (innerc);
255 }
256
257 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
258
259 t = fold_convert_loc (loc, long_integer_type_node, n1);
260 ws_args->quick_push (t);
261
262 t = fold_convert_loc (loc, long_integer_type_node, n2);
263 ws_args->quick_push (t);
264
265 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
266 ws_args->quick_push (t);
267
268 if (fd.chunk_size)
269 {
270 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
271 t = omp_adjust_chunk_size (t, fd.simd_schedule);
272 ws_args->quick_push (t);
273 }
274
275 return ws_args;
276 }
277 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
278 {
279 /* Number of sections is equal to the number of edges from the
280 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
281 the exit of the sections region. */
282 basic_block bb = single_succ (gimple_bb (ws_stmt));
283 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
284 vec_alloc (ws_args, 1);
285 ws_args->quick_push (t);
286 return ws_args;
287 }
288
289 gcc_unreachable ();
290 }
291
292 /* Discover whether REGION is a combined parallel+workshare region. */
293
294 static void
295 determine_parallel_type (struct omp_region *region)
296 {
297 basic_block par_entry_bb, par_exit_bb;
298 basic_block ws_entry_bb, ws_exit_bb;
299
300 if (region == NULL || region->inner == NULL
301 || region->exit == NULL || region->inner->exit == NULL
302 || region->inner->cont == NULL)
303 return;
304
305 /* We only support parallel+for and parallel+sections. */
306 if (region->type != GIMPLE_OMP_PARALLEL
307 || (region->inner->type != GIMPLE_OMP_FOR
308 && region->inner->type != GIMPLE_OMP_SECTIONS))
309 return;
310
311 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
312 WS_EXIT_BB -> PAR_EXIT_BB. */
313 par_entry_bb = region->entry;
314 par_exit_bb = region->exit;
315 ws_entry_bb = region->inner->entry;
316 ws_exit_bb = region->inner->exit;
317
318 /* Give up for task reductions on the parallel, while it is implementable,
319 adding another big set of APIs or slowing down the normal paths is
320 not acceptable. */
321 tree pclauses = gimple_omp_parallel_clauses (last_stmt (par_entry_bb));
322 if (omp_find_clause (pclauses, OMP_CLAUSE__REDUCTEMP_))
323 return;
324
325 if (single_succ (par_entry_bb) == ws_entry_bb
326 && single_succ (ws_exit_bb) == par_exit_bb
327 && workshare_safe_to_combine_p (ws_entry_bb)
328 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
329 || (last_and_only_stmt (ws_entry_bb)
330 && last_and_only_stmt (par_exit_bb))))
331 {
332 gimple *par_stmt = last_stmt (par_entry_bb);
333 gimple *ws_stmt = last_stmt (ws_entry_bb);
334
335 if (region->inner->type == GIMPLE_OMP_FOR)
336 {
337 /* If this is a combined parallel loop, we need to determine
338 whether or not to use the combined library calls. There
339 are two cases where we do not apply the transformation:
340 static loops and any kind of ordered loop. In the first
341 case, we already open code the loop so there is no need
342 to do anything else. In the latter case, the combined
343 parallel loop call would still need extra synchronization
344 to implement ordered semantics, so there would not be any
345 gain in using the combined call. */
346 tree clauses = gimple_omp_for_clauses (ws_stmt);
347 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
348 if (c == NULL
349 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
350 == OMP_CLAUSE_SCHEDULE_STATIC)
351 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED)
352 || omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_)
353 || ((c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_))
354 && POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (c)))))
355 return;
356 }
357 else if (region->inner->type == GIMPLE_OMP_SECTIONS
358 && (omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
359 OMP_CLAUSE__REDUCTEMP_)
360 || omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
361 OMP_CLAUSE__CONDTEMP_)))
362 return;
363
364 region->is_combined_parallel = true;
365 region->inner->is_combined_parallel = true;
366 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
367 }
368 }
369
370 /* Debugging dumps for parallel regions. */
371 void dump_omp_region (FILE *, struct omp_region *, int);
372 void debug_omp_region (struct omp_region *);
373 void debug_all_omp_regions (void);
374
375 /* Dump the parallel region tree rooted at REGION. */
376
377 void
378 dump_omp_region (FILE *file, struct omp_region *region, int indent)
379 {
380 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
381 gimple_code_name[region->type]);
382
383 if (region->inner)
384 dump_omp_region (file, region->inner, indent + 4);
385
386 if (region->cont)
387 {
388 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
389 region->cont->index);
390 }
391
392 if (region->exit)
393 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
394 region->exit->index);
395 else
396 fprintf (file, "%*s[no exit marker]\n", indent, "");
397
398 if (region->next)
399 dump_omp_region (file, region->next, indent);
400 }
401
402 DEBUG_FUNCTION void
403 debug_omp_region (struct omp_region *region)
404 {
405 dump_omp_region (stderr, region, 0);
406 }
407
408 DEBUG_FUNCTION void
409 debug_all_omp_regions (void)
410 {
411 dump_omp_region (stderr, root_omp_region, 0);
412 }
413
414 /* Create a new parallel region starting at STMT inside region PARENT. */
415
416 static struct omp_region *
417 new_omp_region (basic_block bb, enum gimple_code type,
418 struct omp_region *parent)
419 {
420 struct omp_region *region = XCNEW (struct omp_region);
421
422 region->outer = parent;
423 region->entry = bb;
424 region->type = type;
425
426 if (parent)
427 {
428 /* This is a nested region. Add it to the list of inner
429 regions in PARENT. */
430 region->next = parent->inner;
431 parent->inner = region;
432 }
433 else
434 {
435 /* This is a toplevel region. Add it to the list of toplevel
436 regions in ROOT_OMP_REGION. */
437 region->next = root_omp_region;
438 root_omp_region = region;
439 }
440
441 return region;
442 }
443
444 /* Release the memory associated with the region tree rooted at REGION. */
445
446 static void
447 free_omp_region_1 (struct omp_region *region)
448 {
449 struct omp_region *i, *n;
450
451 for (i = region->inner; i ; i = n)
452 {
453 n = i->next;
454 free_omp_region_1 (i);
455 }
456
457 free (region);
458 }
459
460 /* Release the memory for the entire omp region tree. */
461
462 void
463 omp_free_regions (void)
464 {
465 struct omp_region *r, *n;
466 for (r = root_omp_region; r ; r = n)
467 {
468 n = r->next;
469 free_omp_region_1 (r);
470 }
471 root_omp_region = NULL;
472 }
473
474 /* A convenience function to build an empty GIMPLE_COND with just the
475 condition. */
476
477 static gcond *
478 gimple_build_cond_empty (tree cond)
479 {
480 enum tree_code pred_code;
481 tree lhs, rhs;
482
483 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
484 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
485 }
486
487 /* Return true if a parallel REGION is within a declare target function or
488 within a target region and is not a part of a gridified target. */
489
490 static bool
491 parallel_needs_hsa_kernel_p (struct omp_region *region)
492 {
493 bool indirect = false;
494 for (region = region->outer; region; region = region->outer)
495 {
496 if (region->type == GIMPLE_OMP_PARALLEL)
497 indirect = true;
498 else if (region->type == GIMPLE_OMP_TARGET)
499 {
500 gomp_target *tgt_stmt
501 = as_a <gomp_target *> (last_stmt (region->entry));
502
503 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
504 OMP_CLAUSE__GRIDDIM_))
505 return indirect;
506 else
507 return true;
508 }
509 }
510
511 if (lookup_attribute ("omp declare target",
512 DECL_ATTRIBUTES (current_function_decl)))
513 return true;
514
515 return false;
516 }
517
518 /* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
519 Add CHILD_FNDECL to decl chain of the supercontext of the block
520 ENTRY_BLOCK - this is the block which originally contained the
521 code from which CHILD_FNDECL was created.
522
523 Together, these actions ensure that the debug info for the outlined
524 function will be emitted with the correct lexical scope. */
525
526 static void
527 adjust_context_and_scope (struct omp_region *region, tree entry_block,
528 tree child_fndecl)
529 {
530 tree parent_fndecl = NULL_TREE;
531 gimple *entry_stmt;
532 /* OMP expansion expands inner regions before outer ones, so if
533 we e.g. have explicit task region nested in parallel region, when
534 expanding the task region current_function_decl will be the original
535 source function, but we actually want to use as context the child
536 function of the parallel. */
537 for (region = region->outer;
538 region && parent_fndecl == NULL_TREE; region = region->outer)
539 switch (region->type)
540 {
541 case GIMPLE_OMP_PARALLEL:
542 case GIMPLE_OMP_TASK:
543 case GIMPLE_OMP_TEAMS:
544 entry_stmt = last_stmt (region->entry);
545 parent_fndecl = gimple_omp_taskreg_child_fn (entry_stmt);
546 break;
547 case GIMPLE_OMP_TARGET:
548 entry_stmt = last_stmt (region->entry);
549 parent_fndecl
550 = gimple_omp_target_child_fn (as_a <gomp_target *> (entry_stmt));
551 break;
552 default:
553 break;
554 }
555
556 if (parent_fndecl == NULL_TREE)
557 parent_fndecl = current_function_decl;
558 DECL_CONTEXT (child_fndecl) = parent_fndecl;
559
560 if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
561 {
562 tree b = BLOCK_SUPERCONTEXT (entry_block);
563 if (TREE_CODE (b) == BLOCK)
564 {
565 DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
566 BLOCK_VARS (b) = child_fndecl;
567 }
568 }
569 }
570
571 /* Build the function calls to GOMP_parallel etc to actually
572 generate the parallel operation. REGION is the parallel region
573 being expanded. BB is the block where to insert the code. WS_ARGS
574 will be set if this is a call to a combined parallel+workshare
575 construct, it contains the list of additional arguments needed by
576 the workshare construct. */
577
578 static void
579 expand_parallel_call (struct omp_region *region, basic_block bb,
580 gomp_parallel *entry_stmt,
581 vec<tree, va_gc> *ws_args)
582 {
583 tree t, t1, t2, val, cond, c, clauses, flags;
584 gimple_stmt_iterator gsi;
585 gimple *stmt;
586 enum built_in_function start_ix;
587 int start_ix2;
588 location_t clause_loc;
589 vec<tree, va_gc> *args;
590
591 clauses = gimple_omp_parallel_clauses (entry_stmt);
592
593 /* Determine what flavor of GOMP_parallel we will be
594 emitting. */
595 start_ix = BUILT_IN_GOMP_PARALLEL;
596 tree rtmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
597 if (rtmp)
598 start_ix = BUILT_IN_GOMP_PARALLEL_REDUCTIONS;
599 else if (is_combined_parallel (region))
600 {
601 switch (region->inner->type)
602 {
603 case GIMPLE_OMP_FOR:
604 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
605 switch (region->inner->sched_kind)
606 {
607 case OMP_CLAUSE_SCHEDULE_RUNTIME:
608 /* For lastprivate(conditional:), our implementation
609 requires monotonic behavior. */
610 if (region->inner->has_lastprivate_conditional != 0)
611 start_ix2 = 3;
612 else if ((region->inner->sched_modifiers
613 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
614 start_ix2 = 6;
615 else if ((region->inner->sched_modifiers
616 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
617 start_ix2 = 7;
618 else
619 start_ix2 = 3;
620 break;
621 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
622 case OMP_CLAUSE_SCHEDULE_GUIDED:
623 if ((region->inner->sched_modifiers
624 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
625 && !region->inner->has_lastprivate_conditional)
626 {
627 start_ix2 = 3 + region->inner->sched_kind;
628 break;
629 }
630 /* FALLTHRU */
631 default:
632 start_ix2 = region->inner->sched_kind;
633 break;
634 }
635 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
636 start_ix = (enum built_in_function) start_ix2;
637 break;
638 case GIMPLE_OMP_SECTIONS:
639 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
640 break;
641 default:
642 gcc_unreachable ();
643 }
644 }
645
646 /* By default, the value of NUM_THREADS is zero (selected at run time)
647 and there is no conditional. */
648 cond = NULL_TREE;
649 val = build_int_cst (unsigned_type_node, 0);
650 flags = build_int_cst (unsigned_type_node, 0);
651
652 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
653 if (c)
654 cond = OMP_CLAUSE_IF_EXPR (c);
655
656 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
657 if (c)
658 {
659 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
660 clause_loc = OMP_CLAUSE_LOCATION (c);
661 }
662 else
663 clause_loc = gimple_location (entry_stmt);
664
665 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
666 if (c)
667 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
668
669 /* Ensure 'val' is of the correct type. */
670 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
671
672 /* If we found the clause 'if (cond)', build either
673 (cond != 0) or (cond ? val : 1u). */
674 if (cond)
675 {
676 cond = gimple_boolify (cond);
677
678 if (integer_zerop (val))
679 val = fold_build2_loc (clause_loc,
680 EQ_EXPR, unsigned_type_node, cond,
681 build_int_cst (TREE_TYPE (cond), 0));
682 else
683 {
684 basic_block cond_bb, then_bb, else_bb;
685 edge e, e_then, e_else;
686 tree tmp_then, tmp_else, tmp_join, tmp_var;
687
688 tmp_var = create_tmp_var (TREE_TYPE (val));
689 if (gimple_in_ssa_p (cfun))
690 {
691 tmp_then = make_ssa_name (tmp_var);
692 tmp_else = make_ssa_name (tmp_var);
693 tmp_join = make_ssa_name (tmp_var);
694 }
695 else
696 {
697 tmp_then = tmp_var;
698 tmp_else = tmp_var;
699 tmp_join = tmp_var;
700 }
701
702 e = split_block_after_labels (bb);
703 cond_bb = e->src;
704 bb = e->dest;
705 remove_edge (e);
706
707 then_bb = create_empty_bb (cond_bb);
708 else_bb = create_empty_bb (then_bb);
709 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
710 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
711
712 stmt = gimple_build_cond_empty (cond);
713 gsi = gsi_start_bb (cond_bb);
714 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
715
716 gsi = gsi_start_bb (then_bb);
717 expand_omp_build_assign (&gsi, tmp_then, val, true);
718
719 gsi = gsi_start_bb (else_bb);
720 expand_omp_build_assign (&gsi, tmp_else,
721 build_int_cst (unsigned_type_node, 1),
722 true);
723
724 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
725 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
726 add_bb_to_loop (then_bb, cond_bb->loop_father);
727 add_bb_to_loop (else_bb, cond_bb->loop_father);
728 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
729 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
730
731 if (gimple_in_ssa_p (cfun))
732 {
733 gphi *phi = create_phi_node (tmp_join, bb);
734 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
735 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
736 }
737
738 val = tmp_join;
739 }
740
741 gsi = gsi_start_bb (bb);
742 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
743 false, GSI_CONTINUE_LINKING);
744 }
745
746 gsi = gsi_last_nondebug_bb (bb);
747 t = gimple_omp_parallel_data_arg (entry_stmt);
748 if (t == NULL)
749 t1 = null_pointer_node;
750 else
751 t1 = build_fold_addr_expr (t);
752 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
753 t2 = build_fold_addr_expr (child_fndecl);
754
755 vec_alloc (args, 4 + vec_safe_length (ws_args));
756 args->quick_push (t2);
757 args->quick_push (t1);
758 args->quick_push (val);
759 if (ws_args)
760 args->splice (*ws_args);
761 args->quick_push (flags);
762
763 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
764 builtin_decl_explicit (start_ix), args);
765
766 if (rtmp)
767 {
768 tree type = TREE_TYPE (OMP_CLAUSE_DECL (rtmp));
769 t = build2 (MODIFY_EXPR, type, OMP_CLAUSE_DECL (rtmp),
770 fold_convert (type,
771 fold_convert (pointer_sized_int_node, t)));
772 }
773 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
774 false, GSI_CONTINUE_LINKING);
775
776 if (hsa_gen_requested_p ()
777 && parallel_needs_hsa_kernel_p (region))
778 {
779 cgraph_node *child_cnode = cgraph_node::get (child_fndecl);
780 hsa_register_kernel (child_cnode);
781 }
782 }
783
784 /* Build the function call to GOMP_task to actually
785 generate the task operation. BB is the block where to insert the code. */
786
787 static void
788 expand_task_call (struct omp_region *region, basic_block bb,
789 gomp_task *entry_stmt)
790 {
791 tree t1, t2, t3;
792 gimple_stmt_iterator gsi;
793 location_t loc = gimple_location (entry_stmt);
794
795 tree clauses = gimple_omp_task_clauses (entry_stmt);
796
797 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
798 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
799 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
800 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
801 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
802 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
803
804 unsigned int iflags
805 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
806 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
807 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
808
809 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
810 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
811 tree num_tasks = NULL_TREE;
812 bool ull = false;
813 if (taskloop_p)
814 {
815 gimple *g = last_stmt (region->outer->entry);
816 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
817 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
818 struct omp_for_data fd;
819 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
820 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
821 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
822 OMP_CLAUSE__LOOPTEMP_);
823 startvar = OMP_CLAUSE_DECL (startvar);
824 endvar = OMP_CLAUSE_DECL (endvar);
825 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
826 if (fd.loop.cond_code == LT_EXPR)
827 iflags |= GOMP_TASK_FLAG_UP;
828 tree tclauses = gimple_omp_for_clauses (g);
829 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
830 if (num_tasks)
831 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
832 else
833 {
834 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
835 if (num_tasks)
836 {
837 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
838 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
839 }
840 else
841 num_tasks = integer_zero_node;
842 }
843 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
844 if (ifc == NULL_TREE)
845 iflags |= GOMP_TASK_FLAG_IF;
846 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
847 iflags |= GOMP_TASK_FLAG_NOGROUP;
848 ull = fd.iter_type == long_long_unsigned_type_node;
849 if (omp_find_clause (clauses, OMP_CLAUSE_REDUCTION))
850 iflags |= GOMP_TASK_FLAG_REDUCTION;
851 }
852 else if (priority)
853 iflags |= GOMP_TASK_FLAG_PRIORITY;
854
855 tree flags = build_int_cst (unsigned_type_node, iflags);
856
857 tree cond = boolean_true_node;
858 if (ifc)
859 {
860 if (taskloop_p)
861 {
862 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
863 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
864 build_int_cst (unsigned_type_node,
865 GOMP_TASK_FLAG_IF),
866 build_int_cst (unsigned_type_node, 0));
867 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
868 flags, t);
869 }
870 else
871 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
872 }
873
874 if (finalc)
875 {
876 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
877 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
878 build_int_cst (unsigned_type_node,
879 GOMP_TASK_FLAG_FINAL),
880 build_int_cst (unsigned_type_node, 0));
881 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
882 }
883 if (depend)
884 depend = OMP_CLAUSE_DECL (depend);
885 else
886 depend = build_int_cst (ptr_type_node, 0);
887 if (priority)
888 priority = fold_convert (integer_type_node,
889 OMP_CLAUSE_PRIORITY_EXPR (priority));
890 else
891 priority = integer_zero_node;
892
893 gsi = gsi_last_nondebug_bb (bb);
894 tree t = gimple_omp_task_data_arg (entry_stmt);
895 if (t == NULL)
896 t2 = null_pointer_node;
897 else
898 t2 = build_fold_addr_expr_loc (loc, t);
899 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
900 t = gimple_omp_task_copy_fn (entry_stmt);
901 if (t == NULL)
902 t3 = null_pointer_node;
903 else
904 t3 = build_fold_addr_expr_loc (loc, t);
905
906 if (taskloop_p)
907 t = build_call_expr (ull
908 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
909 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
910 11, t1, t2, t3,
911 gimple_omp_task_arg_size (entry_stmt),
912 gimple_omp_task_arg_align (entry_stmt), flags,
913 num_tasks, priority, startvar, endvar, step);
914 else
915 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
916 9, t1, t2, t3,
917 gimple_omp_task_arg_size (entry_stmt),
918 gimple_omp_task_arg_align (entry_stmt), cond, flags,
919 depend, priority);
920
921 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
922 false, GSI_CONTINUE_LINKING);
923 }
924
925 /* Build the function call to GOMP_taskwait_depend to actually
926 generate the taskwait operation. BB is the block where to insert the
927 code. */
928
929 static void
930 expand_taskwait_call (basic_block bb, gomp_task *entry_stmt)
931 {
932 tree clauses = gimple_omp_task_clauses (entry_stmt);
933 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
934 if (depend == NULL_TREE)
935 return;
936
937 depend = OMP_CLAUSE_DECL (depend);
938
939 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
940 tree t
941 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASKWAIT_DEPEND),
942 1, depend);
943
944 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
945 false, GSI_CONTINUE_LINKING);
946 }
947
948 /* Build the function call to GOMP_teams_reg to actually
949 generate the host teams operation. REGION is the teams region
950 being expanded. BB is the block where to insert the code. */
951
952 static void
953 expand_teams_call (basic_block bb, gomp_teams *entry_stmt)
954 {
955 tree clauses = gimple_omp_teams_clauses (entry_stmt);
956 tree num_teams = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
957 if (num_teams == NULL_TREE)
958 num_teams = build_int_cst (unsigned_type_node, 0);
959 else
960 {
961 num_teams = OMP_CLAUSE_NUM_TEAMS_EXPR (num_teams);
962 num_teams = fold_convert (unsigned_type_node, num_teams);
963 }
964 tree thread_limit = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
965 if (thread_limit == NULL_TREE)
966 thread_limit = build_int_cst (unsigned_type_node, 0);
967 else
968 {
969 thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit);
970 thread_limit = fold_convert (unsigned_type_node, thread_limit);
971 }
972
973 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
974 tree t = gimple_omp_teams_data_arg (entry_stmt), t1;
975 if (t == NULL)
976 t1 = null_pointer_node;
977 else
978 t1 = build_fold_addr_expr (t);
979 tree child_fndecl = gimple_omp_teams_child_fn (entry_stmt);
980 tree t2 = build_fold_addr_expr (child_fndecl);
981
982 vec<tree, va_gc> *args;
983 vec_alloc (args, 5);
984 args->quick_push (t2);
985 args->quick_push (t1);
986 args->quick_push (num_teams);
987 args->quick_push (thread_limit);
988 /* For future extensibility. */
989 args->quick_push (build_zero_cst (unsigned_type_node));
990
991 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
992 builtin_decl_explicit (BUILT_IN_GOMP_TEAMS_REG),
993 args);
994
995 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
996 false, GSI_CONTINUE_LINKING);
997 }
998
999 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
1000
1001 static tree
1002 vec2chain (vec<tree, va_gc> *v)
1003 {
1004 tree chain = NULL_TREE, t;
1005 unsigned ix;
1006
1007 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
1008 {
1009 DECL_CHAIN (t) = chain;
1010 chain = t;
1011 }
1012
1013 return chain;
1014 }
1015
1016 /* Remove barriers in REGION->EXIT's block. Note that this is only
1017 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
1018 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
1019 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
1020 removed. */
1021
1022 static void
1023 remove_exit_barrier (struct omp_region *region)
1024 {
1025 gimple_stmt_iterator gsi;
1026 basic_block exit_bb;
1027 edge_iterator ei;
1028 edge e;
1029 gimple *stmt;
1030 int any_addressable_vars = -1;
1031
1032 exit_bb = region->exit;
1033
1034 /* If the parallel region doesn't return, we don't have REGION->EXIT
1035 block at all. */
1036 if (! exit_bb)
1037 return;
1038
1039 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
1040 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
1041 statements that can appear in between are extremely limited -- no
1042 memory operations at all. Here, we allow nothing at all, so the
1043 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
1044 gsi = gsi_last_nondebug_bb (exit_bb);
1045 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1046 gsi_prev_nondebug (&gsi);
1047 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
1048 return;
1049
1050 FOR_EACH_EDGE (e, ei, exit_bb->preds)
1051 {
1052 gsi = gsi_last_nondebug_bb (e->src);
1053 if (gsi_end_p (gsi))
1054 continue;
1055 stmt = gsi_stmt (gsi);
1056 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
1057 && !gimple_omp_return_nowait_p (stmt))
1058 {
1059 /* OpenMP 3.0 tasks unfortunately prevent this optimization
1060 in many cases. If there could be tasks queued, the barrier
1061 might be needed to let the tasks run before some local
1062 variable of the parallel that the task uses as shared
1063 runs out of scope. The task can be spawned either
1064 from within current function (this would be easy to check)
1065 or from some function it calls and gets passed an address
1066 of such a variable. */
1067 if (any_addressable_vars < 0)
1068 {
1069 gomp_parallel *parallel_stmt
1070 = as_a <gomp_parallel *> (last_stmt (region->entry));
1071 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
1072 tree local_decls, block, decl;
1073 unsigned ix;
1074
1075 any_addressable_vars = 0;
1076 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
1077 if (TREE_ADDRESSABLE (decl))
1078 {
1079 any_addressable_vars = 1;
1080 break;
1081 }
1082 for (block = gimple_block (stmt);
1083 !any_addressable_vars
1084 && block
1085 && TREE_CODE (block) == BLOCK;
1086 block = BLOCK_SUPERCONTEXT (block))
1087 {
1088 for (local_decls = BLOCK_VARS (block);
1089 local_decls;
1090 local_decls = DECL_CHAIN (local_decls))
1091 if (TREE_ADDRESSABLE (local_decls))
1092 {
1093 any_addressable_vars = 1;
1094 break;
1095 }
1096 if (block == gimple_block (parallel_stmt))
1097 break;
1098 }
1099 }
1100 if (!any_addressable_vars)
1101 gimple_omp_return_set_nowait (stmt);
1102 }
1103 }
1104 }
1105
1106 static void
1107 remove_exit_barriers (struct omp_region *region)
1108 {
1109 if (region->type == GIMPLE_OMP_PARALLEL)
1110 remove_exit_barrier (region);
1111
1112 if (region->inner)
1113 {
1114 region = region->inner;
1115 remove_exit_barriers (region);
1116 while (region->next)
1117 {
1118 region = region->next;
1119 remove_exit_barriers (region);
1120 }
1121 }
1122 }
1123
1124 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
1125 calls. These can't be declared as const functions, but
1126 within one parallel body they are constant, so they can be
1127 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1128 which are declared const. Similarly for task body, except
1129 that in untied task omp_get_thread_num () can change at any task
1130 scheduling point. */
1131
1132 static void
1133 optimize_omp_library_calls (gimple *entry_stmt)
1134 {
1135 basic_block bb;
1136 gimple_stmt_iterator gsi;
1137 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1138 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1139 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1140 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1141 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1142 && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1143 OMP_CLAUSE_UNTIED) != NULL);
1144
1145 FOR_EACH_BB_FN (bb, cfun)
1146 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1147 {
1148 gimple *call = gsi_stmt (gsi);
1149 tree decl;
1150
1151 if (is_gimple_call (call)
1152 && (decl = gimple_call_fndecl (call))
1153 && DECL_EXTERNAL (decl)
1154 && TREE_PUBLIC (decl)
1155 && DECL_INITIAL (decl) == NULL)
1156 {
1157 tree built_in;
1158
1159 if (DECL_NAME (decl) == thr_num_id)
1160 {
1161 /* In #pragma omp task untied omp_get_thread_num () can change
1162 during the execution of the task region. */
1163 if (untied_task)
1164 continue;
1165 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1166 }
1167 else if (DECL_NAME (decl) == num_thr_id)
1168 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1169 else
1170 continue;
1171
1172 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1173 || gimple_call_num_args (call) != 0)
1174 continue;
1175
1176 if (flag_exceptions && !TREE_NOTHROW (decl))
1177 continue;
1178
1179 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1180 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1181 TREE_TYPE (TREE_TYPE (built_in))))
1182 continue;
1183
1184 gimple_call_set_fndecl (call, built_in);
1185 }
1186 }
1187 }
1188
1189 /* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1190 regimplified. */
1191
1192 static tree
1193 expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1194 {
1195 tree t = *tp;
1196
1197 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1198 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1199 return t;
1200
1201 if (TREE_CODE (t) == ADDR_EXPR)
1202 recompute_tree_invariant_for_addr_expr (t);
1203
1204 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1205 return NULL_TREE;
1206 }
1207
1208 /* Prepend or append TO = FROM assignment before or after *GSI_P. */
1209
1210 static void
1211 expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1212 bool after)
1213 {
1214 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1215 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1216 !after, after ? GSI_CONTINUE_LINKING
1217 : GSI_SAME_STMT);
1218 gimple *stmt = gimple_build_assign (to, from);
1219 if (after)
1220 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1221 else
1222 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1223 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1224 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1225 {
1226 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1227 gimple_regimplify_operands (stmt, &gsi);
1228 }
1229 }
1230
1231 /* Expand the OpenMP parallel or task directive starting at REGION. */
1232
1233 static void
1234 expand_omp_taskreg (struct omp_region *region)
1235 {
1236 basic_block entry_bb, exit_bb, new_bb;
1237 struct function *child_cfun;
1238 tree child_fn, block, t;
1239 gimple_stmt_iterator gsi;
1240 gimple *entry_stmt, *stmt;
1241 edge e;
1242 vec<tree, va_gc> *ws_args;
1243
1244 entry_stmt = last_stmt (region->entry);
1245 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1246 && gimple_omp_task_taskwait_p (entry_stmt))
1247 {
1248 new_bb = region->entry;
1249 gsi = gsi_last_nondebug_bb (region->entry);
1250 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1251 gsi_remove (&gsi, true);
1252 expand_taskwait_call (new_bb, as_a <gomp_task *> (entry_stmt));
1253 return;
1254 }
1255
1256 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1257 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1258
1259 entry_bb = region->entry;
1260 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1261 exit_bb = region->cont;
1262 else
1263 exit_bb = region->exit;
1264
1265 if (is_combined_parallel (region))
1266 ws_args = region->ws_args;
1267 else
1268 ws_args = NULL;
1269
1270 if (child_cfun->cfg)
1271 {
1272 /* Due to inlining, it may happen that we have already outlined
1273 the region, in which case all we need to do is make the
1274 sub-graph unreachable and emit the parallel call. */
1275 edge entry_succ_e, exit_succ_e;
1276
1277 entry_succ_e = single_succ_edge (entry_bb);
1278
1279 gsi = gsi_last_nondebug_bb (entry_bb);
1280 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1281 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK
1282 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TEAMS);
1283 gsi_remove (&gsi, true);
1284
1285 new_bb = entry_bb;
1286 if (exit_bb)
1287 {
1288 exit_succ_e = single_succ_edge (exit_bb);
1289 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1290 }
1291 remove_edge_and_dominated_blocks (entry_succ_e);
1292 }
1293 else
1294 {
1295 unsigned srcidx, dstidx, num;
1296
1297 /* If the parallel region needs data sent from the parent
1298 function, then the very first statement (except possible
1299 tree profile counter updates) of the parallel body
1300 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1301 &.OMP_DATA_O is passed as an argument to the child function,
1302 we need to replace it with the argument as seen by the child
1303 function.
1304
1305 In most cases, this will end up being the identity assignment
1306 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1307 a function call that has been inlined, the original PARM_DECL
1308 .OMP_DATA_I may have been converted into a different local
1309 variable. In which case, we need to keep the assignment. */
1310 if (gimple_omp_taskreg_data_arg (entry_stmt))
1311 {
1312 basic_block entry_succ_bb
1313 = single_succ_p (entry_bb) ? single_succ (entry_bb)
1314 : FALLTHRU_EDGE (entry_bb)->dest;
1315 tree arg;
1316 gimple *parcopy_stmt = NULL;
1317
1318 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1319 {
1320 gimple *stmt;
1321
1322 gcc_assert (!gsi_end_p (gsi));
1323 stmt = gsi_stmt (gsi);
1324 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1325 continue;
1326
1327 if (gimple_num_ops (stmt) == 2)
1328 {
1329 tree arg = gimple_assign_rhs1 (stmt);
1330
1331 /* We're ignore the subcode because we're
1332 effectively doing a STRIP_NOPS. */
1333
1334 if (TREE_CODE (arg) == ADDR_EXPR
1335 && (TREE_OPERAND (arg, 0)
1336 == gimple_omp_taskreg_data_arg (entry_stmt)))
1337 {
1338 parcopy_stmt = stmt;
1339 break;
1340 }
1341 }
1342 }
1343
1344 gcc_assert (parcopy_stmt != NULL);
1345 arg = DECL_ARGUMENTS (child_fn);
1346
1347 if (!gimple_in_ssa_p (cfun))
1348 {
1349 if (gimple_assign_lhs (parcopy_stmt) == arg)
1350 gsi_remove (&gsi, true);
1351 else
1352 {
1353 /* ?? Is setting the subcode really necessary ?? */
1354 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1355 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1356 }
1357 }
1358 else
1359 {
1360 tree lhs = gimple_assign_lhs (parcopy_stmt);
1361 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1362 /* We'd like to set the rhs to the default def in the child_fn,
1363 but it's too early to create ssa names in the child_fn.
1364 Instead, we set the rhs to the parm. In
1365 move_sese_region_to_fn, we introduce a default def for the
1366 parm, map the parm to it's default def, and once we encounter
1367 this stmt, replace the parm with the default def. */
1368 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1369 update_stmt (parcopy_stmt);
1370 }
1371 }
1372
1373 /* Declare local variables needed in CHILD_CFUN. */
1374 block = DECL_INITIAL (child_fn);
1375 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1376 /* The gimplifier could record temporaries in parallel/task block
1377 rather than in containing function's local_decls chain,
1378 which would mean cgraph missed finalizing them. Do it now. */
1379 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1380 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1381 varpool_node::finalize_decl (t);
1382 DECL_SAVED_TREE (child_fn) = NULL;
1383 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1384 gimple_set_body (child_fn, NULL);
1385 TREE_USED (block) = 1;
1386
1387 /* Reset DECL_CONTEXT on function arguments. */
1388 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1389 DECL_CONTEXT (t) = child_fn;
1390
1391 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1392 so that it can be moved to the child function. */
1393 gsi = gsi_last_nondebug_bb (entry_bb);
1394 stmt = gsi_stmt (gsi);
1395 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1396 || gimple_code (stmt) == GIMPLE_OMP_TASK
1397 || gimple_code (stmt) == GIMPLE_OMP_TEAMS));
1398 e = split_block (entry_bb, stmt);
1399 gsi_remove (&gsi, true);
1400 entry_bb = e->dest;
1401 edge e2 = NULL;
1402 if (gimple_code (entry_stmt) != GIMPLE_OMP_TASK)
1403 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1404 else
1405 {
1406 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1407 gcc_assert (e2->dest == region->exit);
1408 remove_edge (BRANCH_EDGE (entry_bb));
1409 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1410 gsi = gsi_last_nondebug_bb (region->exit);
1411 gcc_assert (!gsi_end_p (gsi)
1412 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1413 gsi_remove (&gsi, true);
1414 }
1415
1416 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1417 if (exit_bb)
1418 {
1419 gsi = gsi_last_nondebug_bb (exit_bb);
1420 gcc_assert (!gsi_end_p (gsi)
1421 && (gimple_code (gsi_stmt (gsi))
1422 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1423 stmt = gimple_build_return (NULL);
1424 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1425 gsi_remove (&gsi, true);
1426 }
1427
1428 /* Move the parallel region into CHILD_CFUN. */
1429
1430 if (gimple_in_ssa_p (cfun))
1431 {
1432 init_tree_ssa (child_cfun);
1433 init_ssa_operands (child_cfun);
1434 child_cfun->gimple_df->in_ssa_p = true;
1435 block = NULL_TREE;
1436 }
1437 else
1438 block = gimple_block (entry_stmt);
1439
1440 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1441 if (exit_bb)
1442 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1443 if (e2)
1444 {
1445 basic_block dest_bb = e2->dest;
1446 if (!exit_bb)
1447 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1448 remove_edge (e2);
1449 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1450 }
1451 /* When the OMP expansion process cannot guarantee an up-to-date
1452 loop tree arrange for the child function to fixup loops. */
1453 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1454 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1455
1456 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1457 num = vec_safe_length (child_cfun->local_decls);
1458 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1459 {
1460 t = (*child_cfun->local_decls)[srcidx];
1461 if (DECL_CONTEXT (t) == cfun->decl)
1462 continue;
1463 if (srcidx != dstidx)
1464 (*child_cfun->local_decls)[dstidx] = t;
1465 dstidx++;
1466 }
1467 if (dstidx != num)
1468 vec_safe_truncate (child_cfun->local_decls, dstidx);
1469
1470 /* Inform the callgraph about the new function. */
1471 child_cfun->curr_properties = cfun->curr_properties;
1472 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1473 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1474 cgraph_node *node = cgraph_node::get_create (child_fn);
1475 node->parallelized_function = 1;
1476 cgraph_node::add_new_function (child_fn, true);
1477
1478 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1479 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1480
1481 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1482 fixed in a following pass. */
1483 push_cfun (child_cfun);
1484 if (need_asm)
1485 assign_assembler_name_if_needed (child_fn);
1486
1487 if (optimize)
1488 optimize_omp_library_calls (entry_stmt);
1489 update_max_bb_count ();
1490 cgraph_edge::rebuild_edges ();
1491
1492 /* Some EH regions might become dead, see PR34608. If
1493 pass_cleanup_cfg isn't the first pass to happen with the
1494 new child, these dead EH edges might cause problems.
1495 Clean them up now. */
1496 if (flag_exceptions)
1497 {
1498 basic_block bb;
1499 bool changed = false;
1500
1501 FOR_EACH_BB_FN (bb, cfun)
1502 changed |= gimple_purge_dead_eh_edges (bb);
1503 if (changed)
1504 cleanup_tree_cfg ();
1505 }
1506 if (gimple_in_ssa_p (cfun))
1507 update_ssa (TODO_update_ssa);
1508 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1509 verify_loop_structure ();
1510 pop_cfun ();
1511
1512 if (dump_file && !gimple_in_ssa_p (cfun))
1513 {
1514 omp_any_child_fn_dumped = true;
1515 dump_function_header (dump_file, child_fn, dump_flags);
1516 dump_function_to_file (child_fn, dump_file, dump_flags);
1517 }
1518 }
1519
1520 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
1521
1522 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1523 expand_parallel_call (region, new_bb,
1524 as_a <gomp_parallel *> (entry_stmt), ws_args);
1525 else if (gimple_code (entry_stmt) == GIMPLE_OMP_TEAMS)
1526 expand_teams_call (new_bb, as_a <gomp_teams *> (entry_stmt));
1527 else
1528 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1529 if (gimple_in_ssa_p (cfun))
1530 update_ssa (TODO_update_ssa_only_virtuals);
1531 }
1532
1533 /* Information about members of an OpenACC collapsed loop nest. */
1534
1535 struct oacc_collapse
1536 {
1537 tree base; /* Base value. */
1538 tree iters; /* Number of steps. */
1539 tree step; /* Step size. */
1540 tree tile; /* Tile increment (if tiled). */
1541 tree outer; /* Tile iterator var. */
1542 };
1543
1544 /* Helper for expand_oacc_for. Determine collapsed loop information.
1545 Fill in COUNTS array. Emit any initialization code before GSI.
1546 Return the calculated outer loop bound of BOUND_TYPE. */
1547
1548 static tree
1549 expand_oacc_collapse_init (const struct omp_for_data *fd,
1550 gimple_stmt_iterator *gsi,
1551 oacc_collapse *counts, tree bound_type,
1552 location_t loc)
1553 {
1554 tree tiling = fd->tiling;
1555 tree total = build_int_cst (bound_type, 1);
1556 int ix;
1557
1558 gcc_assert (integer_onep (fd->loop.step));
1559 gcc_assert (integer_zerop (fd->loop.n1));
1560
1561 /* When tiling, the first operand of the tile clause applies to the
1562 innermost loop, and we work outwards from there. Seems
1563 backwards, but whatever. */
1564 for (ix = fd->collapse; ix--;)
1565 {
1566 const omp_for_data_loop *loop = &fd->loops[ix];
1567
1568 tree iter_type = TREE_TYPE (loop->v);
1569 tree diff_type = iter_type;
1570 tree plus_type = iter_type;
1571
1572 gcc_assert (loop->cond_code == fd->loop.cond_code);
1573
1574 if (POINTER_TYPE_P (iter_type))
1575 plus_type = sizetype;
1576 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
1577 diff_type = signed_type_for (diff_type);
1578 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
1579 diff_type = integer_type_node;
1580
1581 if (tiling)
1582 {
1583 tree num = build_int_cst (integer_type_node, fd->collapse);
1584 tree loop_no = build_int_cst (integer_type_node, ix);
1585 tree tile = TREE_VALUE (tiling);
1586 gcall *call
1587 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1588 /* gwv-outer=*/integer_zero_node,
1589 /* gwv-inner=*/integer_zero_node);
1590
1591 counts[ix].outer = create_tmp_var (iter_type, ".outer");
1592 counts[ix].tile = create_tmp_var (diff_type, ".tile");
1593 gimple_call_set_lhs (call, counts[ix].tile);
1594 gimple_set_location (call, loc);
1595 gsi_insert_before (gsi, call, GSI_SAME_STMT);
1596
1597 tiling = TREE_CHAIN (tiling);
1598 }
1599 else
1600 {
1601 counts[ix].tile = NULL;
1602 counts[ix].outer = loop->v;
1603 }
1604
1605 tree b = loop->n1;
1606 tree e = loop->n2;
1607 tree s = loop->step;
1608 bool up = loop->cond_code == LT_EXPR;
1609 tree dir = build_int_cst (diff_type, up ? +1 : -1);
1610 bool negating;
1611 tree expr;
1612
1613 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1614 true, GSI_SAME_STMT);
1615 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1616 true, GSI_SAME_STMT);
1617
1618 /* Convert the step, avoiding possible unsigned->signed overflow. */
1619 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1620 if (negating)
1621 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1622 s = fold_convert (diff_type, s);
1623 if (negating)
1624 s = fold_build1 (NEGATE_EXPR, diff_type, s);
1625 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1626 true, GSI_SAME_STMT);
1627
1628 /* Determine the range, avoiding possible unsigned->signed overflow. */
1629 negating = !up && TYPE_UNSIGNED (iter_type);
1630 expr = fold_build2 (MINUS_EXPR, plus_type,
1631 fold_convert (plus_type, negating ? b : e),
1632 fold_convert (plus_type, negating ? e : b));
1633 expr = fold_convert (diff_type, expr);
1634 if (negating)
1635 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1636 tree range = force_gimple_operand_gsi
1637 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1638
1639 /* Determine number of iterations. */
1640 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1641 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1642 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1643
1644 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1645 true, GSI_SAME_STMT);
1646
1647 counts[ix].base = b;
1648 counts[ix].iters = iters;
1649 counts[ix].step = s;
1650
1651 total = fold_build2 (MULT_EXPR, bound_type, total,
1652 fold_convert (bound_type, iters));
1653 }
1654
1655 return total;
1656 }
1657
1658 /* Emit initializers for collapsed loop members. INNER is true if
1659 this is for the element loop of a TILE. IVAR is the outer
1660 loop iteration variable, from which collapsed loop iteration values
1661 are calculated. COUNTS array has been initialized by
1662 expand_oacc_collapse_inits. */
1663
1664 static void
1665 expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
1666 gimple_stmt_iterator *gsi,
1667 const oacc_collapse *counts, tree ivar)
1668 {
1669 tree ivar_type = TREE_TYPE (ivar);
1670
1671 /* The most rapidly changing iteration variable is the innermost
1672 one. */
1673 for (int ix = fd->collapse; ix--;)
1674 {
1675 const omp_for_data_loop *loop = &fd->loops[ix];
1676 const oacc_collapse *collapse = &counts[ix];
1677 tree v = inner ? loop->v : collapse->outer;
1678 tree iter_type = TREE_TYPE (v);
1679 tree diff_type = TREE_TYPE (collapse->step);
1680 tree plus_type = iter_type;
1681 enum tree_code plus_code = PLUS_EXPR;
1682 tree expr;
1683
1684 if (POINTER_TYPE_P (iter_type))
1685 {
1686 plus_code = POINTER_PLUS_EXPR;
1687 plus_type = sizetype;
1688 }
1689
1690 expr = ivar;
1691 if (ix)
1692 {
1693 tree mod = fold_convert (ivar_type, collapse->iters);
1694 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1695 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1696 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1697 true, GSI_SAME_STMT);
1698 }
1699
1700 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1701 collapse->step);
1702 expr = fold_build2 (plus_code, iter_type,
1703 inner ? collapse->outer : collapse->base,
1704 fold_convert (plus_type, expr));
1705 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1706 true, GSI_SAME_STMT);
1707 gassign *ass = gimple_build_assign (v, expr);
1708 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1709 }
1710 }
1711
1712 /* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1713 of the combined collapse > 1 loop constructs, generate code like:
1714 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1715 if (cond3 is <)
1716 adj = STEP3 - 1;
1717 else
1718 adj = STEP3 + 1;
1719 count3 = (adj + N32 - N31) / STEP3;
1720 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1721 if (cond2 is <)
1722 adj = STEP2 - 1;
1723 else
1724 adj = STEP2 + 1;
1725 count2 = (adj + N22 - N21) / STEP2;
1726 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1727 if (cond1 is <)
1728 adj = STEP1 - 1;
1729 else
1730 adj = STEP1 + 1;
1731 count1 = (adj + N12 - N11) / STEP1;
1732 count = count1 * count2 * count3;
1733 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1734 count = 0;
1735 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1736 of the combined loop constructs, just initialize COUNTS array
1737 from the _looptemp_ clauses. */
1738
1739 /* NOTE: It *could* be better to moosh all of the BBs together,
1740 creating one larger BB with all the computation and the unexpected
1741 jump at the end. I.e.
1742
1743 bool zero3, zero2, zero1, zero;
1744
1745 zero3 = N32 c3 N31;
1746 count3 = (N32 - N31) /[cl] STEP3;
1747 zero2 = N22 c2 N21;
1748 count2 = (N22 - N21) /[cl] STEP2;
1749 zero1 = N12 c1 N11;
1750 count1 = (N12 - N11) /[cl] STEP1;
1751 zero = zero3 || zero2 || zero1;
1752 count = count1 * count2 * count3;
1753 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1754
1755 After all, we expect the zero=false, and thus we expect to have to
1756 evaluate all of the comparison expressions, so short-circuiting
1757 oughtn't be a win. Since the condition isn't protecting a
1758 denominator, we're not concerned about divide-by-zero, so we can
1759 fully evaluate count even if a numerator turned out to be wrong.
1760
1761 It seems like putting this all together would create much better
1762 scheduling opportunities, and less pressure on the chip's branch
1763 predictor. */
1764
1765 static void
1766 expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1767 basic_block &entry_bb, tree *counts,
1768 basic_block &zero_iter1_bb, int &first_zero_iter1,
1769 basic_block &zero_iter2_bb, int &first_zero_iter2,
1770 basic_block &l2_dom_bb)
1771 {
1772 tree t, type = TREE_TYPE (fd->loop.v);
1773 edge e, ne;
1774 int i;
1775
1776 /* Collapsed loops need work for expansion into SSA form. */
1777 gcc_assert (!gimple_in_ssa_p (cfun));
1778
1779 if (gimple_omp_for_combined_into_p (fd->for_stmt)
1780 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1781 {
1782 gcc_assert (fd->ordered == 0);
1783 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1784 isn't supposed to be handled, as the inner loop doesn't
1785 use it. */
1786 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1787 OMP_CLAUSE__LOOPTEMP_);
1788 gcc_assert (innerc);
1789 for (i = 0; i < fd->collapse; i++)
1790 {
1791 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1792 OMP_CLAUSE__LOOPTEMP_);
1793 gcc_assert (innerc);
1794 if (i)
1795 counts[i] = OMP_CLAUSE_DECL (innerc);
1796 else
1797 counts[0] = NULL_TREE;
1798 }
1799 return;
1800 }
1801
1802 for (i = fd->collapse; i < fd->ordered; i++)
1803 {
1804 tree itype = TREE_TYPE (fd->loops[i].v);
1805 counts[i] = NULL_TREE;
1806 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1807 fold_convert (itype, fd->loops[i].n1),
1808 fold_convert (itype, fd->loops[i].n2));
1809 if (t && integer_zerop (t))
1810 {
1811 for (i = fd->collapse; i < fd->ordered; i++)
1812 counts[i] = build_int_cst (type, 0);
1813 break;
1814 }
1815 }
1816 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1817 {
1818 tree itype = TREE_TYPE (fd->loops[i].v);
1819
1820 if (i >= fd->collapse && counts[i])
1821 continue;
1822 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1823 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1824 fold_convert (itype, fd->loops[i].n1),
1825 fold_convert (itype, fd->loops[i].n2)))
1826 == NULL_TREE || !integer_onep (t)))
1827 {
1828 gcond *cond_stmt;
1829 tree n1, n2;
1830 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1831 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1832 true, GSI_SAME_STMT);
1833 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1834 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1835 true, GSI_SAME_STMT);
1836 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1837 NULL_TREE, NULL_TREE);
1838 gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1839 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1840 expand_omp_regimplify_p, NULL, NULL)
1841 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1842 expand_omp_regimplify_p, NULL, NULL))
1843 {
1844 *gsi = gsi_for_stmt (cond_stmt);
1845 gimple_regimplify_operands (cond_stmt, gsi);
1846 }
1847 e = split_block (entry_bb, cond_stmt);
1848 basic_block &zero_iter_bb
1849 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1850 int &first_zero_iter
1851 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1852 if (zero_iter_bb == NULL)
1853 {
1854 gassign *assign_stmt;
1855 first_zero_iter = i;
1856 zero_iter_bb = create_empty_bb (entry_bb);
1857 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1858 *gsi = gsi_after_labels (zero_iter_bb);
1859 if (i < fd->collapse)
1860 assign_stmt = gimple_build_assign (fd->loop.n2,
1861 build_zero_cst (type));
1862 else
1863 {
1864 counts[i] = create_tmp_reg (type, ".count");
1865 assign_stmt
1866 = gimple_build_assign (counts[i], build_zero_cst (type));
1867 }
1868 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1869 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1870 entry_bb);
1871 }
1872 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1873 ne->probability = profile_probability::very_unlikely ();
1874 e->flags = EDGE_TRUE_VALUE;
1875 e->probability = ne->probability.invert ();
1876 if (l2_dom_bb == NULL)
1877 l2_dom_bb = entry_bb;
1878 entry_bb = e->dest;
1879 *gsi = gsi_last_nondebug_bb (entry_bb);
1880 }
1881
1882 if (POINTER_TYPE_P (itype))
1883 itype = signed_type_for (itype);
1884 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1885 ? -1 : 1));
1886 t = fold_build2 (PLUS_EXPR, itype,
1887 fold_convert (itype, fd->loops[i].step), t);
1888 t = fold_build2 (PLUS_EXPR, itype, t,
1889 fold_convert (itype, fd->loops[i].n2));
1890 t = fold_build2 (MINUS_EXPR, itype, t,
1891 fold_convert (itype, fd->loops[i].n1));
1892 /* ?? We could probably use CEIL_DIV_EXPR instead of
1893 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1894 generate the same code in the end because generically we
1895 don't know that the values involved must be negative for
1896 GT?? */
1897 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1898 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1899 fold_build1 (NEGATE_EXPR, itype, t),
1900 fold_build1 (NEGATE_EXPR, itype,
1901 fold_convert (itype,
1902 fd->loops[i].step)));
1903 else
1904 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1905 fold_convert (itype, fd->loops[i].step));
1906 t = fold_convert (type, t);
1907 if (TREE_CODE (t) == INTEGER_CST)
1908 counts[i] = t;
1909 else
1910 {
1911 if (i < fd->collapse || i != first_zero_iter2)
1912 counts[i] = create_tmp_reg (type, ".count");
1913 expand_omp_build_assign (gsi, counts[i], t);
1914 }
1915 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1916 {
1917 if (i == 0)
1918 t = counts[0];
1919 else
1920 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1921 expand_omp_build_assign (gsi, fd->loop.n2, t);
1922 }
1923 }
1924 }
1925
1926 /* Helper function for expand_omp_{for_*,simd}. Generate code like:
1927 T = V;
1928 V3 = N31 + (T % count3) * STEP3;
1929 T = T / count3;
1930 V2 = N21 + (T % count2) * STEP2;
1931 T = T / count2;
1932 V1 = N11 + T * STEP1;
1933 if this loop doesn't have an inner loop construct combined with it.
1934 If it does have an inner loop construct combined with it and the
1935 iteration count isn't known constant, store values from counts array
1936 into its _looptemp_ temporaries instead. */
1937
1938 static void
1939 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1940 tree *counts, gimple *inner_stmt, tree startvar)
1941 {
1942 int i;
1943 if (gimple_omp_for_combined_p (fd->for_stmt))
1944 {
1945 /* If fd->loop.n2 is constant, then no propagation of the counts
1946 is needed, they are constant. */
1947 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
1948 return;
1949
1950 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
1951 ? gimple_omp_taskreg_clauses (inner_stmt)
1952 : gimple_omp_for_clauses (inner_stmt);
1953 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1954 isn't supposed to be handled, as the inner loop doesn't
1955 use it. */
1956 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
1957 gcc_assert (innerc);
1958 for (i = 0; i < fd->collapse; i++)
1959 {
1960 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1961 OMP_CLAUSE__LOOPTEMP_);
1962 gcc_assert (innerc);
1963 if (i)
1964 {
1965 tree tem = OMP_CLAUSE_DECL (innerc);
1966 tree t = fold_convert (TREE_TYPE (tem), counts[i]);
1967 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1968 false, GSI_CONTINUE_LINKING);
1969 gassign *stmt = gimple_build_assign (tem, t);
1970 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1971 }
1972 }
1973 return;
1974 }
1975
1976 tree type = TREE_TYPE (fd->loop.v);
1977 tree tem = create_tmp_reg (type, ".tem");
1978 gassign *stmt = gimple_build_assign (tem, startvar);
1979 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1980
1981 for (i = fd->collapse - 1; i >= 0; i--)
1982 {
1983 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
1984 itype = vtype;
1985 if (POINTER_TYPE_P (vtype))
1986 itype = signed_type_for (vtype);
1987 if (i != 0)
1988 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
1989 else
1990 t = tem;
1991 t = fold_convert (itype, t);
1992 t = fold_build2 (MULT_EXPR, itype, t,
1993 fold_convert (itype, fd->loops[i].step));
1994 if (POINTER_TYPE_P (vtype))
1995 t = fold_build_pointer_plus (fd->loops[i].n1, t);
1996 else
1997 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
1998 t = force_gimple_operand_gsi (gsi, t,
1999 DECL_P (fd->loops[i].v)
2000 && TREE_ADDRESSABLE (fd->loops[i].v),
2001 NULL_TREE, false,
2002 GSI_CONTINUE_LINKING);
2003 stmt = gimple_build_assign (fd->loops[i].v, t);
2004 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2005 if (i != 0)
2006 {
2007 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
2008 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
2009 false, GSI_CONTINUE_LINKING);
2010 stmt = gimple_build_assign (tem, t);
2011 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2012 }
2013 }
2014 }
2015
2016 /* Helper function for expand_omp_for_*. Generate code like:
2017 L10:
2018 V3 += STEP3;
2019 if (V3 cond3 N32) goto BODY_BB; else goto L11;
2020 L11:
2021 V3 = N31;
2022 V2 += STEP2;
2023 if (V2 cond2 N22) goto BODY_BB; else goto L12;
2024 L12:
2025 V2 = N21;
2026 V1 += STEP1;
2027 goto BODY_BB; */
2028
2029 static basic_block
2030 extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb,
2031 basic_block body_bb)
2032 {
2033 basic_block last_bb, bb, collapse_bb = NULL;
2034 int i;
2035 gimple_stmt_iterator gsi;
2036 edge e;
2037 tree t;
2038 gimple *stmt;
2039
2040 last_bb = cont_bb;
2041 for (i = fd->collapse - 1; i >= 0; i--)
2042 {
2043 tree vtype = TREE_TYPE (fd->loops[i].v);
2044
2045 bb = create_empty_bb (last_bb);
2046 add_bb_to_loop (bb, last_bb->loop_father);
2047 gsi = gsi_start_bb (bb);
2048
2049 if (i < fd->collapse - 1)
2050 {
2051 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
2052 e->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2053
2054 t = fd->loops[i + 1].n1;
2055 t = force_gimple_operand_gsi (&gsi, t,
2056 DECL_P (fd->loops[i + 1].v)
2057 && TREE_ADDRESSABLE (fd->loops[i
2058 + 1].v),
2059 NULL_TREE, false,
2060 GSI_CONTINUE_LINKING);
2061 stmt = gimple_build_assign (fd->loops[i + 1].v, t);
2062 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2063 }
2064 else
2065 collapse_bb = bb;
2066
2067 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
2068
2069 if (POINTER_TYPE_P (vtype))
2070 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
2071 else
2072 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
2073 t = force_gimple_operand_gsi (&gsi, t,
2074 DECL_P (fd->loops[i].v)
2075 && TREE_ADDRESSABLE (fd->loops[i].v),
2076 NULL_TREE, false, GSI_CONTINUE_LINKING);
2077 stmt = gimple_build_assign (fd->loops[i].v, t);
2078 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2079
2080 if (i > 0)
2081 {
2082 t = fd->loops[i].n2;
2083 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2084 false, GSI_CONTINUE_LINKING);
2085 tree v = fd->loops[i].v;
2086 if (DECL_P (v) && TREE_ADDRESSABLE (v))
2087 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
2088 false, GSI_CONTINUE_LINKING);
2089 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
2090 stmt = gimple_build_cond_empty (t);
2091 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2092 if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)),
2093 expand_omp_regimplify_p, NULL, NULL)
2094 || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)),
2095 expand_omp_regimplify_p, NULL, NULL))
2096 gimple_regimplify_operands (stmt, &gsi);
2097 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
2098 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
2099 }
2100 else
2101 make_edge (bb, body_bb, EDGE_FALLTHRU);
2102 last_bb = bb;
2103 }
2104
2105 return collapse_bb;
2106 }
2107
2108 /* Expand #pragma omp ordered depend(source). */
2109
2110 static void
2111 expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
2112 tree *counts, location_t loc)
2113 {
2114 enum built_in_function source_ix
2115 = fd->iter_type == long_integer_type_node
2116 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
2117 gimple *g
2118 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
2119 build_fold_addr_expr (counts[fd->ordered]));
2120 gimple_set_location (g, loc);
2121 gsi_insert_before (gsi, g, GSI_SAME_STMT);
2122 }
2123
2124 /* Expand a single depend from #pragma omp ordered depend(sink:...). */
2125
2126 static void
2127 expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
2128 tree *counts, tree c, location_t loc)
2129 {
2130 auto_vec<tree, 10> args;
2131 enum built_in_function sink_ix
2132 = fd->iter_type == long_integer_type_node
2133 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
2134 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
2135 int i;
2136 gimple_stmt_iterator gsi2 = *gsi;
2137 bool warned_step = false;
2138
2139 for (i = 0; i < fd->ordered; i++)
2140 {
2141 tree step = NULL_TREE;
2142 off = TREE_PURPOSE (deps);
2143 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2144 {
2145 step = TREE_OPERAND (off, 1);
2146 off = TREE_OPERAND (off, 0);
2147 }
2148 if (!integer_zerop (off))
2149 {
2150 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2151 || fd->loops[i].cond_code == GT_EXPR);
2152 bool forward = fd->loops[i].cond_code == LT_EXPR;
2153 if (step)
2154 {
2155 /* Non-simple Fortran DO loops. If step is variable,
2156 we don't know at compile even the direction, so can't
2157 warn. */
2158 if (TREE_CODE (step) != INTEGER_CST)
2159 break;
2160 forward = tree_int_cst_sgn (step) != -1;
2161 }
2162 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2163 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
2164 "waiting for lexically later iteration");
2165 break;
2166 }
2167 deps = TREE_CHAIN (deps);
2168 }
2169 /* If all offsets corresponding to the collapsed loops are zero,
2170 this depend clause can be ignored. FIXME: but there is still a
2171 flush needed. We need to emit one __sync_synchronize () for it
2172 though (perhaps conditionally)? Solve this together with the
2173 conservative dependence folding optimization.
2174 if (i >= fd->collapse)
2175 return; */
2176
2177 deps = OMP_CLAUSE_DECL (c);
2178 gsi_prev (&gsi2);
2179 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
2180 edge e2 = split_block_after_labels (e1->dest);
2181
2182 gsi2 = gsi_after_labels (e1->dest);
2183 *gsi = gsi_last_bb (e1->src);
2184 for (i = 0; i < fd->ordered; i++)
2185 {
2186 tree itype = TREE_TYPE (fd->loops[i].v);
2187 tree step = NULL_TREE;
2188 tree orig_off = NULL_TREE;
2189 if (POINTER_TYPE_P (itype))
2190 itype = sizetype;
2191 if (i)
2192 deps = TREE_CHAIN (deps);
2193 off = TREE_PURPOSE (deps);
2194 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2195 {
2196 step = TREE_OPERAND (off, 1);
2197 off = TREE_OPERAND (off, 0);
2198 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2199 && integer_onep (fd->loops[i].step)
2200 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
2201 }
2202 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
2203 if (step)
2204 {
2205 off = fold_convert_loc (loc, itype, off);
2206 orig_off = off;
2207 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2208 }
2209
2210 if (integer_zerop (off))
2211 t = boolean_true_node;
2212 else
2213 {
2214 tree a;
2215 tree co = fold_convert_loc (loc, itype, off);
2216 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
2217 {
2218 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2219 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
2220 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
2221 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
2222 co);
2223 }
2224 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2225 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2226 fd->loops[i].v, co);
2227 else
2228 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
2229 fd->loops[i].v, co);
2230 if (step)
2231 {
2232 tree t1, t2;
2233 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2234 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2235 fd->loops[i].n1);
2236 else
2237 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2238 fd->loops[i].n2);
2239 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2240 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2241 fd->loops[i].n2);
2242 else
2243 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2244 fd->loops[i].n1);
2245 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
2246 step, build_int_cst (TREE_TYPE (step), 0));
2247 if (TREE_CODE (step) != INTEGER_CST)
2248 {
2249 t1 = unshare_expr (t1);
2250 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
2251 false, GSI_CONTINUE_LINKING);
2252 t2 = unshare_expr (t2);
2253 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
2254 false, GSI_CONTINUE_LINKING);
2255 }
2256 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
2257 t, t2, t1);
2258 }
2259 else if (fd->loops[i].cond_code == LT_EXPR)
2260 {
2261 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2262 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2263 fd->loops[i].n1);
2264 else
2265 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2266 fd->loops[i].n2);
2267 }
2268 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2269 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
2270 fd->loops[i].n2);
2271 else
2272 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
2273 fd->loops[i].n1);
2274 }
2275 if (cond)
2276 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
2277 else
2278 cond = t;
2279
2280 off = fold_convert_loc (loc, itype, off);
2281
2282 if (step
2283 || (fd->loops[i].cond_code == LT_EXPR
2284 ? !integer_onep (fd->loops[i].step)
2285 : !integer_minus_onep (fd->loops[i].step)))
2286 {
2287 if (step == NULL_TREE
2288 && TYPE_UNSIGNED (itype)
2289 && fd->loops[i].cond_code == GT_EXPR)
2290 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
2291 fold_build1_loc (loc, NEGATE_EXPR, itype,
2292 s));
2293 else
2294 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
2295 orig_off ? orig_off : off, s);
2296 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
2297 build_int_cst (itype, 0));
2298 if (integer_zerop (t) && !warned_step)
2299 {
2300 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
2301 "refers to iteration never in the iteration "
2302 "space");
2303 warned_step = true;
2304 }
2305 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
2306 cond, t);
2307 }
2308
2309 if (i <= fd->collapse - 1 && fd->collapse > 1)
2310 t = fd->loop.v;
2311 else if (counts[i])
2312 t = counts[i];
2313 else
2314 {
2315 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2316 fd->loops[i].v, fd->loops[i].n1);
2317 t = fold_convert_loc (loc, fd->iter_type, t);
2318 }
2319 if (step)
2320 /* We have divided off by step already earlier. */;
2321 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
2322 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
2323 fold_build1_loc (loc, NEGATE_EXPR, itype,
2324 s));
2325 else
2326 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2327 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2328 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
2329 off = fold_convert_loc (loc, fd->iter_type, off);
2330 if (i <= fd->collapse - 1 && fd->collapse > 1)
2331 {
2332 if (i)
2333 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
2334 off);
2335 if (i < fd->collapse - 1)
2336 {
2337 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
2338 counts[i]);
2339 continue;
2340 }
2341 }
2342 off = unshare_expr (off);
2343 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
2344 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2345 true, GSI_SAME_STMT);
2346 args.safe_push (t);
2347 }
2348 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
2349 gimple_set_location (g, loc);
2350 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
2351
2352 cond = unshare_expr (cond);
2353 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
2354 GSI_CONTINUE_LINKING);
2355 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
2356 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
2357 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2358 e1->probability = e3->probability.invert ();
2359 e1->flags = EDGE_TRUE_VALUE;
2360 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
2361
2362 *gsi = gsi_after_labels (e2->dest);
2363 }
2364
2365 /* Expand all #pragma omp ordered depend(source) and
2366 #pragma omp ordered depend(sink:...) constructs in the current
2367 #pragma omp for ordered(n) region. */
2368
2369 static void
2370 expand_omp_ordered_source_sink (struct omp_region *region,
2371 struct omp_for_data *fd, tree *counts,
2372 basic_block cont_bb)
2373 {
2374 struct omp_region *inner;
2375 int i;
2376 for (i = fd->collapse - 1; i < fd->ordered; i++)
2377 if (i == fd->collapse - 1 && fd->collapse > 1)
2378 counts[i] = NULL_TREE;
2379 else if (i >= fd->collapse && !cont_bb)
2380 counts[i] = build_zero_cst (fd->iter_type);
2381 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
2382 && integer_onep (fd->loops[i].step))
2383 counts[i] = NULL_TREE;
2384 else
2385 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
2386 tree atype
2387 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
2388 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
2389 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
2390
2391 for (inner = region->inner; inner; inner = inner->next)
2392 if (inner->type == GIMPLE_OMP_ORDERED)
2393 {
2394 gomp_ordered *ord_stmt = inner->ord_stmt;
2395 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
2396 location_t loc = gimple_location (ord_stmt);
2397 tree c;
2398 for (c = gimple_omp_ordered_clauses (ord_stmt);
2399 c; c = OMP_CLAUSE_CHAIN (c))
2400 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
2401 break;
2402 if (c)
2403 expand_omp_ordered_source (&gsi, fd, counts, loc);
2404 for (c = gimple_omp_ordered_clauses (ord_stmt);
2405 c; c = OMP_CLAUSE_CHAIN (c))
2406 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
2407 expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
2408 gsi_remove (&gsi, true);
2409 }
2410 }
2411
2412 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
2413 collapsed. */
2414
2415 static basic_block
2416 expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
2417 basic_block cont_bb, basic_block body_bb,
2418 bool ordered_lastprivate)
2419 {
2420 if (fd->ordered == fd->collapse)
2421 return cont_bb;
2422
2423 if (!cont_bb)
2424 {
2425 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2426 for (int i = fd->collapse; i < fd->ordered; i++)
2427 {
2428 tree type = TREE_TYPE (fd->loops[i].v);
2429 tree n1 = fold_convert (type, fd->loops[i].n1);
2430 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
2431 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2432 size_int (i - fd->collapse + 1),
2433 NULL_TREE, NULL_TREE);
2434 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2435 }
2436 return NULL;
2437 }
2438
2439 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
2440 {
2441 tree t, type = TREE_TYPE (fd->loops[i].v);
2442 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2443 expand_omp_build_assign (&gsi, fd->loops[i].v,
2444 fold_convert (type, fd->loops[i].n1));
2445 if (counts[i])
2446 expand_omp_build_assign (&gsi, counts[i],
2447 build_zero_cst (fd->iter_type));
2448 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2449 size_int (i - fd->collapse + 1),
2450 NULL_TREE, NULL_TREE);
2451 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2452 if (!gsi_end_p (gsi))
2453 gsi_prev (&gsi);
2454 else
2455 gsi = gsi_last_bb (body_bb);
2456 edge e1 = split_block (body_bb, gsi_stmt (gsi));
2457 basic_block new_body = e1->dest;
2458 if (body_bb == cont_bb)
2459 cont_bb = new_body;
2460 edge e2 = NULL;
2461 basic_block new_header;
2462 if (EDGE_COUNT (cont_bb->preds) > 0)
2463 {
2464 gsi = gsi_last_bb (cont_bb);
2465 if (POINTER_TYPE_P (type))
2466 t = fold_build_pointer_plus (fd->loops[i].v,
2467 fold_convert (sizetype,
2468 fd->loops[i].step));
2469 else
2470 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
2471 fold_convert (type, fd->loops[i].step));
2472 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
2473 if (counts[i])
2474 {
2475 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
2476 build_int_cst (fd->iter_type, 1));
2477 expand_omp_build_assign (&gsi, counts[i], t);
2478 t = counts[i];
2479 }
2480 else
2481 {
2482 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2483 fd->loops[i].v, fd->loops[i].n1);
2484 t = fold_convert (fd->iter_type, t);
2485 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2486 true, GSI_SAME_STMT);
2487 }
2488 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2489 size_int (i - fd->collapse + 1),
2490 NULL_TREE, NULL_TREE);
2491 expand_omp_build_assign (&gsi, aref, t);
2492 gsi_prev (&gsi);
2493 e2 = split_block (cont_bb, gsi_stmt (gsi));
2494 new_header = e2->dest;
2495 }
2496 else
2497 new_header = cont_bb;
2498 gsi = gsi_after_labels (new_header);
2499 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
2500 true, GSI_SAME_STMT);
2501 tree n2
2502 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
2503 true, NULL_TREE, true, GSI_SAME_STMT);
2504 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
2505 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
2506 edge e3 = split_block (new_header, gsi_stmt (gsi));
2507 cont_bb = e3->dest;
2508 remove_edge (e1);
2509 make_edge (body_bb, new_header, EDGE_FALLTHRU);
2510 e3->flags = EDGE_FALSE_VALUE;
2511 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2512 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
2513 e1->probability = e3->probability.invert ();
2514
2515 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
2516 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
2517
2518 if (e2)
2519 {
2520 class loop *loop = alloc_loop ();
2521 loop->header = new_header;
2522 loop->latch = e2->src;
2523 add_loop (loop, body_bb->loop_father);
2524 }
2525 }
2526
2527 /* If there are any lastprivate clauses and it is possible some loops
2528 might have zero iterations, ensure all the decls are initialized,
2529 otherwise we could crash evaluating C++ class iterators with lastprivate
2530 clauses. */
2531 bool need_inits = false;
2532 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
2533 if (need_inits)
2534 {
2535 tree type = TREE_TYPE (fd->loops[i].v);
2536 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2537 expand_omp_build_assign (&gsi, fd->loops[i].v,
2538 fold_convert (type, fd->loops[i].n1));
2539 }
2540 else
2541 {
2542 tree type = TREE_TYPE (fd->loops[i].v);
2543 tree this_cond = fold_build2 (fd->loops[i].cond_code,
2544 boolean_type_node,
2545 fold_convert (type, fd->loops[i].n1),
2546 fold_convert (type, fd->loops[i].n2));
2547 if (!integer_onep (this_cond))
2548 need_inits = true;
2549 }
2550
2551 return cont_bb;
2552 }
2553
2554 /* A subroutine of expand_omp_for. Generate code for a parallel
2555 loop with any schedule. Given parameters:
2556
2557 for (V = N1; V cond N2; V += STEP) BODY;
2558
2559 where COND is "<" or ">", we generate pseudocode
2560
2561 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
2562 if (more) goto L0; else goto L3;
2563 L0:
2564 V = istart0;
2565 iend = iend0;
2566 L1:
2567 BODY;
2568 V += STEP;
2569 if (V cond iend) goto L1; else goto L2;
2570 L2:
2571 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2572 L3:
2573
2574 If this is a combined omp parallel loop, instead of the call to
2575 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
2576 If this is gimple_omp_for_combined_p loop, then instead of assigning
2577 V and iend in L0 we assign the first two _looptemp_ clause decls of the
2578 inner GIMPLE_OMP_FOR and V += STEP; and
2579 if (V cond iend) goto L1; else goto L2; are removed.
2580
2581 For collapsed loops, given parameters:
2582 collapse(3)
2583 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2584 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2585 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
2586 BODY;
2587
2588 we generate pseudocode
2589
2590 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
2591 if (cond3 is <)
2592 adj = STEP3 - 1;
2593 else
2594 adj = STEP3 + 1;
2595 count3 = (adj + N32 - N31) / STEP3;
2596 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
2597 if (cond2 is <)
2598 adj = STEP2 - 1;
2599 else
2600 adj = STEP2 + 1;
2601 count2 = (adj + N22 - N21) / STEP2;
2602 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
2603 if (cond1 is <)
2604 adj = STEP1 - 1;
2605 else
2606 adj = STEP1 + 1;
2607 count1 = (adj + N12 - N11) / STEP1;
2608 count = count1 * count2 * count3;
2609 goto Z1;
2610 Z0:
2611 count = 0;
2612 Z1:
2613 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
2614 if (more) goto L0; else goto L3;
2615 L0:
2616 V = istart0;
2617 T = V;
2618 V3 = N31 + (T % count3) * STEP3;
2619 T = T / count3;
2620 V2 = N21 + (T % count2) * STEP2;
2621 T = T / count2;
2622 V1 = N11 + T * STEP1;
2623 iend = iend0;
2624 L1:
2625 BODY;
2626 V += 1;
2627 if (V < iend) goto L10; else goto L2;
2628 L10:
2629 V3 += STEP3;
2630 if (V3 cond3 N32) goto L1; else goto L11;
2631 L11:
2632 V3 = N31;
2633 V2 += STEP2;
2634 if (V2 cond2 N22) goto L1; else goto L12;
2635 L12:
2636 V2 = N21;
2637 V1 += STEP1;
2638 goto L1;
2639 L2:
2640 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2641 L3:
2642
2643 */
2644
2645 static void
2646 expand_omp_for_generic (struct omp_region *region,
2647 struct omp_for_data *fd,
2648 enum built_in_function start_fn,
2649 enum built_in_function next_fn,
2650 tree sched_arg,
2651 gimple *inner_stmt)
2652 {
2653 tree type, istart0, iend0, iend;
2654 tree t, vmain, vback, bias = NULL_TREE;
2655 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
2656 basic_block l2_bb = NULL, l3_bb = NULL;
2657 gimple_stmt_iterator gsi;
2658 gassign *assign_stmt;
2659 bool in_combined_parallel = is_combined_parallel (region);
2660 bool broken_loop = region->cont == NULL;
2661 edge e, ne;
2662 tree *counts = NULL;
2663 int i;
2664 bool ordered_lastprivate = false;
2665
2666 gcc_assert (!broken_loop || !in_combined_parallel);
2667 gcc_assert (fd->iter_type == long_integer_type_node
2668 || !in_combined_parallel);
2669
2670 entry_bb = region->entry;
2671 cont_bb = region->cont;
2672 collapse_bb = NULL;
2673 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
2674 gcc_assert (broken_loop
2675 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
2676 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
2677 l1_bb = single_succ (l0_bb);
2678 if (!broken_loop)
2679 {
2680 l2_bb = create_empty_bb (cont_bb);
2681 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
2682 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
2683 == l1_bb));
2684 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
2685 }
2686 else
2687 l2_bb = NULL;
2688 l3_bb = BRANCH_EDGE (entry_bb)->dest;
2689 exit_bb = region->exit;
2690
2691 gsi = gsi_last_nondebug_bb (entry_bb);
2692
2693 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
2694 if (fd->ordered
2695 && omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2696 OMP_CLAUSE_LASTPRIVATE))
2697 ordered_lastprivate = false;
2698 tree reductions = NULL_TREE;
2699 tree mem = NULL_TREE, cond_var = NULL_TREE, condtemp = NULL_TREE;
2700 tree memv = NULL_TREE;
2701 if (fd->lastprivate_conditional)
2702 {
2703 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2704 OMP_CLAUSE__CONDTEMP_);
2705 if (fd->have_pointer_condtemp)
2706 condtemp = OMP_CLAUSE_DECL (c);
2707 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
2708 cond_var = OMP_CLAUSE_DECL (c);
2709 }
2710 if (sched_arg)
2711 {
2712 if (fd->have_reductemp)
2713 {
2714 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2715 OMP_CLAUSE__REDUCTEMP_);
2716 reductions = OMP_CLAUSE_DECL (c);
2717 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
2718 gimple *g = SSA_NAME_DEF_STMT (reductions);
2719 reductions = gimple_assign_rhs1 (g);
2720 OMP_CLAUSE_DECL (c) = reductions;
2721 entry_bb = gimple_bb (g);
2722 edge e = split_block (entry_bb, g);
2723 if (region->entry == entry_bb)
2724 region->entry = e->dest;
2725 gsi = gsi_last_bb (entry_bb);
2726 }
2727 else
2728 reductions = null_pointer_node;
2729 if (fd->have_pointer_condtemp)
2730 {
2731 tree type = TREE_TYPE (condtemp);
2732 memv = create_tmp_var (type);
2733 TREE_ADDRESSABLE (memv) = 1;
2734 unsigned HOST_WIDE_INT sz
2735 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
2736 sz *= fd->lastprivate_conditional;
2737 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
2738 false);
2739 mem = build_fold_addr_expr (memv);
2740 }
2741 else
2742 mem = null_pointer_node;
2743 }
2744 if (fd->collapse > 1 || fd->ordered)
2745 {
2746 int first_zero_iter1 = -1, first_zero_iter2 = -1;
2747 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
2748
2749 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
2750 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
2751 zero_iter1_bb, first_zero_iter1,
2752 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
2753
2754 if (zero_iter1_bb)
2755 {
2756 /* Some counts[i] vars might be uninitialized if
2757 some loop has zero iterations. But the body shouldn't
2758 be executed in that case, so just avoid uninit warnings. */
2759 for (i = first_zero_iter1;
2760 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
2761 if (SSA_VAR_P (counts[i]))
2762 TREE_NO_WARNING (counts[i]) = 1;
2763 gsi_prev (&gsi);
2764 e = split_block (entry_bb, gsi_stmt (gsi));
2765 entry_bb = e->dest;
2766 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
2767 gsi = gsi_last_nondebug_bb (entry_bb);
2768 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2769 get_immediate_dominator (CDI_DOMINATORS,
2770 zero_iter1_bb));
2771 }
2772 if (zero_iter2_bb)
2773 {
2774 /* Some counts[i] vars might be uninitialized if
2775 some loop has zero iterations. But the body shouldn't
2776 be executed in that case, so just avoid uninit warnings. */
2777 for (i = first_zero_iter2; i < fd->ordered; i++)
2778 if (SSA_VAR_P (counts[i]))
2779 TREE_NO_WARNING (counts[i]) = 1;
2780 if (zero_iter1_bb)
2781 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2782 else
2783 {
2784 gsi_prev (&gsi);
2785 e = split_block (entry_bb, gsi_stmt (gsi));
2786 entry_bb = e->dest;
2787 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2788 gsi = gsi_last_nondebug_bb (entry_bb);
2789 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2790 get_immediate_dominator
2791 (CDI_DOMINATORS, zero_iter2_bb));
2792 }
2793 }
2794 if (fd->collapse == 1)
2795 {
2796 counts[0] = fd->loop.n2;
2797 fd->loop = fd->loops[0];
2798 }
2799 }
2800
2801 type = TREE_TYPE (fd->loop.v);
2802 istart0 = create_tmp_var (fd->iter_type, ".istart0");
2803 iend0 = create_tmp_var (fd->iter_type, ".iend0");
2804 TREE_ADDRESSABLE (istart0) = 1;
2805 TREE_ADDRESSABLE (iend0) = 1;
2806
2807 /* See if we need to bias by LLONG_MIN. */
2808 if (fd->iter_type == long_long_unsigned_type_node
2809 && TREE_CODE (type) == INTEGER_TYPE
2810 && !TYPE_UNSIGNED (type)
2811 && fd->ordered == 0)
2812 {
2813 tree n1, n2;
2814
2815 if (fd->loop.cond_code == LT_EXPR)
2816 {
2817 n1 = fd->loop.n1;
2818 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
2819 }
2820 else
2821 {
2822 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
2823 n2 = fd->loop.n1;
2824 }
2825 if (TREE_CODE (n1) != INTEGER_CST
2826 || TREE_CODE (n2) != INTEGER_CST
2827 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
2828 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
2829 }
2830
2831 gimple_stmt_iterator gsif = gsi;
2832 gsi_prev (&gsif);
2833
2834 tree arr = NULL_TREE;
2835 if (in_combined_parallel)
2836 {
2837 gcc_assert (fd->ordered == 0);
2838 /* In a combined parallel loop, emit a call to
2839 GOMP_loop_foo_next. */
2840 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
2841 build_fold_addr_expr (istart0),
2842 build_fold_addr_expr (iend0));
2843 }
2844 else
2845 {
2846 tree t0, t1, t2, t3, t4;
2847 /* If this is not a combined parallel loop, emit a call to
2848 GOMP_loop_foo_start in ENTRY_BB. */
2849 t4 = build_fold_addr_expr (iend0);
2850 t3 = build_fold_addr_expr (istart0);
2851 if (fd->ordered)
2852 {
2853 t0 = build_int_cst (unsigned_type_node,
2854 fd->ordered - fd->collapse + 1);
2855 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
2856 fd->ordered
2857 - fd->collapse + 1),
2858 ".omp_counts");
2859 DECL_NAMELESS (arr) = 1;
2860 TREE_ADDRESSABLE (arr) = 1;
2861 TREE_STATIC (arr) = 1;
2862 vec<constructor_elt, va_gc> *v;
2863 vec_alloc (v, fd->ordered - fd->collapse + 1);
2864 int idx;
2865
2866 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
2867 {
2868 tree c;
2869 if (idx == 0 && fd->collapse > 1)
2870 c = fd->loop.n2;
2871 else
2872 c = counts[idx + fd->collapse - 1];
2873 tree purpose = size_int (idx);
2874 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
2875 if (TREE_CODE (c) != INTEGER_CST)
2876 TREE_STATIC (arr) = 0;
2877 }
2878
2879 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
2880 if (!TREE_STATIC (arr))
2881 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
2882 void_type_node, arr),
2883 true, NULL_TREE, true, GSI_SAME_STMT);
2884 t1 = build_fold_addr_expr (arr);
2885 t2 = NULL_TREE;
2886 }
2887 else
2888 {
2889 t2 = fold_convert (fd->iter_type, fd->loop.step);
2890 t1 = fd->loop.n2;
2891 t0 = fd->loop.n1;
2892 if (gimple_omp_for_combined_into_p (fd->for_stmt))
2893 {
2894 tree innerc
2895 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2896 OMP_CLAUSE__LOOPTEMP_);
2897 gcc_assert (innerc);
2898 t0 = OMP_CLAUSE_DECL (innerc);
2899 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2900 OMP_CLAUSE__LOOPTEMP_);
2901 gcc_assert (innerc);
2902 t1 = OMP_CLAUSE_DECL (innerc);
2903 }
2904 if (POINTER_TYPE_P (TREE_TYPE (t0))
2905 && TYPE_PRECISION (TREE_TYPE (t0))
2906 != TYPE_PRECISION (fd->iter_type))
2907 {
2908 /* Avoid casting pointers to integer of a different size. */
2909 tree itype = signed_type_for (type);
2910 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
2911 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
2912 }
2913 else
2914 {
2915 t1 = fold_convert (fd->iter_type, t1);
2916 t0 = fold_convert (fd->iter_type, t0);
2917 }
2918 if (bias)
2919 {
2920 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
2921 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
2922 }
2923 }
2924 if (fd->iter_type == long_integer_type_node || fd->ordered)
2925 {
2926 if (fd->chunk_size)
2927 {
2928 t = fold_convert (fd->iter_type, fd->chunk_size);
2929 t = omp_adjust_chunk_size (t, fd->simd_schedule);
2930 if (sched_arg)
2931 {
2932 if (fd->ordered)
2933 t = build_call_expr (builtin_decl_explicit (start_fn),
2934 8, t0, t1, sched_arg, t, t3, t4,
2935 reductions, mem);
2936 else
2937 t = build_call_expr (builtin_decl_explicit (start_fn),
2938 9, t0, t1, t2, sched_arg, t, t3, t4,
2939 reductions, mem);
2940 }
2941 else if (fd->ordered)
2942 t = build_call_expr (builtin_decl_explicit (start_fn),
2943 5, t0, t1, t, t3, t4);
2944 else
2945 t = build_call_expr (builtin_decl_explicit (start_fn),
2946 6, t0, t1, t2, t, t3, t4);
2947 }
2948 else if (fd->ordered)
2949 t = build_call_expr (builtin_decl_explicit (start_fn),
2950 4, t0, t1, t3, t4);
2951 else
2952 t = build_call_expr (builtin_decl_explicit (start_fn),
2953 5, t0, t1, t2, t3, t4);
2954 }
2955 else
2956 {
2957 tree t5;
2958 tree c_bool_type;
2959 tree bfn_decl;
2960
2961 /* The GOMP_loop_ull_*start functions have additional boolean
2962 argument, true for < loops and false for > loops.
2963 In Fortran, the C bool type can be different from
2964 boolean_type_node. */
2965 bfn_decl = builtin_decl_explicit (start_fn);
2966 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
2967 t5 = build_int_cst (c_bool_type,
2968 fd->loop.cond_code == LT_EXPR ? 1 : 0);
2969 if (fd->chunk_size)
2970 {
2971 tree bfn_decl = builtin_decl_explicit (start_fn);
2972 t = fold_convert (fd->iter_type, fd->chunk_size);
2973 t = omp_adjust_chunk_size (t, fd->simd_schedule);
2974 if (sched_arg)
2975 t = build_call_expr (bfn_decl, 10, t5, t0, t1, t2, sched_arg,
2976 t, t3, t4, reductions, mem);
2977 else
2978 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
2979 }
2980 else
2981 t = build_call_expr (builtin_decl_explicit (start_fn),
2982 6, t5, t0, t1, t2, t3, t4);
2983 }
2984 }
2985 if (TREE_TYPE (t) != boolean_type_node)
2986 t = fold_build2 (NE_EXPR, boolean_type_node,
2987 t, build_int_cst (TREE_TYPE (t), 0));
2988 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2989 true, GSI_SAME_STMT);
2990 if (arr && !TREE_STATIC (arr))
2991 {
2992 tree clobber = build_clobber (TREE_TYPE (arr));
2993 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
2994 GSI_SAME_STMT);
2995 }
2996 if (fd->have_pointer_condtemp)
2997 expand_omp_build_assign (&gsi, condtemp, memv, false);
2998 if (fd->have_reductemp)
2999 {
3000 gimple *g = gsi_stmt (gsi);
3001 gsi_remove (&gsi, true);
3002 release_ssa_name (gimple_assign_lhs (g));
3003
3004 entry_bb = region->entry;
3005 gsi = gsi_last_nondebug_bb (entry_bb);
3006
3007 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3008 }
3009 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3010
3011 /* Remove the GIMPLE_OMP_FOR statement. */
3012 gsi_remove (&gsi, true);
3013
3014 if (gsi_end_p (gsif))
3015 gsif = gsi_after_labels (gsi_bb (gsif));
3016 gsi_next (&gsif);
3017
3018 /* Iteration setup for sequential loop goes in L0_BB. */
3019 tree startvar = fd->loop.v;
3020 tree endvar = NULL_TREE;
3021
3022 if (gimple_omp_for_combined_p (fd->for_stmt))
3023 {
3024 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
3025 && gimple_omp_for_kind (inner_stmt)
3026 == GF_OMP_FOR_KIND_SIMD);
3027 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
3028 OMP_CLAUSE__LOOPTEMP_);
3029 gcc_assert (innerc);
3030 startvar = OMP_CLAUSE_DECL (innerc);
3031 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3032 OMP_CLAUSE__LOOPTEMP_);
3033 gcc_assert (innerc);
3034 endvar = OMP_CLAUSE_DECL (innerc);
3035 }
3036
3037 gsi = gsi_start_bb (l0_bb);
3038 t = istart0;
3039 if (fd->ordered && fd->collapse == 1)
3040 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
3041 fold_convert (fd->iter_type, fd->loop.step));
3042 else if (bias)
3043 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
3044 if (fd->ordered && fd->collapse == 1)
3045 {
3046 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3047 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
3048 fd->loop.n1, fold_convert (sizetype, t));
3049 else
3050 {
3051 t = fold_convert (TREE_TYPE (startvar), t);
3052 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
3053 fd->loop.n1, t);
3054 }
3055 }
3056 else
3057 {
3058 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3059 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
3060 t = fold_convert (TREE_TYPE (startvar), t);
3061 }
3062 t = force_gimple_operand_gsi (&gsi, t,
3063 DECL_P (startvar)
3064 && TREE_ADDRESSABLE (startvar),
3065 NULL_TREE, false, GSI_CONTINUE_LINKING);
3066 assign_stmt = gimple_build_assign (startvar, t);
3067 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3068 if (cond_var)
3069 {
3070 tree itype = TREE_TYPE (cond_var);
3071 /* For lastprivate(conditional:) itervar, we need some iteration
3072 counter that starts at unsigned non-zero and increases.
3073 Prefer as few IVs as possible, so if we can use startvar
3074 itself, use that, or startvar + constant (those would be
3075 incremented with step), and as last resort use the s0 + 1
3076 incremented by 1. */
3077 if ((fd->ordered && fd->collapse == 1)
3078 || bias
3079 || POINTER_TYPE_P (type)
3080 || TREE_CODE (fd->loop.n1) != INTEGER_CST
3081 || fd->loop.cond_code != LT_EXPR)
3082 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, istart0),
3083 build_int_cst (itype, 1));
3084 else if (tree_int_cst_sgn (fd->loop.n1) == 1)
3085 t = fold_convert (itype, t);
3086 else
3087 {
3088 tree c = fold_convert (itype, fd->loop.n1);
3089 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
3090 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
3091 }
3092 t = force_gimple_operand_gsi (&gsi, t, false,
3093 NULL_TREE, false, GSI_CONTINUE_LINKING);
3094 assign_stmt = gimple_build_assign (cond_var, t);
3095 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3096 }
3097
3098 t = iend0;
3099 if (fd->ordered && fd->collapse == 1)
3100 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
3101 fold_convert (fd->iter_type, fd->loop.step));
3102 else if (bias)
3103 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
3104 if (fd->ordered && fd->collapse == 1)
3105 {
3106 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3107 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
3108 fd->loop.n1, fold_convert (sizetype, t));
3109 else
3110 {
3111 t = fold_convert (TREE_TYPE (startvar), t);
3112 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
3113 fd->loop.n1, t);
3114 }
3115 }
3116 else
3117 {
3118 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3119 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
3120 t = fold_convert (TREE_TYPE (startvar), t);
3121 }
3122 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3123 false, GSI_CONTINUE_LINKING);
3124 if (endvar)
3125 {
3126 assign_stmt = gimple_build_assign (endvar, iend);
3127 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3128 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
3129 assign_stmt = gimple_build_assign (fd->loop.v, iend);
3130 else
3131 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
3132 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3133 }
3134 /* Handle linear clause adjustments. */
3135 tree itercnt = NULL_TREE;
3136 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
3137 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
3138 c; c = OMP_CLAUSE_CHAIN (c))
3139 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
3140 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
3141 {
3142 tree d = OMP_CLAUSE_DECL (c);
3143 bool is_ref = omp_is_reference (d);
3144 tree t = d, a, dest;
3145 if (is_ref)
3146 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
3147 tree type = TREE_TYPE (t);
3148 if (POINTER_TYPE_P (type))
3149 type = sizetype;
3150 dest = unshare_expr (t);
3151 tree v = create_tmp_var (TREE_TYPE (t), NULL);
3152 expand_omp_build_assign (&gsif, v, t);
3153 if (itercnt == NULL_TREE)
3154 {
3155 itercnt = startvar;
3156 tree n1 = fd->loop.n1;
3157 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
3158 {
3159 itercnt
3160 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
3161 itercnt);
3162 n1 = fold_convert (TREE_TYPE (itercnt), n1);
3163 }
3164 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
3165 itercnt, n1);
3166 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
3167 itercnt, fd->loop.step);
3168 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
3169 NULL_TREE, false,
3170 GSI_CONTINUE_LINKING);
3171 }
3172 a = fold_build2 (MULT_EXPR, type,
3173 fold_convert (type, itercnt),
3174 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
3175 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
3176 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
3177 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3178 false, GSI_CONTINUE_LINKING);
3179 assign_stmt = gimple_build_assign (dest, t);
3180 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3181 }
3182 if (fd->collapse > 1)
3183 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
3184
3185 if (fd->ordered)
3186 {
3187 /* Until now, counts array contained number of iterations or
3188 variable containing it for ith loop. From now on, we need
3189 those counts only for collapsed loops, and only for the 2nd
3190 till the last collapsed one. Move those one element earlier,
3191 we'll use counts[fd->collapse - 1] for the first source/sink
3192 iteration counter and so on and counts[fd->ordered]
3193 as the array holding the current counter values for
3194 depend(source). */
3195 if (fd->collapse > 1)
3196 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
3197 if (broken_loop)
3198 {
3199 int i;
3200 for (i = fd->collapse; i < fd->ordered; i++)
3201 {
3202 tree type = TREE_TYPE (fd->loops[i].v);
3203 tree this_cond
3204 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
3205 fold_convert (type, fd->loops[i].n1),
3206 fold_convert (type, fd->loops[i].n2));
3207 if (!integer_onep (this_cond))
3208 break;
3209 }
3210 if (i < fd->ordered)
3211 {
3212 cont_bb
3213 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
3214 add_bb_to_loop (cont_bb, l1_bb->loop_father);
3215 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
3216 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
3217 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3218 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
3219 make_edge (cont_bb, l1_bb, 0);
3220 l2_bb = create_empty_bb (cont_bb);
3221 broken_loop = false;
3222 }
3223 }
3224 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
3225 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
3226 ordered_lastprivate);
3227 if (counts[fd->collapse - 1])
3228 {
3229 gcc_assert (fd->collapse == 1);
3230 gsi = gsi_last_bb (l0_bb);
3231 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
3232 istart0, true);
3233 gsi = gsi_last_bb (cont_bb);
3234 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1],
3235 build_int_cst (fd->iter_type, 1));
3236 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
3237 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3238 size_zero_node, NULL_TREE, NULL_TREE);
3239 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
3240 t = counts[fd->collapse - 1];
3241 }
3242 else if (fd->collapse > 1)
3243 t = fd->loop.v;
3244 else
3245 {
3246 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3247 fd->loops[0].v, fd->loops[0].n1);
3248 t = fold_convert (fd->iter_type, t);
3249 }
3250 gsi = gsi_last_bb (l0_bb);
3251 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3252 size_zero_node, NULL_TREE, NULL_TREE);
3253 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3254 false, GSI_CONTINUE_LINKING);
3255 expand_omp_build_assign (&gsi, aref, t, true);
3256 }
3257
3258 if (!broken_loop)
3259 {
3260 /* Code to control the increment and predicate for the sequential
3261 loop goes in the CONT_BB. */
3262 gsi = gsi_last_nondebug_bb (cont_bb);
3263 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3264 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3265 vmain = gimple_omp_continue_control_use (cont_stmt);
3266 vback = gimple_omp_continue_control_def (cont_stmt);
3267
3268 if (cond_var)
3269 {
3270 tree itype = TREE_TYPE (cond_var);
3271 tree t2;
3272 if ((fd->ordered && fd->collapse == 1)
3273 || bias
3274 || POINTER_TYPE_P (type)
3275 || TREE_CODE (fd->loop.n1) != INTEGER_CST
3276 || fd->loop.cond_code != LT_EXPR)
3277 t2 = build_int_cst (itype, 1);
3278 else
3279 t2 = fold_convert (itype, fd->loop.step);
3280 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
3281 t2 = force_gimple_operand_gsi (&gsi, t2, false,
3282 NULL_TREE, true, GSI_SAME_STMT);
3283 assign_stmt = gimple_build_assign (cond_var, t2);
3284 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3285 }
3286
3287 if (!gimple_omp_for_combined_p (fd->for_stmt))
3288 {
3289 if (POINTER_TYPE_P (type))
3290 t = fold_build_pointer_plus (vmain, fd->loop.step);
3291 else
3292 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
3293 t = force_gimple_operand_gsi (&gsi, t,
3294 DECL_P (vback)
3295 && TREE_ADDRESSABLE (vback),
3296 NULL_TREE, true, GSI_SAME_STMT);
3297 assign_stmt = gimple_build_assign (vback, t);
3298 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3299
3300 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
3301 {
3302 tree tem;
3303 if (fd->collapse > 1)
3304 tem = fd->loop.v;
3305 else
3306 {
3307 tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3308 fd->loops[0].v, fd->loops[0].n1);
3309 tem = fold_convert (fd->iter_type, tem);
3310 }
3311 tree aref = build4 (ARRAY_REF, fd->iter_type,
3312 counts[fd->ordered], size_zero_node,
3313 NULL_TREE, NULL_TREE);
3314 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE,
3315 true, GSI_SAME_STMT);
3316 expand_omp_build_assign (&gsi, aref, tem);
3317 }
3318
3319 t = build2 (fd->loop.cond_code, boolean_type_node,
3320 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
3321 iend);
3322 gcond *cond_stmt = gimple_build_cond_empty (t);
3323 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3324 }
3325
3326 /* Remove GIMPLE_OMP_CONTINUE. */
3327 gsi_remove (&gsi, true);
3328
3329 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3330 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb);
3331
3332 /* Emit code to get the next parallel iteration in L2_BB. */
3333 gsi = gsi_start_bb (l2_bb);
3334
3335 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
3336 build_fold_addr_expr (istart0),
3337 build_fold_addr_expr (iend0));
3338 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3339 false, GSI_CONTINUE_LINKING);
3340 if (TREE_TYPE (t) != boolean_type_node)
3341 t = fold_build2 (NE_EXPR, boolean_type_node,
3342 t, build_int_cst (TREE_TYPE (t), 0));
3343 gcond *cond_stmt = gimple_build_cond_empty (t);
3344 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
3345 }
3346
3347 /* Add the loop cleanup function. */
3348 gsi = gsi_last_nondebug_bb (exit_bb);
3349 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3350 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
3351 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3352 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
3353 else
3354 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
3355 gcall *call_stmt = gimple_build_call (t, 0);
3356 if (fd->ordered)
3357 {
3358 tree arr = counts[fd->ordered];
3359 tree clobber = build_clobber (TREE_TYPE (arr));
3360 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
3361 GSI_SAME_STMT);
3362 }
3363 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3364 {
3365 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
3366 if (fd->have_reductemp)
3367 {
3368 gimple *g = gimple_build_assign (reductions, NOP_EXPR,
3369 gimple_call_lhs (call_stmt));
3370 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
3371 }
3372 }
3373 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
3374 gsi_remove (&gsi, true);
3375
3376 /* Connect the new blocks. */
3377 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
3378 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
3379
3380 if (!broken_loop)
3381 {
3382 gimple_seq phis;
3383
3384 e = find_edge (cont_bb, l3_bb);
3385 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
3386
3387 phis = phi_nodes (l3_bb);
3388 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
3389 {
3390 gimple *phi = gsi_stmt (gsi);
3391 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
3392 PHI_ARG_DEF_FROM_EDGE (phi, e));
3393 }
3394 remove_edge (e);
3395
3396 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
3397 e = find_edge (cont_bb, l1_bb);
3398 if (e == NULL)
3399 {
3400 e = BRANCH_EDGE (cont_bb);
3401 gcc_assert (single_succ (e->dest) == l1_bb);
3402 }
3403 if (gimple_omp_for_combined_p (fd->for_stmt))
3404 {
3405 remove_edge (e);
3406 e = NULL;
3407 }
3408 else if (fd->collapse > 1)
3409 {
3410 remove_edge (e);
3411 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3412 }
3413 else
3414 e->flags = EDGE_TRUE_VALUE;
3415 if (e)
3416 {
3417 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
3418 find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
3419 }
3420 else
3421 {
3422 e = find_edge (cont_bb, l2_bb);
3423 e->flags = EDGE_FALLTHRU;
3424 }
3425 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
3426
3427 if (gimple_in_ssa_p (cfun))
3428 {
3429 /* Add phis to the outer loop that connect to the phis in the inner,
3430 original loop, and move the loop entry value of the inner phi to
3431 the loop entry value of the outer phi. */
3432 gphi_iterator psi;
3433 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
3434 {
3435 location_t locus;
3436 gphi *nphi;
3437 gphi *exit_phi = psi.phi ();
3438
3439 if (virtual_operand_p (gimple_phi_result (exit_phi)))
3440 continue;
3441
3442 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
3443 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
3444
3445 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
3446 edge latch_to_l1 = find_edge (latch, l1_bb);
3447 gphi *inner_phi
3448 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
3449
3450 tree t = gimple_phi_result (exit_phi);
3451 tree new_res = copy_ssa_name (t, NULL);
3452 nphi = create_phi_node (new_res, l0_bb);
3453
3454 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
3455 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
3456 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
3457 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
3458 add_phi_arg (nphi, t, entry_to_l0, locus);
3459
3460 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
3461 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
3462
3463 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
3464 }
3465 }
3466
3467 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
3468 recompute_dominator (CDI_DOMINATORS, l2_bb));
3469 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
3470 recompute_dominator (CDI_DOMINATORS, l3_bb));
3471 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
3472 recompute_dominator (CDI_DOMINATORS, l0_bb));
3473 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
3474 recompute_dominator (CDI_DOMINATORS, l1_bb));
3475
3476 /* We enter expand_omp_for_generic with a loop. This original loop may
3477 have its own loop struct, or it may be part of an outer loop struct
3478 (which may be the fake loop). */
3479 class loop *outer_loop = entry_bb->loop_father;
3480 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
3481
3482 add_bb_to_loop (l2_bb, outer_loop);
3483
3484 /* We've added a new loop around the original loop. Allocate the
3485 corresponding loop struct. */
3486 class loop *new_loop = alloc_loop ();
3487 new_loop->header = l0_bb;
3488 new_loop->latch = l2_bb;
3489 add_loop (new_loop, outer_loop);
3490
3491 /* Allocate a loop structure for the original loop unless we already
3492 had one. */
3493 if (!orig_loop_has_loop_struct
3494 && !gimple_omp_for_combined_p (fd->for_stmt))
3495 {
3496 class loop *orig_loop = alloc_loop ();
3497 orig_loop->header = l1_bb;
3498 /* The loop may have multiple latches. */
3499 add_loop (orig_loop, new_loop);
3500 }
3501 }
3502 }
3503
3504 /* Helper function for expand_omp_for_static_nochunk. If PTR is NULL,
3505 compute needed allocation size. If !ALLOC of team allocations,
3506 if ALLOC of thread allocation. SZ is the initial needed size for
3507 other purposes, ALLOC_ALIGN guaranteed alignment of allocation in bytes,
3508 CNT number of elements of each array, for !ALLOC this is
3509 omp_get_num_threads (), for ALLOC number of iterations handled by the
3510 current thread. If PTR is non-NULL, it is the start of the allocation
3511 and this routine shall assign to OMP_CLAUSE_DECL (c) of those _scantemp_
3512 clauses pointers to the corresponding arrays. */
3513
3514 static tree
3515 expand_omp_scantemp_alloc (tree clauses, tree ptr, unsigned HOST_WIDE_INT sz,
3516 unsigned HOST_WIDE_INT alloc_align, tree cnt,
3517 gimple_stmt_iterator *gsi, bool alloc)
3518 {
3519 tree eltsz = NULL_TREE;
3520 unsigned HOST_WIDE_INT preval = 0;
3521 if (ptr && sz)
3522 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
3523 ptr, size_int (sz));
3524 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
3525 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
3526 && !OMP_CLAUSE__SCANTEMP__CONTROL (c)
3527 && (!OMP_CLAUSE__SCANTEMP__ALLOC (c)) != alloc)
3528 {
3529 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
3530 unsigned HOST_WIDE_INT al = TYPE_ALIGN_UNIT (pointee_type);
3531 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
3532 {
3533 unsigned HOST_WIDE_INT szl
3534 = tree_to_uhwi (TYPE_SIZE_UNIT (pointee_type));
3535 szl = least_bit_hwi (szl);
3536 if (szl)
3537 al = MIN (al, szl);
3538 }
3539 if (ptr == NULL_TREE)
3540 {
3541 if (eltsz == NULL_TREE)
3542 eltsz = TYPE_SIZE_UNIT (pointee_type);
3543 else
3544 eltsz = size_binop (PLUS_EXPR, eltsz,
3545 TYPE_SIZE_UNIT (pointee_type));
3546 }
3547 if (preval == 0 && al <= alloc_align)
3548 {
3549 unsigned HOST_WIDE_INT diff = ROUND_UP (sz, al) - sz;
3550 sz += diff;
3551 if (diff && ptr)
3552 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
3553 ptr, size_int (diff));
3554 }
3555 else if (al > preval)
3556 {
3557 if (ptr)
3558 {
3559 ptr = fold_convert (pointer_sized_int_node, ptr);
3560 ptr = fold_build2 (PLUS_EXPR, pointer_sized_int_node, ptr,
3561 build_int_cst (pointer_sized_int_node,
3562 al - 1));
3563 ptr = fold_build2 (BIT_AND_EXPR, pointer_sized_int_node, ptr,
3564 build_int_cst (pointer_sized_int_node,
3565 -(HOST_WIDE_INT) al));
3566 ptr = fold_convert (ptr_type_node, ptr);
3567 }
3568 else
3569 sz += al - 1;
3570 }
3571 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
3572 preval = al;
3573 else
3574 preval = 1;
3575 if (ptr)
3576 {
3577 expand_omp_build_assign (gsi, OMP_CLAUSE_DECL (c), ptr, false);
3578 ptr = OMP_CLAUSE_DECL (c);
3579 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), ptr,
3580 size_binop (MULT_EXPR, cnt,
3581 TYPE_SIZE_UNIT (pointee_type)));
3582 }
3583 }
3584
3585 if (ptr == NULL_TREE)
3586 {
3587 eltsz = size_binop (MULT_EXPR, eltsz, cnt);
3588 if (sz)
3589 eltsz = size_binop (PLUS_EXPR, eltsz, size_int (sz));
3590 return eltsz;
3591 }
3592 else
3593 return ptr;
3594 }
3595
3596 /* A subroutine of expand_omp_for. Generate code for a parallel
3597 loop with static schedule and no specified chunk size. Given
3598 parameters:
3599
3600 for (V = N1; V cond N2; V += STEP) BODY;
3601
3602 where COND is "<" or ">", we generate pseudocode
3603
3604 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3605 if (cond is <)
3606 adj = STEP - 1;
3607 else
3608 adj = STEP + 1;
3609 if ((__typeof (V)) -1 > 0 && cond is >)
3610 n = -(adj + N2 - N1) / -STEP;
3611 else
3612 n = (adj + N2 - N1) / STEP;
3613 q = n / nthreads;
3614 tt = n % nthreads;
3615 if (threadid < tt) goto L3; else goto L4;
3616 L3:
3617 tt = 0;
3618 q = q + 1;
3619 L4:
3620 s0 = q * threadid + tt;
3621 e0 = s0 + q;
3622 V = s0 * STEP + N1;
3623 if (s0 >= e0) goto L2; else goto L0;
3624 L0:
3625 e = e0 * STEP + N1;
3626 L1:
3627 BODY;
3628 V += STEP;
3629 if (V cond e) goto L1;
3630 L2:
3631 */
3632
3633 static void
3634 expand_omp_for_static_nochunk (struct omp_region *region,
3635 struct omp_for_data *fd,
3636 gimple *inner_stmt)
3637 {
3638 tree n, q, s0, e0, e, t, tt, nthreads = NULL_TREE, threadid;
3639 tree type, itype, vmain, vback;
3640 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
3641 basic_block body_bb, cont_bb, collapse_bb = NULL;
3642 basic_block fin_bb, fourth_bb = NULL, fifth_bb = NULL, sixth_bb = NULL;
3643 basic_block exit1_bb = NULL, exit2_bb = NULL, exit3_bb = NULL;
3644 gimple_stmt_iterator gsi, gsip;
3645 edge ep;
3646 bool broken_loop = region->cont == NULL;
3647 tree *counts = NULL;
3648 tree n1, n2, step;
3649 tree reductions = NULL_TREE;
3650 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
3651
3652 itype = type = TREE_TYPE (fd->loop.v);
3653 if (POINTER_TYPE_P (type))
3654 itype = signed_type_for (type);
3655
3656 entry_bb = region->entry;
3657 cont_bb = region->cont;
3658 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3659 fin_bb = BRANCH_EDGE (entry_bb)->dest;
3660 gcc_assert (broken_loop
3661 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
3662 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3663 body_bb = single_succ (seq_start_bb);
3664 if (!broken_loop)
3665 {
3666 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3667 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3668 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3669 }
3670 exit_bb = region->exit;
3671
3672 /* Iteration space partitioning goes in ENTRY_BB. */
3673 gsi = gsi_last_nondebug_bb (entry_bb);
3674 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3675 gsip = gsi;
3676 gsi_prev (&gsip);
3677
3678 if (fd->collapse > 1)
3679 {
3680 int first_zero_iter = -1, dummy = -1;
3681 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3682
3683 counts = XALLOCAVEC (tree, fd->collapse);
3684 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3685 fin_bb, first_zero_iter,
3686 dummy_bb, dummy, l2_dom_bb);
3687 t = NULL_TREE;
3688 }
3689 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3690 t = integer_one_node;
3691 else
3692 t = fold_binary (fd->loop.cond_code, boolean_type_node,
3693 fold_convert (type, fd->loop.n1),
3694 fold_convert (type, fd->loop.n2));
3695 if (fd->collapse == 1
3696 && TYPE_UNSIGNED (type)
3697 && (t == NULL_TREE || !integer_onep (t)))
3698 {
3699 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3700 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3701 true, GSI_SAME_STMT);
3702 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3703 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3704 true, GSI_SAME_STMT);
3705 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3706 NULL_TREE, NULL_TREE);
3707 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3708 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3709 expand_omp_regimplify_p, NULL, NULL)
3710 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3711 expand_omp_regimplify_p, NULL, NULL))
3712 {
3713 gsi = gsi_for_stmt (cond_stmt);
3714 gimple_regimplify_operands (cond_stmt, &gsi);
3715 }
3716 ep = split_block (entry_bb, cond_stmt);
3717 ep->flags = EDGE_TRUE_VALUE;
3718 entry_bb = ep->dest;
3719 ep->probability = profile_probability::very_likely ();
3720 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
3721 ep->probability = profile_probability::very_unlikely ();
3722 if (gimple_in_ssa_p (cfun))
3723 {
3724 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
3725 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3726 !gsi_end_p (gpi); gsi_next (&gpi))
3727 {
3728 gphi *phi = gpi.phi ();
3729 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3730 ep, UNKNOWN_LOCATION);
3731 }
3732 }
3733 gsi = gsi_last_bb (entry_bb);
3734 }
3735
3736 if (fd->lastprivate_conditional)
3737 {
3738 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
3739 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
3740 if (fd->have_pointer_condtemp)
3741 condtemp = OMP_CLAUSE_DECL (c);
3742 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
3743 cond_var = OMP_CLAUSE_DECL (c);
3744 }
3745 if (fd->have_reductemp
3746 /* For scan, we don't want to reinitialize condtemp before the
3747 second loop. */
3748 || (fd->have_pointer_condtemp && !fd->have_scantemp)
3749 || fd->have_nonctrl_scantemp)
3750 {
3751 tree t1 = build_int_cst (long_integer_type_node, 0);
3752 tree t2 = build_int_cst (long_integer_type_node, 1);
3753 tree t3 = build_int_cstu (long_integer_type_node,
3754 (HOST_WIDE_INT_1U << 31) + 1);
3755 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
3756 gimple_stmt_iterator gsi2 = gsi_none ();
3757 gimple *g = NULL;
3758 tree mem = null_pointer_node, memv = NULL_TREE;
3759 unsigned HOST_WIDE_INT condtemp_sz = 0;
3760 unsigned HOST_WIDE_INT alloc_align = 0;
3761 if (fd->have_reductemp)
3762 {
3763 gcc_assert (!fd->have_nonctrl_scantemp);
3764 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
3765 reductions = OMP_CLAUSE_DECL (c);
3766 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
3767 g = SSA_NAME_DEF_STMT (reductions);
3768 reductions = gimple_assign_rhs1 (g);
3769 OMP_CLAUSE_DECL (c) = reductions;
3770 gsi2 = gsi_for_stmt (g);
3771 }
3772 else
3773 {
3774 if (gsi_end_p (gsip))
3775 gsi2 = gsi_after_labels (region->entry);
3776 else
3777 gsi2 = gsip;
3778 reductions = null_pointer_node;
3779 }
3780 if (fd->have_pointer_condtemp || fd->have_nonctrl_scantemp)
3781 {
3782 tree type;
3783 if (fd->have_pointer_condtemp)
3784 type = TREE_TYPE (condtemp);
3785 else
3786 type = ptr_type_node;
3787 memv = create_tmp_var (type);
3788 TREE_ADDRESSABLE (memv) = 1;
3789 unsigned HOST_WIDE_INT sz = 0;
3790 tree size = NULL_TREE;
3791 if (fd->have_pointer_condtemp)
3792 {
3793 sz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
3794 sz *= fd->lastprivate_conditional;
3795 condtemp_sz = sz;
3796 }
3797 if (fd->have_nonctrl_scantemp)
3798 {
3799 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3800 gimple *g = gimple_build_call (nthreads, 0);
3801 nthreads = create_tmp_var (integer_type_node);
3802 gimple_call_set_lhs (g, nthreads);
3803 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3804 nthreads = fold_convert (sizetype, nthreads);
3805 alloc_align = TYPE_ALIGN_UNIT (long_long_integer_type_node);
3806 size = expand_omp_scantemp_alloc (clauses, NULL_TREE, sz,
3807 alloc_align, nthreads, NULL,
3808 false);
3809 size = fold_convert (type, size);
3810 }
3811 else
3812 size = build_int_cst (type, sz);
3813 expand_omp_build_assign (&gsi2, memv, size, false);
3814 mem = build_fold_addr_expr (memv);
3815 }
3816 tree t
3817 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
3818 9, t1, t2, t2, t3, t1, null_pointer_node,
3819 null_pointer_node, reductions, mem);
3820 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3821 true, GSI_SAME_STMT);
3822 if (fd->have_pointer_condtemp)
3823 expand_omp_build_assign (&gsi2, condtemp, memv, false);
3824 if (fd->have_nonctrl_scantemp)
3825 {
3826 tree ptr = fd->have_pointer_condtemp ? condtemp : memv;
3827 expand_omp_scantemp_alloc (clauses, ptr, condtemp_sz,
3828 alloc_align, nthreads, &gsi2, false);
3829 }
3830 if (fd->have_reductemp)
3831 {
3832 gsi_remove (&gsi2, true);
3833 release_ssa_name (gimple_assign_lhs (g));
3834 }
3835 }
3836 switch (gimple_omp_for_kind (fd->for_stmt))
3837 {
3838 case GF_OMP_FOR_KIND_FOR:
3839 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3840 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3841 break;
3842 case GF_OMP_FOR_KIND_DISTRIBUTE:
3843 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3844 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3845 break;
3846 default:
3847 gcc_unreachable ();
3848 }
3849 nthreads = build_call_expr (nthreads, 0);
3850 nthreads = fold_convert (itype, nthreads);
3851 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3852 true, GSI_SAME_STMT);
3853 threadid = build_call_expr (threadid, 0);
3854 threadid = fold_convert (itype, threadid);
3855 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3856 true, GSI_SAME_STMT);
3857
3858 n1 = fd->loop.n1;
3859 n2 = fd->loop.n2;
3860 step = fd->loop.step;
3861 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3862 {
3863 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3864 OMP_CLAUSE__LOOPTEMP_);
3865 gcc_assert (innerc);
3866 n1 = OMP_CLAUSE_DECL (innerc);
3867 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3868 OMP_CLAUSE__LOOPTEMP_);
3869 gcc_assert (innerc);
3870 n2 = OMP_CLAUSE_DECL (innerc);
3871 }
3872 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3873 true, NULL_TREE, true, GSI_SAME_STMT);
3874 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3875 true, NULL_TREE, true, GSI_SAME_STMT);
3876 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3877 true, NULL_TREE, true, GSI_SAME_STMT);
3878
3879 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3880 t = fold_build2 (PLUS_EXPR, itype, step, t);
3881 t = fold_build2 (PLUS_EXPR, itype, t, n2);
3882 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3883 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3884 t = fold_build2 (TRUNC_DIV_EXPR, itype,
3885 fold_build1 (NEGATE_EXPR, itype, t),
3886 fold_build1 (NEGATE_EXPR, itype, step));
3887 else
3888 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3889 t = fold_convert (itype, t);
3890 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3891
3892 q = create_tmp_reg (itype, "q");
3893 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
3894 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3895 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
3896
3897 tt = create_tmp_reg (itype, "tt");
3898 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
3899 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3900 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
3901
3902 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
3903 gcond *cond_stmt = gimple_build_cond_empty (t);
3904 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3905
3906 second_bb = split_block (entry_bb, cond_stmt)->dest;
3907 gsi = gsi_last_nondebug_bb (second_bb);
3908 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3909
3910 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
3911 GSI_SAME_STMT);
3912 gassign *assign_stmt
3913 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
3914 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3915
3916 third_bb = split_block (second_bb, assign_stmt)->dest;
3917 gsi = gsi_last_nondebug_bb (third_bb);
3918 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3919
3920 if (fd->have_nonctrl_scantemp)
3921 {
3922 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
3923 tree controlp = NULL_TREE, controlb = NULL_TREE;
3924 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
3925 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
3926 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
3927 {
3928 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
3929 controlb = OMP_CLAUSE_DECL (c);
3930 else
3931 controlp = OMP_CLAUSE_DECL (c);
3932 if (controlb && controlp)
3933 break;
3934 }
3935 gcc_assert (controlp && controlb);
3936 tree cnt = create_tmp_var (sizetype);
3937 gimple *g = gimple_build_assign (cnt, NOP_EXPR, q);
3938 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3939 unsigned HOST_WIDE_INT alloc_align = TYPE_ALIGN_UNIT (ptr_type_node);
3940 tree sz = expand_omp_scantemp_alloc (clauses, NULL_TREE, 0,
3941 alloc_align, cnt, NULL, true);
3942 tree size = create_tmp_var (sizetype);
3943 expand_omp_build_assign (&gsi, size, sz, false);
3944 tree cmp = fold_build2 (GT_EXPR, boolean_type_node,
3945 size, size_int (16384));
3946 expand_omp_build_assign (&gsi, controlb, cmp);
3947 g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
3948 NULL_TREE, NULL_TREE);
3949 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3950 fourth_bb = split_block (third_bb, g)->dest;
3951 gsi = gsi_last_nondebug_bb (fourth_bb);
3952 /* FIXME: Once we have allocators, this should use allocator. */
3953 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_MALLOC), 1, size);
3954 gimple_call_set_lhs (g, controlp);
3955 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3956 expand_omp_scantemp_alloc (clauses, controlp, 0, alloc_align, cnt,
3957 &gsi, true);
3958 gsi_prev (&gsi);
3959 g = gsi_stmt (gsi);
3960 fifth_bb = split_block (fourth_bb, g)->dest;
3961 gsi = gsi_last_nondebug_bb (fifth_bb);
3962
3963 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_SAVE), 0);
3964 gimple_call_set_lhs (g, controlp);
3965 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3966 tree alloca_decl = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN);
3967 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
3968 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
3969 && OMP_CLAUSE__SCANTEMP__ALLOC (c))
3970 {
3971 tree tmp = create_tmp_var (sizetype);
3972 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
3973 g = gimple_build_assign (tmp, MULT_EXPR, cnt,
3974 TYPE_SIZE_UNIT (pointee_type));
3975 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3976 g = gimple_build_call (alloca_decl, 2, tmp,
3977 size_int (TYPE_ALIGN (pointee_type)));
3978 gimple_call_set_lhs (g, OMP_CLAUSE_DECL (c));
3979 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3980 }
3981
3982 sixth_bb = split_block (fifth_bb, g)->dest;
3983 gsi = gsi_last_nondebug_bb (sixth_bb);
3984 }
3985
3986 t = build2 (MULT_EXPR, itype, q, threadid);
3987 t = build2 (PLUS_EXPR, itype, t, tt);
3988 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3989
3990 t = fold_build2 (PLUS_EXPR, itype, s0, q);
3991 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3992
3993 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
3994 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3995
3996 /* Remove the GIMPLE_OMP_FOR statement. */
3997 gsi_remove (&gsi, true);
3998
3999 /* Setup code for sequential iteration goes in SEQ_START_BB. */
4000 gsi = gsi_start_bb (seq_start_bb);
4001
4002 tree startvar = fd->loop.v;
4003 tree endvar = NULL_TREE;
4004
4005 if (gimple_omp_for_combined_p (fd->for_stmt))
4006 {
4007 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
4008 ? gimple_omp_parallel_clauses (inner_stmt)
4009 : gimple_omp_for_clauses (inner_stmt);
4010 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
4011 gcc_assert (innerc);
4012 startvar = OMP_CLAUSE_DECL (innerc);
4013 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4014 OMP_CLAUSE__LOOPTEMP_);
4015 gcc_assert (innerc);
4016 endvar = OMP_CLAUSE_DECL (innerc);
4017 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
4018 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
4019 {
4020 int i;
4021 for (i = 1; i < fd->collapse; i++)
4022 {
4023 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4024 OMP_CLAUSE__LOOPTEMP_);
4025 gcc_assert (innerc);
4026 }
4027 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4028 OMP_CLAUSE__LOOPTEMP_);
4029 if (innerc)
4030 {
4031 /* If needed (distribute parallel for with lastprivate),
4032 propagate down the total number of iterations. */
4033 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
4034 fd->loop.n2);
4035 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
4036 GSI_CONTINUE_LINKING);
4037 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4038 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4039 }
4040 }
4041 }
4042 t = fold_convert (itype, s0);
4043 t = fold_build2 (MULT_EXPR, itype, t, step);
4044 if (POINTER_TYPE_P (type))
4045 {
4046 t = fold_build_pointer_plus (n1, t);
4047 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4048 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4049 t = fold_convert (signed_type_for (type), t);
4050 }
4051 else
4052 t = fold_build2 (PLUS_EXPR, type, t, n1);
4053 t = fold_convert (TREE_TYPE (startvar), t);
4054 t = force_gimple_operand_gsi (&gsi, t,
4055 DECL_P (startvar)
4056 && TREE_ADDRESSABLE (startvar),
4057 NULL_TREE, false, GSI_CONTINUE_LINKING);
4058 assign_stmt = gimple_build_assign (startvar, t);
4059 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4060 if (cond_var)
4061 {
4062 tree itype = TREE_TYPE (cond_var);
4063 /* For lastprivate(conditional:) itervar, we need some iteration
4064 counter that starts at unsigned non-zero and increases.
4065 Prefer as few IVs as possible, so if we can use startvar
4066 itself, use that, or startvar + constant (those would be
4067 incremented with step), and as last resort use the s0 + 1
4068 incremented by 1. */
4069 if (POINTER_TYPE_P (type)
4070 || TREE_CODE (n1) != INTEGER_CST
4071 || fd->loop.cond_code != LT_EXPR)
4072 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
4073 build_int_cst (itype, 1));
4074 else if (tree_int_cst_sgn (n1) == 1)
4075 t = fold_convert (itype, t);
4076 else
4077 {
4078 tree c = fold_convert (itype, n1);
4079 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
4080 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
4081 }
4082 t = force_gimple_operand_gsi (&gsi, t, false,
4083 NULL_TREE, false, GSI_CONTINUE_LINKING);
4084 assign_stmt = gimple_build_assign (cond_var, t);
4085 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4086 }
4087
4088 t = fold_convert (itype, e0);
4089 t = fold_build2 (MULT_EXPR, itype, t, step);
4090 if (POINTER_TYPE_P (type))
4091 {
4092 t = fold_build_pointer_plus (n1, t);
4093 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4094 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4095 t = fold_convert (signed_type_for (type), t);
4096 }
4097 else
4098 t = fold_build2 (PLUS_EXPR, type, t, n1);
4099 t = fold_convert (TREE_TYPE (startvar), t);
4100 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4101 false, GSI_CONTINUE_LINKING);
4102 if (endvar)
4103 {
4104 assign_stmt = gimple_build_assign (endvar, e);
4105 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4106 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4107 assign_stmt = gimple_build_assign (fd->loop.v, e);
4108 else
4109 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4110 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4111 }
4112 /* Handle linear clause adjustments. */
4113 tree itercnt = NULL_TREE;
4114 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4115 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4116 c; c = OMP_CLAUSE_CHAIN (c))
4117 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4118 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4119 {
4120 tree d = OMP_CLAUSE_DECL (c);
4121 bool is_ref = omp_is_reference (d);
4122 tree t = d, a, dest;
4123 if (is_ref)
4124 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4125 if (itercnt == NULL_TREE)
4126 {
4127 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4128 {
4129 itercnt = fold_build2 (MINUS_EXPR, itype,
4130 fold_convert (itype, n1),
4131 fold_convert (itype, fd->loop.n1));
4132 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
4133 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
4134 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4135 NULL_TREE, false,
4136 GSI_CONTINUE_LINKING);
4137 }
4138 else
4139 itercnt = s0;
4140 }
4141 tree type = TREE_TYPE (t);
4142 if (POINTER_TYPE_P (type))
4143 type = sizetype;
4144 a = fold_build2 (MULT_EXPR, type,
4145 fold_convert (type, itercnt),
4146 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4147 dest = unshare_expr (t);
4148 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4149 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
4150 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4151 false, GSI_CONTINUE_LINKING);
4152 assign_stmt = gimple_build_assign (dest, t);
4153 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4154 }
4155 if (fd->collapse > 1)
4156 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4157
4158 if (!broken_loop)
4159 {
4160 /* The code controlling the sequential loop replaces the
4161 GIMPLE_OMP_CONTINUE. */
4162 gsi = gsi_last_nondebug_bb (cont_bb);
4163 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4164 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
4165 vmain = gimple_omp_continue_control_use (cont_stmt);
4166 vback = gimple_omp_continue_control_def (cont_stmt);
4167
4168 if (cond_var)
4169 {
4170 tree itype = TREE_TYPE (cond_var);
4171 tree t2;
4172 if (POINTER_TYPE_P (type)
4173 || TREE_CODE (n1) != INTEGER_CST
4174 || fd->loop.cond_code != LT_EXPR)
4175 t2 = build_int_cst (itype, 1);
4176 else
4177 t2 = fold_convert (itype, step);
4178 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
4179 t2 = force_gimple_operand_gsi (&gsi, t2, false,
4180 NULL_TREE, true, GSI_SAME_STMT);
4181 assign_stmt = gimple_build_assign (cond_var, t2);
4182 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4183 }
4184
4185 if (!gimple_omp_for_combined_p (fd->for_stmt))
4186 {
4187 if (POINTER_TYPE_P (type))
4188 t = fold_build_pointer_plus (vmain, step);
4189 else
4190 t = fold_build2 (PLUS_EXPR, type, vmain, step);
4191 t = force_gimple_operand_gsi (&gsi, t,
4192 DECL_P (vback)
4193 && TREE_ADDRESSABLE (vback),
4194 NULL_TREE, true, GSI_SAME_STMT);
4195 assign_stmt = gimple_build_assign (vback, t);
4196 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4197
4198 t = build2 (fd->loop.cond_code, boolean_type_node,
4199 DECL_P (vback) && TREE_ADDRESSABLE (vback)
4200 ? t : vback, e);
4201 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4202 }
4203
4204 /* Remove the GIMPLE_OMP_CONTINUE statement. */
4205 gsi_remove (&gsi, true);
4206
4207 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4208 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
4209 }
4210
4211 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
4212 gsi = gsi_last_nondebug_bb (exit_bb);
4213 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4214 {
4215 t = gimple_omp_return_lhs (gsi_stmt (gsi));
4216 if (fd->have_reductemp
4217 || ((fd->have_pointer_condtemp || fd->have_scantemp)
4218 && !fd->have_nonctrl_scantemp))
4219 {
4220 tree fn;
4221 if (t)
4222 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4223 else
4224 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4225 gcall *g = gimple_build_call (fn, 0);
4226 if (t)
4227 {
4228 gimple_call_set_lhs (g, t);
4229 if (fd->have_reductemp)
4230 gsi_insert_after (&gsi, gimple_build_assign (reductions,
4231 NOP_EXPR, t),
4232 GSI_SAME_STMT);
4233 }
4234 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4235 }
4236 else
4237 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
4238 }
4239 else if ((fd->have_pointer_condtemp || fd->have_scantemp)
4240 && !fd->have_nonctrl_scantemp)
4241 {
4242 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
4243 gcall *g = gimple_build_call (fn, 0);
4244 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4245 }
4246 if (fd->have_scantemp && !fd->have_nonctrl_scantemp)
4247 {
4248 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4249 tree controlp = NULL_TREE, controlb = NULL_TREE;
4250 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
4251 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
4252 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
4253 {
4254 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
4255 controlb = OMP_CLAUSE_DECL (c);
4256 else
4257 controlp = OMP_CLAUSE_DECL (c);
4258 if (controlb && controlp)
4259 break;
4260 }
4261 gcc_assert (controlp && controlb);
4262 gimple *g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
4263 NULL_TREE, NULL_TREE);
4264 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4265 exit1_bb = split_block (exit_bb, g)->dest;
4266 gsi = gsi_after_labels (exit1_bb);
4267 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_FREE), 1,
4268 controlp);
4269 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4270 exit2_bb = split_block (exit1_bb, g)->dest;
4271 gsi = gsi_after_labels (exit2_bb);
4272 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_RESTORE), 1,
4273 controlp);
4274 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4275 exit3_bb = split_block (exit2_bb, g)->dest;
4276 gsi = gsi_after_labels (exit3_bb);
4277 }
4278 gsi_remove (&gsi, true);
4279
4280 /* Connect all the blocks. */
4281 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
4282 ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
4283 ep = find_edge (entry_bb, second_bb);
4284 ep->flags = EDGE_TRUE_VALUE;
4285 ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
4286 if (fourth_bb)
4287 {
4288 ep = make_edge (third_bb, fifth_bb, EDGE_FALSE_VALUE);
4289 ep->probability
4290 = profile_probability::guessed_always ().apply_scale (1, 2);
4291 ep = find_edge (third_bb, fourth_bb);
4292 ep->flags = EDGE_TRUE_VALUE;
4293 ep->probability
4294 = profile_probability::guessed_always ().apply_scale (1, 2);
4295 ep = find_edge (fourth_bb, fifth_bb);
4296 redirect_edge_and_branch (ep, sixth_bb);
4297 }
4298 else
4299 sixth_bb = third_bb;
4300 find_edge (sixth_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
4301 find_edge (sixth_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
4302 if (exit1_bb)
4303 {
4304 ep = make_edge (exit_bb, exit2_bb, EDGE_FALSE_VALUE);
4305 ep->probability
4306 = profile_probability::guessed_always ().apply_scale (1, 2);
4307 ep = find_edge (exit_bb, exit1_bb);
4308 ep->flags = EDGE_TRUE_VALUE;
4309 ep->probability
4310 = profile_probability::guessed_always ().apply_scale (1, 2);
4311 ep = find_edge (exit1_bb, exit2_bb);
4312 redirect_edge_and_branch (ep, exit3_bb);
4313 }
4314
4315 if (!broken_loop)
4316 {
4317 ep = find_edge (cont_bb, body_bb);
4318 if (ep == NULL)
4319 {
4320 ep = BRANCH_EDGE (cont_bb);
4321 gcc_assert (single_succ (ep->dest) == body_bb);
4322 }
4323 if (gimple_omp_for_combined_p (fd->for_stmt))
4324 {
4325 remove_edge (ep);
4326 ep = NULL;
4327 }
4328 else if (fd->collapse > 1)
4329 {
4330 remove_edge (ep);
4331 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4332 }
4333 else
4334 ep->flags = EDGE_TRUE_VALUE;
4335 find_edge (cont_bb, fin_bb)->flags
4336 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
4337 }
4338
4339 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
4340 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
4341 if (fourth_bb)
4342 {
4343 set_immediate_dominator (CDI_DOMINATORS, fifth_bb, third_bb);
4344 set_immediate_dominator (CDI_DOMINATORS, sixth_bb, third_bb);
4345 }
4346 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, sixth_bb);
4347
4348 set_immediate_dominator (CDI_DOMINATORS, body_bb,
4349 recompute_dominator (CDI_DOMINATORS, body_bb));
4350 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
4351 recompute_dominator (CDI_DOMINATORS, fin_bb));
4352 if (exit1_bb)
4353 {
4354 set_immediate_dominator (CDI_DOMINATORS, exit2_bb, exit_bb);
4355 set_immediate_dominator (CDI_DOMINATORS, exit3_bb, exit_bb);
4356 }
4357
4358 class loop *loop = body_bb->loop_father;
4359 if (loop != entry_bb->loop_father)
4360 {
4361 gcc_assert (broken_loop || loop->header == body_bb);
4362 gcc_assert (broken_loop
4363 || loop->latch == region->cont
4364 || single_pred (loop->latch) == region->cont);
4365 return;
4366 }
4367
4368 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
4369 {
4370 loop = alloc_loop ();
4371 loop->header = body_bb;
4372 if (collapse_bb == NULL)
4373 loop->latch = cont_bb;
4374 add_loop (loop, body_bb->loop_father);
4375 }
4376 }
4377
4378 /* Return phi in E->DEST with ARG on edge E. */
4379
4380 static gphi *
4381 find_phi_with_arg_on_edge (tree arg, edge e)
4382 {
4383 basic_block bb = e->dest;
4384
4385 for (gphi_iterator gpi = gsi_start_phis (bb);
4386 !gsi_end_p (gpi);
4387 gsi_next (&gpi))
4388 {
4389 gphi *phi = gpi.phi ();
4390 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
4391 return phi;
4392 }
4393
4394 return NULL;
4395 }
4396
4397 /* A subroutine of expand_omp_for. Generate code for a parallel
4398 loop with static schedule and a specified chunk size. Given
4399 parameters:
4400
4401 for (V = N1; V cond N2; V += STEP) BODY;
4402
4403 where COND is "<" or ">", we generate pseudocode
4404
4405 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
4406 if (cond is <)
4407 adj = STEP - 1;
4408 else
4409 adj = STEP + 1;
4410 if ((__typeof (V)) -1 > 0 && cond is >)
4411 n = -(adj + N2 - N1) / -STEP;
4412 else
4413 n = (adj + N2 - N1) / STEP;
4414 trip = 0;
4415 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
4416 here so that V is defined
4417 if the loop is not entered
4418 L0:
4419 s0 = (trip * nthreads + threadid) * CHUNK;
4420 e0 = min (s0 + CHUNK, n);
4421 if (s0 < n) goto L1; else goto L4;
4422 L1:
4423 V = s0 * STEP + N1;
4424 e = e0 * STEP + N1;
4425 L2:
4426 BODY;
4427 V += STEP;
4428 if (V cond e) goto L2; else goto L3;
4429 L3:
4430 trip += 1;
4431 goto L0;
4432 L4:
4433 */
4434
4435 static void
4436 expand_omp_for_static_chunk (struct omp_region *region,
4437 struct omp_for_data *fd, gimple *inner_stmt)
4438 {
4439 tree n, s0, e0, e, t;
4440 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
4441 tree type, itype, vmain, vback, vextra;
4442 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
4443 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
4444 gimple_stmt_iterator gsi, gsip;
4445 edge se;
4446 bool broken_loop = region->cont == NULL;
4447 tree *counts = NULL;
4448 tree n1, n2, step;
4449 tree reductions = NULL_TREE;
4450 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
4451
4452 itype = type = TREE_TYPE (fd->loop.v);
4453 if (POINTER_TYPE_P (type))
4454 itype = signed_type_for (type);
4455
4456 entry_bb = region->entry;
4457 se = split_block (entry_bb, last_stmt (entry_bb));
4458 entry_bb = se->src;
4459 iter_part_bb = se->dest;
4460 cont_bb = region->cont;
4461 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
4462 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
4463 gcc_assert (broken_loop
4464 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
4465 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
4466 body_bb = single_succ (seq_start_bb);
4467 if (!broken_loop)
4468 {
4469 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
4470 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
4471 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4472 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
4473 }
4474 exit_bb = region->exit;
4475
4476 /* Trip and adjustment setup goes in ENTRY_BB. */
4477 gsi = gsi_last_nondebug_bb (entry_bb);
4478 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4479 gsip = gsi;
4480 gsi_prev (&gsip);
4481
4482 if (fd->collapse > 1)
4483 {
4484 int first_zero_iter = -1, dummy = -1;
4485 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
4486
4487 counts = XALLOCAVEC (tree, fd->collapse);
4488 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4489 fin_bb, first_zero_iter,
4490 dummy_bb, dummy, l2_dom_bb);
4491 t = NULL_TREE;
4492 }
4493 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
4494 t = integer_one_node;
4495 else
4496 t = fold_binary (fd->loop.cond_code, boolean_type_node,
4497 fold_convert (type, fd->loop.n1),
4498 fold_convert (type, fd->loop.n2));
4499 if (fd->collapse == 1
4500 && TYPE_UNSIGNED (type)
4501 && (t == NULL_TREE || !integer_onep (t)))
4502 {
4503 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
4504 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
4505 true, GSI_SAME_STMT);
4506 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
4507 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
4508 true, GSI_SAME_STMT);
4509 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
4510 NULL_TREE, NULL_TREE);
4511 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4512 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
4513 expand_omp_regimplify_p, NULL, NULL)
4514 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
4515 expand_omp_regimplify_p, NULL, NULL))
4516 {
4517 gsi = gsi_for_stmt (cond_stmt);
4518 gimple_regimplify_operands (cond_stmt, &gsi);
4519 }
4520 se = split_block (entry_bb, cond_stmt);
4521 se->flags = EDGE_TRUE_VALUE;
4522 entry_bb = se->dest;
4523 se->probability = profile_probability::very_likely ();
4524 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
4525 se->probability = profile_probability::very_unlikely ();
4526 if (gimple_in_ssa_p (cfun))
4527 {
4528 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
4529 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
4530 !gsi_end_p (gpi); gsi_next (&gpi))
4531 {
4532 gphi *phi = gpi.phi ();
4533 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
4534 se, UNKNOWN_LOCATION);
4535 }
4536 }
4537 gsi = gsi_last_bb (entry_bb);
4538 }
4539
4540 if (fd->lastprivate_conditional)
4541 {
4542 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4543 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
4544 if (fd->have_pointer_condtemp)
4545 condtemp = OMP_CLAUSE_DECL (c);
4546 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
4547 cond_var = OMP_CLAUSE_DECL (c);
4548 }
4549 if (fd->have_reductemp || fd->have_pointer_condtemp)
4550 {
4551 tree t1 = build_int_cst (long_integer_type_node, 0);
4552 tree t2 = build_int_cst (long_integer_type_node, 1);
4553 tree t3 = build_int_cstu (long_integer_type_node,
4554 (HOST_WIDE_INT_1U << 31) + 1);
4555 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4556 gimple_stmt_iterator gsi2 = gsi_none ();
4557 gimple *g = NULL;
4558 tree mem = null_pointer_node, memv = NULL_TREE;
4559 if (fd->have_reductemp)
4560 {
4561 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
4562 reductions = OMP_CLAUSE_DECL (c);
4563 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
4564 g = SSA_NAME_DEF_STMT (reductions);
4565 reductions = gimple_assign_rhs1 (g);
4566 OMP_CLAUSE_DECL (c) = reductions;
4567 gsi2 = gsi_for_stmt (g);
4568 }
4569 else
4570 {
4571 if (gsi_end_p (gsip))
4572 gsi2 = gsi_after_labels (region->entry);
4573 else
4574 gsi2 = gsip;
4575 reductions = null_pointer_node;
4576 }
4577 if (fd->have_pointer_condtemp)
4578 {
4579 tree type = TREE_TYPE (condtemp);
4580 memv = create_tmp_var (type);
4581 TREE_ADDRESSABLE (memv) = 1;
4582 unsigned HOST_WIDE_INT sz
4583 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
4584 sz *= fd->lastprivate_conditional;
4585 expand_omp_build_assign (&gsi2, memv, build_int_cst (type, sz),
4586 false);
4587 mem = build_fold_addr_expr (memv);
4588 }
4589 tree t
4590 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
4591 9, t1, t2, t2, t3, t1, null_pointer_node,
4592 null_pointer_node, reductions, mem);
4593 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
4594 true, GSI_SAME_STMT);
4595 if (fd->have_pointer_condtemp)
4596 expand_omp_build_assign (&gsi2, condtemp, memv, false);
4597 if (fd->have_reductemp)
4598 {
4599 gsi_remove (&gsi2, true);
4600 release_ssa_name (gimple_assign_lhs (g));
4601 }
4602 }
4603 switch (gimple_omp_for_kind (fd->for_stmt))
4604 {
4605 case GF_OMP_FOR_KIND_FOR:
4606 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
4607 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
4608 break;
4609 case GF_OMP_FOR_KIND_DISTRIBUTE:
4610 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
4611 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
4612 break;
4613 default:
4614 gcc_unreachable ();
4615 }
4616 nthreads = build_call_expr (nthreads, 0);
4617 nthreads = fold_convert (itype, nthreads);
4618 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
4619 true, GSI_SAME_STMT);
4620 threadid = build_call_expr (threadid, 0);
4621 threadid = fold_convert (itype, threadid);
4622 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
4623 true, GSI_SAME_STMT);
4624
4625 n1 = fd->loop.n1;
4626 n2 = fd->loop.n2;
4627 step = fd->loop.step;
4628 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4629 {
4630 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4631 OMP_CLAUSE__LOOPTEMP_);
4632 gcc_assert (innerc);
4633 n1 = OMP_CLAUSE_DECL (innerc);
4634 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4635 OMP_CLAUSE__LOOPTEMP_);
4636 gcc_assert (innerc);
4637 n2 = OMP_CLAUSE_DECL (innerc);
4638 }
4639 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
4640 true, NULL_TREE, true, GSI_SAME_STMT);
4641 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
4642 true, NULL_TREE, true, GSI_SAME_STMT);
4643 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
4644 true, NULL_TREE, true, GSI_SAME_STMT);
4645 tree chunk_size = fold_convert (itype, fd->chunk_size);
4646 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
4647 chunk_size
4648 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
4649 GSI_SAME_STMT);
4650
4651 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
4652 t = fold_build2 (PLUS_EXPR, itype, step, t);
4653 t = fold_build2 (PLUS_EXPR, itype, t, n2);
4654 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
4655 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
4656 t = fold_build2 (TRUNC_DIV_EXPR, itype,
4657 fold_build1 (NEGATE_EXPR, itype, t),
4658 fold_build1 (NEGATE_EXPR, itype, step));
4659 else
4660 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
4661 t = fold_convert (itype, t);
4662 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4663 true, GSI_SAME_STMT);
4664
4665 trip_var = create_tmp_reg (itype, ".trip");
4666 if (gimple_in_ssa_p (cfun))
4667 {
4668 trip_init = make_ssa_name (trip_var);
4669 trip_main = make_ssa_name (trip_var);
4670 trip_back = make_ssa_name (trip_var);
4671 }
4672 else
4673 {
4674 trip_init = trip_var;
4675 trip_main = trip_var;
4676 trip_back = trip_var;
4677 }
4678
4679 gassign *assign_stmt
4680 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
4681 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4682
4683 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
4684 t = fold_build2 (MULT_EXPR, itype, t, step);
4685 if (POINTER_TYPE_P (type))
4686 t = fold_build_pointer_plus (n1, t);
4687 else
4688 t = fold_build2 (PLUS_EXPR, type, t, n1);
4689 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4690 true, GSI_SAME_STMT);
4691
4692 /* Remove the GIMPLE_OMP_FOR. */
4693 gsi_remove (&gsi, true);
4694
4695 gimple_stmt_iterator gsif = gsi;
4696
4697 /* Iteration space partitioning goes in ITER_PART_BB. */
4698 gsi = gsi_last_bb (iter_part_bb);
4699
4700 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
4701 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
4702 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
4703 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4704 false, GSI_CONTINUE_LINKING);
4705
4706 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
4707 t = fold_build2 (MIN_EXPR, itype, t, n);
4708 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4709 false, GSI_CONTINUE_LINKING);
4710
4711 t = build2 (LT_EXPR, boolean_type_node, s0, n);
4712 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
4713
4714 /* Setup code for sequential iteration goes in SEQ_START_BB. */
4715 gsi = gsi_start_bb (seq_start_bb);
4716
4717 tree startvar = fd->loop.v;
4718 tree endvar = NULL_TREE;
4719
4720 if (gimple_omp_for_combined_p (fd->for_stmt))
4721 {
4722 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
4723 ? gimple_omp_parallel_clauses (inner_stmt)
4724 : gimple_omp_for_clauses (inner_stmt);
4725 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
4726 gcc_assert (innerc);
4727 startvar = OMP_CLAUSE_DECL (innerc);
4728 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4729 OMP_CLAUSE__LOOPTEMP_);
4730 gcc_assert (innerc);
4731 endvar = OMP_CLAUSE_DECL (innerc);
4732 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
4733 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
4734 {
4735 int i;
4736 for (i = 1; i < fd->collapse; i++)
4737 {
4738 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4739 OMP_CLAUSE__LOOPTEMP_);
4740 gcc_assert (innerc);
4741 }
4742 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4743 OMP_CLAUSE__LOOPTEMP_);
4744 if (innerc)
4745 {
4746 /* If needed (distribute parallel for with lastprivate),
4747 propagate down the total number of iterations. */
4748 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
4749 fd->loop.n2);
4750 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
4751 GSI_CONTINUE_LINKING);
4752 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4753 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4754 }
4755 }
4756 }
4757
4758 t = fold_convert (itype, s0);
4759 t = fold_build2 (MULT_EXPR, itype, t, step);
4760 if (POINTER_TYPE_P (type))
4761 {
4762 t = fold_build_pointer_plus (n1, t);
4763 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4764 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4765 t = fold_convert (signed_type_for (type), t);
4766 }
4767 else
4768 t = fold_build2 (PLUS_EXPR, type, t, n1);
4769 t = fold_convert (TREE_TYPE (startvar), t);
4770 t = force_gimple_operand_gsi (&gsi, t,
4771 DECL_P (startvar)
4772 && TREE_ADDRESSABLE (startvar),
4773 NULL_TREE, false, GSI_CONTINUE_LINKING);
4774 assign_stmt = gimple_build_assign (startvar, t);
4775 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4776 if (cond_var)
4777 {
4778 tree itype = TREE_TYPE (cond_var);
4779 /* For lastprivate(conditional:) itervar, we need some iteration
4780 counter that starts at unsigned non-zero and increases.
4781 Prefer as few IVs as possible, so if we can use startvar
4782 itself, use that, or startvar + constant (those would be
4783 incremented with step), and as last resort use the s0 + 1
4784 incremented by 1. */
4785 if (POINTER_TYPE_P (type)
4786 || TREE_CODE (n1) != INTEGER_CST
4787 || fd->loop.cond_code != LT_EXPR)
4788 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
4789 build_int_cst (itype, 1));
4790 else if (tree_int_cst_sgn (n1) == 1)
4791 t = fold_convert (itype, t);
4792 else
4793 {
4794 tree c = fold_convert (itype, n1);
4795 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
4796 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
4797 }
4798 t = force_gimple_operand_gsi (&gsi, t, false,
4799 NULL_TREE, false, GSI_CONTINUE_LINKING);
4800 assign_stmt = gimple_build_assign (cond_var, t);
4801 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4802 }
4803
4804 t = fold_convert (itype, e0);
4805 t = fold_build2 (MULT_EXPR, itype, t, step);
4806 if (POINTER_TYPE_P (type))
4807 {
4808 t = fold_build_pointer_plus (n1, t);
4809 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4810 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4811 t = fold_convert (signed_type_for (type), t);
4812 }
4813 else
4814 t = fold_build2 (PLUS_EXPR, type, t, n1);
4815 t = fold_convert (TREE_TYPE (startvar), t);
4816 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4817 false, GSI_CONTINUE_LINKING);
4818 if (endvar)
4819 {
4820 assign_stmt = gimple_build_assign (endvar, e);
4821 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4822 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4823 assign_stmt = gimple_build_assign (fd->loop.v, e);
4824 else
4825 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4826 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4827 }
4828 /* Handle linear clause adjustments. */
4829 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
4830 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4831 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4832 c; c = OMP_CLAUSE_CHAIN (c))
4833 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4834 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4835 {
4836 tree d = OMP_CLAUSE_DECL (c);
4837 bool is_ref = omp_is_reference (d);
4838 tree t = d, a, dest;
4839 if (is_ref)
4840 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4841 tree type = TREE_TYPE (t);
4842 if (POINTER_TYPE_P (type))
4843 type = sizetype;
4844 dest = unshare_expr (t);
4845 tree v = create_tmp_var (TREE_TYPE (t), NULL);
4846 expand_omp_build_assign (&gsif, v, t);
4847 if (itercnt == NULL_TREE)
4848 {
4849 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4850 {
4851 itercntbias
4852 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
4853 fold_convert (itype, fd->loop.n1));
4854 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
4855 itercntbias, step);
4856 itercntbias
4857 = force_gimple_operand_gsi (&gsif, itercntbias, true,
4858 NULL_TREE, true,
4859 GSI_SAME_STMT);
4860 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
4861 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4862 NULL_TREE, false,
4863 GSI_CONTINUE_LINKING);
4864 }
4865 else
4866 itercnt = s0;
4867 }
4868 a = fold_build2 (MULT_EXPR, type,
4869 fold_convert (type, itercnt),
4870 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4871 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4872 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4873 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4874 false, GSI_CONTINUE_LINKING);
4875 assign_stmt = gimple_build_assign (dest, t);
4876 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4877 }
4878 if (fd->collapse > 1)
4879 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4880
4881 if (!broken_loop)
4882 {
4883 /* The code controlling the sequential loop goes in CONT_BB,
4884 replacing the GIMPLE_OMP_CONTINUE. */
4885 gsi = gsi_last_nondebug_bb (cont_bb);
4886 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4887 vmain = gimple_omp_continue_control_use (cont_stmt);
4888 vback = gimple_omp_continue_control_def (cont_stmt);
4889
4890 if (cond_var)
4891 {
4892 tree itype = TREE_TYPE (cond_var);
4893 tree t2;
4894 if (POINTER_TYPE_P (type)
4895 || TREE_CODE (n1) != INTEGER_CST
4896 || fd->loop.cond_code != LT_EXPR)
4897 t2 = build_int_cst (itype, 1);
4898 else
4899 t2 = fold_convert (itype, step);
4900 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
4901 t2 = force_gimple_operand_gsi (&gsi, t2, false,
4902 NULL_TREE, true, GSI_SAME_STMT);
4903 assign_stmt = gimple_build_assign (cond_var, t2);
4904 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4905 }
4906
4907 if (!gimple_omp_for_combined_p (fd->for_stmt))
4908 {
4909 if (POINTER_TYPE_P (type))
4910 t = fold_build_pointer_plus (vmain, step);
4911 else
4912 t = fold_build2 (PLUS_EXPR, type, vmain, step);
4913 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
4914 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4915 true, GSI_SAME_STMT);
4916 assign_stmt = gimple_build_assign (vback, t);
4917 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4918
4919 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
4920 t = build2 (EQ_EXPR, boolean_type_node,
4921 build_int_cst (itype, 0),
4922 build_int_cst (itype, 1));
4923 else
4924 t = build2 (fd->loop.cond_code, boolean_type_node,
4925 DECL_P (vback) && TREE_ADDRESSABLE (vback)
4926 ? t : vback, e);
4927 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4928 }
4929
4930 /* Remove GIMPLE_OMP_CONTINUE. */
4931 gsi_remove (&gsi, true);
4932
4933 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4934 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
4935
4936 /* Trip update code goes into TRIP_UPDATE_BB. */
4937 gsi = gsi_start_bb (trip_update_bb);
4938
4939 t = build_int_cst (itype, 1);
4940 t = build2 (PLUS_EXPR, itype, trip_main, t);
4941 assign_stmt = gimple_build_assign (trip_back, t);
4942 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4943 }
4944
4945 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
4946 gsi = gsi_last_nondebug_bb (exit_bb);
4947 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4948 {
4949 t = gimple_omp_return_lhs (gsi_stmt (gsi));
4950 if (fd->have_reductemp || fd->have_pointer_condtemp)
4951 {
4952 tree fn;
4953 if (t)
4954 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4955 else
4956 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4957 gcall *g = gimple_build_call (fn, 0);
4958 if (t)
4959 {
4960 gimple_call_set_lhs (g, t);
4961 if (fd->have_reductemp)
4962 gsi_insert_after (&gsi, gimple_build_assign (reductions,
4963 NOP_EXPR, t),
4964 GSI_SAME_STMT);
4965 }
4966 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4967 }
4968 else
4969 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
4970 }
4971 else if (fd->have_pointer_condtemp)
4972 {
4973 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
4974 gcall *g = gimple_build_call (fn, 0);
4975 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4976 }
4977 gsi_remove (&gsi, true);
4978
4979 /* Connect the new blocks. */
4980 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
4981 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
4982
4983 if (!broken_loop)
4984 {
4985 se = find_edge (cont_bb, body_bb);
4986 if (se == NULL)
4987 {
4988 se = BRANCH_EDGE (cont_bb);
4989 gcc_assert (single_succ (se->dest) == body_bb);
4990 }
4991 if (gimple_omp_for_combined_p (fd->for_stmt))
4992 {
4993 remove_edge (se);
4994 se = NULL;
4995 }
4996 else if (fd->collapse > 1)
4997 {
4998 remove_edge (se);
4999 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5000 }
5001 else
5002 se->flags = EDGE_TRUE_VALUE;
5003 find_edge (cont_bb, trip_update_bb)->flags
5004 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5005
5006 redirect_edge_and_branch (single_succ_edge (trip_update_bb),
5007 iter_part_bb);
5008 }
5009
5010 if (gimple_in_ssa_p (cfun))
5011 {
5012 gphi_iterator psi;
5013 gphi *phi;
5014 edge re, ene;
5015 edge_var_map *vm;
5016 size_t i;
5017
5018 gcc_assert (fd->collapse == 1 && !broken_loop);
5019
5020 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
5021 remove arguments of the phi nodes in fin_bb. We need to create
5022 appropriate phi nodes in iter_part_bb instead. */
5023 se = find_edge (iter_part_bb, fin_bb);
5024 re = single_succ_edge (trip_update_bb);
5025 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
5026 ene = single_succ_edge (entry_bb);
5027
5028 psi = gsi_start_phis (fin_bb);
5029 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
5030 gsi_next (&psi), ++i)
5031 {
5032 gphi *nphi;
5033 location_t locus;
5034
5035 phi = psi.phi ();
5036 if (operand_equal_p (gimple_phi_arg_def (phi, 0),
5037 redirect_edge_var_map_def (vm), 0))
5038 continue;
5039
5040 t = gimple_phi_result (phi);
5041 gcc_assert (t == redirect_edge_var_map_result (vm));
5042
5043 if (!single_pred_p (fin_bb))
5044 t = copy_ssa_name (t, phi);
5045
5046 nphi = create_phi_node (t, iter_part_bb);
5047
5048 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
5049 locus = gimple_phi_arg_location_from_edge (phi, se);
5050
5051 /* A special case -- fd->loop.v is not yet computed in
5052 iter_part_bb, we need to use vextra instead. */
5053 if (t == fd->loop.v)
5054 t = vextra;
5055 add_phi_arg (nphi, t, ene, locus);
5056 locus = redirect_edge_var_map_location (vm);
5057 tree back_arg = redirect_edge_var_map_def (vm);
5058 add_phi_arg (nphi, back_arg, re, locus);
5059 edge ce = find_edge (cont_bb, body_bb);
5060 if (ce == NULL)
5061 {
5062 ce = BRANCH_EDGE (cont_bb);
5063 gcc_assert (single_succ (ce->dest) == body_bb);
5064 ce = single_succ_edge (ce->dest);
5065 }
5066 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
5067 gcc_assert (inner_loop_phi != NULL);
5068 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
5069 find_edge (seq_start_bb, body_bb), locus);
5070
5071 if (!single_pred_p (fin_bb))
5072 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
5073 }
5074 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
5075 redirect_edge_var_map_clear (re);
5076 if (single_pred_p (fin_bb))
5077 while (1)
5078 {
5079 psi = gsi_start_phis (fin_bb);
5080 if (gsi_end_p (psi))
5081 break;
5082 remove_phi_node (&psi, false);
5083 }
5084
5085 /* Make phi node for trip. */
5086 phi = create_phi_node (trip_main, iter_part_bb);
5087 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
5088 UNKNOWN_LOCATION);
5089 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
5090 UNKNOWN_LOCATION);
5091 }
5092
5093 if (!broken_loop)
5094 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
5095 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
5096 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
5097 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5098 recompute_dominator (CDI_DOMINATORS, fin_bb));
5099 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
5100 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
5101 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5102 recompute_dominator (CDI_DOMINATORS, body_bb));
5103
5104 if (!broken_loop)
5105 {
5106 class loop *loop = body_bb->loop_father;
5107 class loop *trip_loop = alloc_loop ();
5108 trip_loop->header = iter_part_bb;
5109 trip_loop->latch = trip_update_bb;
5110 add_loop (trip_loop, iter_part_bb->loop_father);
5111
5112 if (loop != entry_bb->loop_father)
5113 {
5114 gcc_assert (loop->header == body_bb);
5115 gcc_assert (loop->latch == region->cont
5116 || single_pred (loop->latch) == region->cont);
5117 trip_loop->inner = loop;
5118 return;
5119 }
5120
5121 if (!gimple_omp_for_combined_p (fd->for_stmt))
5122 {
5123 loop = alloc_loop ();
5124 loop->header = body_bb;
5125 if (collapse_bb == NULL)
5126 loop->latch = cont_bb;
5127 add_loop (loop, trip_loop);
5128 }
5129 }
5130 }
5131
5132 /* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
5133 loop. Given parameters:
5134
5135 for (V = N1; V cond N2; V += STEP) BODY;
5136
5137 where COND is "<" or ">", we generate pseudocode
5138
5139 V = N1;
5140 goto L1;
5141 L0:
5142 BODY;
5143 V += STEP;
5144 L1:
5145 if (V cond N2) goto L0; else goto L2;
5146 L2:
5147
5148 For collapsed loops, given parameters:
5149 collapse(3)
5150 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
5151 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
5152 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
5153 BODY;
5154
5155 we generate pseudocode
5156
5157 if (cond3 is <)
5158 adj = STEP3 - 1;
5159 else
5160 adj = STEP3 + 1;
5161 count3 = (adj + N32 - N31) / STEP3;
5162 if (cond2 is <)
5163 adj = STEP2 - 1;
5164 else
5165 adj = STEP2 + 1;
5166 count2 = (adj + N22 - N21) / STEP2;
5167 if (cond1 is <)
5168 adj = STEP1 - 1;
5169 else
5170 adj = STEP1 + 1;
5171 count1 = (adj + N12 - N11) / STEP1;
5172 count = count1 * count2 * count3;
5173 V = 0;
5174 V1 = N11;
5175 V2 = N21;
5176 V3 = N31;
5177 goto L1;
5178 L0:
5179 BODY;
5180 V += 1;
5181 V3 += STEP3;
5182 V2 += (V3 cond3 N32) ? 0 : STEP2;
5183 V3 = (V3 cond3 N32) ? V3 : N31;
5184 V1 += (V2 cond2 N22) ? 0 : STEP1;
5185 V2 = (V2 cond2 N22) ? V2 : N21;
5186 L1:
5187 if (V < count) goto L0; else goto L2;
5188 L2:
5189
5190 */
5191
5192 static void
5193 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
5194 {
5195 tree type, t;
5196 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
5197 gimple_stmt_iterator gsi;
5198 gimple *stmt;
5199 gcond *cond_stmt;
5200 bool broken_loop = region->cont == NULL;
5201 edge e, ne;
5202 tree *counts = NULL;
5203 int i;
5204 int safelen_int = INT_MAX;
5205 bool dont_vectorize = false;
5206 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5207 OMP_CLAUSE_SAFELEN);
5208 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5209 OMP_CLAUSE__SIMDUID_);
5210 tree ifc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5211 OMP_CLAUSE_IF);
5212 tree simdlen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5213 OMP_CLAUSE_SIMDLEN);
5214 tree condtemp = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5215 OMP_CLAUSE__CONDTEMP_);
5216 tree n1, n2;
5217 tree cond_var = condtemp ? OMP_CLAUSE_DECL (condtemp) : NULL_TREE;
5218
5219 if (safelen)
5220 {
5221 poly_uint64 val;
5222 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
5223 if (!poly_int_tree_p (safelen, &val))
5224 safelen_int = 0;
5225 else
5226 safelen_int = MIN (constant_lower_bound (val), INT_MAX);
5227 if (safelen_int == 1)
5228 safelen_int = 0;
5229 }
5230 if ((ifc && integer_zerop (OMP_CLAUSE_IF_EXPR (ifc)))
5231 || (simdlen && integer_onep (OMP_CLAUSE_SIMDLEN_EXPR (simdlen))))
5232 {
5233 safelen_int = 0;
5234 dont_vectorize = true;
5235 }
5236 type = TREE_TYPE (fd->loop.v);
5237 entry_bb = region->entry;
5238 cont_bb = region->cont;
5239 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5240 gcc_assert (broken_loop
5241 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
5242 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
5243 if (!broken_loop)
5244 {
5245 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
5246 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5247 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
5248 l2_bb = BRANCH_EDGE (entry_bb)->dest;
5249 }
5250 else
5251 {
5252 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
5253 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
5254 l2_bb = single_succ (l1_bb);
5255 }
5256 exit_bb = region->exit;
5257 l2_dom_bb = NULL;
5258
5259 gsi = gsi_last_nondebug_bb (entry_bb);
5260
5261 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5262 /* Not needed in SSA form right now. */
5263 gcc_assert (!gimple_in_ssa_p (cfun));
5264 if (fd->collapse > 1)
5265 {
5266 int first_zero_iter = -1, dummy = -1;
5267 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
5268
5269 counts = XALLOCAVEC (tree, fd->collapse);
5270 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5271 zero_iter_bb, first_zero_iter,
5272 dummy_bb, dummy, l2_dom_bb);
5273 }
5274 if (l2_dom_bb == NULL)
5275 l2_dom_bb = l1_bb;
5276
5277 n1 = fd->loop.n1;
5278 n2 = fd->loop.n2;
5279 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5280 {
5281 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5282 OMP_CLAUSE__LOOPTEMP_);
5283 gcc_assert (innerc);
5284 n1 = OMP_CLAUSE_DECL (innerc);
5285 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5286 OMP_CLAUSE__LOOPTEMP_);
5287 gcc_assert (innerc);
5288 n2 = OMP_CLAUSE_DECL (innerc);
5289 }
5290 tree step = fd->loop.step;
5291
5292 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5293 OMP_CLAUSE__SIMT_);
5294 if (is_simt)
5295 {
5296 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
5297 is_simt = safelen_int > 1;
5298 }
5299 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
5300 if (is_simt)
5301 {
5302 simt_lane = create_tmp_var (unsigned_type_node);
5303 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
5304 gimple_call_set_lhs (g, simt_lane);
5305 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5306 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
5307 fold_convert (TREE_TYPE (step), simt_lane));
5308 n1 = fold_convert (type, n1);
5309 if (POINTER_TYPE_P (type))
5310 n1 = fold_build_pointer_plus (n1, offset);
5311 else
5312 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
5313
5314 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
5315 if (fd->collapse > 1)
5316 simt_maxlane = build_one_cst (unsigned_type_node);
5317 else if (safelen_int < omp_max_simt_vf ())
5318 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
5319 tree vf
5320 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
5321 unsigned_type_node, 0);
5322 if (simt_maxlane)
5323 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
5324 vf = fold_convert (TREE_TYPE (step), vf);
5325 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
5326 }
5327
5328 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
5329 if (fd->collapse > 1)
5330 {
5331 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5332 {
5333 gsi_prev (&gsi);
5334 expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1);
5335 gsi_next (&gsi);
5336 }
5337 else
5338 for (i = 0; i < fd->collapse; i++)
5339 {
5340 tree itype = TREE_TYPE (fd->loops[i].v);
5341 if (POINTER_TYPE_P (itype))
5342 itype = signed_type_for (itype);
5343 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
5344 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
5345 }
5346 }
5347 if (cond_var)
5348 {
5349 if (POINTER_TYPE_P (type)
5350 || TREE_CODE (n1) != INTEGER_CST
5351 || fd->loop.cond_code != LT_EXPR
5352 || tree_int_cst_sgn (n1) != 1)
5353 expand_omp_build_assign (&gsi, cond_var,
5354 build_one_cst (TREE_TYPE (cond_var)));
5355 else
5356 expand_omp_build_assign (&gsi, cond_var,
5357 fold_convert (TREE_TYPE (cond_var), n1));
5358 }
5359
5360 /* Remove the GIMPLE_OMP_FOR statement. */
5361 gsi_remove (&gsi, true);
5362
5363 if (!broken_loop)
5364 {
5365 /* Code to control the increment goes in the CONT_BB. */
5366 gsi = gsi_last_nondebug_bb (cont_bb);
5367 stmt = gsi_stmt (gsi);
5368 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
5369
5370 if (POINTER_TYPE_P (type))
5371 t = fold_build_pointer_plus (fd->loop.v, step);
5372 else
5373 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
5374 expand_omp_build_assign (&gsi, fd->loop.v, t);
5375
5376 if (fd->collapse > 1)
5377 {
5378 i = fd->collapse - 1;
5379 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
5380 {
5381 t = fold_convert (sizetype, fd->loops[i].step);
5382 t = fold_build_pointer_plus (fd->loops[i].v, t);
5383 }
5384 else
5385 {
5386 t = fold_convert (TREE_TYPE (fd->loops[i].v),
5387 fd->loops[i].step);
5388 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
5389 fd->loops[i].v, t);
5390 }
5391 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
5392
5393 for (i = fd->collapse - 1; i > 0; i--)
5394 {
5395 tree itype = TREE_TYPE (fd->loops[i].v);
5396 tree itype2 = TREE_TYPE (fd->loops[i - 1].v);
5397 if (POINTER_TYPE_P (itype2))
5398 itype2 = signed_type_for (itype2);
5399 t = fold_convert (itype2, fd->loops[i - 1].step);
5400 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
5401 GSI_SAME_STMT);
5402 t = build3 (COND_EXPR, itype2,
5403 build2 (fd->loops[i].cond_code, boolean_type_node,
5404 fd->loops[i].v,
5405 fold_convert (itype, fd->loops[i].n2)),
5406 build_int_cst (itype2, 0), t);
5407 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v)))
5408 t = fold_build_pointer_plus (fd->loops[i - 1].v, t);
5409 else
5410 t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t);
5411 expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t);
5412
5413 t = fold_convert (itype, fd->loops[i].n1);
5414 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
5415 GSI_SAME_STMT);
5416 t = build3 (COND_EXPR, itype,
5417 build2 (fd->loops[i].cond_code, boolean_type_node,
5418 fd->loops[i].v,
5419 fold_convert (itype, fd->loops[i].n2)),
5420 fd->loops[i].v, t);
5421 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
5422 }
5423 }
5424 if (cond_var)
5425 {
5426 if (POINTER_TYPE_P (type)
5427 || TREE_CODE (n1) != INTEGER_CST
5428 || fd->loop.cond_code != LT_EXPR
5429 || tree_int_cst_sgn (n1) != 1)
5430 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
5431 build_one_cst (TREE_TYPE (cond_var)));
5432 else
5433 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
5434 fold_convert (TREE_TYPE (cond_var), step));
5435 expand_omp_build_assign (&gsi, cond_var, t);
5436 }
5437
5438 /* Remove GIMPLE_OMP_CONTINUE. */
5439 gsi_remove (&gsi, true);
5440 }
5441
5442 /* Emit the condition in L1_BB. */
5443 gsi = gsi_start_bb (l1_bb);
5444
5445 t = fold_convert (type, n2);
5446 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5447 false, GSI_CONTINUE_LINKING);
5448 tree v = fd->loop.v;
5449 if (DECL_P (v) && TREE_ADDRESSABLE (v))
5450 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
5451 false, GSI_CONTINUE_LINKING);
5452 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
5453 cond_stmt = gimple_build_cond_empty (t);
5454 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
5455 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
5456 NULL, NULL)
5457 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
5458 NULL, NULL))
5459 {
5460 gsi = gsi_for_stmt (cond_stmt);
5461 gimple_regimplify_operands (cond_stmt, &gsi);
5462 }
5463
5464 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
5465 if (is_simt)
5466 {
5467 gsi = gsi_start_bb (l2_bb);
5468 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step);
5469 if (POINTER_TYPE_P (type))
5470 t = fold_build_pointer_plus (fd->loop.v, step);
5471 else
5472 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
5473 expand_omp_build_assign (&gsi, fd->loop.v, t);
5474 }
5475
5476 /* Remove GIMPLE_OMP_RETURN. */
5477 gsi = gsi_last_nondebug_bb (exit_bb);
5478 gsi_remove (&gsi, true);
5479
5480 /* Connect the new blocks. */
5481 remove_edge (FALLTHRU_EDGE (entry_bb));
5482
5483 if (!broken_loop)
5484 {
5485 remove_edge (BRANCH_EDGE (entry_bb));
5486 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
5487
5488 e = BRANCH_EDGE (l1_bb);
5489 ne = FALLTHRU_EDGE (l1_bb);
5490 e->flags = EDGE_TRUE_VALUE;
5491 }
5492 else
5493 {
5494 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
5495
5496 ne = single_succ_edge (l1_bb);
5497 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
5498
5499 }
5500 ne->flags = EDGE_FALSE_VALUE;
5501 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
5502 ne->probability = e->probability.invert ();
5503
5504 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
5505 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
5506
5507 if (simt_maxlane)
5508 {
5509 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
5510 NULL_TREE, NULL_TREE);
5511 gsi = gsi_last_bb (entry_bb);
5512 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
5513 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
5514 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
5515 FALLTHRU_EDGE (entry_bb)->probability
5516 = profile_probability::guessed_always ().apply_scale (7, 8);
5517 BRANCH_EDGE (entry_bb)->probability
5518 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
5519 l2_dom_bb = entry_bb;
5520 }
5521 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
5522
5523 if (!broken_loop)
5524 {
5525 class loop *loop = alloc_loop ();
5526 loop->header = l1_bb;
5527 loop->latch = cont_bb;
5528 add_loop (loop, l1_bb->loop_father);
5529 loop->safelen = safelen_int;
5530 if (simduid)
5531 {
5532 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
5533 cfun->has_simduid_loops = true;
5534 }
5535 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
5536 the loop. */
5537 if ((flag_tree_loop_vectorize
5538 || !global_options_set.x_flag_tree_loop_vectorize)
5539 && flag_tree_loop_optimize
5540 && loop->safelen > 1)
5541 {
5542 loop->force_vectorize = true;
5543 if (simdlen && tree_fits_uhwi_p (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)))
5544 {
5545 unsigned HOST_WIDE_INT v
5546 = tree_to_uhwi (OMP_CLAUSE_SIMDLEN_EXPR (simdlen));
5547 if (v < INT_MAX && v <= (unsigned HOST_WIDE_INT) loop->safelen)
5548 loop->simdlen = v;
5549 }
5550 cfun->has_force_vectorize_loops = true;
5551 }
5552 else if (dont_vectorize)
5553 loop->dont_vectorize = true;
5554 }
5555 else if (simduid)
5556 cfun->has_simduid_loops = true;
5557 }
5558
5559 /* Taskloop construct is represented after gimplification with
5560 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
5561 in between them. This routine expands the outer GIMPLE_OMP_FOR,
5562 which should just compute all the needed loop temporaries
5563 for GIMPLE_OMP_TASK. */
5564
5565 static void
5566 expand_omp_taskloop_for_outer (struct omp_region *region,
5567 struct omp_for_data *fd,
5568 gimple *inner_stmt)
5569 {
5570 tree type, bias = NULL_TREE;
5571 basic_block entry_bb, cont_bb, exit_bb;
5572 gimple_stmt_iterator gsi;
5573 gassign *assign_stmt;
5574 tree *counts = NULL;
5575 int i;
5576
5577 gcc_assert (inner_stmt);
5578 gcc_assert (region->cont);
5579 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
5580 && gimple_omp_task_taskloop_p (inner_stmt));
5581 type = TREE_TYPE (fd->loop.v);
5582
5583 /* See if we need to bias by LLONG_MIN. */
5584 if (fd->iter_type == long_long_unsigned_type_node
5585 && TREE_CODE (type) == INTEGER_TYPE
5586 && !TYPE_UNSIGNED (type))
5587 {
5588 tree n1, n2;
5589
5590 if (fd->loop.cond_code == LT_EXPR)
5591 {
5592 n1 = fd->loop.n1;
5593 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
5594 }
5595 else
5596 {
5597 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
5598 n2 = fd->loop.n1;
5599 }
5600 if (TREE_CODE (n1) != INTEGER_CST
5601 || TREE_CODE (n2) != INTEGER_CST
5602 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
5603 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
5604 }
5605
5606 entry_bb = region->entry;
5607 cont_bb = region->cont;
5608 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5609 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
5610 exit_bb = region->exit;
5611
5612 gsi = gsi_last_nondebug_bb (entry_bb);
5613 gimple *for_stmt = gsi_stmt (gsi);
5614 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
5615 if (fd->collapse > 1)
5616 {
5617 int first_zero_iter = -1, dummy = -1;
5618 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
5619
5620 counts = XALLOCAVEC (tree, fd->collapse);
5621 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5622 zero_iter_bb, first_zero_iter,
5623 dummy_bb, dummy, l2_dom_bb);
5624
5625 if (zero_iter_bb)
5626 {
5627 /* Some counts[i] vars might be uninitialized if
5628 some loop has zero iterations. But the body shouldn't
5629 be executed in that case, so just avoid uninit warnings. */
5630 for (i = first_zero_iter; i < fd->collapse; i++)
5631 if (SSA_VAR_P (counts[i]))
5632 TREE_NO_WARNING (counts[i]) = 1;
5633 gsi_prev (&gsi);
5634 edge e = split_block (entry_bb, gsi_stmt (gsi));
5635 entry_bb = e->dest;
5636 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
5637 gsi = gsi_last_bb (entry_bb);
5638 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
5639 get_immediate_dominator (CDI_DOMINATORS,
5640 zero_iter_bb));
5641 }
5642 }
5643
5644 tree t0, t1;
5645 t1 = fd->loop.n2;
5646 t0 = fd->loop.n1;
5647 if (POINTER_TYPE_P (TREE_TYPE (t0))
5648 && TYPE_PRECISION (TREE_TYPE (t0))
5649 != TYPE_PRECISION (fd->iter_type))
5650 {
5651 /* Avoid casting pointers to integer of a different size. */
5652 tree itype = signed_type_for (type);
5653 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
5654 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
5655 }
5656 else
5657 {
5658 t1 = fold_convert (fd->iter_type, t1);
5659 t0 = fold_convert (fd->iter_type, t0);
5660 }
5661 if (bias)
5662 {
5663 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
5664 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
5665 }
5666
5667 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
5668 OMP_CLAUSE__LOOPTEMP_);
5669 gcc_assert (innerc);
5670 tree startvar = OMP_CLAUSE_DECL (innerc);
5671 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
5672 gcc_assert (innerc);
5673 tree endvar = OMP_CLAUSE_DECL (innerc);
5674 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
5675 {
5676 gcc_assert (innerc);
5677 for (i = 1; i < fd->collapse; i++)
5678 {
5679 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5680 OMP_CLAUSE__LOOPTEMP_);
5681 gcc_assert (innerc);
5682 }
5683 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5684 OMP_CLAUSE__LOOPTEMP_);
5685 if (innerc)
5686 {
5687 /* If needed (inner taskloop has lastprivate clause), propagate
5688 down the total number of iterations. */
5689 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
5690 NULL_TREE, false,
5691 GSI_CONTINUE_LINKING);
5692 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5693 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5694 }
5695 }
5696
5697 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
5698 GSI_CONTINUE_LINKING);
5699 assign_stmt = gimple_build_assign (startvar, t0);
5700 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5701
5702 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
5703 GSI_CONTINUE_LINKING);
5704 assign_stmt = gimple_build_assign (endvar, t1);
5705 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5706 if (fd->collapse > 1)
5707 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5708
5709 /* Remove the GIMPLE_OMP_FOR statement. */
5710 gsi = gsi_for_stmt (for_stmt);
5711 gsi_remove (&gsi, true);
5712
5713 gsi = gsi_last_nondebug_bb (cont_bb);
5714 gsi_remove (&gsi, true);
5715
5716 gsi = gsi_last_nondebug_bb (exit_bb);
5717 gsi_remove (&gsi, true);
5718
5719 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
5720 remove_edge (BRANCH_EDGE (entry_bb));
5721 FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
5722 remove_edge (BRANCH_EDGE (cont_bb));
5723 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
5724 set_immediate_dominator (CDI_DOMINATORS, region->entry,
5725 recompute_dominator (CDI_DOMINATORS, region->entry));
5726 }
5727
5728 /* Taskloop construct is represented after gimplification with
5729 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
5730 in between them. This routine expands the inner GIMPLE_OMP_FOR.
5731 GOMP_taskloop{,_ull} function arranges for each task to be given just
5732 a single range of iterations. */
5733
5734 static void
5735 expand_omp_taskloop_for_inner (struct omp_region *region,
5736 struct omp_for_data *fd,
5737 gimple *inner_stmt)
5738 {
5739 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
5740 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
5741 basic_block fin_bb;
5742 gimple_stmt_iterator gsi;
5743 edge ep;
5744 bool broken_loop = region->cont == NULL;
5745 tree *counts = NULL;
5746 tree n1, n2, step;
5747
5748 itype = type = TREE_TYPE (fd->loop.v);
5749 if (POINTER_TYPE_P (type))
5750 itype = signed_type_for (type);
5751
5752 /* See if we need to bias by LLONG_MIN. */
5753 if (fd->iter_type == long_long_unsigned_type_node
5754 && TREE_CODE (type) == INTEGER_TYPE
5755 && !TYPE_UNSIGNED (type))
5756 {
5757 tree n1, n2;
5758
5759 if (fd->loop.cond_code == LT_EXPR)
5760 {
5761 n1 = fd->loop.n1;
5762 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
5763 }
5764 else
5765 {
5766 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
5767 n2 = fd->loop.n1;
5768 }
5769 if (TREE_CODE (n1) != INTEGER_CST
5770 || TREE_CODE (n2) != INTEGER_CST
5771 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
5772 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
5773 }
5774
5775 entry_bb = region->entry;
5776 cont_bb = region->cont;
5777 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5778 fin_bb = BRANCH_EDGE (entry_bb)->dest;
5779 gcc_assert (broken_loop
5780 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
5781 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5782 if (!broken_loop)
5783 {
5784 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
5785 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5786 }
5787 exit_bb = region->exit;
5788
5789 /* Iteration space partitioning goes in ENTRY_BB. */
5790 gsi = gsi_last_nondebug_bb (entry_bb);
5791 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5792
5793 if (fd->collapse > 1)
5794 {
5795 int first_zero_iter = -1, dummy = -1;
5796 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5797
5798 counts = XALLOCAVEC (tree, fd->collapse);
5799 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5800 fin_bb, first_zero_iter,
5801 dummy_bb, dummy, l2_dom_bb);
5802 t = NULL_TREE;
5803 }
5804 else
5805 t = integer_one_node;
5806
5807 step = fd->loop.step;
5808 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5809 OMP_CLAUSE__LOOPTEMP_);
5810 gcc_assert (innerc);
5811 n1 = OMP_CLAUSE_DECL (innerc);
5812 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
5813 gcc_assert (innerc);
5814 n2 = OMP_CLAUSE_DECL (innerc);
5815 if (bias)
5816 {
5817 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
5818 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
5819 }
5820 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5821 true, NULL_TREE, true, GSI_SAME_STMT);
5822 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5823 true, NULL_TREE, true, GSI_SAME_STMT);
5824 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5825 true, NULL_TREE, true, GSI_SAME_STMT);
5826
5827 tree startvar = fd->loop.v;
5828 tree endvar = NULL_TREE;
5829
5830 if (gimple_omp_for_combined_p (fd->for_stmt))
5831 {
5832 tree clauses = gimple_omp_for_clauses (inner_stmt);
5833 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5834 gcc_assert (innerc);
5835 startvar = OMP_CLAUSE_DECL (innerc);
5836 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5837 OMP_CLAUSE__LOOPTEMP_);
5838 gcc_assert (innerc);
5839 endvar = OMP_CLAUSE_DECL (innerc);
5840 }
5841 t = fold_convert (TREE_TYPE (startvar), n1);
5842 t = force_gimple_operand_gsi (&gsi, t,
5843 DECL_P (startvar)
5844 && TREE_ADDRESSABLE (startvar),
5845 NULL_TREE, false, GSI_CONTINUE_LINKING);
5846 gimple *assign_stmt = gimple_build_assign (startvar, t);
5847 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5848
5849 t = fold_convert (TREE_TYPE (startvar), n2);
5850 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5851 false, GSI_CONTINUE_LINKING);
5852 if (endvar)
5853 {
5854 assign_stmt = gimple_build_assign (endvar, e);
5855 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5856 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5857 assign_stmt = gimple_build_assign (fd->loop.v, e);
5858 else
5859 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5860 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5861 }
5862 if (fd->collapse > 1)
5863 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5864
5865 if (!broken_loop)
5866 {
5867 /* The code controlling the sequential loop replaces the
5868 GIMPLE_OMP_CONTINUE. */
5869 gsi = gsi_last_nondebug_bb (cont_bb);
5870 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5871 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5872 vmain = gimple_omp_continue_control_use (cont_stmt);
5873 vback = gimple_omp_continue_control_def (cont_stmt);
5874
5875 if (!gimple_omp_for_combined_p (fd->for_stmt))
5876 {
5877 if (POINTER_TYPE_P (type))
5878 t = fold_build_pointer_plus (vmain, step);
5879 else
5880 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5881 t = force_gimple_operand_gsi (&gsi, t,
5882 DECL_P (vback)
5883 && TREE_ADDRESSABLE (vback),
5884 NULL_TREE, true, GSI_SAME_STMT);
5885 assign_stmt = gimple_build_assign (vback, t);
5886 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5887
5888 t = build2 (fd->loop.cond_code, boolean_type_node,
5889 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5890 ? t : vback, e);
5891 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5892 }
5893
5894 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5895 gsi_remove (&gsi, true);
5896
5897 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5898 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
5899 }
5900
5901 /* Remove the GIMPLE_OMP_FOR statement. */
5902 gsi = gsi_for_stmt (fd->for_stmt);
5903 gsi_remove (&gsi, true);
5904
5905 /* Remove the GIMPLE_OMP_RETURN statement. */
5906 gsi = gsi_last_nondebug_bb (exit_bb);
5907 gsi_remove (&gsi, true);
5908
5909 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
5910 if (!broken_loop)
5911 remove_edge (BRANCH_EDGE (entry_bb));
5912 else
5913 {
5914 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
5915 region->outer->cont = NULL;
5916 }
5917
5918 /* Connect all the blocks. */
5919 if (!broken_loop)
5920 {
5921 ep = find_edge (cont_bb, body_bb);
5922 if (gimple_omp_for_combined_p (fd->for_stmt))
5923 {
5924 remove_edge (ep);
5925 ep = NULL;
5926 }
5927 else if (fd->collapse > 1)
5928 {
5929 remove_edge (ep);
5930 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5931 }
5932 else
5933 ep->flags = EDGE_TRUE_VALUE;
5934 find_edge (cont_bb, fin_bb)->flags
5935 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5936 }
5937
5938 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5939 recompute_dominator (CDI_DOMINATORS, body_bb));
5940 if (!broken_loop)
5941 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5942 recompute_dominator (CDI_DOMINATORS, fin_bb));
5943
5944 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5945 {
5946 class loop *loop = alloc_loop ();
5947 loop->header = body_bb;
5948 if (collapse_bb == NULL)
5949 loop->latch = cont_bb;
5950 add_loop (loop, body_bb->loop_father);
5951 }
5952 }
5953
5954 /* A subroutine of expand_omp_for. Generate code for an OpenACC
5955 partitioned loop. The lowering here is abstracted, in that the
5956 loop parameters are passed through internal functions, which are
5957 further lowered by oacc_device_lower, once we get to the target
5958 compiler. The loop is of the form:
5959
5960 for (V = B; V LTGT E; V += S) {BODY}
5961
5962 where LTGT is < or >. We may have a specified chunking size, CHUNKING
5963 (constant 0 for no chunking) and we will have a GWV partitioning
5964 mask, specifying dimensions over which the loop is to be
5965 partitioned (see note below). We generate code that looks like
5966 (this ignores tiling):
5967
5968 <entry_bb> [incoming FALL->body, BRANCH->exit]
5969 typedef signedintify (typeof (V)) T; // underlying signed integral type
5970 T range = E - B;
5971 T chunk_no = 0;
5972 T DIR = LTGT == '<' ? +1 : -1;
5973 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
5974 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
5975
5976 <head_bb> [created by splitting end of entry_bb]
5977 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
5978 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
5979 if (!(offset LTGT bound)) goto bottom_bb;
5980
5981 <body_bb> [incoming]
5982 V = B + offset;
5983 {BODY}
5984
5985 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
5986 offset += step;
5987 if (offset LTGT bound) goto body_bb; [*]
5988
5989 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
5990 chunk_no++;
5991 if (chunk < chunk_max) goto head_bb;
5992
5993 <exit_bb> [incoming]
5994 V = B + ((range -/+ 1) / S +/- 1) * S [*]
5995
5996 [*] Needed if V live at end of loop. */
5997
5998 static void
5999 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
6000 {
6001 tree v = fd->loop.v;
6002 enum tree_code cond_code = fd->loop.cond_code;
6003 enum tree_code plus_code = PLUS_EXPR;
6004
6005 tree chunk_size = integer_minus_one_node;
6006 tree gwv = integer_zero_node;
6007 tree iter_type = TREE_TYPE (v);
6008 tree diff_type = iter_type;
6009 tree plus_type = iter_type;
6010 struct oacc_collapse *counts = NULL;
6011
6012 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
6013 == GF_OMP_FOR_KIND_OACC_LOOP);
6014 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
6015 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
6016
6017 if (POINTER_TYPE_P (iter_type))
6018 {
6019 plus_code = POINTER_PLUS_EXPR;
6020 plus_type = sizetype;
6021 }
6022 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
6023 diff_type = signed_type_for (diff_type);
6024 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
6025 diff_type = integer_type_node;
6026
6027 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
6028 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
6029 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
6030 basic_block bottom_bb = NULL;
6031
6032 /* entry_bb has two sucessors; the branch edge is to the exit
6033 block, fallthrough edge to body. */
6034 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
6035 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
6036
6037 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
6038 body_bb, or to a block whose only successor is the body_bb. Its
6039 fallthrough successor is the final block (same as the branch
6040 successor of the entry_bb). */
6041 if (cont_bb)
6042 {
6043 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
6044 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
6045
6046 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
6047 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
6048 }
6049 else
6050 gcc_assert (!gimple_in_ssa_p (cfun));
6051
6052 /* The exit block only has entry_bb and cont_bb as predecessors. */
6053 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
6054
6055 tree chunk_no;
6056 tree chunk_max = NULL_TREE;
6057 tree bound, offset;
6058 tree step = create_tmp_var (diff_type, ".step");
6059 bool up = cond_code == LT_EXPR;
6060 tree dir = build_int_cst (diff_type, up ? +1 : -1);
6061 bool chunking = !gimple_in_ssa_p (cfun);
6062 bool negating;
6063
6064 /* Tiling vars. */
6065 tree tile_size = NULL_TREE;
6066 tree element_s = NULL_TREE;
6067 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
6068 basic_block elem_body_bb = NULL;
6069 basic_block elem_cont_bb = NULL;
6070
6071 /* SSA instances. */
6072 tree offset_incr = NULL_TREE;
6073 tree offset_init = NULL_TREE;
6074
6075 gimple_stmt_iterator gsi;
6076 gassign *ass;
6077 gcall *call;
6078 gimple *stmt;
6079 tree expr;
6080 location_t loc;
6081 edge split, be, fte;
6082
6083 /* Split the end of entry_bb to create head_bb. */
6084 split = split_block (entry_bb, last_stmt (entry_bb));
6085 basic_block head_bb = split->dest;
6086 entry_bb = split->src;
6087
6088 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
6089 gsi = gsi_last_nondebug_bb (entry_bb);
6090 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
6091 loc = gimple_location (for_stmt);
6092
6093 if (gimple_in_ssa_p (cfun))
6094 {
6095 offset_init = gimple_omp_for_index (for_stmt, 0);
6096 gcc_assert (integer_zerop (fd->loop.n1));
6097 /* The SSA parallelizer does gang parallelism. */
6098 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
6099 }
6100
6101 if (fd->collapse > 1 || fd->tiling)
6102 {
6103 gcc_assert (!gimple_in_ssa_p (cfun) && up);
6104 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
6105 tree total = expand_oacc_collapse_init (fd, &gsi, counts,
6106 TREE_TYPE (fd->loop.n2), loc);
6107
6108 if (SSA_VAR_P (fd->loop.n2))
6109 {
6110 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
6111 true, GSI_SAME_STMT);
6112 ass = gimple_build_assign (fd->loop.n2, total);
6113 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6114 }
6115 }
6116
6117 tree b = fd->loop.n1;
6118 tree e = fd->loop.n2;
6119 tree s = fd->loop.step;
6120
6121 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
6122 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
6123
6124 /* Convert the step, avoiding possible unsigned->signed overflow. */
6125 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
6126 if (negating)
6127 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
6128 s = fold_convert (diff_type, s);
6129 if (negating)
6130 s = fold_build1 (NEGATE_EXPR, diff_type, s);
6131 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
6132
6133 if (!chunking)
6134 chunk_size = integer_zero_node;
6135 expr = fold_convert (diff_type, chunk_size);
6136 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
6137 NULL_TREE, true, GSI_SAME_STMT);
6138
6139 if (fd->tiling)
6140 {
6141 /* Determine the tile size and element step,
6142 modify the outer loop step size. */
6143 tile_size = create_tmp_var (diff_type, ".tile_size");
6144 expr = build_int_cst (diff_type, 1);
6145 for (int ix = 0; ix < fd->collapse; ix++)
6146 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
6147 expr = force_gimple_operand_gsi (&gsi, expr, true,
6148 NULL_TREE, true, GSI_SAME_STMT);
6149 ass = gimple_build_assign (tile_size, expr);
6150 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6151
6152 element_s = create_tmp_var (diff_type, ".element_s");
6153 ass = gimple_build_assign (element_s, s);
6154 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6155
6156 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
6157 s = force_gimple_operand_gsi (&gsi, expr, true,
6158 NULL_TREE, true, GSI_SAME_STMT);
6159 }
6160
6161 /* Determine the range, avoiding possible unsigned->signed overflow. */
6162 negating = !up && TYPE_UNSIGNED (iter_type);
6163 expr = fold_build2 (MINUS_EXPR, plus_type,
6164 fold_convert (plus_type, negating ? b : e),
6165 fold_convert (plus_type, negating ? e : b));
6166 expr = fold_convert (diff_type, expr);
6167 if (negating)
6168 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
6169 tree range = force_gimple_operand_gsi (&gsi, expr, true,
6170 NULL_TREE, true, GSI_SAME_STMT);
6171
6172 chunk_no = build_int_cst (diff_type, 0);
6173 if (chunking)
6174 {
6175 gcc_assert (!gimple_in_ssa_p (cfun));
6176
6177 expr = chunk_no;
6178 chunk_max = create_tmp_var (diff_type, ".chunk_max");
6179 chunk_no = create_tmp_var (diff_type, ".chunk_no");
6180
6181 ass = gimple_build_assign (chunk_no, expr);
6182 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6183
6184 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
6185 build_int_cst (integer_type_node,
6186 IFN_GOACC_LOOP_CHUNKS),
6187 dir, range, s, chunk_size, gwv);
6188 gimple_call_set_lhs (call, chunk_max);
6189 gimple_set_location (call, loc);
6190 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
6191 }
6192 else
6193 chunk_size = chunk_no;
6194
6195 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
6196 build_int_cst (integer_type_node,
6197 IFN_GOACC_LOOP_STEP),
6198 dir, range, s, chunk_size, gwv);
6199 gimple_call_set_lhs (call, step);
6200 gimple_set_location (call, loc);
6201 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
6202
6203 /* Remove the GIMPLE_OMP_FOR. */
6204 gsi_remove (&gsi, true);
6205
6206 /* Fixup edges from head_bb. */
6207 be = BRANCH_EDGE (head_bb);
6208 fte = FALLTHRU_EDGE (head_bb);
6209 be->flags |= EDGE_FALSE_VALUE;
6210 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
6211
6212 basic_block body_bb = fte->dest;
6213
6214 if (gimple_in_ssa_p (cfun))
6215 {
6216 gsi = gsi_last_nondebug_bb (cont_bb);
6217 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
6218
6219 offset = gimple_omp_continue_control_use (cont_stmt);
6220 offset_incr = gimple_omp_continue_control_def (cont_stmt);
6221 }
6222 else
6223 {
6224 offset = create_tmp_var (diff_type, ".offset");
6225 offset_init = offset_incr = offset;
6226 }
6227 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
6228
6229 /* Loop offset & bound go into head_bb. */
6230 gsi = gsi_start_bb (head_bb);
6231
6232 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
6233 build_int_cst (integer_type_node,
6234 IFN_GOACC_LOOP_OFFSET),
6235 dir, range, s,
6236 chunk_size, gwv, chunk_no);
6237 gimple_call_set_lhs (call, offset_init);
6238 gimple_set_location (call, loc);
6239 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
6240
6241 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
6242 build_int_cst (integer_type_node,
6243 IFN_GOACC_LOOP_BOUND),
6244 dir, range, s,
6245 chunk_size, gwv, offset_init);
6246 gimple_call_set_lhs (call, bound);
6247 gimple_set_location (call, loc);
6248 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
6249
6250 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
6251 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
6252 GSI_CONTINUE_LINKING);
6253
6254 /* V assignment goes into body_bb. */
6255 if (!gimple_in_ssa_p (cfun))
6256 {
6257 gsi = gsi_start_bb (body_bb);
6258
6259 expr = build2 (plus_code, iter_type, b,
6260 fold_convert (plus_type, offset));
6261 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
6262 true, GSI_SAME_STMT);
6263 ass = gimple_build_assign (v, expr);
6264 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6265
6266 if (fd->collapse > 1 || fd->tiling)
6267 expand_oacc_collapse_vars (fd, false, &gsi, counts, v);
6268
6269 if (fd->tiling)
6270 {
6271 /* Determine the range of the element loop -- usually simply
6272 the tile_size, but could be smaller if the final
6273 iteration of the outer loop is a partial tile. */
6274 tree e_range = create_tmp_var (diff_type, ".e_range");
6275
6276 expr = build2 (MIN_EXPR, diff_type,
6277 build2 (MINUS_EXPR, diff_type, bound, offset),
6278 build2 (MULT_EXPR, diff_type, tile_size,
6279 element_s));
6280 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
6281 true, GSI_SAME_STMT);
6282 ass = gimple_build_assign (e_range, expr);
6283 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6284
6285 /* Determine bound, offset & step of inner loop. */
6286 e_bound = create_tmp_var (diff_type, ".e_bound");
6287 e_offset = create_tmp_var (diff_type, ".e_offset");
6288 e_step = create_tmp_var (diff_type, ".e_step");
6289
6290 /* Mark these as element loops. */
6291 tree t, e_gwv = integer_minus_one_node;
6292 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */
6293
6294 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
6295 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
6296 element_s, chunk, e_gwv, chunk);
6297 gimple_call_set_lhs (call, e_offset);
6298 gimple_set_location (call, loc);
6299 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
6300
6301 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
6302 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
6303 element_s, chunk, e_gwv, e_offset);
6304 gimple_call_set_lhs (call, e_bound);
6305 gimple_set_location (call, loc);
6306 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
6307
6308 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
6309 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
6310 element_s, chunk, e_gwv);
6311 gimple_call_set_lhs (call, e_step);
6312 gimple_set_location (call, loc);
6313 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
6314
6315 /* Add test and split block. */
6316 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
6317 stmt = gimple_build_cond_empty (expr);
6318 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
6319 split = split_block (body_bb, stmt);
6320 elem_body_bb = split->dest;
6321 if (cont_bb == body_bb)
6322 cont_bb = elem_body_bb;
6323 body_bb = split->src;
6324
6325 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
6326
6327 /* Add a dummy exit for the tiled block when cont_bb is missing. */
6328 if (cont_bb == NULL)
6329 {
6330 edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
6331 e->probability = profile_probability::even ();
6332 split->probability = profile_probability::even ();
6333 }
6334
6335 /* Initialize the user's loop vars. */
6336 gsi = gsi_start_bb (elem_body_bb);
6337 expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset);
6338 }
6339 }
6340
6341 /* Loop increment goes into cont_bb. If this is not a loop, we
6342 will have spawned threads as if it was, and each one will
6343 execute one iteration. The specification is not explicit about
6344 whether such constructs are ill-formed or not, and they can
6345 occur, especially when noreturn routines are involved. */
6346 if (cont_bb)
6347 {
6348 gsi = gsi_last_nondebug_bb (cont_bb);
6349 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
6350 loc = gimple_location (cont_stmt);
6351
6352 if (fd->tiling)
6353 {
6354 /* Insert element loop increment and test. */
6355 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
6356 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
6357 true, GSI_SAME_STMT);
6358 ass = gimple_build_assign (e_offset, expr);
6359 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6360 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
6361
6362 stmt = gimple_build_cond_empty (expr);
6363 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
6364 split = split_block (cont_bb, stmt);
6365 elem_cont_bb = split->src;
6366 cont_bb = split->dest;
6367
6368 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
6369 split->probability = profile_probability::unlikely ().guessed ();
6370 edge latch_edge
6371 = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
6372 latch_edge->probability = profile_probability::likely ().guessed ();
6373
6374 edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
6375 skip_edge->probability = profile_probability::unlikely ().guessed ();
6376 edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
6377 loop_entry_edge->probability
6378 = profile_probability::likely ().guessed ();
6379
6380 gsi = gsi_for_stmt (cont_stmt);
6381 }
6382
6383 /* Increment offset. */
6384 if (gimple_in_ssa_p (cfun))
6385 expr = build2 (plus_code, iter_type, offset,
6386 fold_convert (plus_type, step));
6387 else
6388 expr = build2 (PLUS_EXPR, diff_type, offset, step);
6389 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
6390 true, GSI_SAME_STMT);
6391 ass = gimple_build_assign (offset_incr, expr);
6392 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6393 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
6394 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
6395
6396 /* Remove the GIMPLE_OMP_CONTINUE. */
6397 gsi_remove (&gsi, true);
6398
6399 /* Fixup edges from cont_bb. */
6400 be = BRANCH_EDGE (cont_bb);
6401 fte = FALLTHRU_EDGE (cont_bb);
6402 be->flags |= EDGE_TRUE_VALUE;
6403 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
6404
6405 if (chunking)
6406 {
6407 /* Split the beginning of exit_bb to make bottom_bb. We
6408 need to insert a nop at the start, because splitting is
6409 after a stmt, not before. */
6410 gsi = gsi_start_bb (exit_bb);
6411 stmt = gimple_build_nop ();
6412 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
6413 split = split_block (exit_bb, stmt);
6414 bottom_bb = split->src;
6415 exit_bb = split->dest;
6416 gsi = gsi_last_bb (bottom_bb);
6417
6418 /* Chunk increment and test goes into bottom_bb. */
6419 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
6420 build_int_cst (diff_type, 1));
6421 ass = gimple_build_assign (chunk_no, expr);
6422 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
6423
6424 /* Chunk test at end of bottom_bb. */
6425 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
6426 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
6427 GSI_CONTINUE_LINKING);
6428
6429 /* Fixup edges from bottom_bb. */
6430 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
6431 split->probability = profile_probability::unlikely ().guessed ();
6432 edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
6433 latch_edge->probability = profile_probability::likely ().guessed ();
6434 }
6435 }
6436
6437 gsi = gsi_last_nondebug_bb (exit_bb);
6438 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
6439 loc = gimple_location (gsi_stmt (gsi));
6440
6441 if (!gimple_in_ssa_p (cfun))
6442 {
6443 /* Insert the final value of V, in case it is live. This is the
6444 value for the only thread that survives past the join. */
6445 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
6446 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
6447 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
6448 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
6449 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
6450 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
6451 true, GSI_SAME_STMT);
6452 ass = gimple_build_assign (v, expr);
6453 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6454 }
6455
6456 /* Remove the OMP_RETURN. */
6457 gsi_remove (&gsi, true);
6458
6459 if (cont_bb)
6460 {
6461 /* We now have one, two or three nested loops. Update the loop
6462 structures. */
6463 class loop *parent = entry_bb->loop_father;
6464 class loop *body = body_bb->loop_father;
6465
6466 if (chunking)
6467 {
6468 class loop *chunk_loop = alloc_loop ();
6469 chunk_loop->header = head_bb;
6470 chunk_loop->latch = bottom_bb;
6471 add_loop (chunk_loop, parent);
6472 parent = chunk_loop;
6473 }
6474 else if (parent != body)
6475 {
6476 gcc_assert (body->header == body_bb);
6477 gcc_assert (body->latch == cont_bb
6478 || single_pred (body->latch) == cont_bb);
6479 parent = NULL;
6480 }
6481
6482 if (parent)
6483 {
6484 class loop *body_loop = alloc_loop ();
6485 body_loop->header = body_bb;
6486 body_loop->latch = cont_bb;
6487 add_loop (body_loop, parent);
6488
6489 if (fd->tiling)
6490 {
6491 /* Insert tiling's element loop. */
6492 class loop *inner_loop = alloc_loop ();
6493 inner_loop->header = elem_body_bb;
6494 inner_loop->latch = elem_cont_bb;
6495 add_loop (inner_loop, body_loop);
6496 }
6497 }
6498 }
6499 }
6500
6501 /* Expand the OMP loop defined by REGION. */
6502
6503 static void
6504 expand_omp_for (struct omp_region *region, gimple *inner_stmt)
6505 {
6506 struct omp_for_data fd;
6507 struct omp_for_data_loop *loops;
6508
6509 loops
6510 = (struct omp_for_data_loop *)
6511 alloca (gimple_omp_for_collapse (last_stmt (region->entry))
6512 * sizeof (struct omp_for_data_loop));
6513 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
6514 &fd, loops);
6515 region->sched_kind = fd.sched_kind;
6516 region->sched_modifiers = fd.sched_modifiers;
6517 region->has_lastprivate_conditional = fd.lastprivate_conditional != 0;
6518
6519 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
6520 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
6521 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
6522 if (region->cont)
6523 {
6524 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
6525 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
6526 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
6527 }
6528 else
6529 /* If there isn't a continue then this is a degerate case where
6530 the introduction of abnormal edges during lowering will prevent
6531 original loops from being detected. Fix that up. */
6532 loops_state_set (LOOPS_NEED_FIXUP);
6533
6534 if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_SIMD)
6535 expand_omp_simd (region, &fd);
6536 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
6537 {
6538 gcc_assert (!inner_stmt);
6539 expand_oacc_for (region, &fd);
6540 }
6541 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
6542 {
6543 if (gimple_omp_for_combined_into_p (fd.for_stmt))
6544 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
6545 else
6546 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
6547 }
6548 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
6549 && !fd.have_ordered)
6550 {
6551 if (fd.chunk_size == NULL)
6552 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
6553 else
6554 expand_omp_for_static_chunk (region, &fd, inner_stmt);
6555 }
6556 else
6557 {
6558 int fn_index, start_ix, next_ix;
6559 unsigned HOST_WIDE_INT sched = 0;
6560 tree sched_arg = NULL_TREE;
6561
6562 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
6563 == GF_OMP_FOR_KIND_FOR);
6564 if (fd.chunk_size == NULL
6565 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
6566 fd.chunk_size = integer_zero_node;
6567 switch (fd.sched_kind)
6568 {
6569 case OMP_CLAUSE_SCHEDULE_RUNTIME:
6570 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0
6571 && fd.lastprivate_conditional == 0)
6572 {
6573 gcc_assert (!fd.have_ordered);
6574 fn_index = 6;
6575 sched = 4;
6576 }
6577 else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
6578 && !fd.have_ordered
6579 && fd.lastprivate_conditional == 0)
6580 fn_index = 7;
6581 else
6582 {
6583 fn_index = 3;
6584 sched = (HOST_WIDE_INT_1U << 31);
6585 }
6586 break;
6587 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
6588 case OMP_CLAUSE_SCHEDULE_GUIDED:
6589 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
6590 && !fd.have_ordered
6591 && fd.lastprivate_conditional == 0)
6592 {
6593 fn_index = 3 + fd.sched_kind;
6594 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
6595 break;
6596 }
6597 fn_index = fd.sched_kind;
6598 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
6599 sched += (HOST_WIDE_INT_1U << 31);
6600 break;
6601 case OMP_CLAUSE_SCHEDULE_STATIC:
6602 gcc_assert (fd.have_ordered);
6603 fn_index = 0;
6604 sched = (HOST_WIDE_INT_1U << 31) + 1;
6605 break;
6606 default:
6607 gcc_unreachable ();
6608 }
6609 if (!fd.ordered)
6610 fn_index += fd.have_ordered * 8;
6611 if (fd.ordered)
6612 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
6613 else
6614 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
6615 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
6616 if (fd.have_reductemp || fd.have_pointer_condtemp)
6617 {
6618 if (fd.ordered)
6619 start_ix = (int)BUILT_IN_GOMP_LOOP_DOACROSS_START;
6620 else if (fd.have_ordered)
6621 start_ix = (int)BUILT_IN_GOMP_LOOP_ORDERED_START;
6622 else
6623 start_ix = (int)BUILT_IN_GOMP_LOOP_START;
6624 sched_arg = build_int_cstu (long_integer_type_node, sched);
6625 if (!fd.chunk_size)
6626 fd.chunk_size = integer_zero_node;
6627 }
6628 if (fd.iter_type == long_long_unsigned_type_node)
6629 {
6630 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
6631 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
6632 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
6633 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
6634 }
6635 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
6636 (enum built_in_function) next_ix, sched_arg,
6637 inner_stmt);
6638 }
6639
6640 if (gimple_in_ssa_p (cfun))
6641 update_ssa (TODO_update_ssa_only_virtuals);
6642 }
6643
6644 /* Expand code for an OpenMP sections directive. In pseudo code, we generate
6645
6646 v = GOMP_sections_start (n);
6647 L0:
6648 switch (v)
6649 {
6650 case 0:
6651 goto L2;
6652 case 1:
6653 section 1;
6654 goto L1;
6655 case 2:
6656 ...
6657 case n:
6658 ...
6659 default:
6660 abort ();
6661 }
6662 L1:
6663 v = GOMP_sections_next ();
6664 goto L0;
6665 L2:
6666 reduction;
6667
6668 If this is a combined parallel sections, replace the call to
6669 GOMP_sections_start with call to GOMP_sections_next. */
6670
6671 static void
6672 expand_omp_sections (struct omp_region *region)
6673 {
6674 tree t, u, vin = NULL, vmain, vnext, l2;
6675 unsigned len;
6676 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
6677 gimple_stmt_iterator si, switch_si;
6678 gomp_sections *sections_stmt;
6679 gimple *stmt;
6680 gomp_continue *cont;
6681 edge_iterator ei;
6682 edge e;
6683 struct omp_region *inner;
6684 unsigned i, casei;
6685 bool exit_reachable = region->cont != NULL;
6686
6687 gcc_assert (region->exit != NULL);
6688 entry_bb = region->entry;
6689 l0_bb = single_succ (entry_bb);
6690 l1_bb = region->cont;
6691 l2_bb = region->exit;
6692 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
6693 l2 = gimple_block_label (l2_bb);
6694 else
6695 {
6696 /* This can happen if there are reductions. */
6697 len = EDGE_COUNT (l0_bb->succs);
6698 gcc_assert (len > 0);
6699 e = EDGE_SUCC (l0_bb, len - 1);
6700 si = gsi_last_nondebug_bb (e->dest);
6701 l2 = NULL_TREE;
6702 if (gsi_end_p (si)
6703 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
6704 l2 = gimple_block_label (e->dest);
6705 else
6706 FOR_EACH_EDGE (e, ei, l0_bb->succs)
6707 {
6708 si = gsi_last_nondebug_bb (e->dest);
6709 if (gsi_end_p (si)
6710 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
6711 {
6712 l2 = gimple_block_label (e->dest);
6713 break;
6714 }
6715 }
6716 }
6717 if (exit_reachable)
6718 default_bb = create_empty_bb (l1_bb->prev_bb);
6719 else
6720 default_bb = create_empty_bb (l0_bb);
6721
6722 /* We will build a switch() with enough cases for all the
6723 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
6724 and a default case to abort if something goes wrong. */
6725 len = EDGE_COUNT (l0_bb->succs);
6726
6727 /* Use vec::quick_push on label_vec throughout, since we know the size
6728 in advance. */
6729 auto_vec<tree> label_vec (len);
6730
6731 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
6732 GIMPLE_OMP_SECTIONS statement. */
6733 si = gsi_last_nondebug_bb (entry_bb);
6734 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
6735 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
6736 vin = gimple_omp_sections_control (sections_stmt);
6737 tree clauses = gimple_omp_sections_clauses (sections_stmt);
6738 tree reductmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
6739 tree condtmp = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
6740 tree cond_var = NULL_TREE;
6741 if (reductmp || condtmp)
6742 {
6743 tree reductions = null_pointer_node, mem = null_pointer_node;
6744 tree memv = NULL_TREE, condtemp = NULL_TREE;
6745 gimple_stmt_iterator gsi = gsi_none ();
6746 gimple *g = NULL;
6747 if (reductmp)
6748 {
6749 reductions = OMP_CLAUSE_DECL (reductmp);
6750 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
6751 g = SSA_NAME_DEF_STMT (reductions);
6752 reductions = gimple_assign_rhs1 (g);
6753 OMP_CLAUSE_DECL (reductmp) = reductions;
6754 gsi = gsi_for_stmt (g);
6755 }
6756 else
6757 gsi = si;
6758 if (condtmp)
6759 {
6760 condtemp = OMP_CLAUSE_DECL (condtmp);
6761 tree c = omp_find_clause (OMP_CLAUSE_CHAIN (condtmp),
6762 OMP_CLAUSE__CONDTEMP_);
6763 cond_var = OMP_CLAUSE_DECL (c);
6764 tree type = TREE_TYPE (condtemp);
6765 memv = create_tmp_var (type);
6766 TREE_ADDRESSABLE (memv) = 1;
6767 unsigned cnt = 0;
6768 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
6769 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
6770 && OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (c))
6771 ++cnt;
6772 unsigned HOST_WIDE_INT sz
6773 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))) * cnt;
6774 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
6775 false);
6776 mem = build_fold_addr_expr (memv);
6777 }
6778 t = build_int_cst (unsigned_type_node, len - 1);
6779 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS2_START);
6780 stmt = gimple_build_call (u, 3, t, reductions, mem);
6781 gimple_call_set_lhs (stmt, vin);
6782 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
6783 if (condtmp)
6784 {
6785 expand_omp_build_assign (&gsi, condtemp, memv, false);
6786 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
6787 vin, build_one_cst (TREE_TYPE (cond_var)));
6788 expand_omp_build_assign (&gsi, cond_var, t, false);
6789 }
6790 if (reductmp)
6791 {
6792 gsi_remove (&gsi, true);
6793 release_ssa_name (gimple_assign_lhs (g));
6794 }
6795 }
6796 else if (!is_combined_parallel (region))
6797 {
6798 /* If we are not inside a combined parallel+sections region,
6799 call GOMP_sections_start. */
6800 t = build_int_cst (unsigned_type_node, len - 1);
6801 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
6802 stmt = gimple_build_call (u, 1, t);
6803 }
6804 else
6805 {
6806 /* Otherwise, call GOMP_sections_next. */
6807 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
6808 stmt = gimple_build_call (u, 0);
6809 }
6810 if (!reductmp && !condtmp)
6811 {
6812 gimple_call_set_lhs (stmt, vin);
6813 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6814 }
6815 gsi_remove (&si, true);
6816
6817 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
6818 L0_BB. */
6819 switch_si = gsi_last_nondebug_bb (l0_bb);
6820 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
6821 if (exit_reachable)
6822 {
6823 cont = as_a <gomp_continue *> (last_stmt (l1_bb));
6824 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
6825 vmain = gimple_omp_continue_control_use (cont);
6826 vnext = gimple_omp_continue_control_def (cont);
6827 }
6828 else
6829 {
6830 vmain = vin;
6831 vnext = NULL_TREE;
6832 }
6833
6834 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
6835 label_vec.quick_push (t);
6836 i = 1;
6837
6838 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
6839 for (inner = region->inner, casei = 1;
6840 inner;
6841 inner = inner->next, i++, casei++)
6842 {
6843 basic_block s_entry_bb, s_exit_bb;
6844
6845 /* Skip optional reduction region. */
6846 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
6847 {
6848 --i;
6849 --casei;
6850 continue;
6851 }
6852
6853 s_entry_bb = inner->entry;
6854 s_exit_bb = inner->exit;
6855
6856 t = gimple_block_label (s_entry_bb);
6857 u = build_int_cst (unsigned_type_node, casei);
6858 u = build_case_label (u, NULL, t);
6859 label_vec.quick_push (u);
6860
6861 si = gsi_last_nondebug_bb (s_entry_bb);
6862 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
6863 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
6864 gsi_remove (&si, true);
6865 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
6866
6867 if (s_exit_bb == NULL)
6868 continue;
6869
6870 si = gsi_last_nondebug_bb (s_exit_bb);
6871 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6872 gsi_remove (&si, true);
6873
6874 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
6875 }
6876
6877 /* Error handling code goes in DEFAULT_BB. */
6878 t = gimple_block_label (default_bb);
6879 u = build_case_label (NULL, NULL, t);
6880 make_edge (l0_bb, default_bb, 0);
6881 add_bb_to_loop (default_bb, current_loops->tree_root);
6882
6883 stmt = gimple_build_switch (vmain, u, label_vec);
6884 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
6885 gsi_remove (&switch_si, true);
6886
6887 si = gsi_start_bb (default_bb);
6888 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
6889 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
6890
6891 if (exit_reachable)
6892 {
6893 tree bfn_decl;
6894
6895 /* Code to get the next section goes in L1_BB. */
6896 si = gsi_last_nondebug_bb (l1_bb);
6897 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
6898
6899 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
6900 stmt = gimple_build_call (bfn_decl, 0);
6901 gimple_call_set_lhs (stmt, vnext);
6902 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6903 if (cond_var)
6904 {
6905 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
6906 vnext, build_one_cst (TREE_TYPE (cond_var)));
6907 expand_omp_build_assign (&si, cond_var, t, false);
6908 }
6909 gsi_remove (&si, true);
6910
6911 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
6912 }
6913
6914 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
6915 si = gsi_last_nondebug_bb (l2_bb);
6916 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
6917 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
6918 else if (gimple_omp_return_lhs (gsi_stmt (si)))
6919 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
6920 else
6921 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
6922 stmt = gimple_build_call (t, 0);
6923 if (gimple_omp_return_lhs (gsi_stmt (si)))
6924 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
6925 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6926 gsi_remove (&si, true);
6927
6928 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
6929 }
6930
6931 /* Expand code for an OpenMP single directive. We've already expanded
6932 much of the code, here we simply place the GOMP_barrier call. */
6933
6934 static void
6935 expand_omp_single (struct omp_region *region)
6936 {
6937 basic_block entry_bb, exit_bb;
6938 gimple_stmt_iterator si;
6939
6940 entry_bb = region->entry;
6941 exit_bb = region->exit;
6942
6943 si = gsi_last_nondebug_bb (entry_bb);
6944 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
6945 gsi_remove (&si, true);
6946 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6947
6948 si = gsi_last_nondebug_bb (exit_bb);
6949 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
6950 {
6951 tree t = gimple_omp_return_lhs (gsi_stmt (si));
6952 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
6953 }
6954 gsi_remove (&si, true);
6955 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6956 }
6957
6958 /* Generic expansion for OpenMP synchronization directives: master,
6959 ordered and critical. All we need to do here is remove the entry
6960 and exit markers for REGION. */
6961
6962 static void
6963 expand_omp_synch (struct omp_region *region)
6964 {
6965 basic_block entry_bb, exit_bb;
6966 gimple_stmt_iterator si;
6967
6968 entry_bb = region->entry;
6969 exit_bb = region->exit;
6970
6971 si = gsi_last_nondebug_bb (entry_bb);
6972 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
6973 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
6974 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
6975 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
6976 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
6977 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
6978 if (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS
6979 && gimple_omp_teams_host (as_a <gomp_teams *> (gsi_stmt (si))))
6980 {
6981 expand_omp_taskreg (region);
6982 return;
6983 }
6984 gsi_remove (&si, true);
6985 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6986
6987 if (exit_bb)
6988 {
6989 si = gsi_last_nondebug_bb (exit_bb);
6990 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6991 gsi_remove (&si, true);
6992 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6993 }
6994 }
6995
6996 /* Translate enum omp_memory_order to enum memmodel. The two enums
6997 are using different numbers so that OMP_MEMORY_ORDER_UNSPECIFIED
6998 is 0. */
6999
7000 static enum memmodel
7001 omp_memory_order_to_memmodel (enum omp_memory_order mo)
7002 {
7003 switch (mo)
7004 {
7005 case OMP_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
7006 case OMP_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
7007 case OMP_MEMORY_ORDER_RELEASE: return MEMMODEL_RELEASE;
7008 case OMP_MEMORY_ORDER_ACQ_REL: return MEMMODEL_ACQ_REL;
7009 case OMP_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
7010 default: gcc_unreachable ();
7011 }
7012 }
7013
7014 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
7015 operation as a normal volatile load. */
7016
7017 static bool
7018 expand_omp_atomic_load (basic_block load_bb, tree addr,
7019 tree loaded_val, int index)
7020 {
7021 enum built_in_function tmpbase;
7022 gimple_stmt_iterator gsi;
7023 basic_block store_bb;
7024 location_t loc;
7025 gimple *stmt;
7026 tree decl, call, type, itype;
7027
7028 gsi = gsi_last_nondebug_bb (load_bb);
7029 stmt = gsi_stmt (gsi);
7030 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
7031 loc = gimple_location (stmt);
7032
7033 /* ??? If the target does not implement atomic_load_optab[mode], and mode
7034 is smaller than word size, then expand_atomic_load assumes that the load
7035 is atomic. We could avoid the builtin entirely in this case. */
7036
7037 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
7038 decl = builtin_decl_explicit (tmpbase);
7039 if (decl == NULL_TREE)
7040 return false;
7041
7042 type = TREE_TYPE (loaded_val);
7043 itype = TREE_TYPE (TREE_TYPE (decl));
7044
7045 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
7046 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
7047 call = build_call_expr_loc (loc, decl, 2, addr, mo);
7048 if (!useless_type_conversion_p (type, itype))
7049 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
7050 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
7051
7052 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
7053 gsi_remove (&gsi, true);
7054
7055 store_bb = single_succ (load_bb);
7056 gsi = gsi_last_nondebug_bb (store_bb);
7057 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
7058 gsi_remove (&gsi, true);
7059
7060 if (gimple_in_ssa_p (cfun))
7061 update_ssa (TODO_update_ssa_no_phi);
7062
7063 return true;
7064 }
7065
7066 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
7067 operation as a normal volatile store. */
7068
7069 static bool
7070 expand_omp_atomic_store (basic_block load_bb, tree addr,
7071 tree loaded_val, tree stored_val, int index)
7072 {
7073 enum built_in_function tmpbase;
7074 gimple_stmt_iterator gsi;
7075 basic_block store_bb = single_succ (load_bb);
7076 location_t loc;
7077 gimple *stmt;
7078 tree decl, call, type, itype;
7079 machine_mode imode;
7080 bool exchange;
7081
7082 gsi = gsi_last_nondebug_bb (load_bb);
7083 stmt = gsi_stmt (gsi);
7084 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
7085
7086 /* If the load value is needed, then this isn't a store but an exchange. */
7087 exchange = gimple_omp_atomic_need_value_p (stmt);
7088
7089 gsi = gsi_last_nondebug_bb (store_bb);
7090 stmt = gsi_stmt (gsi);
7091 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
7092 loc = gimple_location (stmt);
7093
7094 /* ??? If the target does not implement atomic_store_optab[mode], and mode
7095 is smaller than word size, then expand_atomic_store assumes that the store
7096 is atomic. We could avoid the builtin entirely in this case. */
7097
7098 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
7099 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
7100 decl = builtin_decl_explicit (tmpbase);
7101 if (decl == NULL_TREE)
7102 return false;
7103
7104 type = TREE_TYPE (stored_val);
7105
7106 /* Dig out the type of the function's second argument. */
7107 itype = TREE_TYPE (decl);
7108 itype = TYPE_ARG_TYPES (itype);
7109 itype = TREE_CHAIN (itype);
7110 itype = TREE_VALUE (itype);
7111 imode = TYPE_MODE (itype);
7112
7113 if (exchange && !can_atomic_exchange_p (imode, true))
7114 return false;
7115
7116 if (!useless_type_conversion_p (itype, type))
7117 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
7118 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
7119 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
7120 call = build_call_expr_loc (loc, decl, 3, addr, stored_val, mo);
7121 if (exchange)
7122 {
7123 if (!useless_type_conversion_p (type, itype))
7124 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
7125 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
7126 }
7127
7128 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
7129 gsi_remove (&gsi, true);
7130
7131 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
7132 gsi = gsi_last_nondebug_bb (load_bb);
7133 gsi_remove (&gsi, true);
7134
7135 if (gimple_in_ssa_p (cfun))
7136 update_ssa (TODO_update_ssa_no_phi);
7137
7138 return true;
7139 }
7140
7141 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
7142 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
7143 size of the data type, and thus usable to find the index of the builtin
7144 decl. Returns false if the expression is not of the proper form. */
7145
7146 static bool
7147 expand_omp_atomic_fetch_op (basic_block load_bb,
7148 tree addr, tree loaded_val,
7149 tree stored_val, int index)
7150 {
7151 enum built_in_function oldbase, newbase, tmpbase;
7152 tree decl, itype, call;
7153 tree lhs, rhs;
7154 basic_block store_bb = single_succ (load_bb);
7155 gimple_stmt_iterator gsi;
7156 gimple *stmt;
7157 location_t loc;
7158 enum tree_code code;
7159 bool need_old, need_new;
7160 machine_mode imode;
7161
7162 /* We expect to find the following sequences:
7163
7164 load_bb:
7165 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
7166
7167 store_bb:
7168 val = tmp OP something; (or: something OP tmp)
7169 GIMPLE_OMP_STORE (val)
7170
7171 ???FIXME: Allow a more flexible sequence.
7172 Perhaps use data flow to pick the statements.
7173
7174 */
7175
7176 gsi = gsi_after_labels (store_bb);
7177 stmt = gsi_stmt (gsi);
7178 if (is_gimple_debug (stmt))
7179 {
7180 gsi_next_nondebug (&gsi);
7181 if (gsi_end_p (gsi))
7182 return false;
7183 stmt = gsi_stmt (gsi);
7184 }
7185 loc = gimple_location (stmt);
7186 if (!is_gimple_assign (stmt))
7187 return false;
7188 gsi_next_nondebug (&gsi);
7189 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
7190 return false;
7191 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
7192 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
7193 enum omp_memory_order omo
7194 = gimple_omp_atomic_memory_order (last_stmt (load_bb));
7195 enum memmodel mo = omp_memory_order_to_memmodel (omo);
7196 gcc_checking_assert (!need_old || !need_new);
7197
7198 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
7199 return false;
7200
7201 /* Check for one of the supported fetch-op operations. */
7202 code = gimple_assign_rhs_code (stmt);
7203 switch (code)
7204 {
7205 case PLUS_EXPR:
7206 case POINTER_PLUS_EXPR:
7207 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
7208 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
7209 break;
7210 case MINUS_EXPR:
7211 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
7212 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
7213 break;
7214 case BIT_AND_EXPR:
7215 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
7216 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
7217 break;
7218 case BIT_IOR_EXPR:
7219 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
7220 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
7221 break;
7222 case BIT_XOR_EXPR:
7223 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
7224 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
7225 break;
7226 default:
7227 return false;
7228 }
7229
7230 /* Make sure the expression is of the proper form. */
7231 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
7232 rhs = gimple_assign_rhs2 (stmt);
7233 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
7234 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
7235 rhs = gimple_assign_rhs1 (stmt);
7236 else
7237 return false;
7238
7239 tmpbase = ((enum built_in_function)
7240 ((need_new ? newbase : oldbase) + index + 1));
7241 decl = builtin_decl_explicit (tmpbase);
7242 if (decl == NULL_TREE)
7243 return false;
7244 itype = TREE_TYPE (TREE_TYPE (decl));
7245 imode = TYPE_MODE (itype);
7246
7247 /* We could test all of the various optabs involved, but the fact of the
7248 matter is that (with the exception of i486 vs i586 and xadd) all targets
7249 that support any atomic operaton optab also implements compare-and-swap.
7250 Let optabs.c take care of expanding any compare-and-swap loop. */
7251 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
7252 return false;
7253
7254 gsi = gsi_last_nondebug_bb (load_bb);
7255 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
7256
7257 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
7258 It only requires that the operation happen atomically. Thus we can
7259 use the RELAXED memory model. */
7260 call = build_call_expr_loc (loc, decl, 3, addr,
7261 fold_convert_loc (loc, itype, rhs),
7262 build_int_cst (NULL, mo));
7263
7264 if (need_old || need_new)
7265 {
7266 lhs = need_old ? loaded_val : stored_val;
7267 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
7268 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
7269 }
7270 else
7271 call = fold_convert_loc (loc, void_type_node, call);
7272 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
7273 gsi_remove (&gsi, true);
7274
7275 gsi = gsi_last_nondebug_bb (store_bb);
7276 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
7277 gsi_remove (&gsi, true);
7278 gsi = gsi_last_nondebug_bb (store_bb);
7279 stmt = gsi_stmt (gsi);
7280 gsi_remove (&gsi, true);
7281
7282 if (gimple_in_ssa_p (cfun))
7283 {
7284 release_defs (stmt);
7285 update_ssa (TODO_update_ssa_no_phi);
7286 }
7287
7288 return true;
7289 }
7290
7291 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
7292
7293 oldval = *addr;
7294 repeat:
7295 newval = rhs; // with oldval replacing *addr in rhs
7296 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
7297 if (oldval != newval)
7298 goto repeat;
7299
7300 INDEX is log2 of the size of the data type, and thus usable to find the
7301 index of the builtin decl. */
7302
7303 static bool
7304 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
7305 tree addr, tree loaded_val, tree stored_val,
7306 int index)
7307 {
7308 tree loadedi, storedi, initial, new_storedi, old_vali;
7309 tree type, itype, cmpxchg, iaddr, atype;
7310 gimple_stmt_iterator si;
7311 basic_block loop_header = single_succ (load_bb);
7312 gimple *phi, *stmt;
7313 edge e;
7314 enum built_in_function fncode;
7315
7316 /* ??? We need a non-pointer interface to __atomic_compare_exchange in
7317 order to use the RELAXED memory model effectively. */
7318 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
7319 + index + 1);
7320 cmpxchg = builtin_decl_explicit (fncode);
7321 if (cmpxchg == NULL_TREE)
7322 return false;
7323 type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
7324 atype = type;
7325 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
7326
7327 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
7328 || !can_atomic_load_p (TYPE_MODE (itype)))
7329 return false;
7330
7331 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
7332 si = gsi_last_nondebug_bb (load_bb);
7333 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
7334
7335 /* For floating-point values, we'll need to view-convert them to integers
7336 so that we can perform the atomic compare and swap. Simplify the
7337 following code by always setting up the "i"ntegral variables. */
7338 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
7339 {
7340 tree iaddr_val;
7341
7342 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
7343 true));
7344 atype = itype;
7345 iaddr_val
7346 = force_gimple_operand_gsi (&si,
7347 fold_convert (TREE_TYPE (iaddr), addr),
7348 false, NULL_TREE, true, GSI_SAME_STMT);
7349 stmt = gimple_build_assign (iaddr, iaddr_val);
7350 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7351 loadedi = create_tmp_var (itype);
7352 if (gimple_in_ssa_p (cfun))
7353 loadedi = make_ssa_name (loadedi);
7354 }
7355 else
7356 {
7357 iaddr = addr;
7358 loadedi = loaded_val;
7359 }
7360
7361 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
7362 tree loaddecl = builtin_decl_explicit (fncode);
7363 if (loaddecl)
7364 initial
7365 = fold_convert (atype,
7366 build_call_expr (loaddecl, 2, iaddr,
7367 build_int_cst (NULL_TREE,
7368 MEMMODEL_RELAXED)));
7369 else
7370 {
7371 tree off
7372 = build_int_cst (build_pointer_type_for_mode (atype, ptr_mode,
7373 true), 0);
7374 initial = build2 (MEM_REF, atype, iaddr, off);
7375 }
7376
7377 initial
7378 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
7379 GSI_SAME_STMT);
7380
7381 /* Move the value to the LOADEDI temporary. */
7382 if (gimple_in_ssa_p (cfun))
7383 {
7384 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
7385 phi = create_phi_node (loadedi, loop_header);
7386 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
7387 initial);
7388 }
7389 else
7390 gsi_insert_before (&si,
7391 gimple_build_assign (loadedi, initial),
7392 GSI_SAME_STMT);
7393 if (loadedi != loaded_val)
7394 {
7395 gimple_stmt_iterator gsi2;
7396 tree x;
7397
7398 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
7399 gsi2 = gsi_start_bb (loop_header);
7400 if (gimple_in_ssa_p (cfun))
7401 {
7402 gassign *stmt;
7403 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
7404 true, GSI_SAME_STMT);
7405 stmt = gimple_build_assign (loaded_val, x);
7406 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
7407 }
7408 else
7409 {
7410 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
7411 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
7412 true, GSI_SAME_STMT);
7413 }
7414 }
7415 gsi_remove (&si, true);
7416
7417 si = gsi_last_nondebug_bb (store_bb);
7418 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
7419
7420 if (iaddr == addr)
7421 storedi = stored_val;
7422 else
7423 storedi
7424 = force_gimple_operand_gsi (&si,
7425 build1 (VIEW_CONVERT_EXPR, itype,
7426 stored_val), true, NULL_TREE, true,
7427 GSI_SAME_STMT);
7428
7429 /* Build the compare&swap statement. */
7430 new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
7431 new_storedi = force_gimple_operand_gsi (&si,
7432 fold_convert (TREE_TYPE (loadedi),
7433 new_storedi),
7434 true, NULL_TREE,
7435 true, GSI_SAME_STMT);
7436
7437 if (gimple_in_ssa_p (cfun))
7438 old_vali = loadedi;
7439 else
7440 {
7441 old_vali = create_tmp_var (TREE_TYPE (loadedi));
7442 stmt = gimple_build_assign (old_vali, loadedi);
7443 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7444
7445 stmt = gimple_build_assign (loadedi, new_storedi);
7446 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7447 }
7448
7449 /* Note that we always perform the comparison as an integer, even for
7450 floating point. This allows the atomic operation to properly
7451 succeed even with NaNs and -0.0. */
7452 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
7453 stmt = gimple_build_cond_empty (ne);
7454 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7455
7456 /* Update cfg. */
7457 e = single_succ_edge (store_bb);
7458 e->flags &= ~EDGE_FALLTHRU;
7459 e->flags |= EDGE_FALSE_VALUE;
7460 /* Expect no looping. */
7461 e->probability = profile_probability::guessed_always ();
7462
7463 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
7464 e->probability = profile_probability::guessed_never ();
7465
7466 /* Copy the new value to loadedi (we already did that before the condition
7467 if we are not in SSA). */
7468 if (gimple_in_ssa_p (cfun))
7469 {
7470 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
7471 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
7472 }
7473
7474 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
7475 gsi_remove (&si, true);
7476
7477 class loop *loop = alloc_loop ();
7478 loop->header = loop_header;
7479 loop->latch = store_bb;
7480 add_loop (loop, loop_header->loop_father);
7481
7482 if (gimple_in_ssa_p (cfun))
7483 update_ssa (TODO_update_ssa_no_phi);
7484
7485 return true;
7486 }
7487
7488 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
7489
7490 GOMP_atomic_start ();
7491 *addr = rhs;
7492 GOMP_atomic_end ();
7493
7494 The result is not globally atomic, but works so long as all parallel
7495 references are within #pragma omp atomic directives. According to
7496 responses received from omp@openmp.org, appears to be within spec.
7497 Which makes sense, since that's how several other compilers handle
7498 this situation as well.
7499 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
7500 expanding. STORED_VAL is the operand of the matching
7501 GIMPLE_OMP_ATOMIC_STORE.
7502
7503 We replace
7504 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
7505 loaded_val = *addr;
7506
7507 and replace
7508 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
7509 *addr = stored_val;
7510 */
7511
7512 static bool
7513 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
7514 tree addr, tree loaded_val, tree stored_val)
7515 {
7516 gimple_stmt_iterator si;
7517 gassign *stmt;
7518 tree t;
7519
7520 si = gsi_last_nondebug_bb (load_bb);
7521 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
7522
7523 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
7524 t = build_call_expr (t, 0);
7525 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
7526
7527 tree mem = build_simple_mem_ref (addr);
7528 TREE_TYPE (mem) = TREE_TYPE (loaded_val);
7529 TREE_OPERAND (mem, 1)
7530 = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode,
7531 true),
7532 TREE_OPERAND (mem, 1));
7533 stmt = gimple_build_assign (loaded_val, mem);
7534 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7535 gsi_remove (&si, true);
7536
7537 si = gsi_last_nondebug_bb (store_bb);
7538 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
7539
7540 stmt = gimple_build_assign (unshare_expr (mem), stored_val);
7541 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7542
7543 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
7544 t = build_call_expr (t, 0);
7545 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
7546 gsi_remove (&si, true);
7547
7548 if (gimple_in_ssa_p (cfun))
7549 update_ssa (TODO_update_ssa_no_phi);
7550 return true;
7551 }
7552
7553 /* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
7554 using expand_omp_atomic_fetch_op. If it failed, we try to
7555 call expand_omp_atomic_pipeline, and if it fails too, the
7556 ultimate fallback is wrapping the operation in a mutex
7557 (expand_omp_atomic_mutex). REGION is the atomic region built
7558 by build_omp_regions_1(). */
7559
7560 static void
7561 expand_omp_atomic (struct omp_region *region)
7562 {
7563 basic_block load_bb = region->entry, store_bb = region->exit;
7564 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
7565 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
7566 tree loaded_val = gimple_omp_atomic_load_lhs (load);
7567 tree addr = gimple_omp_atomic_load_rhs (load);
7568 tree stored_val = gimple_omp_atomic_store_val (store);
7569 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
7570 HOST_WIDE_INT index;
7571
7572 /* Make sure the type is one of the supported sizes. */
7573 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
7574 index = exact_log2 (index);
7575 if (index >= 0 && index <= 4)
7576 {
7577 unsigned int align = TYPE_ALIGN_UNIT (type);
7578
7579 /* __sync builtins require strict data alignment. */
7580 if (exact_log2 (align) >= index)
7581 {
7582 /* Atomic load. */
7583 scalar_mode smode;
7584 if (loaded_val == stored_val
7585 && (is_int_mode (TYPE_MODE (type), &smode)
7586 || is_float_mode (TYPE_MODE (type), &smode))
7587 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
7588 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
7589 return;
7590
7591 /* Atomic store. */
7592 if ((is_int_mode (TYPE_MODE (type), &smode)
7593 || is_float_mode (TYPE_MODE (type), &smode))
7594 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
7595 && store_bb == single_succ (load_bb)
7596 && first_stmt (store_bb) == store
7597 && expand_omp_atomic_store (load_bb, addr, loaded_val,
7598 stored_val, index))
7599 return;
7600
7601 /* When possible, use specialized atomic update functions. */
7602 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
7603 && store_bb == single_succ (load_bb)
7604 && expand_omp_atomic_fetch_op (load_bb, addr,
7605 loaded_val, stored_val, index))
7606 return;
7607
7608 /* If we don't have specialized __sync builtins, try and implement
7609 as a compare and swap loop. */
7610 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
7611 loaded_val, stored_val, index))
7612 return;
7613 }
7614 }
7615
7616 /* The ultimate fallback is wrapping the operation in a mutex. */
7617 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
7618 }
7619
7620 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
7621 at REGION_EXIT. */
7622
7623 static void
7624 mark_loops_in_oacc_kernels_region (basic_block region_entry,
7625 basic_block region_exit)
7626 {
7627 class loop *outer = region_entry->loop_father;
7628 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
7629
7630 /* Don't parallelize the kernels region if it contains more than one outer
7631 loop. */
7632 unsigned int nr_outer_loops = 0;
7633 class loop *single_outer = NULL;
7634 for (class loop *loop = outer->inner; loop != NULL; loop = loop->next)
7635 {
7636 gcc_assert (loop_outer (loop) == outer);
7637
7638 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
7639 continue;
7640
7641 if (region_exit != NULL
7642 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
7643 continue;
7644
7645 nr_outer_loops++;
7646 single_outer = loop;
7647 }
7648 if (nr_outer_loops != 1)
7649 return;
7650
7651 for (class loop *loop = single_outer->inner;
7652 loop != NULL;
7653 loop = loop->inner)
7654 if (loop->next)
7655 return;
7656
7657 /* Mark the loops in the region. */
7658 for (class loop *loop = single_outer; loop != NULL; loop = loop->inner)
7659 loop->in_oacc_kernels_region = true;
7660 }
7661
7662 /* Types used to pass grid and wortkgroup sizes to kernel invocation. */
7663
7664 struct GTY(()) grid_launch_attributes_trees
7665 {
7666 tree kernel_dim_array_type;
7667 tree kernel_lattrs_dimnum_decl;
7668 tree kernel_lattrs_grid_decl;
7669 tree kernel_lattrs_group_decl;
7670 tree kernel_launch_attributes_type;
7671 };
7672
7673 static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees;
7674
7675 /* Create types used to pass kernel launch attributes to target. */
7676
7677 static void
7678 grid_create_kernel_launch_attr_types (void)
7679 {
7680 if (grid_attr_trees)
7681 return;
7682 grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> ();
7683
7684 tree dim_arr_index_type
7685 = build_index_type (build_int_cst (integer_type_node, 2));
7686 grid_attr_trees->kernel_dim_array_type
7687 = build_array_type (uint32_type_node, dim_arr_index_type);
7688
7689 grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE);
7690 grid_attr_trees->kernel_lattrs_dimnum_decl
7691 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"),
7692 uint32_type_node);
7693 DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE;
7694
7695 grid_attr_trees->kernel_lattrs_grid_decl
7696 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"),
7697 grid_attr_trees->kernel_dim_array_type);
7698 DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl)
7699 = grid_attr_trees->kernel_lattrs_dimnum_decl;
7700 grid_attr_trees->kernel_lattrs_group_decl
7701 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"),
7702 grid_attr_trees->kernel_dim_array_type);
7703 DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl)
7704 = grid_attr_trees->kernel_lattrs_grid_decl;
7705 finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type,
7706 "__gomp_kernel_launch_attributes",
7707 grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE);
7708 }
7709
7710 /* Insert before the current statement in GSI a store of VALUE to INDEX of
7711 array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be
7712 of type uint32_type_node. */
7713
7714 static void
7715 grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var,
7716 tree fld_decl, int index, tree value)
7717 {
7718 tree ref = build4 (ARRAY_REF, uint32_type_node,
7719 build3 (COMPONENT_REF,
7720 grid_attr_trees->kernel_dim_array_type,
7721 range_var, fld_decl, NULL_TREE),
7722 build_int_cst (integer_type_node, index),
7723 NULL_TREE, NULL_TREE);
7724 gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT);
7725 }
7726
7727 /* Return a tree representation of a pointer to a structure with grid and
7728 work-group size information. Statements filling that information will be
7729 inserted before GSI, TGT_STMT is the target statement which has the
7730 necessary information in it. */
7731
7732 static tree
7733 grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi,
7734 gomp_target *tgt_stmt)
7735 {
7736 grid_create_kernel_launch_attr_types ();
7737 tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type,
7738 "__kernel_launch_attrs");
7739
7740 unsigned max_dim = 0;
7741 for (tree clause = gimple_omp_target_clauses (tgt_stmt);
7742 clause;
7743 clause = OMP_CLAUSE_CHAIN (clause))
7744 {
7745 if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_)
7746 continue;
7747
7748 unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause);
7749 max_dim = MAX (dim, max_dim);
7750
7751 grid_insert_store_range_dim (gsi, lattrs,
7752 grid_attr_trees->kernel_lattrs_grid_decl,
7753 dim, OMP_CLAUSE__GRIDDIM__SIZE (clause));
7754 grid_insert_store_range_dim (gsi, lattrs,
7755 grid_attr_trees->kernel_lattrs_group_decl,
7756 dim, OMP_CLAUSE__GRIDDIM__GROUP (clause));
7757 }
7758
7759 tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs,
7760 grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE);
7761 gcc_checking_assert (max_dim <= 2);
7762 tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1);
7763 gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions),
7764 GSI_SAME_STMT);
7765 TREE_ADDRESSABLE (lattrs) = 1;
7766 return build_fold_addr_expr (lattrs);
7767 }
7768
7769 /* Build target argument identifier from the DEVICE identifier, value
7770 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
7771
7772 static tree
7773 get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
7774 {
7775 tree t = build_int_cst (integer_type_node, device);
7776 if (subseqent_param)
7777 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
7778 build_int_cst (integer_type_node,
7779 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
7780 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
7781 build_int_cst (integer_type_node, id));
7782 return t;
7783 }
7784
7785 /* Like above but return it in type that can be directly stored as an element
7786 of the argument array. */
7787
7788 static tree
7789 get_target_argument_identifier (int device, bool subseqent_param, int id)
7790 {
7791 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
7792 return fold_convert (ptr_type_node, t);
7793 }
7794
7795 /* Return a target argument consisting of DEVICE identifier, value identifier
7796 ID, and the actual VALUE. */
7797
7798 static tree
7799 get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
7800 tree value)
7801 {
7802 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
7803 fold_convert (integer_type_node, value),
7804 build_int_cst (unsigned_type_node,
7805 GOMP_TARGET_ARG_VALUE_SHIFT));
7806 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
7807 get_target_argument_identifier_1 (device, false, id));
7808 t = fold_convert (ptr_type_node, t);
7809 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
7810 }
7811
7812 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
7813 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
7814 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
7815 arguments. */
7816
7817 static void
7818 push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
7819 int id, tree value, vec <tree> *args)
7820 {
7821 if (tree_fits_shwi_p (value)
7822 && tree_to_shwi (value) > -(1 << 15)
7823 && tree_to_shwi (value) < (1 << 15))
7824 args->quick_push (get_target_argument_value (gsi, device, id, value));
7825 else
7826 {
7827 args->quick_push (get_target_argument_identifier (device, true, id));
7828 value = fold_convert (ptr_type_node, value);
7829 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
7830 GSI_SAME_STMT);
7831 args->quick_push (value);
7832 }
7833 }
7834
7835 /* Create an array of arguments that is then passed to GOMP_target. */
7836
7837 static tree
7838 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
7839 {
7840 auto_vec <tree, 6> args;
7841 tree clauses = gimple_omp_target_clauses (tgt_stmt);
7842 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
7843 if (c)
7844 t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
7845 else
7846 t = integer_minus_one_node;
7847 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
7848 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
7849
7850 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
7851 if (c)
7852 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
7853 else
7854 t = integer_minus_one_node;
7855 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
7856 GOMP_TARGET_ARG_THREAD_LIMIT, t,
7857 &args);
7858
7859 /* Add HSA-specific grid sizes, if available. */
7860 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7861 OMP_CLAUSE__GRIDDIM_))
7862 {
7863 int id = GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES;
7864 t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, id);
7865 args.quick_push (t);
7866 args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt));
7867 }
7868
7869 /* Produce more, perhaps device specific, arguments here. */
7870
7871 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
7872 args.length () + 1),
7873 ".omp_target_args");
7874 for (unsigned i = 0; i < args.length (); i++)
7875 {
7876 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
7877 build_int_cst (integer_type_node, i),
7878 NULL_TREE, NULL_TREE);
7879 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
7880 GSI_SAME_STMT);
7881 }
7882 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
7883 build_int_cst (integer_type_node, args.length ()),
7884 NULL_TREE, NULL_TREE);
7885 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
7886 GSI_SAME_STMT);
7887 TREE_ADDRESSABLE (argarray) = 1;
7888 return build_fold_addr_expr (argarray);
7889 }
7890
7891 /* Expand the GIMPLE_OMP_TARGET starting at REGION. */
7892
7893 static void
7894 expand_omp_target (struct omp_region *region)
7895 {
7896 basic_block entry_bb, exit_bb, new_bb;
7897 struct function *child_cfun;
7898 tree child_fn, block, t;
7899 gimple_stmt_iterator gsi;
7900 gomp_target *entry_stmt;
7901 gimple *stmt;
7902 edge e;
7903 bool offloaded, data_region;
7904 int target_kind;
7905
7906 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
7907 target_kind = gimple_omp_target_kind (entry_stmt);
7908 new_bb = region->entry;
7909
7910 offloaded = is_gimple_omp_offloaded (entry_stmt);
7911 switch (target_kind)
7912 {
7913 case GF_OMP_TARGET_KIND_REGION:
7914 case GF_OMP_TARGET_KIND_UPDATE:
7915 case GF_OMP_TARGET_KIND_ENTER_DATA:
7916 case GF_OMP_TARGET_KIND_EXIT_DATA:
7917 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7918 case GF_OMP_TARGET_KIND_OACC_KERNELS:
7919 case GF_OMP_TARGET_KIND_OACC_SERIAL:
7920 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7921 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7922 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7923 data_region = false;
7924 break;
7925 case GF_OMP_TARGET_KIND_DATA:
7926 case GF_OMP_TARGET_KIND_OACC_DATA:
7927 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7928 data_region = true;
7929 break;
7930 default:
7931 gcc_unreachable ();
7932 }
7933
7934 child_fn = NULL_TREE;
7935 child_cfun = NULL;
7936 if (offloaded)
7937 {
7938 child_fn = gimple_omp_target_child_fn (entry_stmt);
7939 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
7940 }
7941
7942 /* Supported by expand_omp_taskreg, but not here. */
7943 if (child_cfun != NULL)
7944 gcc_checking_assert (!child_cfun->cfg);
7945 gcc_checking_assert (!gimple_in_ssa_p (cfun));
7946
7947 entry_bb = region->entry;
7948 exit_bb = region->exit;
7949
7950 switch (target_kind)
7951 {
7952 case GF_OMP_TARGET_KIND_OACC_KERNELS:
7953 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
7954
7955 /* Further down, all OpenACC compute constructs will be mapped to
7956 BUILT_IN_GOACC_PARALLEL, and to distinguish between them, there
7957 is an "oacc kernels" attribute set for OpenACC kernels. */
7958 DECL_ATTRIBUTES (child_fn)
7959 = tree_cons (get_identifier ("oacc kernels"),
7960 NULL_TREE, DECL_ATTRIBUTES (child_fn));
7961 break;
7962 case GF_OMP_TARGET_KIND_OACC_SERIAL:
7963 /* Further down, all OpenACC compute constructs will be mapped to
7964 BUILT_IN_GOACC_PARALLEL, and to distinguish between them, there
7965 is an "oacc serial" attribute set for OpenACC serial. */
7966 DECL_ATTRIBUTES (child_fn)
7967 = tree_cons (get_identifier ("oacc serial"),
7968 NULL_TREE, DECL_ATTRIBUTES (child_fn));
7969 break;
7970 default:
7971 break;
7972 }
7973
7974 if (offloaded)
7975 {
7976 unsigned srcidx, dstidx, num;
7977
7978 /* If the offloading region needs data sent from the parent
7979 function, then the very first statement (except possible
7980 tree profile counter updates) of the offloading body
7981 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
7982 &.OMP_DATA_O is passed as an argument to the child function,
7983 we need to replace it with the argument as seen by the child
7984 function.
7985
7986 In most cases, this will end up being the identity assignment
7987 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
7988 a function call that has been inlined, the original PARM_DECL
7989 .OMP_DATA_I may have been converted into a different local
7990 variable. In which case, we need to keep the assignment. */
7991 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
7992 if (data_arg)
7993 {
7994 basic_block entry_succ_bb = single_succ (entry_bb);
7995 gimple_stmt_iterator gsi;
7996 tree arg;
7997 gimple *tgtcopy_stmt = NULL;
7998 tree sender = TREE_VEC_ELT (data_arg, 0);
7999
8000 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
8001 {
8002 gcc_assert (!gsi_end_p (gsi));
8003 stmt = gsi_stmt (gsi);
8004 if (gimple_code (stmt) != GIMPLE_ASSIGN)
8005 continue;
8006
8007 if (gimple_num_ops (stmt) == 2)
8008 {
8009 tree arg = gimple_assign_rhs1 (stmt);
8010
8011 /* We're ignoring the subcode because we're
8012 effectively doing a STRIP_NOPS. */
8013
8014 if (TREE_CODE (arg) == ADDR_EXPR
8015 && TREE_OPERAND (arg, 0) == sender)
8016 {
8017 tgtcopy_stmt = stmt;
8018 break;
8019 }
8020 }
8021 }
8022
8023 gcc_assert (tgtcopy_stmt != NULL);
8024 arg = DECL_ARGUMENTS (child_fn);
8025
8026 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
8027 gsi_remove (&gsi, true);
8028 }
8029
8030 /* Declare local variables needed in CHILD_CFUN. */
8031 block = DECL_INITIAL (child_fn);
8032 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
8033 /* The gimplifier could record temporaries in the offloading block
8034 rather than in containing function's local_decls chain,
8035 which would mean cgraph missed finalizing them. Do it now. */
8036 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
8037 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
8038 varpool_node::finalize_decl (t);
8039 DECL_SAVED_TREE (child_fn) = NULL;
8040 /* We'll create a CFG for child_fn, so no gimple body is needed. */
8041 gimple_set_body (child_fn, NULL);
8042 TREE_USED (block) = 1;
8043
8044 /* Reset DECL_CONTEXT on function arguments. */
8045 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
8046 DECL_CONTEXT (t) = child_fn;
8047
8048 /* Split ENTRY_BB at GIMPLE_*,
8049 so that it can be moved to the child function. */
8050 gsi = gsi_last_nondebug_bb (entry_bb);
8051 stmt = gsi_stmt (gsi);
8052 gcc_assert (stmt
8053 && gimple_code (stmt) == gimple_code (entry_stmt));
8054 e = split_block (entry_bb, stmt);
8055 gsi_remove (&gsi, true);
8056 entry_bb = e->dest;
8057 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8058
8059 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
8060 if (exit_bb)
8061 {
8062 gsi = gsi_last_nondebug_bb (exit_bb);
8063 gcc_assert (!gsi_end_p (gsi)
8064 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
8065 stmt = gimple_build_return (NULL);
8066 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
8067 gsi_remove (&gsi, true);
8068 }
8069
8070 /* Move the offloading region into CHILD_CFUN. */
8071
8072 block = gimple_block (entry_stmt);
8073
8074 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
8075 if (exit_bb)
8076 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
8077 /* When the OMP expansion process cannot guarantee an up-to-date
8078 loop tree arrange for the child function to fixup loops. */
8079 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
8080 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
8081
8082 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
8083 num = vec_safe_length (child_cfun->local_decls);
8084 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
8085 {
8086 t = (*child_cfun->local_decls)[srcidx];
8087 if (DECL_CONTEXT (t) == cfun->decl)
8088 continue;
8089 if (srcidx != dstidx)
8090 (*child_cfun->local_decls)[dstidx] = t;
8091 dstidx++;
8092 }
8093 if (dstidx != num)
8094 vec_safe_truncate (child_cfun->local_decls, dstidx);
8095
8096 /* Inform the callgraph about the new function. */
8097 child_cfun->curr_properties = cfun->curr_properties;
8098 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
8099 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
8100 cgraph_node *node = cgraph_node::get_create (child_fn);
8101 node->parallelized_function = 1;
8102 cgraph_node::add_new_function (child_fn, true);
8103
8104 /* Add the new function to the offload table. */
8105 if (ENABLE_OFFLOADING)
8106 {
8107 if (in_lto_p)
8108 DECL_PRESERVE_P (child_fn) = 1;
8109 vec_safe_push (offload_funcs, child_fn);
8110 }
8111
8112 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
8113 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
8114
8115 /* Fix the callgraph edges for child_cfun. Those for cfun will be
8116 fixed in a following pass. */
8117 push_cfun (child_cfun);
8118 if (need_asm)
8119 assign_assembler_name_if_needed (child_fn);
8120 cgraph_edge::rebuild_edges ();
8121
8122 /* Some EH regions might become dead, see PR34608. If
8123 pass_cleanup_cfg isn't the first pass to happen with the
8124 new child, these dead EH edges might cause problems.
8125 Clean them up now. */
8126 if (flag_exceptions)
8127 {
8128 basic_block bb;
8129 bool changed = false;
8130
8131 FOR_EACH_BB_FN (bb, cfun)
8132 changed |= gimple_purge_dead_eh_edges (bb);
8133 if (changed)
8134 cleanup_tree_cfg ();
8135 }
8136 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
8137 verify_loop_structure ();
8138 pop_cfun ();
8139
8140 if (dump_file && !gimple_in_ssa_p (cfun))
8141 {
8142 omp_any_child_fn_dumped = true;
8143 dump_function_header (dump_file, child_fn, dump_flags);
8144 dump_function_to_file (child_fn, dump_file, dump_flags);
8145 }
8146
8147 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
8148 }
8149
8150 /* Emit a library call to launch the offloading region, or do data
8151 transfers. */
8152 tree t1, t2, t3, t4, depend, c, clauses;
8153 enum built_in_function start_ix;
8154 unsigned int flags_i = 0;
8155
8156 switch (gimple_omp_target_kind (entry_stmt))
8157 {
8158 case GF_OMP_TARGET_KIND_REGION:
8159 start_ix = BUILT_IN_GOMP_TARGET;
8160 break;
8161 case GF_OMP_TARGET_KIND_DATA:
8162 start_ix = BUILT_IN_GOMP_TARGET_DATA;
8163 break;
8164 case GF_OMP_TARGET_KIND_UPDATE:
8165 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
8166 break;
8167 case GF_OMP_TARGET_KIND_ENTER_DATA:
8168 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
8169 break;
8170 case GF_OMP_TARGET_KIND_EXIT_DATA:
8171 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
8172 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
8173 break;
8174 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8175 case GF_OMP_TARGET_KIND_OACC_KERNELS:
8176 case GF_OMP_TARGET_KIND_OACC_SERIAL:
8177 start_ix = BUILT_IN_GOACC_PARALLEL;
8178 break;
8179 case GF_OMP_TARGET_KIND_OACC_DATA:
8180 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8181 start_ix = BUILT_IN_GOACC_DATA_START;
8182 break;
8183 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8184 start_ix = BUILT_IN_GOACC_UPDATE;
8185 break;
8186 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8187 start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA;
8188 break;
8189 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8190 start_ix = BUILT_IN_GOACC_DECLARE;
8191 break;
8192 default:
8193 gcc_unreachable ();
8194 }
8195
8196 clauses = gimple_omp_target_clauses (entry_stmt);
8197
8198 tree device = NULL_TREE;
8199 location_t device_loc = UNKNOWN_LOCATION;
8200 tree goacc_flags = NULL_TREE;
8201 if (is_gimple_omp_oacc (entry_stmt))
8202 {
8203 /* By default, no GOACC_FLAGs are set. */
8204 goacc_flags = integer_zero_node;
8205 }
8206 else
8207 {
8208 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
8209 if (c)
8210 {
8211 device = OMP_CLAUSE_DEVICE_ID (c);
8212 device_loc = OMP_CLAUSE_LOCATION (c);
8213 }
8214 else
8215 {
8216 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
8217 library choose). */
8218 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
8219 device_loc = gimple_location (entry_stmt);
8220 }
8221
8222 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
8223 if (c)
8224 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
8225 }
8226
8227 /* By default, there is no conditional. */
8228 tree cond = NULL_TREE;
8229 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
8230 if (c)
8231 cond = OMP_CLAUSE_IF_EXPR (c);
8232 /* If we found the clause 'if (cond)', build:
8233 OpenACC: goacc_flags = (cond ? goacc_flags : flags | GOACC_FLAG_HOST_FALLBACK)
8234 OpenMP: device = (cond ? device : GOMP_DEVICE_HOST_FALLBACK) */
8235 if (cond)
8236 {
8237 tree *tp;
8238 if (is_gimple_omp_oacc (entry_stmt))
8239 tp = &goacc_flags;
8240 else
8241 {
8242 /* Ensure 'device' is of the correct type. */
8243 device = fold_convert_loc (device_loc, integer_type_node, device);
8244
8245 tp = &device;
8246 }
8247
8248 cond = gimple_boolify (cond);
8249
8250 basic_block cond_bb, then_bb, else_bb;
8251 edge e;
8252 tree tmp_var;
8253
8254 tmp_var = create_tmp_var (TREE_TYPE (*tp));
8255 if (offloaded)
8256 e = split_block_after_labels (new_bb);
8257 else
8258 {
8259 gsi = gsi_last_nondebug_bb (new_bb);
8260 gsi_prev (&gsi);
8261 e = split_block (new_bb, gsi_stmt (gsi));
8262 }
8263 cond_bb = e->src;
8264 new_bb = e->dest;
8265 remove_edge (e);
8266
8267 then_bb = create_empty_bb (cond_bb);
8268 else_bb = create_empty_bb (then_bb);
8269 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
8270 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
8271
8272 stmt = gimple_build_cond_empty (cond);
8273 gsi = gsi_last_bb (cond_bb);
8274 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
8275
8276 gsi = gsi_start_bb (then_bb);
8277 stmt = gimple_build_assign (tmp_var, *tp);
8278 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
8279
8280 gsi = gsi_start_bb (else_bb);
8281 if (is_gimple_omp_oacc (entry_stmt))
8282 stmt = gimple_build_assign (tmp_var,
8283 BIT_IOR_EXPR,
8284 *tp,
8285 build_int_cst (integer_type_node,
8286 GOACC_FLAG_HOST_FALLBACK));
8287 else
8288 stmt = gimple_build_assign (tmp_var,
8289 build_int_cst (integer_type_node,
8290 GOMP_DEVICE_HOST_FALLBACK));
8291 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
8292
8293 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
8294 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
8295 add_bb_to_loop (then_bb, cond_bb->loop_father);
8296 add_bb_to_loop (else_bb, cond_bb->loop_father);
8297 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
8298 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
8299
8300 *tp = tmp_var;
8301
8302 gsi = gsi_last_nondebug_bb (new_bb);
8303 }
8304 else
8305 {
8306 gsi = gsi_last_nondebug_bb (new_bb);
8307
8308 if (device != NULL_TREE)
8309 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
8310 true, GSI_SAME_STMT);
8311 }
8312
8313 t = gimple_omp_target_data_arg (entry_stmt);
8314 if (t == NULL)
8315 {
8316 t1 = size_zero_node;
8317 t2 = build_zero_cst (ptr_type_node);
8318 t3 = t2;
8319 t4 = t2;
8320 }
8321 else
8322 {
8323 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
8324 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
8325 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
8326 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
8327 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
8328 }
8329
8330 gimple *g;
8331 bool tagging = false;
8332 /* The maximum number used by any start_ix, without varargs. */
8333 auto_vec<tree, 11> args;
8334 if (is_gimple_omp_oacc (entry_stmt))
8335 {
8336 tree goacc_flags_m = fold_build1 (GOACC_FLAGS_MARSHAL_OP,
8337 TREE_TYPE (goacc_flags), goacc_flags);
8338 goacc_flags_m = force_gimple_operand_gsi (&gsi, goacc_flags_m, true,
8339 NULL_TREE, true,
8340 GSI_SAME_STMT);
8341 args.quick_push (goacc_flags_m);
8342 }
8343 else
8344 args.quick_push (device);
8345 if (offloaded)
8346 args.quick_push (build_fold_addr_expr (child_fn));
8347 args.quick_push (t1);
8348 args.quick_push (t2);
8349 args.quick_push (t3);
8350 args.quick_push (t4);
8351 switch (start_ix)
8352 {
8353 case BUILT_IN_GOACC_DATA_START:
8354 case BUILT_IN_GOACC_DECLARE:
8355 case BUILT_IN_GOMP_TARGET_DATA:
8356 break;
8357 case BUILT_IN_GOMP_TARGET:
8358 case BUILT_IN_GOMP_TARGET_UPDATE:
8359 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
8360 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
8361 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
8362 if (c)
8363 depend = OMP_CLAUSE_DECL (c);
8364 else
8365 depend = build_int_cst (ptr_type_node, 0);
8366 args.quick_push (depend);
8367 if (start_ix == BUILT_IN_GOMP_TARGET)
8368 args.quick_push (get_target_arguments (&gsi, entry_stmt));
8369 break;
8370 case BUILT_IN_GOACC_PARALLEL:
8371 if (lookup_attribute ("oacc serial", DECL_ATTRIBUTES (child_fn)) != NULL)
8372 {
8373 tree dims = NULL_TREE;
8374 unsigned int ix;
8375
8376 /* For serial constructs we set all dimensions to 1. */
8377 for (ix = GOMP_DIM_MAX; ix--;)
8378 dims = tree_cons (NULL_TREE, integer_one_node, dims);
8379 oacc_replace_fn_attrib (child_fn, dims);
8380 }
8381 else
8382 oacc_set_fn_attrib (child_fn, clauses, &args);
8383 tagging = true;
8384 /* FALLTHRU */
8385 case BUILT_IN_GOACC_ENTER_EXIT_DATA:
8386 case BUILT_IN_GOACC_UPDATE:
8387 {
8388 tree t_async = NULL_TREE;
8389
8390 /* If present, use the value specified by the respective
8391 clause, making sure that is of the correct type. */
8392 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
8393 if (c)
8394 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
8395 integer_type_node,
8396 OMP_CLAUSE_ASYNC_EXPR (c));
8397 else if (!tagging)
8398 /* Default values for t_async. */
8399 t_async = fold_convert_loc (gimple_location (entry_stmt),
8400 integer_type_node,
8401 build_int_cst (integer_type_node,
8402 GOMP_ASYNC_SYNC));
8403 if (tagging && t_async)
8404 {
8405 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
8406
8407 if (TREE_CODE (t_async) == INTEGER_CST)
8408 {
8409 /* See if we can pack the async arg in to the tag's
8410 operand. */
8411 i_async = TREE_INT_CST_LOW (t_async);
8412 if (i_async < GOMP_LAUNCH_OP_MAX)
8413 t_async = NULL_TREE;
8414 else
8415 i_async = GOMP_LAUNCH_OP_MAX;
8416 }
8417 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
8418 i_async));
8419 }
8420 if (t_async)
8421 args.safe_push (t_async);
8422
8423 /* Save the argument index, and ... */
8424 unsigned t_wait_idx = args.length ();
8425 unsigned num_waits = 0;
8426 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
8427 if (!tagging || c)
8428 /* ... push a placeholder. */
8429 args.safe_push (integer_zero_node);
8430
8431 for (; c; c = OMP_CLAUSE_CHAIN (c))
8432 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
8433 {
8434 args.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c),
8435 integer_type_node,
8436 OMP_CLAUSE_WAIT_EXPR (c)));
8437 num_waits++;
8438 }
8439
8440 if (!tagging || num_waits)
8441 {
8442 tree len;
8443
8444 /* Now that we know the number, update the placeholder. */
8445 if (tagging)
8446 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
8447 else
8448 len = build_int_cst (integer_type_node, num_waits);
8449 len = fold_convert_loc (gimple_location (entry_stmt),
8450 unsigned_type_node, len);
8451 args[t_wait_idx] = len;
8452 }
8453 }
8454 break;
8455 default:
8456 gcc_unreachable ();
8457 }
8458 if (tagging)
8459 /* Push terminal marker - zero. */
8460 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
8461
8462 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
8463 gimple_set_location (g, gimple_location (entry_stmt));
8464 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
8465 if (!offloaded)
8466 {
8467 g = gsi_stmt (gsi);
8468 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
8469 gsi_remove (&gsi, true);
8470 }
8471 if (data_region && region->exit)
8472 {
8473 gsi = gsi_last_nondebug_bb (region->exit);
8474 g = gsi_stmt (gsi);
8475 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN);
8476 gsi_remove (&gsi, true);
8477 }
8478 }
8479
8480 /* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with
8481 iteration variable derived from the thread number. INTRA_GROUP means this
8482 is an expansion of a loop iterating over work-items within a separate
8483 iteration over groups. */
8484
8485 static void
8486 grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group)
8487 {
8488 gimple_stmt_iterator gsi;
8489 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
8490 gcc_checking_assert (gimple_omp_for_kind (for_stmt)
8491 == GF_OMP_FOR_KIND_GRID_LOOP);
8492 size_t collapse = gimple_omp_for_collapse (for_stmt);
8493 struct omp_for_data_loop *loops
8494 = XALLOCAVEC (struct omp_for_data_loop,
8495 gimple_omp_for_collapse (for_stmt));
8496 struct omp_for_data fd;
8497
8498 remove_edge (BRANCH_EDGE (kfor->entry));
8499 basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest;
8500
8501 gcc_assert (kfor->cont);
8502 omp_extract_for_data (for_stmt, &fd, loops);
8503
8504 gsi = gsi_start_bb (body_bb);
8505
8506 for (size_t dim = 0; dim < collapse; dim++)
8507 {
8508 tree type, itype;
8509 itype = type = TREE_TYPE (fd.loops[dim].v);
8510 if (POINTER_TYPE_P (type))
8511 itype = signed_type_for (type);
8512
8513 tree n1 = fd.loops[dim].n1;
8514 tree step = fd.loops[dim].step;
8515 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
8516 true, NULL_TREE, true, GSI_SAME_STMT);
8517 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
8518 true, NULL_TREE, true, GSI_SAME_STMT);
8519 tree threadid;
8520 if (gimple_omp_for_grid_group_iter (for_stmt))
8521 {
8522 gcc_checking_assert (!intra_group);
8523 threadid = build_call_expr (builtin_decl_explicit
8524 (BUILT_IN_HSA_WORKGROUPID), 1,
8525 build_int_cstu (unsigned_type_node, dim));
8526 }
8527 else if (intra_group)
8528 threadid = build_call_expr (builtin_decl_explicit
8529 (BUILT_IN_HSA_WORKITEMID), 1,
8530 build_int_cstu (unsigned_type_node, dim));
8531 else
8532 threadid = build_call_expr (builtin_decl_explicit
8533 (BUILT_IN_HSA_WORKITEMABSID), 1,
8534 build_int_cstu (unsigned_type_node, dim));
8535 threadid = fold_convert (itype, threadid);
8536 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
8537 true, GSI_SAME_STMT);
8538
8539 tree startvar = fd.loops[dim].v;
8540 tree t = fold_build2 (MULT_EXPR, itype, threadid, step);
8541 if (POINTER_TYPE_P (type))
8542 t = fold_build_pointer_plus (n1, t);
8543 else
8544 t = fold_build2 (PLUS_EXPR, type, t, n1);
8545 t = fold_convert (type, t);
8546 t = force_gimple_operand_gsi (&gsi, t,
8547 DECL_P (startvar)
8548 && TREE_ADDRESSABLE (startvar),
8549 NULL_TREE, true, GSI_SAME_STMT);
8550 gassign *assign_stmt = gimple_build_assign (startvar, t);
8551 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
8552 }
8553 /* Remove the omp for statement. */
8554 gsi = gsi_last_nondebug_bb (kfor->entry);
8555 gsi_remove (&gsi, true);
8556
8557 /* Remove the GIMPLE_OMP_CONTINUE statement. */
8558 gsi = gsi_last_nondebug_bb (kfor->cont);
8559 gcc_assert (!gsi_end_p (gsi)
8560 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE);
8561 gsi_remove (&gsi, true);
8562
8563 /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary. */
8564 gsi = gsi_last_nondebug_bb (kfor->exit);
8565 gcc_assert (!gsi_end_p (gsi)
8566 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
8567 if (intra_group)
8568 gsi_insert_before (&gsi, omp_build_barrier (NULL_TREE), GSI_SAME_STMT);
8569 gsi_remove (&gsi, true);
8570
8571 /* Fixup the much simpler CFG. */
8572 remove_edge (find_edge (kfor->cont, body_bb));
8573
8574 if (kfor->cont != body_bb)
8575 set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb);
8576 set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont);
8577 }
8578
8579 /* Structure passed to grid_remap_kernel_arg_accesses so that it can remap
8580 argument_decls. */
8581
8582 struct grid_arg_decl_map
8583 {
8584 tree old_arg;
8585 tree new_arg;
8586 };
8587
8588 /* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones
8589 pertaining to kernel function. */
8590
8591 static tree
8592 grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data)
8593 {
8594 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
8595 struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info;
8596 tree t = *tp;
8597
8598 if (t == adm->old_arg)
8599 *tp = adm->new_arg;
8600 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
8601 return NULL_TREE;
8602 }
8603
8604 /* If TARGET region contains a kernel body for loop, remove its region from the
8605 TARGET and expand it in HSA gridified kernel fashion. */
8606
8607 static void
8608 grid_expand_target_grid_body (struct omp_region *target)
8609 {
8610 if (!hsa_gen_requested_p ())
8611 return;
8612
8613 gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry));
8614 struct omp_region **pp;
8615
8616 for (pp = &target->inner; *pp; pp = &(*pp)->next)
8617 if ((*pp)->type == GIMPLE_OMP_GRID_BODY)
8618 break;
8619
8620 struct omp_region *gpukernel = *pp;
8621
8622 tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt);
8623 if (!gpukernel)
8624 {
8625 /* HSA cannot handle OACC stuff. */
8626 if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION)
8627 return;
8628 gcc_checking_assert (orig_child_fndecl);
8629 gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
8630 OMP_CLAUSE__GRIDDIM_));
8631 cgraph_node *n = cgraph_node::get (orig_child_fndecl);
8632
8633 hsa_register_kernel (n);
8634 return;
8635 }
8636
8637 gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
8638 OMP_CLAUSE__GRIDDIM_));
8639 tree inside_block
8640 = gimple_block (first_stmt (single_succ (gpukernel->entry)));
8641 *pp = gpukernel->next;
8642 for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next)
8643 if ((*pp)->type == GIMPLE_OMP_FOR)
8644 break;
8645
8646 struct omp_region *kfor = *pp;
8647 gcc_assert (kfor);
8648 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
8649 gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP);
8650 *pp = kfor->next;
8651 if (kfor->inner)
8652 {
8653 if (gimple_omp_for_grid_group_iter (for_stmt))
8654 {
8655 struct omp_region **next_pp;
8656 for (pp = &kfor->inner; *pp; pp = next_pp)
8657 {
8658 next_pp = &(*pp)->next;
8659 if ((*pp)->type != GIMPLE_OMP_FOR)
8660 continue;
8661 gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry));
8662 gcc_assert (gimple_omp_for_kind (inner)
8663 == GF_OMP_FOR_KIND_GRID_LOOP);
8664 grid_expand_omp_for_loop (*pp, true);
8665 *pp = (*pp)->next;
8666 next_pp = pp;
8667 }
8668 }
8669 expand_omp (kfor->inner);
8670 }
8671 if (gpukernel->inner)
8672 expand_omp (gpukernel->inner);
8673
8674 tree kern_fndecl = copy_node (orig_child_fndecl);
8675 DECL_NAME (kern_fndecl) = clone_function_name_numbered (kern_fndecl,
8676 "kernel");
8677 SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl));
8678 tree tgtblock = gimple_block (tgt_stmt);
8679 tree fniniblock = make_node (BLOCK);
8680 BLOCK_ABSTRACT_ORIGIN (fniniblock) = BLOCK_ORIGIN (tgtblock);
8681 BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock);
8682 BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock);
8683 BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl;
8684 DECL_INITIAL (kern_fndecl) = fniniblock;
8685 push_struct_function (kern_fndecl);
8686 cfun->function_end_locus = gimple_location (tgt_stmt);
8687 init_tree_ssa (cfun);
8688 pop_cfun ();
8689
8690 tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl);
8691 gcc_assert (!DECL_CHAIN (old_parm_decl));
8692 tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl));
8693 DECL_CONTEXT (new_parm_decl) = kern_fndecl;
8694 DECL_ARGUMENTS (kern_fndecl) = new_parm_decl;
8695 gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl))));
8696 DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl));
8697 DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl;
8698 struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl);
8699 kern_cfun->curr_properties = cfun->curr_properties;
8700
8701 grid_expand_omp_for_loop (kfor, false);
8702
8703 /* Remove the omp for statement. */
8704 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (gpukernel->entry);
8705 gsi_remove (&gsi, true);
8706 /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real
8707 return. */
8708 gsi = gsi_last_nondebug_bb (gpukernel->exit);
8709 gcc_assert (!gsi_end_p (gsi)
8710 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
8711 gimple *ret_stmt = gimple_build_return (NULL);
8712 gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT);
8713 gsi_remove (&gsi, true);
8714
8715 /* Statements in the first BB in the target construct have been produced by
8716 target lowering and must be copied inside the GPUKERNEL, with the two
8717 exceptions of the first OMP statement and the OMP_DATA assignment
8718 statement. */
8719 gsi = gsi_start_bb (single_succ (gpukernel->entry));
8720 tree data_arg = gimple_omp_target_data_arg (tgt_stmt);
8721 tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL;
8722 for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry));
8723 !gsi_end_p (tsi); gsi_next (&tsi))
8724 {
8725 gimple *stmt = gsi_stmt (tsi);
8726 if (is_gimple_omp (stmt))
8727 break;
8728 if (sender
8729 && is_gimple_assign (stmt)
8730 && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR
8731 && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender)
8732 continue;
8733 gimple *copy = gimple_copy (stmt);
8734 gsi_insert_before (&gsi, copy, GSI_SAME_STMT);
8735 gimple_set_block (copy, fniniblock);
8736 }
8737
8738 move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry),
8739 gpukernel->exit, inside_block);
8740
8741 cgraph_node *kcn = cgraph_node::get_create (kern_fndecl);
8742 kcn->mark_force_output ();
8743 cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl);
8744
8745 hsa_register_kernel (kcn, orig_child);
8746
8747 cgraph_node::add_new_function (kern_fndecl, true);
8748 push_cfun (kern_cfun);
8749 cgraph_edge::rebuild_edges ();
8750
8751 /* Re-map any mention of the PARM_DECL of the original function to the
8752 PARM_DECL of the new one.
8753
8754 TODO: It would be great if lowering produced references into the GPU
8755 kernel decl straight away and we did not have to do this. */
8756 struct grid_arg_decl_map adm;
8757 adm.old_arg = old_parm_decl;
8758 adm.new_arg = new_parm_decl;
8759 basic_block bb;
8760 FOR_EACH_BB_FN (bb, kern_cfun)
8761 {
8762 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
8763 {
8764 gimple *stmt = gsi_stmt (gsi);
8765 struct walk_stmt_info wi;
8766 memset (&wi, 0, sizeof (wi));
8767 wi.info = &adm;
8768 walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi);
8769 }
8770 }
8771 pop_cfun ();
8772
8773 return;
8774 }
8775
8776 /* Expand the parallel region tree rooted at REGION. Expansion
8777 proceeds in depth-first order. Innermost regions are expanded
8778 first. This way, parallel regions that require a new function to
8779 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
8780 internal dependencies in their body. */
8781
8782 static void
8783 expand_omp (struct omp_region *region)
8784 {
8785 omp_any_child_fn_dumped = false;
8786 while (region)
8787 {
8788 location_t saved_location;
8789 gimple *inner_stmt = NULL;
8790
8791 /* First, determine whether this is a combined parallel+workshare
8792 region. */
8793 if (region->type == GIMPLE_OMP_PARALLEL)
8794 determine_parallel_type (region);
8795 else if (region->type == GIMPLE_OMP_TARGET)
8796 grid_expand_target_grid_body (region);
8797
8798 if (region->type == GIMPLE_OMP_FOR
8799 && gimple_omp_for_combined_p (last_stmt (region->entry)))
8800 inner_stmt = last_stmt (region->inner->entry);
8801
8802 if (region->inner)
8803 expand_omp (region->inner);
8804
8805 saved_location = input_location;
8806 if (gimple_has_location (last_stmt (region->entry)))
8807 input_location = gimple_location (last_stmt (region->entry));
8808
8809 switch (region->type)
8810 {
8811 case GIMPLE_OMP_PARALLEL:
8812 case GIMPLE_OMP_TASK:
8813 expand_omp_taskreg (region);
8814 break;
8815
8816 case GIMPLE_OMP_FOR:
8817 expand_omp_for (region, inner_stmt);
8818 break;
8819
8820 case GIMPLE_OMP_SECTIONS:
8821 expand_omp_sections (region);
8822 break;
8823
8824 case GIMPLE_OMP_SECTION:
8825 /* Individual omp sections are handled together with their
8826 parent GIMPLE_OMP_SECTIONS region. */
8827 break;
8828
8829 case GIMPLE_OMP_SINGLE:
8830 expand_omp_single (region);
8831 break;
8832
8833 case GIMPLE_OMP_ORDERED:
8834 {
8835 gomp_ordered *ord_stmt
8836 = as_a <gomp_ordered *> (last_stmt (region->entry));
8837 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
8838 OMP_CLAUSE_DEPEND))
8839 {
8840 /* We'll expand these when expanding corresponding
8841 worksharing region with ordered(n) clause. */
8842 gcc_assert (region->outer
8843 && region->outer->type == GIMPLE_OMP_FOR);
8844 region->ord_stmt = ord_stmt;
8845 break;
8846 }
8847 }
8848 /* FALLTHRU */
8849 case GIMPLE_OMP_MASTER:
8850 case GIMPLE_OMP_TASKGROUP:
8851 case GIMPLE_OMP_CRITICAL:
8852 case GIMPLE_OMP_TEAMS:
8853 expand_omp_synch (region);
8854 break;
8855
8856 case GIMPLE_OMP_ATOMIC_LOAD:
8857 expand_omp_atomic (region);
8858 break;
8859
8860 case GIMPLE_OMP_TARGET:
8861 expand_omp_target (region);
8862 break;
8863
8864 default:
8865 gcc_unreachable ();
8866 }
8867
8868 input_location = saved_location;
8869 region = region->next;
8870 }
8871 if (omp_any_child_fn_dumped)
8872 {
8873 if (dump_file)
8874 dump_function_header (dump_file, current_function_decl, dump_flags);
8875 omp_any_child_fn_dumped = false;
8876 }
8877 }
8878
8879 /* Helper for build_omp_regions. Scan the dominator tree starting at
8880 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
8881 true, the function ends once a single tree is built (otherwise, whole
8882 forest of OMP constructs may be built). */
8883
8884 static void
8885 build_omp_regions_1 (basic_block bb, struct omp_region *parent,
8886 bool single_tree)
8887 {
8888 gimple_stmt_iterator gsi;
8889 gimple *stmt;
8890 basic_block son;
8891
8892 gsi = gsi_last_nondebug_bb (bb);
8893 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
8894 {
8895 struct omp_region *region;
8896 enum gimple_code code;
8897
8898 stmt = gsi_stmt (gsi);
8899 code = gimple_code (stmt);
8900 if (code == GIMPLE_OMP_RETURN)
8901 {
8902 /* STMT is the return point out of region PARENT. Mark it
8903 as the exit point and make PARENT the immediately
8904 enclosing region. */
8905 gcc_assert (parent);
8906 region = parent;
8907 region->exit = bb;
8908 parent = parent->outer;
8909 }
8910 else if (code == GIMPLE_OMP_ATOMIC_STORE)
8911 {
8912 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
8913 GIMPLE_OMP_RETURN, but matches with
8914 GIMPLE_OMP_ATOMIC_LOAD. */
8915 gcc_assert (parent);
8916 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
8917 region = parent;
8918 region->exit = bb;
8919 parent = parent->outer;
8920 }
8921 else if (code == GIMPLE_OMP_CONTINUE)
8922 {
8923 gcc_assert (parent);
8924 parent->cont = bb;
8925 }
8926 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
8927 {
8928 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
8929 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
8930 }
8931 else
8932 {
8933 region = new_omp_region (bb, code, parent);
8934 /* Otherwise... */
8935 if (code == GIMPLE_OMP_TARGET)
8936 {
8937 switch (gimple_omp_target_kind (stmt))
8938 {
8939 case GF_OMP_TARGET_KIND_REGION:
8940 case GF_OMP_TARGET_KIND_DATA:
8941 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8942 case GF_OMP_TARGET_KIND_OACC_KERNELS:
8943 case GF_OMP_TARGET_KIND_OACC_SERIAL:
8944 case GF_OMP_TARGET_KIND_OACC_DATA:
8945 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8946 break;
8947 case GF_OMP_TARGET_KIND_UPDATE:
8948 case GF_OMP_TARGET_KIND_ENTER_DATA:
8949 case GF_OMP_TARGET_KIND_EXIT_DATA:
8950 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8951 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8952 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8953 /* ..., other than for those stand-alone directives... */
8954 region = NULL;
8955 break;
8956 default:
8957 gcc_unreachable ();
8958 }
8959 }
8960 else if (code == GIMPLE_OMP_ORDERED
8961 && omp_find_clause (gimple_omp_ordered_clauses
8962 (as_a <gomp_ordered *> (stmt)),
8963 OMP_CLAUSE_DEPEND))
8964 /* #pragma omp ordered depend is also just a stand-alone
8965 directive. */
8966 region = NULL;
8967 else if (code == GIMPLE_OMP_TASK
8968 && gimple_omp_task_taskwait_p (stmt))
8969 /* #pragma omp taskwait depend(...) is a stand-alone directive. */
8970 region = NULL;
8971 /* ..., this directive becomes the parent for a new region. */
8972 if (region)
8973 parent = region;
8974 }
8975 }
8976
8977 if (single_tree && !parent)
8978 return;
8979
8980 for (son = first_dom_son (CDI_DOMINATORS, bb);
8981 son;
8982 son = next_dom_son (CDI_DOMINATORS, son))
8983 build_omp_regions_1 (son, parent, single_tree);
8984 }
8985
8986 /* Builds the tree of OMP regions rooted at ROOT, storing it to
8987 root_omp_region. */
8988
8989 static void
8990 build_omp_regions_root (basic_block root)
8991 {
8992 gcc_assert (root_omp_region == NULL);
8993 build_omp_regions_1 (root, NULL, true);
8994 gcc_assert (root_omp_region != NULL);
8995 }
8996
8997 /* Expands omp construct (and its subconstructs) starting in HEAD. */
8998
8999 void
9000 omp_expand_local (basic_block head)
9001 {
9002 build_omp_regions_root (head);
9003 if (dump_file && (dump_flags & TDF_DETAILS))
9004 {
9005 fprintf (dump_file, "\nOMP region tree\n\n");
9006 dump_omp_region (dump_file, root_omp_region, 0);
9007 fprintf (dump_file, "\n");
9008 }
9009
9010 remove_exit_barriers (root_omp_region);
9011 expand_omp (root_omp_region);
9012
9013 omp_free_regions ();
9014 }
9015
9016 /* Scan the CFG and build a tree of OMP regions. Return the root of
9017 the OMP region tree. */
9018
9019 static void
9020 build_omp_regions (void)
9021 {
9022 gcc_assert (root_omp_region == NULL);
9023 calculate_dominance_info (CDI_DOMINATORS);
9024 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
9025 }
9026
9027 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */
9028
9029 static unsigned int
9030 execute_expand_omp (void)
9031 {
9032 build_omp_regions ();
9033
9034 if (!root_omp_region)
9035 return 0;
9036
9037 if (dump_file)
9038 {
9039 fprintf (dump_file, "\nOMP region tree\n\n");
9040 dump_omp_region (dump_file, root_omp_region, 0);
9041 fprintf (dump_file, "\n");
9042 }
9043
9044 remove_exit_barriers (root_omp_region);
9045
9046 expand_omp (root_omp_region);
9047
9048 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9049 verify_loop_structure ();
9050 cleanup_tree_cfg ();
9051
9052 omp_free_regions ();
9053
9054 return 0;
9055 }
9056
9057 /* OMP expansion -- the default pass, run before creation of SSA form. */
9058
9059 namespace {
9060
9061 const pass_data pass_data_expand_omp =
9062 {
9063 GIMPLE_PASS, /* type */
9064 "ompexp", /* name */
9065 OPTGROUP_OMP, /* optinfo_flags */
9066 TV_NONE, /* tv_id */
9067 PROP_gimple_any, /* properties_required */
9068 PROP_gimple_eomp, /* properties_provided */
9069 0, /* properties_destroyed */
9070 0, /* todo_flags_start */
9071 0, /* todo_flags_finish */
9072 };
9073
9074 class pass_expand_omp : public gimple_opt_pass
9075 {
9076 public:
9077 pass_expand_omp (gcc::context *ctxt)
9078 : gimple_opt_pass (pass_data_expand_omp, ctxt)
9079 {}
9080
9081 /* opt_pass methods: */
9082 virtual unsigned int execute (function *)
9083 {
9084 bool gate = ((flag_openacc != 0 || flag_openmp != 0
9085 || flag_openmp_simd != 0)
9086 && !seen_error ());
9087
9088 /* This pass always runs, to provide PROP_gimple_eomp.
9089 But often, there is nothing to do. */
9090 if (!gate)
9091 return 0;
9092
9093 return execute_expand_omp ();
9094 }
9095
9096 }; // class pass_expand_omp
9097
9098 } // anon namespace
9099
9100 gimple_opt_pass *
9101 make_pass_expand_omp (gcc::context *ctxt)
9102 {
9103 return new pass_expand_omp (ctxt);
9104 }
9105
9106 namespace {
9107
9108 const pass_data pass_data_expand_omp_ssa =
9109 {
9110 GIMPLE_PASS, /* type */
9111 "ompexpssa", /* name */
9112 OPTGROUP_OMP, /* optinfo_flags */
9113 TV_NONE, /* tv_id */
9114 PROP_cfg | PROP_ssa, /* properties_required */
9115 PROP_gimple_eomp, /* properties_provided */
9116 0, /* properties_destroyed */
9117 0, /* todo_flags_start */
9118 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
9119 };
9120
9121 class pass_expand_omp_ssa : public gimple_opt_pass
9122 {
9123 public:
9124 pass_expand_omp_ssa (gcc::context *ctxt)
9125 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
9126 {}
9127
9128 /* opt_pass methods: */
9129 virtual bool gate (function *fun)
9130 {
9131 return !(fun->curr_properties & PROP_gimple_eomp);
9132 }
9133 virtual unsigned int execute (function *) { return execute_expand_omp (); }
9134 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
9135
9136 }; // class pass_expand_omp_ssa
9137
9138 } // anon namespace
9139
9140 gimple_opt_pass *
9141 make_pass_expand_omp_ssa (gcc::context *ctxt)
9142 {
9143 return new pass_expand_omp_ssa (ctxt);
9144 }
9145
9146 /* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
9147 GIMPLE_* codes. */
9148
9149 bool
9150 omp_make_gimple_edges (basic_block bb, struct omp_region **region,
9151 int *region_idx)
9152 {
9153 gimple *last = last_stmt (bb);
9154 enum gimple_code code = gimple_code (last);
9155 struct omp_region *cur_region = *region;
9156 bool fallthru = false;
9157
9158 switch (code)
9159 {
9160 case GIMPLE_OMP_PARALLEL:
9161 case GIMPLE_OMP_FOR:
9162 case GIMPLE_OMP_SINGLE:
9163 case GIMPLE_OMP_TEAMS:
9164 case GIMPLE_OMP_MASTER:
9165 case GIMPLE_OMP_TASKGROUP:
9166 case GIMPLE_OMP_CRITICAL:
9167 case GIMPLE_OMP_SECTION:
9168 case GIMPLE_OMP_GRID_BODY:
9169 cur_region = new_omp_region (bb, code, cur_region);
9170 fallthru = true;
9171 break;
9172
9173 case GIMPLE_OMP_TASK:
9174 cur_region = new_omp_region (bb, code, cur_region);
9175 fallthru = true;
9176 if (gimple_omp_task_taskwait_p (last))
9177 cur_region = cur_region->outer;
9178 break;
9179
9180 case GIMPLE_OMP_ORDERED:
9181 cur_region = new_omp_region (bb, code, cur_region);
9182 fallthru = true;
9183 if (omp_find_clause (gimple_omp_ordered_clauses
9184 (as_a <gomp_ordered *> (last)),
9185 OMP_CLAUSE_DEPEND))
9186 cur_region = cur_region->outer;
9187 break;
9188
9189 case GIMPLE_OMP_TARGET:
9190 cur_region = new_omp_region (bb, code, cur_region);
9191 fallthru = true;
9192 switch (gimple_omp_target_kind (last))
9193 {
9194 case GF_OMP_TARGET_KIND_REGION:
9195 case GF_OMP_TARGET_KIND_DATA:
9196 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9197 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9198 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9199 case GF_OMP_TARGET_KIND_OACC_DATA:
9200 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9201 break;
9202 case GF_OMP_TARGET_KIND_UPDATE:
9203 case GF_OMP_TARGET_KIND_ENTER_DATA:
9204 case GF_OMP_TARGET_KIND_EXIT_DATA:
9205 case GF_OMP_TARGET_KIND_OACC_UPDATE:
9206 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
9207 case GF_OMP_TARGET_KIND_OACC_DECLARE:
9208 cur_region = cur_region->outer;
9209 break;
9210 default:
9211 gcc_unreachable ();
9212 }
9213 break;
9214
9215 case GIMPLE_OMP_SECTIONS:
9216 cur_region = new_omp_region (bb, code, cur_region);
9217 fallthru = true;
9218 break;
9219
9220 case GIMPLE_OMP_SECTIONS_SWITCH:
9221 fallthru = false;
9222 break;
9223
9224 case GIMPLE_OMP_ATOMIC_LOAD:
9225 case GIMPLE_OMP_ATOMIC_STORE:
9226 fallthru = true;
9227 break;
9228
9229 case GIMPLE_OMP_RETURN:
9230 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
9231 somewhere other than the next block. This will be
9232 created later. */
9233 cur_region->exit = bb;
9234 if (cur_region->type == GIMPLE_OMP_TASK)
9235 /* Add an edge corresponding to not scheduling the task
9236 immediately. */
9237 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
9238 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
9239 cur_region = cur_region->outer;
9240 break;
9241
9242 case GIMPLE_OMP_CONTINUE:
9243 cur_region->cont = bb;
9244 switch (cur_region->type)
9245 {
9246 case GIMPLE_OMP_FOR:
9247 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
9248 succs edges as abnormal to prevent splitting
9249 them. */
9250 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
9251 /* Make the loopback edge. */
9252 make_edge (bb, single_succ (cur_region->entry),
9253 EDGE_ABNORMAL);
9254
9255 /* Create an edge from GIMPLE_OMP_FOR to exit, which
9256 corresponds to the case that the body of the loop
9257 is not executed at all. */
9258 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
9259 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
9260 fallthru = false;
9261 break;
9262
9263 case GIMPLE_OMP_SECTIONS:
9264 /* Wire up the edges into and out of the nested sections. */
9265 {
9266 basic_block switch_bb = single_succ (cur_region->entry);
9267
9268 struct omp_region *i;
9269 for (i = cur_region->inner; i ; i = i->next)
9270 {
9271 gcc_assert (i->type == GIMPLE_OMP_SECTION);
9272 make_edge (switch_bb, i->entry, 0);
9273 make_edge (i->exit, bb, EDGE_FALLTHRU);
9274 }
9275
9276 /* Make the loopback edge to the block with
9277 GIMPLE_OMP_SECTIONS_SWITCH. */
9278 make_edge (bb, switch_bb, 0);
9279
9280 /* Make the edge from the switch to exit. */
9281 make_edge (switch_bb, bb->next_bb, 0);
9282 fallthru = false;
9283 }
9284 break;
9285
9286 case GIMPLE_OMP_TASK:
9287 fallthru = true;
9288 break;
9289
9290 default:
9291 gcc_unreachable ();
9292 }
9293 break;
9294
9295 default:
9296 gcc_unreachable ();
9297 }
9298
9299 if (*region != cur_region)
9300 {
9301 *region = cur_region;
9302 if (cur_region)
9303 *region_idx = cur_region->entry->index;
9304 else
9305 *region_idx = 0;
9306 }
9307
9308 return fallthru;
9309 }
9310
9311 #include "gt-omp-expand.h"