]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/omp-expand.c
omp-expand.c (struct omp_region): Add has_lastprivate_conditional member.
[thirdparty/gcc.git] / gcc / omp-expand.c
1 /* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
4
5 Copyright (C) 2005-2019 Free Software Foundation, Inc.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "tree-pass.h"
34 #include "ssa.h"
35 #include "optabs.h"
36 #include "cgraph.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "cfganal.h"
42 #include "internal-fn.h"
43 #include "gimplify.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
47 #include "tree-cfg.h"
48 #include "tree-into-ssa.h"
49 #include "tree-ssa.h"
50 #include "splay-tree.h"
51 #include "cfgloop.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "symbol-summary.h"
56 #include "gomp-constants.h"
57 #include "gimple-pretty-print.h"
58 #include "hsa-common.h"
59 #include "stringpool.h"
60 #include "attribs.h"
61
62 /* OMP region information. Every parallel and workshare
63 directive is enclosed between two markers, the OMP_* directive
64 and a corresponding GIMPLE_OMP_RETURN statement. */
65
66 struct omp_region
67 {
68 /* The enclosing region. */
69 struct omp_region *outer;
70
71 /* First child region. */
72 struct omp_region *inner;
73
74 /* Next peer region. */
75 struct omp_region *next;
76
77 /* Block containing the omp directive as its last stmt. */
78 basic_block entry;
79
80 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
81 basic_block exit;
82
83 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
84 basic_block cont;
85
86 /* If this is a combined parallel+workshare region, this is a list
87 of additional arguments needed by the combined parallel+workshare
88 library call. */
89 vec<tree, va_gc> *ws_args;
90
91 /* The code for the omp directive of this region. */
92 enum gimple_code type;
93
94 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
95 enum omp_clause_schedule_kind sched_kind;
96
97 /* Schedule modifiers. */
98 unsigned char sched_modifiers;
99
100 /* True if this is a combined parallel+workshare region. */
101 bool is_combined_parallel;
102
103 /* Copy of fd.lastprivate_conditional != 0. */
104 bool has_lastprivate_conditional;
105
106 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
107 a depend clause. */
108 gomp_ordered *ord_stmt;
109 };
110
111 static struct omp_region *root_omp_region;
112 static bool omp_any_child_fn_dumped;
113
114 static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
115 bool = false);
116 static gphi *find_phi_with_arg_on_edge (tree, edge);
117 static void expand_omp (struct omp_region *region);
118
119 /* Return true if REGION is a combined parallel+workshare region. */
120
121 static inline bool
122 is_combined_parallel (struct omp_region *region)
123 {
124 return region->is_combined_parallel;
125 }
126
127 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
128 is the immediate dominator of PAR_ENTRY_BB, return true if there
129 are no data dependencies that would prevent expanding the parallel
130 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
131
132 When expanding a combined parallel+workshare region, the call to
133 the child function may need additional arguments in the case of
134 GIMPLE_OMP_FOR regions. In some cases, these arguments are
135 computed out of variables passed in from the parent to the child
136 via 'struct .omp_data_s'. For instance:
137
138 #pragma omp parallel for schedule (guided, i * 4)
139 for (j ...)
140
141 Is lowered into:
142
143 # BLOCK 2 (PAR_ENTRY_BB)
144 .omp_data_o.i = i;
145 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
146
147 # BLOCK 3 (WS_ENTRY_BB)
148 .omp_data_i = &.omp_data_o;
149 D.1667 = .omp_data_i->i;
150 D.1598 = D.1667 * 4;
151 #pragma omp for schedule (guided, D.1598)
152
153 When we outline the parallel region, the call to the child function
154 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
155 that value is computed *after* the call site. So, in principle we
156 cannot do the transformation.
157
158 To see whether the code in WS_ENTRY_BB blocks the combined
159 parallel+workshare call, we collect all the variables used in the
160 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
161 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
162 call.
163
164 FIXME. If we had the SSA form built at this point, we could merely
165 hoist the code in block 3 into block 2 and be done with it. But at
166 this point we don't have dataflow information and though we could
167 hack something up here, it is really not worth the aggravation. */
168
169 static bool
170 workshare_safe_to_combine_p (basic_block ws_entry_bb)
171 {
172 struct omp_for_data fd;
173 gimple *ws_stmt = last_stmt (ws_entry_bb);
174
175 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
176 return true;
177
178 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
179 if (gimple_omp_for_kind (ws_stmt) != GF_OMP_FOR_KIND_FOR)
180 return false;
181
182 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
183
184 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
185 return false;
186 if (fd.iter_type != long_integer_type_node)
187 return false;
188
189 /* FIXME. We give up too easily here. If any of these arguments
190 are not constants, they will likely involve variables that have
191 been mapped into fields of .omp_data_s for sharing with the child
192 function. With appropriate data flow, it would be possible to
193 see through this. */
194 if (!is_gimple_min_invariant (fd.loop.n1)
195 || !is_gimple_min_invariant (fd.loop.n2)
196 || !is_gimple_min_invariant (fd.loop.step)
197 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
198 return false;
199
200 return true;
201 }
202
203 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
204 presence (SIMD_SCHEDULE). */
205
206 static tree
207 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
208 {
209 if (!simd_schedule || integer_zerop (chunk_size))
210 return chunk_size;
211
212 poly_uint64 vf = omp_max_vf ();
213 if (known_eq (vf, 1U))
214 return chunk_size;
215
216 tree type = TREE_TYPE (chunk_size);
217 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
218 build_int_cst (type, vf - 1));
219 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
220 build_int_cst (type, -vf));
221 }
222
223 /* Collect additional arguments needed to emit a combined
224 parallel+workshare call. WS_STMT is the workshare directive being
225 expanded. */
226
227 static vec<tree, va_gc> *
228 get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
229 {
230 tree t;
231 location_t loc = gimple_location (ws_stmt);
232 vec<tree, va_gc> *ws_args;
233
234 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
235 {
236 struct omp_for_data fd;
237 tree n1, n2;
238
239 omp_extract_for_data (for_stmt, &fd, NULL);
240 n1 = fd.loop.n1;
241 n2 = fd.loop.n2;
242
243 if (gimple_omp_for_combined_into_p (for_stmt))
244 {
245 tree innerc
246 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
247 OMP_CLAUSE__LOOPTEMP_);
248 gcc_assert (innerc);
249 n1 = OMP_CLAUSE_DECL (innerc);
250 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
251 OMP_CLAUSE__LOOPTEMP_);
252 gcc_assert (innerc);
253 n2 = OMP_CLAUSE_DECL (innerc);
254 }
255
256 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
257
258 t = fold_convert_loc (loc, long_integer_type_node, n1);
259 ws_args->quick_push (t);
260
261 t = fold_convert_loc (loc, long_integer_type_node, n2);
262 ws_args->quick_push (t);
263
264 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
265 ws_args->quick_push (t);
266
267 if (fd.chunk_size)
268 {
269 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
270 t = omp_adjust_chunk_size (t, fd.simd_schedule);
271 ws_args->quick_push (t);
272 }
273
274 return ws_args;
275 }
276 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
277 {
278 /* Number of sections is equal to the number of edges from the
279 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
280 the exit of the sections region. */
281 basic_block bb = single_succ (gimple_bb (ws_stmt));
282 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
283 vec_alloc (ws_args, 1);
284 ws_args->quick_push (t);
285 return ws_args;
286 }
287
288 gcc_unreachable ();
289 }
290
291 /* Discover whether REGION is a combined parallel+workshare region. */
292
293 static void
294 determine_parallel_type (struct omp_region *region)
295 {
296 basic_block par_entry_bb, par_exit_bb;
297 basic_block ws_entry_bb, ws_exit_bb;
298
299 if (region == NULL || region->inner == NULL
300 || region->exit == NULL || region->inner->exit == NULL
301 || region->inner->cont == NULL)
302 return;
303
304 /* We only support parallel+for and parallel+sections. */
305 if (region->type != GIMPLE_OMP_PARALLEL
306 || (region->inner->type != GIMPLE_OMP_FOR
307 && region->inner->type != GIMPLE_OMP_SECTIONS))
308 return;
309
310 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
311 WS_EXIT_BB -> PAR_EXIT_BB. */
312 par_entry_bb = region->entry;
313 par_exit_bb = region->exit;
314 ws_entry_bb = region->inner->entry;
315 ws_exit_bb = region->inner->exit;
316
317 /* Give up for task reductions on the parallel, while it is implementable,
318 adding another big set of APIs or slowing down the normal paths is
319 not acceptable. */
320 tree pclauses = gimple_omp_parallel_clauses (last_stmt (par_entry_bb));
321 if (omp_find_clause (pclauses, OMP_CLAUSE__REDUCTEMP_))
322 return;
323
324 if (single_succ (par_entry_bb) == ws_entry_bb
325 && single_succ (ws_exit_bb) == par_exit_bb
326 && workshare_safe_to_combine_p (ws_entry_bb)
327 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
328 || (last_and_only_stmt (ws_entry_bb)
329 && last_and_only_stmt (par_exit_bb))))
330 {
331 gimple *par_stmt = last_stmt (par_entry_bb);
332 gimple *ws_stmt = last_stmt (ws_entry_bb);
333
334 if (region->inner->type == GIMPLE_OMP_FOR)
335 {
336 /* If this is a combined parallel loop, we need to determine
337 whether or not to use the combined library calls. There
338 are two cases where we do not apply the transformation:
339 static loops and any kind of ordered loop. In the first
340 case, we already open code the loop so there is no need
341 to do anything else. In the latter case, the combined
342 parallel loop call would still need extra synchronization
343 to implement ordered semantics, so there would not be any
344 gain in using the combined call. */
345 tree clauses = gimple_omp_for_clauses (ws_stmt);
346 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
347 if (c == NULL
348 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
349 == OMP_CLAUSE_SCHEDULE_STATIC)
350 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED)
351 || omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_)
352 || ((c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_))
353 && POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (c)))))
354 return;
355 }
356 else if (region->inner->type == GIMPLE_OMP_SECTIONS
357 && (omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
358 OMP_CLAUSE__REDUCTEMP_)
359 || omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
360 OMP_CLAUSE__CONDTEMP_)))
361 return;
362
363 region->is_combined_parallel = true;
364 region->inner->is_combined_parallel = true;
365 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
366 }
367 }
368
369 /* Debugging dumps for parallel regions. */
370 void dump_omp_region (FILE *, struct omp_region *, int);
371 void debug_omp_region (struct omp_region *);
372 void debug_all_omp_regions (void);
373
374 /* Dump the parallel region tree rooted at REGION. */
375
376 void
377 dump_omp_region (FILE *file, struct omp_region *region, int indent)
378 {
379 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
380 gimple_code_name[region->type]);
381
382 if (region->inner)
383 dump_omp_region (file, region->inner, indent + 4);
384
385 if (region->cont)
386 {
387 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
388 region->cont->index);
389 }
390
391 if (region->exit)
392 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
393 region->exit->index);
394 else
395 fprintf (file, "%*s[no exit marker]\n", indent, "");
396
397 if (region->next)
398 dump_omp_region (file, region->next, indent);
399 }
400
401 DEBUG_FUNCTION void
402 debug_omp_region (struct omp_region *region)
403 {
404 dump_omp_region (stderr, region, 0);
405 }
406
407 DEBUG_FUNCTION void
408 debug_all_omp_regions (void)
409 {
410 dump_omp_region (stderr, root_omp_region, 0);
411 }
412
413 /* Create a new parallel region starting at STMT inside region PARENT. */
414
415 static struct omp_region *
416 new_omp_region (basic_block bb, enum gimple_code type,
417 struct omp_region *parent)
418 {
419 struct omp_region *region = XCNEW (struct omp_region);
420
421 region->outer = parent;
422 region->entry = bb;
423 region->type = type;
424
425 if (parent)
426 {
427 /* This is a nested region. Add it to the list of inner
428 regions in PARENT. */
429 region->next = parent->inner;
430 parent->inner = region;
431 }
432 else
433 {
434 /* This is a toplevel region. Add it to the list of toplevel
435 regions in ROOT_OMP_REGION. */
436 region->next = root_omp_region;
437 root_omp_region = region;
438 }
439
440 return region;
441 }
442
443 /* Release the memory associated with the region tree rooted at REGION. */
444
445 static void
446 free_omp_region_1 (struct omp_region *region)
447 {
448 struct omp_region *i, *n;
449
450 for (i = region->inner; i ; i = n)
451 {
452 n = i->next;
453 free_omp_region_1 (i);
454 }
455
456 free (region);
457 }
458
459 /* Release the memory for the entire omp region tree. */
460
461 void
462 omp_free_regions (void)
463 {
464 struct omp_region *r, *n;
465 for (r = root_omp_region; r ; r = n)
466 {
467 n = r->next;
468 free_omp_region_1 (r);
469 }
470 root_omp_region = NULL;
471 }
472
473 /* A convenience function to build an empty GIMPLE_COND with just the
474 condition. */
475
476 static gcond *
477 gimple_build_cond_empty (tree cond)
478 {
479 enum tree_code pred_code;
480 tree lhs, rhs;
481
482 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
483 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
484 }
485
486 /* Return true if a parallel REGION is within a declare target function or
487 within a target region and is not a part of a gridified target. */
488
489 static bool
490 parallel_needs_hsa_kernel_p (struct omp_region *region)
491 {
492 bool indirect = false;
493 for (region = region->outer; region; region = region->outer)
494 {
495 if (region->type == GIMPLE_OMP_PARALLEL)
496 indirect = true;
497 else if (region->type == GIMPLE_OMP_TARGET)
498 {
499 gomp_target *tgt_stmt
500 = as_a <gomp_target *> (last_stmt (region->entry));
501
502 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
503 OMP_CLAUSE__GRIDDIM_))
504 return indirect;
505 else
506 return true;
507 }
508 }
509
510 if (lookup_attribute ("omp declare target",
511 DECL_ATTRIBUTES (current_function_decl)))
512 return true;
513
514 return false;
515 }
516
517 /* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
518 Add CHILD_FNDECL to decl chain of the supercontext of the block
519 ENTRY_BLOCK - this is the block which originally contained the
520 code from which CHILD_FNDECL was created.
521
522 Together, these actions ensure that the debug info for the outlined
523 function will be emitted with the correct lexical scope. */
524
525 static void
526 adjust_context_and_scope (struct omp_region *region, tree entry_block,
527 tree child_fndecl)
528 {
529 tree parent_fndecl = NULL_TREE;
530 gimple *entry_stmt;
531 /* OMP expansion expands inner regions before outer ones, so if
532 we e.g. have explicit task region nested in parallel region, when
533 expanding the task region current_function_decl will be the original
534 source function, but we actually want to use as context the child
535 function of the parallel. */
536 for (region = region->outer;
537 region && parent_fndecl == NULL_TREE; region = region->outer)
538 switch (region->type)
539 {
540 case GIMPLE_OMP_PARALLEL:
541 case GIMPLE_OMP_TASK:
542 case GIMPLE_OMP_TEAMS:
543 entry_stmt = last_stmt (region->entry);
544 parent_fndecl = gimple_omp_taskreg_child_fn (entry_stmt);
545 break;
546 case GIMPLE_OMP_TARGET:
547 entry_stmt = last_stmt (region->entry);
548 parent_fndecl
549 = gimple_omp_target_child_fn (as_a <gomp_target *> (entry_stmt));
550 break;
551 default:
552 break;
553 }
554
555 if (parent_fndecl == NULL_TREE)
556 parent_fndecl = current_function_decl;
557 DECL_CONTEXT (child_fndecl) = parent_fndecl;
558
559 if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
560 {
561 tree b = BLOCK_SUPERCONTEXT (entry_block);
562 if (TREE_CODE (b) == BLOCK)
563 {
564 DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
565 BLOCK_VARS (b) = child_fndecl;
566 }
567 }
568 }
569
570 /* Build the function calls to GOMP_parallel etc to actually
571 generate the parallel operation. REGION is the parallel region
572 being expanded. BB is the block where to insert the code. WS_ARGS
573 will be set if this is a call to a combined parallel+workshare
574 construct, it contains the list of additional arguments needed by
575 the workshare construct. */
576
577 static void
578 expand_parallel_call (struct omp_region *region, basic_block bb,
579 gomp_parallel *entry_stmt,
580 vec<tree, va_gc> *ws_args)
581 {
582 tree t, t1, t2, val, cond, c, clauses, flags;
583 gimple_stmt_iterator gsi;
584 gimple *stmt;
585 enum built_in_function start_ix;
586 int start_ix2;
587 location_t clause_loc;
588 vec<tree, va_gc> *args;
589
590 clauses = gimple_omp_parallel_clauses (entry_stmt);
591
592 /* Determine what flavor of GOMP_parallel we will be
593 emitting. */
594 start_ix = BUILT_IN_GOMP_PARALLEL;
595 tree rtmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
596 if (rtmp)
597 start_ix = BUILT_IN_GOMP_PARALLEL_REDUCTIONS;
598 else if (is_combined_parallel (region))
599 {
600 switch (region->inner->type)
601 {
602 case GIMPLE_OMP_FOR:
603 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
604 switch (region->inner->sched_kind)
605 {
606 case OMP_CLAUSE_SCHEDULE_RUNTIME:
607 /* For lastprivate(conditional:), our implementation
608 requires monotonic behavior. */
609 if (region->inner->has_lastprivate_conditional != 0)
610 start_ix2 = 3;
611 else if ((region->inner->sched_modifiers
612 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
613 start_ix2 = 6;
614 else if ((region->inner->sched_modifiers
615 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
616 start_ix2 = 7;
617 else
618 start_ix2 = 3;
619 break;
620 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
621 case OMP_CLAUSE_SCHEDULE_GUIDED:
622 if ((region->inner->sched_modifiers
623 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
624 && !region->inner->has_lastprivate_conditional)
625 {
626 start_ix2 = 3 + region->inner->sched_kind;
627 break;
628 }
629 /* FALLTHRU */
630 default:
631 start_ix2 = region->inner->sched_kind;
632 break;
633 }
634 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
635 start_ix = (enum built_in_function) start_ix2;
636 break;
637 case GIMPLE_OMP_SECTIONS:
638 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
639 break;
640 default:
641 gcc_unreachable ();
642 }
643 }
644
645 /* By default, the value of NUM_THREADS is zero (selected at run time)
646 and there is no conditional. */
647 cond = NULL_TREE;
648 val = build_int_cst (unsigned_type_node, 0);
649 flags = build_int_cst (unsigned_type_node, 0);
650
651 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
652 if (c)
653 cond = OMP_CLAUSE_IF_EXPR (c);
654
655 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
656 if (c)
657 {
658 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
659 clause_loc = OMP_CLAUSE_LOCATION (c);
660 }
661 else
662 clause_loc = gimple_location (entry_stmt);
663
664 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
665 if (c)
666 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
667
668 /* Ensure 'val' is of the correct type. */
669 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
670
671 /* If we found the clause 'if (cond)', build either
672 (cond != 0) or (cond ? val : 1u). */
673 if (cond)
674 {
675 cond = gimple_boolify (cond);
676
677 if (integer_zerop (val))
678 val = fold_build2_loc (clause_loc,
679 EQ_EXPR, unsigned_type_node, cond,
680 build_int_cst (TREE_TYPE (cond), 0));
681 else
682 {
683 basic_block cond_bb, then_bb, else_bb;
684 edge e, e_then, e_else;
685 tree tmp_then, tmp_else, tmp_join, tmp_var;
686
687 tmp_var = create_tmp_var (TREE_TYPE (val));
688 if (gimple_in_ssa_p (cfun))
689 {
690 tmp_then = make_ssa_name (tmp_var);
691 tmp_else = make_ssa_name (tmp_var);
692 tmp_join = make_ssa_name (tmp_var);
693 }
694 else
695 {
696 tmp_then = tmp_var;
697 tmp_else = tmp_var;
698 tmp_join = tmp_var;
699 }
700
701 e = split_block_after_labels (bb);
702 cond_bb = e->src;
703 bb = e->dest;
704 remove_edge (e);
705
706 then_bb = create_empty_bb (cond_bb);
707 else_bb = create_empty_bb (then_bb);
708 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
709 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
710
711 stmt = gimple_build_cond_empty (cond);
712 gsi = gsi_start_bb (cond_bb);
713 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
714
715 gsi = gsi_start_bb (then_bb);
716 expand_omp_build_assign (&gsi, tmp_then, val, true);
717
718 gsi = gsi_start_bb (else_bb);
719 expand_omp_build_assign (&gsi, tmp_else,
720 build_int_cst (unsigned_type_node, 1),
721 true);
722
723 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
724 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
725 add_bb_to_loop (then_bb, cond_bb->loop_father);
726 add_bb_to_loop (else_bb, cond_bb->loop_father);
727 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
728 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
729
730 if (gimple_in_ssa_p (cfun))
731 {
732 gphi *phi = create_phi_node (tmp_join, bb);
733 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
734 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
735 }
736
737 val = tmp_join;
738 }
739
740 gsi = gsi_start_bb (bb);
741 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
742 false, GSI_CONTINUE_LINKING);
743 }
744
745 gsi = gsi_last_nondebug_bb (bb);
746 t = gimple_omp_parallel_data_arg (entry_stmt);
747 if (t == NULL)
748 t1 = null_pointer_node;
749 else
750 t1 = build_fold_addr_expr (t);
751 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
752 t2 = build_fold_addr_expr (child_fndecl);
753
754 vec_alloc (args, 4 + vec_safe_length (ws_args));
755 args->quick_push (t2);
756 args->quick_push (t1);
757 args->quick_push (val);
758 if (ws_args)
759 args->splice (*ws_args);
760 args->quick_push (flags);
761
762 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
763 builtin_decl_explicit (start_ix), args);
764
765 if (rtmp)
766 {
767 tree type = TREE_TYPE (OMP_CLAUSE_DECL (rtmp));
768 t = build2 (MODIFY_EXPR, type, OMP_CLAUSE_DECL (rtmp),
769 fold_convert (type,
770 fold_convert (pointer_sized_int_node, t)));
771 }
772 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
773 false, GSI_CONTINUE_LINKING);
774
775 if (hsa_gen_requested_p ()
776 && parallel_needs_hsa_kernel_p (region))
777 {
778 cgraph_node *child_cnode = cgraph_node::get (child_fndecl);
779 hsa_register_kernel (child_cnode);
780 }
781 }
782
783 /* Build the function call to GOMP_task to actually
784 generate the task operation. BB is the block where to insert the code. */
785
786 static void
787 expand_task_call (struct omp_region *region, basic_block bb,
788 gomp_task *entry_stmt)
789 {
790 tree t1, t2, t3;
791 gimple_stmt_iterator gsi;
792 location_t loc = gimple_location (entry_stmt);
793
794 tree clauses = gimple_omp_task_clauses (entry_stmt);
795
796 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
797 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
798 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
799 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
800 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
801 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
802
803 unsigned int iflags
804 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
805 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
806 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
807
808 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
809 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
810 tree num_tasks = NULL_TREE;
811 bool ull = false;
812 if (taskloop_p)
813 {
814 gimple *g = last_stmt (region->outer->entry);
815 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
816 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
817 struct omp_for_data fd;
818 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
819 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
820 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
821 OMP_CLAUSE__LOOPTEMP_);
822 startvar = OMP_CLAUSE_DECL (startvar);
823 endvar = OMP_CLAUSE_DECL (endvar);
824 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
825 if (fd.loop.cond_code == LT_EXPR)
826 iflags |= GOMP_TASK_FLAG_UP;
827 tree tclauses = gimple_omp_for_clauses (g);
828 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
829 if (num_tasks)
830 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
831 else
832 {
833 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
834 if (num_tasks)
835 {
836 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
837 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
838 }
839 else
840 num_tasks = integer_zero_node;
841 }
842 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
843 if (ifc == NULL_TREE)
844 iflags |= GOMP_TASK_FLAG_IF;
845 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
846 iflags |= GOMP_TASK_FLAG_NOGROUP;
847 ull = fd.iter_type == long_long_unsigned_type_node;
848 if (omp_find_clause (clauses, OMP_CLAUSE_REDUCTION))
849 iflags |= GOMP_TASK_FLAG_REDUCTION;
850 }
851 else if (priority)
852 iflags |= GOMP_TASK_FLAG_PRIORITY;
853
854 tree flags = build_int_cst (unsigned_type_node, iflags);
855
856 tree cond = boolean_true_node;
857 if (ifc)
858 {
859 if (taskloop_p)
860 {
861 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
862 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
863 build_int_cst (unsigned_type_node,
864 GOMP_TASK_FLAG_IF),
865 build_int_cst (unsigned_type_node, 0));
866 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
867 flags, t);
868 }
869 else
870 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
871 }
872
873 if (finalc)
874 {
875 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
876 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
877 build_int_cst (unsigned_type_node,
878 GOMP_TASK_FLAG_FINAL),
879 build_int_cst (unsigned_type_node, 0));
880 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
881 }
882 if (depend)
883 depend = OMP_CLAUSE_DECL (depend);
884 else
885 depend = build_int_cst (ptr_type_node, 0);
886 if (priority)
887 priority = fold_convert (integer_type_node,
888 OMP_CLAUSE_PRIORITY_EXPR (priority));
889 else
890 priority = integer_zero_node;
891
892 gsi = gsi_last_nondebug_bb (bb);
893 tree t = gimple_omp_task_data_arg (entry_stmt);
894 if (t == NULL)
895 t2 = null_pointer_node;
896 else
897 t2 = build_fold_addr_expr_loc (loc, t);
898 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
899 t = gimple_omp_task_copy_fn (entry_stmt);
900 if (t == NULL)
901 t3 = null_pointer_node;
902 else
903 t3 = build_fold_addr_expr_loc (loc, t);
904
905 if (taskloop_p)
906 t = build_call_expr (ull
907 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
908 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
909 11, t1, t2, t3,
910 gimple_omp_task_arg_size (entry_stmt),
911 gimple_omp_task_arg_align (entry_stmt), flags,
912 num_tasks, priority, startvar, endvar, step);
913 else
914 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
915 9, t1, t2, t3,
916 gimple_omp_task_arg_size (entry_stmt),
917 gimple_omp_task_arg_align (entry_stmt), cond, flags,
918 depend, priority);
919
920 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
921 false, GSI_CONTINUE_LINKING);
922 }
923
924 /* Build the function call to GOMP_taskwait_depend to actually
925 generate the taskwait operation. BB is the block where to insert the
926 code. */
927
928 static void
929 expand_taskwait_call (basic_block bb, gomp_task *entry_stmt)
930 {
931 tree clauses = gimple_omp_task_clauses (entry_stmt);
932 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
933 if (depend == NULL_TREE)
934 return;
935
936 depend = OMP_CLAUSE_DECL (depend);
937
938 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
939 tree t
940 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASKWAIT_DEPEND),
941 1, depend);
942
943 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
944 false, GSI_CONTINUE_LINKING);
945 }
946
947 /* Build the function call to GOMP_teams_reg to actually
948 generate the host teams operation. REGION is the teams region
949 being expanded. BB is the block where to insert the code. */
950
951 static void
952 expand_teams_call (basic_block bb, gomp_teams *entry_stmt)
953 {
954 tree clauses = gimple_omp_teams_clauses (entry_stmt);
955 tree num_teams = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
956 if (num_teams == NULL_TREE)
957 num_teams = build_int_cst (unsigned_type_node, 0);
958 else
959 {
960 num_teams = OMP_CLAUSE_NUM_TEAMS_EXPR (num_teams);
961 num_teams = fold_convert (unsigned_type_node, num_teams);
962 }
963 tree thread_limit = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
964 if (thread_limit == NULL_TREE)
965 thread_limit = build_int_cst (unsigned_type_node, 0);
966 else
967 {
968 thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit);
969 thread_limit = fold_convert (unsigned_type_node, thread_limit);
970 }
971
972 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
973 tree t = gimple_omp_teams_data_arg (entry_stmt), t1;
974 if (t == NULL)
975 t1 = null_pointer_node;
976 else
977 t1 = build_fold_addr_expr (t);
978 tree child_fndecl = gimple_omp_teams_child_fn (entry_stmt);
979 tree t2 = build_fold_addr_expr (child_fndecl);
980
981 vec<tree, va_gc> *args;
982 vec_alloc (args, 5);
983 args->quick_push (t2);
984 args->quick_push (t1);
985 args->quick_push (num_teams);
986 args->quick_push (thread_limit);
987 /* For future extensibility. */
988 args->quick_push (build_zero_cst (unsigned_type_node));
989
990 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
991 builtin_decl_explicit (BUILT_IN_GOMP_TEAMS_REG),
992 args);
993
994 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
995 false, GSI_CONTINUE_LINKING);
996 }
997
998 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
999
1000 static tree
1001 vec2chain (vec<tree, va_gc> *v)
1002 {
1003 tree chain = NULL_TREE, t;
1004 unsigned ix;
1005
1006 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
1007 {
1008 DECL_CHAIN (t) = chain;
1009 chain = t;
1010 }
1011
1012 return chain;
1013 }
1014
1015 /* Remove barriers in REGION->EXIT's block. Note that this is only
1016 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
1017 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
1018 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
1019 removed. */
1020
1021 static void
1022 remove_exit_barrier (struct omp_region *region)
1023 {
1024 gimple_stmt_iterator gsi;
1025 basic_block exit_bb;
1026 edge_iterator ei;
1027 edge e;
1028 gimple *stmt;
1029 int any_addressable_vars = -1;
1030
1031 exit_bb = region->exit;
1032
1033 /* If the parallel region doesn't return, we don't have REGION->EXIT
1034 block at all. */
1035 if (! exit_bb)
1036 return;
1037
1038 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
1039 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
1040 statements that can appear in between are extremely limited -- no
1041 memory operations at all. Here, we allow nothing at all, so the
1042 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
1043 gsi = gsi_last_nondebug_bb (exit_bb);
1044 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1045 gsi_prev_nondebug (&gsi);
1046 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
1047 return;
1048
1049 FOR_EACH_EDGE (e, ei, exit_bb->preds)
1050 {
1051 gsi = gsi_last_nondebug_bb (e->src);
1052 if (gsi_end_p (gsi))
1053 continue;
1054 stmt = gsi_stmt (gsi);
1055 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
1056 && !gimple_omp_return_nowait_p (stmt))
1057 {
1058 /* OpenMP 3.0 tasks unfortunately prevent this optimization
1059 in many cases. If there could be tasks queued, the barrier
1060 might be needed to let the tasks run before some local
1061 variable of the parallel that the task uses as shared
1062 runs out of scope. The task can be spawned either
1063 from within current function (this would be easy to check)
1064 or from some function it calls and gets passed an address
1065 of such a variable. */
1066 if (any_addressable_vars < 0)
1067 {
1068 gomp_parallel *parallel_stmt
1069 = as_a <gomp_parallel *> (last_stmt (region->entry));
1070 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
1071 tree local_decls, block, decl;
1072 unsigned ix;
1073
1074 any_addressable_vars = 0;
1075 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
1076 if (TREE_ADDRESSABLE (decl))
1077 {
1078 any_addressable_vars = 1;
1079 break;
1080 }
1081 for (block = gimple_block (stmt);
1082 !any_addressable_vars
1083 && block
1084 && TREE_CODE (block) == BLOCK;
1085 block = BLOCK_SUPERCONTEXT (block))
1086 {
1087 for (local_decls = BLOCK_VARS (block);
1088 local_decls;
1089 local_decls = DECL_CHAIN (local_decls))
1090 if (TREE_ADDRESSABLE (local_decls))
1091 {
1092 any_addressable_vars = 1;
1093 break;
1094 }
1095 if (block == gimple_block (parallel_stmt))
1096 break;
1097 }
1098 }
1099 if (!any_addressable_vars)
1100 gimple_omp_return_set_nowait (stmt);
1101 }
1102 }
1103 }
1104
1105 static void
1106 remove_exit_barriers (struct omp_region *region)
1107 {
1108 if (region->type == GIMPLE_OMP_PARALLEL)
1109 remove_exit_barrier (region);
1110
1111 if (region->inner)
1112 {
1113 region = region->inner;
1114 remove_exit_barriers (region);
1115 while (region->next)
1116 {
1117 region = region->next;
1118 remove_exit_barriers (region);
1119 }
1120 }
1121 }
1122
1123 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
1124 calls. These can't be declared as const functions, but
1125 within one parallel body they are constant, so they can be
1126 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1127 which are declared const. Similarly for task body, except
1128 that in untied task omp_get_thread_num () can change at any task
1129 scheduling point. */
1130
1131 static void
1132 optimize_omp_library_calls (gimple *entry_stmt)
1133 {
1134 basic_block bb;
1135 gimple_stmt_iterator gsi;
1136 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1137 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1138 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1139 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1140 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1141 && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1142 OMP_CLAUSE_UNTIED) != NULL);
1143
1144 FOR_EACH_BB_FN (bb, cfun)
1145 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1146 {
1147 gimple *call = gsi_stmt (gsi);
1148 tree decl;
1149
1150 if (is_gimple_call (call)
1151 && (decl = gimple_call_fndecl (call))
1152 && DECL_EXTERNAL (decl)
1153 && TREE_PUBLIC (decl)
1154 && DECL_INITIAL (decl) == NULL)
1155 {
1156 tree built_in;
1157
1158 if (DECL_NAME (decl) == thr_num_id)
1159 {
1160 /* In #pragma omp task untied omp_get_thread_num () can change
1161 during the execution of the task region. */
1162 if (untied_task)
1163 continue;
1164 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1165 }
1166 else if (DECL_NAME (decl) == num_thr_id)
1167 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1168 else
1169 continue;
1170
1171 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1172 || gimple_call_num_args (call) != 0)
1173 continue;
1174
1175 if (flag_exceptions && !TREE_NOTHROW (decl))
1176 continue;
1177
1178 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1179 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1180 TREE_TYPE (TREE_TYPE (built_in))))
1181 continue;
1182
1183 gimple_call_set_fndecl (call, built_in);
1184 }
1185 }
1186 }
1187
1188 /* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1189 regimplified. */
1190
1191 static tree
1192 expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1193 {
1194 tree t = *tp;
1195
1196 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1197 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1198 return t;
1199
1200 if (TREE_CODE (t) == ADDR_EXPR)
1201 recompute_tree_invariant_for_addr_expr (t);
1202
1203 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1204 return NULL_TREE;
1205 }
1206
1207 /* Prepend or append TO = FROM assignment before or after *GSI_P. */
1208
1209 static void
1210 expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1211 bool after)
1212 {
1213 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1214 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1215 !after, after ? GSI_CONTINUE_LINKING
1216 : GSI_SAME_STMT);
1217 gimple *stmt = gimple_build_assign (to, from);
1218 if (after)
1219 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1220 else
1221 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1222 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1223 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1224 {
1225 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1226 gimple_regimplify_operands (stmt, &gsi);
1227 }
1228 }
1229
1230 /* Expand the OpenMP parallel or task directive starting at REGION. */
1231
1232 static void
1233 expand_omp_taskreg (struct omp_region *region)
1234 {
1235 basic_block entry_bb, exit_bb, new_bb;
1236 struct function *child_cfun;
1237 tree child_fn, block, t;
1238 gimple_stmt_iterator gsi;
1239 gimple *entry_stmt, *stmt;
1240 edge e;
1241 vec<tree, va_gc> *ws_args;
1242
1243 entry_stmt = last_stmt (region->entry);
1244 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1245 && gimple_omp_task_taskwait_p (entry_stmt))
1246 {
1247 new_bb = region->entry;
1248 gsi = gsi_last_nondebug_bb (region->entry);
1249 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1250 gsi_remove (&gsi, true);
1251 expand_taskwait_call (new_bb, as_a <gomp_task *> (entry_stmt));
1252 return;
1253 }
1254
1255 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1256 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1257
1258 entry_bb = region->entry;
1259 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1260 exit_bb = region->cont;
1261 else
1262 exit_bb = region->exit;
1263
1264 if (is_combined_parallel (region))
1265 ws_args = region->ws_args;
1266 else
1267 ws_args = NULL;
1268
1269 if (child_cfun->cfg)
1270 {
1271 /* Due to inlining, it may happen that we have already outlined
1272 the region, in which case all we need to do is make the
1273 sub-graph unreachable and emit the parallel call. */
1274 edge entry_succ_e, exit_succ_e;
1275
1276 entry_succ_e = single_succ_edge (entry_bb);
1277
1278 gsi = gsi_last_nondebug_bb (entry_bb);
1279 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1280 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK
1281 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TEAMS);
1282 gsi_remove (&gsi, true);
1283
1284 new_bb = entry_bb;
1285 if (exit_bb)
1286 {
1287 exit_succ_e = single_succ_edge (exit_bb);
1288 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1289 }
1290 remove_edge_and_dominated_blocks (entry_succ_e);
1291 }
1292 else
1293 {
1294 unsigned srcidx, dstidx, num;
1295
1296 /* If the parallel region needs data sent from the parent
1297 function, then the very first statement (except possible
1298 tree profile counter updates) of the parallel body
1299 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1300 &.OMP_DATA_O is passed as an argument to the child function,
1301 we need to replace it with the argument as seen by the child
1302 function.
1303
1304 In most cases, this will end up being the identity assignment
1305 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1306 a function call that has been inlined, the original PARM_DECL
1307 .OMP_DATA_I may have been converted into a different local
1308 variable. In which case, we need to keep the assignment. */
1309 if (gimple_omp_taskreg_data_arg (entry_stmt))
1310 {
1311 basic_block entry_succ_bb
1312 = single_succ_p (entry_bb) ? single_succ (entry_bb)
1313 : FALLTHRU_EDGE (entry_bb)->dest;
1314 tree arg;
1315 gimple *parcopy_stmt = NULL;
1316
1317 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1318 {
1319 gimple *stmt;
1320
1321 gcc_assert (!gsi_end_p (gsi));
1322 stmt = gsi_stmt (gsi);
1323 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1324 continue;
1325
1326 if (gimple_num_ops (stmt) == 2)
1327 {
1328 tree arg = gimple_assign_rhs1 (stmt);
1329
1330 /* We're ignore the subcode because we're
1331 effectively doing a STRIP_NOPS. */
1332
1333 if (TREE_CODE (arg) == ADDR_EXPR
1334 && (TREE_OPERAND (arg, 0)
1335 == gimple_omp_taskreg_data_arg (entry_stmt)))
1336 {
1337 parcopy_stmt = stmt;
1338 break;
1339 }
1340 }
1341 }
1342
1343 gcc_assert (parcopy_stmt != NULL);
1344 arg = DECL_ARGUMENTS (child_fn);
1345
1346 if (!gimple_in_ssa_p (cfun))
1347 {
1348 if (gimple_assign_lhs (parcopy_stmt) == arg)
1349 gsi_remove (&gsi, true);
1350 else
1351 {
1352 /* ?? Is setting the subcode really necessary ?? */
1353 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1354 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1355 }
1356 }
1357 else
1358 {
1359 tree lhs = gimple_assign_lhs (parcopy_stmt);
1360 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1361 /* We'd like to set the rhs to the default def in the child_fn,
1362 but it's too early to create ssa names in the child_fn.
1363 Instead, we set the rhs to the parm. In
1364 move_sese_region_to_fn, we introduce a default def for the
1365 parm, map the parm to it's default def, and once we encounter
1366 this stmt, replace the parm with the default def. */
1367 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1368 update_stmt (parcopy_stmt);
1369 }
1370 }
1371
1372 /* Declare local variables needed in CHILD_CFUN. */
1373 block = DECL_INITIAL (child_fn);
1374 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1375 /* The gimplifier could record temporaries in parallel/task block
1376 rather than in containing function's local_decls chain,
1377 which would mean cgraph missed finalizing them. Do it now. */
1378 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1379 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1380 varpool_node::finalize_decl (t);
1381 DECL_SAVED_TREE (child_fn) = NULL;
1382 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1383 gimple_set_body (child_fn, NULL);
1384 TREE_USED (block) = 1;
1385
1386 /* Reset DECL_CONTEXT on function arguments. */
1387 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1388 DECL_CONTEXT (t) = child_fn;
1389
1390 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1391 so that it can be moved to the child function. */
1392 gsi = gsi_last_nondebug_bb (entry_bb);
1393 stmt = gsi_stmt (gsi);
1394 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1395 || gimple_code (stmt) == GIMPLE_OMP_TASK
1396 || gimple_code (stmt) == GIMPLE_OMP_TEAMS));
1397 e = split_block (entry_bb, stmt);
1398 gsi_remove (&gsi, true);
1399 entry_bb = e->dest;
1400 edge e2 = NULL;
1401 if (gimple_code (entry_stmt) != GIMPLE_OMP_TASK)
1402 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1403 else
1404 {
1405 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1406 gcc_assert (e2->dest == region->exit);
1407 remove_edge (BRANCH_EDGE (entry_bb));
1408 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1409 gsi = gsi_last_nondebug_bb (region->exit);
1410 gcc_assert (!gsi_end_p (gsi)
1411 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1412 gsi_remove (&gsi, true);
1413 }
1414
1415 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1416 if (exit_bb)
1417 {
1418 gsi = gsi_last_nondebug_bb (exit_bb);
1419 gcc_assert (!gsi_end_p (gsi)
1420 && (gimple_code (gsi_stmt (gsi))
1421 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1422 stmt = gimple_build_return (NULL);
1423 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1424 gsi_remove (&gsi, true);
1425 }
1426
1427 /* Move the parallel region into CHILD_CFUN. */
1428
1429 if (gimple_in_ssa_p (cfun))
1430 {
1431 init_tree_ssa (child_cfun);
1432 init_ssa_operands (child_cfun);
1433 child_cfun->gimple_df->in_ssa_p = true;
1434 block = NULL_TREE;
1435 }
1436 else
1437 block = gimple_block (entry_stmt);
1438
1439 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1440 if (exit_bb)
1441 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1442 if (e2)
1443 {
1444 basic_block dest_bb = e2->dest;
1445 if (!exit_bb)
1446 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1447 remove_edge (e2);
1448 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1449 }
1450 /* When the OMP expansion process cannot guarantee an up-to-date
1451 loop tree arrange for the child function to fixup loops. */
1452 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1453 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1454
1455 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1456 num = vec_safe_length (child_cfun->local_decls);
1457 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1458 {
1459 t = (*child_cfun->local_decls)[srcidx];
1460 if (DECL_CONTEXT (t) == cfun->decl)
1461 continue;
1462 if (srcidx != dstidx)
1463 (*child_cfun->local_decls)[dstidx] = t;
1464 dstidx++;
1465 }
1466 if (dstidx != num)
1467 vec_safe_truncate (child_cfun->local_decls, dstidx);
1468
1469 /* Inform the callgraph about the new function. */
1470 child_cfun->curr_properties = cfun->curr_properties;
1471 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1472 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1473 cgraph_node *node = cgraph_node::get_create (child_fn);
1474 node->parallelized_function = 1;
1475 cgraph_node::add_new_function (child_fn, true);
1476
1477 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1478 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1479
1480 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1481 fixed in a following pass. */
1482 push_cfun (child_cfun);
1483 if (need_asm)
1484 assign_assembler_name_if_needed (child_fn);
1485
1486 if (optimize)
1487 optimize_omp_library_calls (entry_stmt);
1488 update_max_bb_count ();
1489 cgraph_edge::rebuild_edges ();
1490
1491 /* Some EH regions might become dead, see PR34608. If
1492 pass_cleanup_cfg isn't the first pass to happen with the
1493 new child, these dead EH edges might cause problems.
1494 Clean them up now. */
1495 if (flag_exceptions)
1496 {
1497 basic_block bb;
1498 bool changed = false;
1499
1500 FOR_EACH_BB_FN (bb, cfun)
1501 changed |= gimple_purge_dead_eh_edges (bb);
1502 if (changed)
1503 cleanup_tree_cfg ();
1504 }
1505 if (gimple_in_ssa_p (cfun))
1506 update_ssa (TODO_update_ssa);
1507 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1508 verify_loop_structure ();
1509 pop_cfun ();
1510
1511 if (dump_file && !gimple_in_ssa_p (cfun))
1512 {
1513 omp_any_child_fn_dumped = true;
1514 dump_function_header (dump_file, child_fn, dump_flags);
1515 dump_function_to_file (child_fn, dump_file, dump_flags);
1516 }
1517 }
1518
1519 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
1520
1521 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1522 expand_parallel_call (region, new_bb,
1523 as_a <gomp_parallel *> (entry_stmt), ws_args);
1524 else if (gimple_code (entry_stmt) == GIMPLE_OMP_TEAMS)
1525 expand_teams_call (new_bb, as_a <gomp_teams *> (entry_stmt));
1526 else
1527 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1528 if (gimple_in_ssa_p (cfun))
1529 update_ssa (TODO_update_ssa_only_virtuals);
1530 }
1531
1532 /* Information about members of an OpenACC collapsed loop nest. */
1533
1534 struct oacc_collapse
1535 {
1536 tree base; /* Base value. */
1537 tree iters; /* Number of steps. */
1538 tree step; /* Step size. */
1539 tree tile; /* Tile increment (if tiled). */
1540 tree outer; /* Tile iterator var. */
1541 };
1542
1543 /* Helper for expand_oacc_for. Determine collapsed loop information.
1544 Fill in COUNTS array. Emit any initialization code before GSI.
1545 Return the calculated outer loop bound of BOUND_TYPE. */
1546
1547 static tree
1548 expand_oacc_collapse_init (const struct omp_for_data *fd,
1549 gimple_stmt_iterator *gsi,
1550 oacc_collapse *counts, tree bound_type,
1551 location_t loc)
1552 {
1553 tree tiling = fd->tiling;
1554 tree total = build_int_cst (bound_type, 1);
1555 int ix;
1556
1557 gcc_assert (integer_onep (fd->loop.step));
1558 gcc_assert (integer_zerop (fd->loop.n1));
1559
1560 /* When tiling, the first operand of the tile clause applies to the
1561 innermost loop, and we work outwards from there. Seems
1562 backwards, but whatever. */
1563 for (ix = fd->collapse; ix--;)
1564 {
1565 const omp_for_data_loop *loop = &fd->loops[ix];
1566
1567 tree iter_type = TREE_TYPE (loop->v);
1568 tree diff_type = iter_type;
1569 tree plus_type = iter_type;
1570
1571 gcc_assert (loop->cond_code == fd->loop.cond_code);
1572
1573 if (POINTER_TYPE_P (iter_type))
1574 plus_type = sizetype;
1575 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
1576 diff_type = signed_type_for (diff_type);
1577 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
1578 diff_type = integer_type_node;
1579
1580 if (tiling)
1581 {
1582 tree num = build_int_cst (integer_type_node, fd->collapse);
1583 tree loop_no = build_int_cst (integer_type_node, ix);
1584 tree tile = TREE_VALUE (tiling);
1585 gcall *call
1586 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1587 /* gwv-outer=*/integer_zero_node,
1588 /* gwv-inner=*/integer_zero_node);
1589
1590 counts[ix].outer = create_tmp_var (iter_type, ".outer");
1591 counts[ix].tile = create_tmp_var (diff_type, ".tile");
1592 gimple_call_set_lhs (call, counts[ix].tile);
1593 gimple_set_location (call, loc);
1594 gsi_insert_before (gsi, call, GSI_SAME_STMT);
1595
1596 tiling = TREE_CHAIN (tiling);
1597 }
1598 else
1599 {
1600 counts[ix].tile = NULL;
1601 counts[ix].outer = loop->v;
1602 }
1603
1604 tree b = loop->n1;
1605 tree e = loop->n2;
1606 tree s = loop->step;
1607 bool up = loop->cond_code == LT_EXPR;
1608 tree dir = build_int_cst (diff_type, up ? +1 : -1);
1609 bool negating;
1610 tree expr;
1611
1612 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1613 true, GSI_SAME_STMT);
1614 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1615 true, GSI_SAME_STMT);
1616
1617 /* Convert the step, avoiding possible unsigned->signed overflow. */
1618 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1619 if (negating)
1620 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1621 s = fold_convert (diff_type, s);
1622 if (negating)
1623 s = fold_build1 (NEGATE_EXPR, diff_type, s);
1624 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1625 true, GSI_SAME_STMT);
1626
1627 /* Determine the range, avoiding possible unsigned->signed overflow. */
1628 negating = !up && TYPE_UNSIGNED (iter_type);
1629 expr = fold_build2 (MINUS_EXPR, plus_type,
1630 fold_convert (plus_type, negating ? b : e),
1631 fold_convert (plus_type, negating ? e : b));
1632 expr = fold_convert (diff_type, expr);
1633 if (negating)
1634 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1635 tree range = force_gimple_operand_gsi
1636 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1637
1638 /* Determine number of iterations. */
1639 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1640 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1641 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1642
1643 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1644 true, GSI_SAME_STMT);
1645
1646 counts[ix].base = b;
1647 counts[ix].iters = iters;
1648 counts[ix].step = s;
1649
1650 total = fold_build2 (MULT_EXPR, bound_type, total,
1651 fold_convert (bound_type, iters));
1652 }
1653
1654 return total;
1655 }
1656
1657 /* Emit initializers for collapsed loop members. INNER is true if
1658 this is for the element loop of a TILE. IVAR is the outer
1659 loop iteration variable, from which collapsed loop iteration values
1660 are calculated. COUNTS array has been initialized by
1661 expand_oacc_collapse_inits. */
1662
1663 static void
1664 expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
1665 gimple_stmt_iterator *gsi,
1666 const oacc_collapse *counts, tree ivar)
1667 {
1668 tree ivar_type = TREE_TYPE (ivar);
1669
1670 /* The most rapidly changing iteration variable is the innermost
1671 one. */
1672 for (int ix = fd->collapse; ix--;)
1673 {
1674 const omp_for_data_loop *loop = &fd->loops[ix];
1675 const oacc_collapse *collapse = &counts[ix];
1676 tree v = inner ? loop->v : collapse->outer;
1677 tree iter_type = TREE_TYPE (v);
1678 tree diff_type = TREE_TYPE (collapse->step);
1679 tree plus_type = iter_type;
1680 enum tree_code plus_code = PLUS_EXPR;
1681 tree expr;
1682
1683 if (POINTER_TYPE_P (iter_type))
1684 {
1685 plus_code = POINTER_PLUS_EXPR;
1686 plus_type = sizetype;
1687 }
1688
1689 expr = ivar;
1690 if (ix)
1691 {
1692 tree mod = fold_convert (ivar_type, collapse->iters);
1693 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1694 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1695 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1696 true, GSI_SAME_STMT);
1697 }
1698
1699 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1700 collapse->step);
1701 expr = fold_build2 (plus_code, iter_type,
1702 inner ? collapse->outer : collapse->base,
1703 fold_convert (plus_type, expr));
1704 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1705 true, GSI_SAME_STMT);
1706 gassign *ass = gimple_build_assign (v, expr);
1707 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1708 }
1709 }
1710
1711 /* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1712 of the combined collapse > 1 loop constructs, generate code like:
1713 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1714 if (cond3 is <)
1715 adj = STEP3 - 1;
1716 else
1717 adj = STEP3 + 1;
1718 count3 = (adj + N32 - N31) / STEP3;
1719 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1720 if (cond2 is <)
1721 adj = STEP2 - 1;
1722 else
1723 adj = STEP2 + 1;
1724 count2 = (adj + N22 - N21) / STEP2;
1725 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1726 if (cond1 is <)
1727 adj = STEP1 - 1;
1728 else
1729 adj = STEP1 + 1;
1730 count1 = (adj + N12 - N11) / STEP1;
1731 count = count1 * count2 * count3;
1732 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1733 count = 0;
1734 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1735 of the combined loop constructs, just initialize COUNTS array
1736 from the _looptemp_ clauses. */
1737
1738 /* NOTE: It *could* be better to moosh all of the BBs together,
1739 creating one larger BB with all the computation and the unexpected
1740 jump at the end. I.e.
1741
1742 bool zero3, zero2, zero1, zero;
1743
1744 zero3 = N32 c3 N31;
1745 count3 = (N32 - N31) /[cl] STEP3;
1746 zero2 = N22 c2 N21;
1747 count2 = (N22 - N21) /[cl] STEP2;
1748 zero1 = N12 c1 N11;
1749 count1 = (N12 - N11) /[cl] STEP1;
1750 zero = zero3 || zero2 || zero1;
1751 count = count1 * count2 * count3;
1752 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1753
1754 After all, we expect the zero=false, and thus we expect to have to
1755 evaluate all of the comparison expressions, so short-circuiting
1756 oughtn't be a win. Since the condition isn't protecting a
1757 denominator, we're not concerned about divide-by-zero, so we can
1758 fully evaluate count even if a numerator turned out to be wrong.
1759
1760 It seems like putting this all together would create much better
1761 scheduling opportunities, and less pressure on the chip's branch
1762 predictor. */
1763
1764 static void
1765 expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1766 basic_block &entry_bb, tree *counts,
1767 basic_block &zero_iter1_bb, int &first_zero_iter1,
1768 basic_block &zero_iter2_bb, int &first_zero_iter2,
1769 basic_block &l2_dom_bb)
1770 {
1771 tree t, type = TREE_TYPE (fd->loop.v);
1772 edge e, ne;
1773 int i;
1774
1775 /* Collapsed loops need work for expansion into SSA form. */
1776 gcc_assert (!gimple_in_ssa_p (cfun));
1777
1778 if (gimple_omp_for_combined_into_p (fd->for_stmt)
1779 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1780 {
1781 gcc_assert (fd->ordered == 0);
1782 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1783 isn't supposed to be handled, as the inner loop doesn't
1784 use it. */
1785 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1786 OMP_CLAUSE__LOOPTEMP_);
1787 gcc_assert (innerc);
1788 for (i = 0; i < fd->collapse; i++)
1789 {
1790 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1791 OMP_CLAUSE__LOOPTEMP_);
1792 gcc_assert (innerc);
1793 if (i)
1794 counts[i] = OMP_CLAUSE_DECL (innerc);
1795 else
1796 counts[0] = NULL_TREE;
1797 }
1798 return;
1799 }
1800
1801 for (i = fd->collapse; i < fd->ordered; i++)
1802 {
1803 tree itype = TREE_TYPE (fd->loops[i].v);
1804 counts[i] = NULL_TREE;
1805 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1806 fold_convert (itype, fd->loops[i].n1),
1807 fold_convert (itype, fd->loops[i].n2));
1808 if (t && integer_zerop (t))
1809 {
1810 for (i = fd->collapse; i < fd->ordered; i++)
1811 counts[i] = build_int_cst (type, 0);
1812 break;
1813 }
1814 }
1815 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1816 {
1817 tree itype = TREE_TYPE (fd->loops[i].v);
1818
1819 if (i >= fd->collapse && counts[i])
1820 continue;
1821 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1822 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1823 fold_convert (itype, fd->loops[i].n1),
1824 fold_convert (itype, fd->loops[i].n2)))
1825 == NULL_TREE || !integer_onep (t)))
1826 {
1827 gcond *cond_stmt;
1828 tree n1, n2;
1829 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1830 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1831 true, GSI_SAME_STMT);
1832 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1833 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1834 true, GSI_SAME_STMT);
1835 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1836 NULL_TREE, NULL_TREE);
1837 gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1838 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1839 expand_omp_regimplify_p, NULL, NULL)
1840 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1841 expand_omp_regimplify_p, NULL, NULL))
1842 {
1843 *gsi = gsi_for_stmt (cond_stmt);
1844 gimple_regimplify_operands (cond_stmt, gsi);
1845 }
1846 e = split_block (entry_bb, cond_stmt);
1847 basic_block &zero_iter_bb
1848 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1849 int &first_zero_iter
1850 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1851 if (zero_iter_bb == NULL)
1852 {
1853 gassign *assign_stmt;
1854 first_zero_iter = i;
1855 zero_iter_bb = create_empty_bb (entry_bb);
1856 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1857 *gsi = gsi_after_labels (zero_iter_bb);
1858 if (i < fd->collapse)
1859 assign_stmt = gimple_build_assign (fd->loop.n2,
1860 build_zero_cst (type));
1861 else
1862 {
1863 counts[i] = create_tmp_reg (type, ".count");
1864 assign_stmt
1865 = gimple_build_assign (counts[i], build_zero_cst (type));
1866 }
1867 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1868 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1869 entry_bb);
1870 }
1871 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1872 ne->probability = profile_probability::very_unlikely ();
1873 e->flags = EDGE_TRUE_VALUE;
1874 e->probability = ne->probability.invert ();
1875 if (l2_dom_bb == NULL)
1876 l2_dom_bb = entry_bb;
1877 entry_bb = e->dest;
1878 *gsi = gsi_last_nondebug_bb (entry_bb);
1879 }
1880
1881 if (POINTER_TYPE_P (itype))
1882 itype = signed_type_for (itype);
1883 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1884 ? -1 : 1));
1885 t = fold_build2 (PLUS_EXPR, itype,
1886 fold_convert (itype, fd->loops[i].step), t);
1887 t = fold_build2 (PLUS_EXPR, itype, t,
1888 fold_convert (itype, fd->loops[i].n2));
1889 t = fold_build2 (MINUS_EXPR, itype, t,
1890 fold_convert (itype, fd->loops[i].n1));
1891 /* ?? We could probably use CEIL_DIV_EXPR instead of
1892 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1893 generate the same code in the end because generically we
1894 don't know that the values involved must be negative for
1895 GT?? */
1896 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1897 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1898 fold_build1 (NEGATE_EXPR, itype, t),
1899 fold_build1 (NEGATE_EXPR, itype,
1900 fold_convert (itype,
1901 fd->loops[i].step)));
1902 else
1903 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1904 fold_convert (itype, fd->loops[i].step));
1905 t = fold_convert (type, t);
1906 if (TREE_CODE (t) == INTEGER_CST)
1907 counts[i] = t;
1908 else
1909 {
1910 if (i < fd->collapse || i != first_zero_iter2)
1911 counts[i] = create_tmp_reg (type, ".count");
1912 expand_omp_build_assign (gsi, counts[i], t);
1913 }
1914 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1915 {
1916 if (i == 0)
1917 t = counts[0];
1918 else
1919 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1920 expand_omp_build_assign (gsi, fd->loop.n2, t);
1921 }
1922 }
1923 }
1924
1925 /* Helper function for expand_omp_{for_*,simd}. Generate code like:
1926 T = V;
1927 V3 = N31 + (T % count3) * STEP3;
1928 T = T / count3;
1929 V2 = N21 + (T % count2) * STEP2;
1930 T = T / count2;
1931 V1 = N11 + T * STEP1;
1932 if this loop doesn't have an inner loop construct combined with it.
1933 If it does have an inner loop construct combined with it and the
1934 iteration count isn't known constant, store values from counts array
1935 into its _looptemp_ temporaries instead. */
1936
1937 static void
1938 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1939 tree *counts, gimple *inner_stmt, tree startvar)
1940 {
1941 int i;
1942 if (gimple_omp_for_combined_p (fd->for_stmt))
1943 {
1944 /* If fd->loop.n2 is constant, then no propagation of the counts
1945 is needed, they are constant. */
1946 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
1947 return;
1948
1949 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
1950 ? gimple_omp_taskreg_clauses (inner_stmt)
1951 : gimple_omp_for_clauses (inner_stmt);
1952 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1953 isn't supposed to be handled, as the inner loop doesn't
1954 use it. */
1955 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
1956 gcc_assert (innerc);
1957 for (i = 0; i < fd->collapse; i++)
1958 {
1959 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1960 OMP_CLAUSE__LOOPTEMP_);
1961 gcc_assert (innerc);
1962 if (i)
1963 {
1964 tree tem = OMP_CLAUSE_DECL (innerc);
1965 tree t = fold_convert (TREE_TYPE (tem), counts[i]);
1966 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1967 false, GSI_CONTINUE_LINKING);
1968 gassign *stmt = gimple_build_assign (tem, t);
1969 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1970 }
1971 }
1972 return;
1973 }
1974
1975 tree type = TREE_TYPE (fd->loop.v);
1976 tree tem = create_tmp_reg (type, ".tem");
1977 gassign *stmt = gimple_build_assign (tem, startvar);
1978 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1979
1980 for (i = fd->collapse - 1; i >= 0; i--)
1981 {
1982 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
1983 itype = vtype;
1984 if (POINTER_TYPE_P (vtype))
1985 itype = signed_type_for (vtype);
1986 if (i != 0)
1987 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
1988 else
1989 t = tem;
1990 t = fold_convert (itype, t);
1991 t = fold_build2 (MULT_EXPR, itype, t,
1992 fold_convert (itype, fd->loops[i].step));
1993 if (POINTER_TYPE_P (vtype))
1994 t = fold_build_pointer_plus (fd->loops[i].n1, t);
1995 else
1996 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
1997 t = force_gimple_operand_gsi (gsi, t,
1998 DECL_P (fd->loops[i].v)
1999 && TREE_ADDRESSABLE (fd->loops[i].v),
2000 NULL_TREE, false,
2001 GSI_CONTINUE_LINKING);
2002 stmt = gimple_build_assign (fd->loops[i].v, t);
2003 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2004 if (i != 0)
2005 {
2006 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
2007 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
2008 false, GSI_CONTINUE_LINKING);
2009 stmt = gimple_build_assign (tem, t);
2010 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2011 }
2012 }
2013 }
2014
2015 /* Helper function for expand_omp_for_*. Generate code like:
2016 L10:
2017 V3 += STEP3;
2018 if (V3 cond3 N32) goto BODY_BB; else goto L11;
2019 L11:
2020 V3 = N31;
2021 V2 += STEP2;
2022 if (V2 cond2 N22) goto BODY_BB; else goto L12;
2023 L12:
2024 V2 = N21;
2025 V1 += STEP1;
2026 goto BODY_BB; */
2027
2028 static basic_block
2029 extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb,
2030 basic_block body_bb)
2031 {
2032 basic_block last_bb, bb, collapse_bb = NULL;
2033 int i;
2034 gimple_stmt_iterator gsi;
2035 edge e;
2036 tree t;
2037 gimple *stmt;
2038
2039 last_bb = cont_bb;
2040 for (i = fd->collapse - 1; i >= 0; i--)
2041 {
2042 tree vtype = TREE_TYPE (fd->loops[i].v);
2043
2044 bb = create_empty_bb (last_bb);
2045 add_bb_to_loop (bb, last_bb->loop_father);
2046 gsi = gsi_start_bb (bb);
2047
2048 if (i < fd->collapse - 1)
2049 {
2050 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
2051 e->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2052
2053 t = fd->loops[i + 1].n1;
2054 t = force_gimple_operand_gsi (&gsi, t,
2055 DECL_P (fd->loops[i + 1].v)
2056 && TREE_ADDRESSABLE (fd->loops[i
2057 + 1].v),
2058 NULL_TREE, false,
2059 GSI_CONTINUE_LINKING);
2060 stmt = gimple_build_assign (fd->loops[i + 1].v, t);
2061 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2062 }
2063 else
2064 collapse_bb = bb;
2065
2066 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
2067
2068 if (POINTER_TYPE_P (vtype))
2069 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
2070 else
2071 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
2072 t = force_gimple_operand_gsi (&gsi, t,
2073 DECL_P (fd->loops[i].v)
2074 && TREE_ADDRESSABLE (fd->loops[i].v),
2075 NULL_TREE, false, GSI_CONTINUE_LINKING);
2076 stmt = gimple_build_assign (fd->loops[i].v, t);
2077 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2078
2079 if (i > 0)
2080 {
2081 t = fd->loops[i].n2;
2082 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2083 false, GSI_CONTINUE_LINKING);
2084 tree v = fd->loops[i].v;
2085 if (DECL_P (v) && TREE_ADDRESSABLE (v))
2086 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
2087 false, GSI_CONTINUE_LINKING);
2088 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
2089 stmt = gimple_build_cond_empty (t);
2090 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2091 if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)),
2092 expand_omp_regimplify_p, NULL, NULL)
2093 || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)),
2094 expand_omp_regimplify_p, NULL, NULL))
2095 gimple_regimplify_operands (stmt, &gsi);
2096 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
2097 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
2098 }
2099 else
2100 make_edge (bb, body_bb, EDGE_FALLTHRU);
2101 last_bb = bb;
2102 }
2103
2104 return collapse_bb;
2105 }
2106
2107 /* Expand #pragma omp ordered depend(source). */
2108
2109 static void
2110 expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
2111 tree *counts, location_t loc)
2112 {
2113 enum built_in_function source_ix
2114 = fd->iter_type == long_integer_type_node
2115 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
2116 gimple *g
2117 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
2118 build_fold_addr_expr (counts[fd->ordered]));
2119 gimple_set_location (g, loc);
2120 gsi_insert_before (gsi, g, GSI_SAME_STMT);
2121 }
2122
2123 /* Expand a single depend from #pragma omp ordered depend(sink:...). */
2124
2125 static void
2126 expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
2127 tree *counts, tree c, location_t loc)
2128 {
2129 auto_vec<tree, 10> args;
2130 enum built_in_function sink_ix
2131 = fd->iter_type == long_integer_type_node
2132 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
2133 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
2134 int i;
2135 gimple_stmt_iterator gsi2 = *gsi;
2136 bool warned_step = false;
2137
2138 for (i = 0; i < fd->ordered; i++)
2139 {
2140 tree step = NULL_TREE;
2141 off = TREE_PURPOSE (deps);
2142 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2143 {
2144 step = TREE_OPERAND (off, 1);
2145 off = TREE_OPERAND (off, 0);
2146 }
2147 if (!integer_zerop (off))
2148 {
2149 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2150 || fd->loops[i].cond_code == GT_EXPR);
2151 bool forward = fd->loops[i].cond_code == LT_EXPR;
2152 if (step)
2153 {
2154 /* Non-simple Fortran DO loops. If step is variable,
2155 we don't know at compile even the direction, so can't
2156 warn. */
2157 if (TREE_CODE (step) != INTEGER_CST)
2158 break;
2159 forward = tree_int_cst_sgn (step) != -1;
2160 }
2161 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2162 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
2163 "waiting for lexically later iteration");
2164 break;
2165 }
2166 deps = TREE_CHAIN (deps);
2167 }
2168 /* If all offsets corresponding to the collapsed loops are zero,
2169 this depend clause can be ignored. FIXME: but there is still a
2170 flush needed. We need to emit one __sync_synchronize () for it
2171 though (perhaps conditionally)? Solve this together with the
2172 conservative dependence folding optimization.
2173 if (i >= fd->collapse)
2174 return; */
2175
2176 deps = OMP_CLAUSE_DECL (c);
2177 gsi_prev (&gsi2);
2178 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
2179 edge e2 = split_block_after_labels (e1->dest);
2180
2181 gsi2 = gsi_after_labels (e1->dest);
2182 *gsi = gsi_last_bb (e1->src);
2183 for (i = 0; i < fd->ordered; i++)
2184 {
2185 tree itype = TREE_TYPE (fd->loops[i].v);
2186 tree step = NULL_TREE;
2187 tree orig_off = NULL_TREE;
2188 if (POINTER_TYPE_P (itype))
2189 itype = sizetype;
2190 if (i)
2191 deps = TREE_CHAIN (deps);
2192 off = TREE_PURPOSE (deps);
2193 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2194 {
2195 step = TREE_OPERAND (off, 1);
2196 off = TREE_OPERAND (off, 0);
2197 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2198 && integer_onep (fd->loops[i].step)
2199 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
2200 }
2201 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
2202 if (step)
2203 {
2204 off = fold_convert_loc (loc, itype, off);
2205 orig_off = off;
2206 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2207 }
2208
2209 if (integer_zerop (off))
2210 t = boolean_true_node;
2211 else
2212 {
2213 tree a;
2214 tree co = fold_convert_loc (loc, itype, off);
2215 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
2216 {
2217 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2218 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
2219 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
2220 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
2221 co);
2222 }
2223 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2224 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2225 fd->loops[i].v, co);
2226 else
2227 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
2228 fd->loops[i].v, co);
2229 if (step)
2230 {
2231 tree t1, t2;
2232 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2233 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2234 fd->loops[i].n1);
2235 else
2236 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2237 fd->loops[i].n2);
2238 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2239 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2240 fd->loops[i].n2);
2241 else
2242 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2243 fd->loops[i].n1);
2244 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
2245 step, build_int_cst (TREE_TYPE (step), 0));
2246 if (TREE_CODE (step) != INTEGER_CST)
2247 {
2248 t1 = unshare_expr (t1);
2249 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
2250 false, GSI_CONTINUE_LINKING);
2251 t2 = unshare_expr (t2);
2252 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
2253 false, GSI_CONTINUE_LINKING);
2254 }
2255 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
2256 t, t2, t1);
2257 }
2258 else if (fd->loops[i].cond_code == LT_EXPR)
2259 {
2260 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2261 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2262 fd->loops[i].n1);
2263 else
2264 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2265 fd->loops[i].n2);
2266 }
2267 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2268 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
2269 fd->loops[i].n2);
2270 else
2271 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
2272 fd->loops[i].n1);
2273 }
2274 if (cond)
2275 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
2276 else
2277 cond = t;
2278
2279 off = fold_convert_loc (loc, itype, off);
2280
2281 if (step
2282 || (fd->loops[i].cond_code == LT_EXPR
2283 ? !integer_onep (fd->loops[i].step)
2284 : !integer_minus_onep (fd->loops[i].step)))
2285 {
2286 if (step == NULL_TREE
2287 && TYPE_UNSIGNED (itype)
2288 && fd->loops[i].cond_code == GT_EXPR)
2289 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
2290 fold_build1_loc (loc, NEGATE_EXPR, itype,
2291 s));
2292 else
2293 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
2294 orig_off ? orig_off : off, s);
2295 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
2296 build_int_cst (itype, 0));
2297 if (integer_zerop (t) && !warned_step)
2298 {
2299 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
2300 "refers to iteration never in the iteration "
2301 "space");
2302 warned_step = true;
2303 }
2304 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
2305 cond, t);
2306 }
2307
2308 if (i <= fd->collapse - 1 && fd->collapse > 1)
2309 t = fd->loop.v;
2310 else if (counts[i])
2311 t = counts[i];
2312 else
2313 {
2314 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2315 fd->loops[i].v, fd->loops[i].n1);
2316 t = fold_convert_loc (loc, fd->iter_type, t);
2317 }
2318 if (step)
2319 /* We have divided off by step already earlier. */;
2320 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
2321 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
2322 fold_build1_loc (loc, NEGATE_EXPR, itype,
2323 s));
2324 else
2325 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2326 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2327 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
2328 off = fold_convert_loc (loc, fd->iter_type, off);
2329 if (i <= fd->collapse - 1 && fd->collapse > 1)
2330 {
2331 if (i)
2332 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
2333 off);
2334 if (i < fd->collapse - 1)
2335 {
2336 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
2337 counts[i]);
2338 continue;
2339 }
2340 }
2341 off = unshare_expr (off);
2342 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
2343 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2344 true, GSI_SAME_STMT);
2345 args.safe_push (t);
2346 }
2347 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
2348 gimple_set_location (g, loc);
2349 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
2350
2351 cond = unshare_expr (cond);
2352 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
2353 GSI_CONTINUE_LINKING);
2354 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
2355 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
2356 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2357 e1->probability = e3->probability.invert ();
2358 e1->flags = EDGE_TRUE_VALUE;
2359 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
2360
2361 *gsi = gsi_after_labels (e2->dest);
2362 }
2363
2364 /* Expand all #pragma omp ordered depend(source) and
2365 #pragma omp ordered depend(sink:...) constructs in the current
2366 #pragma omp for ordered(n) region. */
2367
2368 static void
2369 expand_omp_ordered_source_sink (struct omp_region *region,
2370 struct omp_for_data *fd, tree *counts,
2371 basic_block cont_bb)
2372 {
2373 struct omp_region *inner;
2374 int i;
2375 for (i = fd->collapse - 1; i < fd->ordered; i++)
2376 if (i == fd->collapse - 1 && fd->collapse > 1)
2377 counts[i] = NULL_TREE;
2378 else if (i >= fd->collapse && !cont_bb)
2379 counts[i] = build_zero_cst (fd->iter_type);
2380 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
2381 && integer_onep (fd->loops[i].step))
2382 counts[i] = NULL_TREE;
2383 else
2384 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
2385 tree atype
2386 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
2387 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
2388 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
2389
2390 for (inner = region->inner; inner; inner = inner->next)
2391 if (inner->type == GIMPLE_OMP_ORDERED)
2392 {
2393 gomp_ordered *ord_stmt = inner->ord_stmt;
2394 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
2395 location_t loc = gimple_location (ord_stmt);
2396 tree c;
2397 for (c = gimple_omp_ordered_clauses (ord_stmt);
2398 c; c = OMP_CLAUSE_CHAIN (c))
2399 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
2400 break;
2401 if (c)
2402 expand_omp_ordered_source (&gsi, fd, counts, loc);
2403 for (c = gimple_omp_ordered_clauses (ord_stmt);
2404 c; c = OMP_CLAUSE_CHAIN (c))
2405 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
2406 expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
2407 gsi_remove (&gsi, true);
2408 }
2409 }
2410
2411 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
2412 collapsed. */
2413
2414 static basic_block
2415 expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
2416 basic_block cont_bb, basic_block body_bb,
2417 bool ordered_lastprivate)
2418 {
2419 if (fd->ordered == fd->collapse)
2420 return cont_bb;
2421
2422 if (!cont_bb)
2423 {
2424 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2425 for (int i = fd->collapse; i < fd->ordered; i++)
2426 {
2427 tree type = TREE_TYPE (fd->loops[i].v);
2428 tree n1 = fold_convert (type, fd->loops[i].n1);
2429 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
2430 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2431 size_int (i - fd->collapse + 1),
2432 NULL_TREE, NULL_TREE);
2433 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2434 }
2435 return NULL;
2436 }
2437
2438 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
2439 {
2440 tree t, type = TREE_TYPE (fd->loops[i].v);
2441 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2442 expand_omp_build_assign (&gsi, fd->loops[i].v,
2443 fold_convert (type, fd->loops[i].n1));
2444 if (counts[i])
2445 expand_omp_build_assign (&gsi, counts[i],
2446 build_zero_cst (fd->iter_type));
2447 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2448 size_int (i - fd->collapse + 1),
2449 NULL_TREE, NULL_TREE);
2450 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2451 if (!gsi_end_p (gsi))
2452 gsi_prev (&gsi);
2453 else
2454 gsi = gsi_last_bb (body_bb);
2455 edge e1 = split_block (body_bb, gsi_stmt (gsi));
2456 basic_block new_body = e1->dest;
2457 if (body_bb == cont_bb)
2458 cont_bb = new_body;
2459 edge e2 = NULL;
2460 basic_block new_header;
2461 if (EDGE_COUNT (cont_bb->preds) > 0)
2462 {
2463 gsi = gsi_last_bb (cont_bb);
2464 if (POINTER_TYPE_P (type))
2465 t = fold_build_pointer_plus (fd->loops[i].v,
2466 fold_convert (sizetype,
2467 fd->loops[i].step));
2468 else
2469 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
2470 fold_convert (type, fd->loops[i].step));
2471 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
2472 if (counts[i])
2473 {
2474 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
2475 build_int_cst (fd->iter_type, 1));
2476 expand_omp_build_assign (&gsi, counts[i], t);
2477 t = counts[i];
2478 }
2479 else
2480 {
2481 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2482 fd->loops[i].v, fd->loops[i].n1);
2483 t = fold_convert (fd->iter_type, t);
2484 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2485 true, GSI_SAME_STMT);
2486 }
2487 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2488 size_int (i - fd->collapse + 1),
2489 NULL_TREE, NULL_TREE);
2490 expand_omp_build_assign (&gsi, aref, t);
2491 gsi_prev (&gsi);
2492 e2 = split_block (cont_bb, gsi_stmt (gsi));
2493 new_header = e2->dest;
2494 }
2495 else
2496 new_header = cont_bb;
2497 gsi = gsi_after_labels (new_header);
2498 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
2499 true, GSI_SAME_STMT);
2500 tree n2
2501 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
2502 true, NULL_TREE, true, GSI_SAME_STMT);
2503 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
2504 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
2505 edge e3 = split_block (new_header, gsi_stmt (gsi));
2506 cont_bb = e3->dest;
2507 remove_edge (e1);
2508 make_edge (body_bb, new_header, EDGE_FALLTHRU);
2509 e3->flags = EDGE_FALSE_VALUE;
2510 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2511 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
2512 e1->probability = e3->probability.invert ();
2513
2514 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
2515 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
2516
2517 if (e2)
2518 {
2519 struct loop *loop = alloc_loop ();
2520 loop->header = new_header;
2521 loop->latch = e2->src;
2522 add_loop (loop, body_bb->loop_father);
2523 }
2524 }
2525
2526 /* If there are any lastprivate clauses and it is possible some loops
2527 might have zero iterations, ensure all the decls are initialized,
2528 otherwise we could crash evaluating C++ class iterators with lastprivate
2529 clauses. */
2530 bool need_inits = false;
2531 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
2532 if (need_inits)
2533 {
2534 tree type = TREE_TYPE (fd->loops[i].v);
2535 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2536 expand_omp_build_assign (&gsi, fd->loops[i].v,
2537 fold_convert (type, fd->loops[i].n1));
2538 }
2539 else
2540 {
2541 tree type = TREE_TYPE (fd->loops[i].v);
2542 tree this_cond = fold_build2 (fd->loops[i].cond_code,
2543 boolean_type_node,
2544 fold_convert (type, fd->loops[i].n1),
2545 fold_convert (type, fd->loops[i].n2));
2546 if (!integer_onep (this_cond))
2547 need_inits = true;
2548 }
2549
2550 return cont_bb;
2551 }
2552
2553 /* A subroutine of expand_omp_for. Generate code for a parallel
2554 loop with any schedule. Given parameters:
2555
2556 for (V = N1; V cond N2; V += STEP) BODY;
2557
2558 where COND is "<" or ">", we generate pseudocode
2559
2560 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
2561 if (more) goto L0; else goto L3;
2562 L0:
2563 V = istart0;
2564 iend = iend0;
2565 L1:
2566 BODY;
2567 V += STEP;
2568 if (V cond iend) goto L1; else goto L2;
2569 L2:
2570 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2571 L3:
2572
2573 If this is a combined omp parallel loop, instead of the call to
2574 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
2575 If this is gimple_omp_for_combined_p loop, then instead of assigning
2576 V and iend in L0 we assign the first two _looptemp_ clause decls of the
2577 inner GIMPLE_OMP_FOR and V += STEP; and
2578 if (V cond iend) goto L1; else goto L2; are removed.
2579
2580 For collapsed loops, given parameters:
2581 collapse(3)
2582 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2583 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2584 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
2585 BODY;
2586
2587 we generate pseudocode
2588
2589 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
2590 if (cond3 is <)
2591 adj = STEP3 - 1;
2592 else
2593 adj = STEP3 + 1;
2594 count3 = (adj + N32 - N31) / STEP3;
2595 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
2596 if (cond2 is <)
2597 adj = STEP2 - 1;
2598 else
2599 adj = STEP2 + 1;
2600 count2 = (adj + N22 - N21) / STEP2;
2601 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
2602 if (cond1 is <)
2603 adj = STEP1 - 1;
2604 else
2605 adj = STEP1 + 1;
2606 count1 = (adj + N12 - N11) / STEP1;
2607 count = count1 * count2 * count3;
2608 goto Z1;
2609 Z0:
2610 count = 0;
2611 Z1:
2612 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
2613 if (more) goto L0; else goto L3;
2614 L0:
2615 V = istart0;
2616 T = V;
2617 V3 = N31 + (T % count3) * STEP3;
2618 T = T / count3;
2619 V2 = N21 + (T % count2) * STEP2;
2620 T = T / count2;
2621 V1 = N11 + T * STEP1;
2622 iend = iend0;
2623 L1:
2624 BODY;
2625 V += 1;
2626 if (V < iend) goto L10; else goto L2;
2627 L10:
2628 V3 += STEP3;
2629 if (V3 cond3 N32) goto L1; else goto L11;
2630 L11:
2631 V3 = N31;
2632 V2 += STEP2;
2633 if (V2 cond2 N22) goto L1; else goto L12;
2634 L12:
2635 V2 = N21;
2636 V1 += STEP1;
2637 goto L1;
2638 L2:
2639 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2640 L3:
2641
2642 */
2643
2644 static void
2645 expand_omp_for_generic (struct omp_region *region,
2646 struct omp_for_data *fd,
2647 enum built_in_function start_fn,
2648 enum built_in_function next_fn,
2649 tree sched_arg,
2650 gimple *inner_stmt)
2651 {
2652 tree type, istart0, iend0, iend;
2653 tree t, vmain, vback, bias = NULL_TREE;
2654 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
2655 basic_block l2_bb = NULL, l3_bb = NULL;
2656 gimple_stmt_iterator gsi;
2657 gassign *assign_stmt;
2658 bool in_combined_parallel = is_combined_parallel (region);
2659 bool broken_loop = region->cont == NULL;
2660 edge e, ne;
2661 tree *counts = NULL;
2662 int i;
2663 bool ordered_lastprivate = false;
2664
2665 gcc_assert (!broken_loop || !in_combined_parallel);
2666 gcc_assert (fd->iter_type == long_integer_type_node
2667 || !in_combined_parallel);
2668
2669 entry_bb = region->entry;
2670 cont_bb = region->cont;
2671 collapse_bb = NULL;
2672 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
2673 gcc_assert (broken_loop
2674 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
2675 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
2676 l1_bb = single_succ (l0_bb);
2677 if (!broken_loop)
2678 {
2679 l2_bb = create_empty_bb (cont_bb);
2680 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
2681 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
2682 == l1_bb));
2683 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
2684 }
2685 else
2686 l2_bb = NULL;
2687 l3_bb = BRANCH_EDGE (entry_bb)->dest;
2688 exit_bb = region->exit;
2689
2690 gsi = gsi_last_nondebug_bb (entry_bb);
2691
2692 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
2693 if (fd->ordered
2694 && omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2695 OMP_CLAUSE_LASTPRIVATE))
2696 ordered_lastprivate = false;
2697 tree reductions = NULL_TREE;
2698 tree mem = NULL_TREE, cond_var = NULL_TREE, condtemp = NULL_TREE;
2699 tree memv = NULL_TREE;
2700 if (fd->lastprivate_conditional)
2701 {
2702 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2703 OMP_CLAUSE__CONDTEMP_);
2704 if (fd->have_pointer_condtemp)
2705 condtemp = OMP_CLAUSE_DECL (c);
2706 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
2707 cond_var = OMP_CLAUSE_DECL (c);
2708 }
2709 if (sched_arg)
2710 {
2711 if (fd->have_reductemp)
2712 {
2713 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2714 OMP_CLAUSE__REDUCTEMP_);
2715 reductions = OMP_CLAUSE_DECL (c);
2716 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
2717 gimple *g = SSA_NAME_DEF_STMT (reductions);
2718 reductions = gimple_assign_rhs1 (g);
2719 OMP_CLAUSE_DECL (c) = reductions;
2720 entry_bb = gimple_bb (g);
2721 edge e = split_block (entry_bb, g);
2722 if (region->entry == entry_bb)
2723 region->entry = e->dest;
2724 gsi = gsi_last_bb (entry_bb);
2725 }
2726 else
2727 reductions = null_pointer_node;
2728 if (fd->have_pointer_condtemp)
2729 {
2730 tree type = TREE_TYPE (condtemp);
2731 memv = create_tmp_var (type);
2732 TREE_ADDRESSABLE (memv) = 1;
2733 unsigned HOST_WIDE_INT sz
2734 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
2735 sz *= fd->lastprivate_conditional;
2736 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
2737 false);
2738 mem = build_fold_addr_expr (memv);
2739 }
2740 else
2741 mem = null_pointer_node;
2742 }
2743 if (fd->collapse > 1 || fd->ordered)
2744 {
2745 int first_zero_iter1 = -1, first_zero_iter2 = -1;
2746 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
2747
2748 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
2749 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
2750 zero_iter1_bb, first_zero_iter1,
2751 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
2752
2753 if (zero_iter1_bb)
2754 {
2755 /* Some counts[i] vars might be uninitialized if
2756 some loop has zero iterations. But the body shouldn't
2757 be executed in that case, so just avoid uninit warnings. */
2758 for (i = first_zero_iter1;
2759 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
2760 if (SSA_VAR_P (counts[i]))
2761 TREE_NO_WARNING (counts[i]) = 1;
2762 gsi_prev (&gsi);
2763 e = split_block (entry_bb, gsi_stmt (gsi));
2764 entry_bb = e->dest;
2765 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
2766 gsi = gsi_last_nondebug_bb (entry_bb);
2767 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2768 get_immediate_dominator (CDI_DOMINATORS,
2769 zero_iter1_bb));
2770 }
2771 if (zero_iter2_bb)
2772 {
2773 /* Some counts[i] vars might be uninitialized if
2774 some loop has zero iterations. But the body shouldn't
2775 be executed in that case, so just avoid uninit warnings. */
2776 for (i = first_zero_iter2; i < fd->ordered; i++)
2777 if (SSA_VAR_P (counts[i]))
2778 TREE_NO_WARNING (counts[i]) = 1;
2779 if (zero_iter1_bb)
2780 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2781 else
2782 {
2783 gsi_prev (&gsi);
2784 e = split_block (entry_bb, gsi_stmt (gsi));
2785 entry_bb = e->dest;
2786 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2787 gsi = gsi_last_nondebug_bb (entry_bb);
2788 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2789 get_immediate_dominator
2790 (CDI_DOMINATORS, zero_iter2_bb));
2791 }
2792 }
2793 if (fd->collapse == 1)
2794 {
2795 counts[0] = fd->loop.n2;
2796 fd->loop = fd->loops[0];
2797 }
2798 }
2799
2800 type = TREE_TYPE (fd->loop.v);
2801 istart0 = create_tmp_var (fd->iter_type, ".istart0");
2802 iend0 = create_tmp_var (fd->iter_type, ".iend0");
2803 TREE_ADDRESSABLE (istart0) = 1;
2804 TREE_ADDRESSABLE (iend0) = 1;
2805
2806 /* See if we need to bias by LLONG_MIN. */
2807 if (fd->iter_type == long_long_unsigned_type_node
2808 && TREE_CODE (type) == INTEGER_TYPE
2809 && !TYPE_UNSIGNED (type)
2810 && fd->ordered == 0)
2811 {
2812 tree n1, n2;
2813
2814 if (fd->loop.cond_code == LT_EXPR)
2815 {
2816 n1 = fd->loop.n1;
2817 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
2818 }
2819 else
2820 {
2821 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
2822 n2 = fd->loop.n1;
2823 }
2824 if (TREE_CODE (n1) != INTEGER_CST
2825 || TREE_CODE (n2) != INTEGER_CST
2826 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
2827 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
2828 }
2829
2830 gimple_stmt_iterator gsif = gsi;
2831 gsi_prev (&gsif);
2832
2833 tree arr = NULL_TREE;
2834 if (in_combined_parallel)
2835 {
2836 gcc_assert (fd->ordered == 0);
2837 /* In a combined parallel loop, emit a call to
2838 GOMP_loop_foo_next. */
2839 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
2840 build_fold_addr_expr (istart0),
2841 build_fold_addr_expr (iend0));
2842 }
2843 else
2844 {
2845 tree t0, t1, t2, t3, t4;
2846 /* If this is not a combined parallel loop, emit a call to
2847 GOMP_loop_foo_start in ENTRY_BB. */
2848 t4 = build_fold_addr_expr (iend0);
2849 t3 = build_fold_addr_expr (istart0);
2850 if (fd->ordered)
2851 {
2852 t0 = build_int_cst (unsigned_type_node,
2853 fd->ordered - fd->collapse + 1);
2854 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
2855 fd->ordered
2856 - fd->collapse + 1),
2857 ".omp_counts");
2858 DECL_NAMELESS (arr) = 1;
2859 TREE_ADDRESSABLE (arr) = 1;
2860 TREE_STATIC (arr) = 1;
2861 vec<constructor_elt, va_gc> *v;
2862 vec_alloc (v, fd->ordered - fd->collapse + 1);
2863 int idx;
2864
2865 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
2866 {
2867 tree c;
2868 if (idx == 0 && fd->collapse > 1)
2869 c = fd->loop.n2;
2870 else
2871 c = counts[idx + fd->collapse - 1];
2872 tree purpose = size_int (idx);
2873 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
2874 if (TREE_CODE (c) != INTEGER_CST)
2875 TREE_STATIC (arr) = 0;
2876 }
2877
2878 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
2879 if (!TREE_STATIC (arr))
2880 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
2881 void_type_node, arr),
2882 true, NULL_TREE, true, GSI_SAME_STMT);
2883 t1 = build_fold_addr_expr (arr);
2884 t2 = NULL_TREE;
2885 }
2886 else
2887 {
2888 t2 = fold_convert (fd->iter_type, fd->loop.step);
2889 t1 = fd->loop.n2;
2890 t0 = fd->loop.n1;
2891 if (gimple_omp_for_combined_into_p (fd->for_stmt))
2892 {
2893 tree innerc
2894 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2895 OMP_CLAUSE__LOOPTEMP_);
2896 gcc_assert (innerc);
2897 t0 = OMP_CLAUSE_DECL (innerc);
2898 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2899 OMP_CLAUSE__LOOPTEMP_);
2900 gcc_assert (innerc);
2901 t1 = OMP_CLAUSE_DECL (innerc);
2902 }
2903 if (POINTER_TYPE_P (TREE_TYPE (t0))
2904 && TYPE_PRECISION (TREE_TYPE (t0))
2905 != TYPE_PRECISION (fd->iter_type))
2906 {
2907 /* Avoid casting pointers to integer of a different size. */
2908 tree itype = signed_type_for (type);
2909 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
2910 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
2911 }
2912 else
2913 {
2914 t1 = fold_convert (fd->iter_type, t1);
2915 t0 = fold_convert (fd->iter_type, t0);
2916 }
2917 if (bias)
2918 {
2919 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
2920 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
2921 }
2922 }
2923 if (fd->iter_type == long_integer_type_node || fd->ordered)
2924 {
2925 if (fd->chunk_size)
2926 {
2927 t = fold_convert (fd->iter_type, fd->chunk_size);
2928 t = omp_adjust_chunk_size (t, fd->simd_schedule);
2929 if (sched_arg)
2930 {
2931 if (fd->ordered)
2932 t = build_call_expr (builtin_decl_explicit (start_fn),
2933 8, t0, t1, sched_arg, t, t3, t4,
2934 reductions, mem);
2935 else
2936 t = build_call_expr (builtin_decl_explicit (start_fn),
2937 9, t0, t1, t2, sched_arg, t, t3, t4,
2938 reductions, mem);
2939 }
2940 else if (fd->ordered)
2941 t = build_call_expr (builtin_decl_explicit (start_fn),
2942 5, t0, t1, t, t3, t4);
2943 else
2944 t = build_call_expr (builtin_decl_explicit (start_fn),
2945 6, t0, t1, t2, t, t3, t4);
2946 }
2947 else if (fd->ordered)
2948 t = build_call_expr (builtin_decl_explicit (start_fn),
2949 4, t0, t1, t3, t4);
2950 else
2951 t = build_call_expr (builtin_decl_explicit (start_fn),
2952 5, t0, t1, t2, t3, t4);
2953 }
2954 else
2955 {
2956 tree t5;
2957 tree c_bool_type;
2958 tree bfn_decl;
2959
2960 /* The GOMP_loop_ull_*start functions have additional boolean
2961 argument, true for < loops and false for > loops.
2962 In Fortran, the C bool type can be different from
2963 boolean_type_node. */
2964 bfn_decl = builtin_decl_explicit (start_fn);
2965 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
2966 t5 = build_int_cst (c_bool_type,
2967 fd->loop.cond_code == LT_EXPR ? 1 : 0);
2968 if (fd->chunk_size)
2969 {
2970 tree bfn_decl = builtin_decl_explicit (start_fn);
2971 t = fold_convert (fd->iter_type, fd->chunk_size);
2972 t = omp_adjust_chunk_size (t, fd->simd_schedule);
2973 if (sched_arg)
2974 t = build_call_expr (bfn_decl, 10, t5, t0, t1, t2, sched_arg,
2975 t, t3, t4, reductions, mem);
2976 else
2977 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
2978 }
2979 else
2980 t = build_call_expr (builtin_decl_explicit (start_fn),
2981 6, t5, t0, t1, t2, t3, t4);
2982 }
2983 }
2984 if (TREE_TYPE (t) != boolean_type_node)
2985 t = fold_build2 (NE_EXPR, boolean_type_node,
2986 t, build_int_cst (TREE_TYPE (t), 0));
2987 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2988 true, GSI_SAME_STMT);
2989 if (arr && !TREE_STATIC (arr))
2990 {
2991 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
2992 TREE_THIS_VOLATILE (clobber) = 1;
2993 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
2994 GSI_SAME_STMT);
2995 }
2996 if (fd->have_pointer_condtemp)
2997 expand_omp_build_assign (&gsi, condtemp, memv, false);
2998 if (fd->have_reductemp)
2999 {
3000 gimple *g = gsi_stmt (gsi);
3001 gsi_remove (&gsi, true);
3002 release_ssa_name (gimple_assign_lhs (g));
3003
3004 entry_bb = region->entry;
3005 gsi = gsi_last_nondebug_bb (entry_bb);
3006
3007 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3008 }
3009 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3010
3011 /* Remove the GIMPLE_OMP_FOR statement. */
3012 gsi_remove (&gsi, true);
3013
3014 if (gsi_end_p (gsif))
3015 gsif = gsi_after_labels (gsi_bb (gsif));
3016 gsi_next (&gsif);
3017
3018 /* Iteration setup for sequential loop goes in L0_BB. */
3019 tree startvar = fd->loop.v;
3020 tree endvar = NULL_TREE;
3021
3022 if (gimple_omp_for_combined_p (fd->for_stmt))
3023 {
3024 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
3025 && gimple_omp_for_kind (inner_stmt)
3026 == GF_OMP_FOR_KIND_SIMD);
3027 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
3028 OMP_CLAUSE__LOOPTEMP_);
3029 gcc_assert (innerc);
3030 startvar = OMP_CLAUSE_DECL (innerc);
3031 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3032 OMP_CLAUSE__LOOPTEMP_);
3033 gcc_assert (innerc);
3034 endvar = OMP_CLAUSE_DECL (innerc);
3035 }
3036
3037 gsi = gsi_start_bb (l0_bb);
3038 t = istart0;
3039 if (fd->ordered && fd->collapse == 1)
3040 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
3041 fold_convert (fd->iter_type, fd->loop.step));
3042 else if (bias)
3043 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
3044 if (fd->ordered && fd->collapse == 1)
3045 {
3046 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3047 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
3048 fd->loop.n1, fold_convert (sizetype, t));
3049 else
3050 {
3051 t = fold_convert (TREE_TYPE (startvar), t);
3052 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
3053 fd->loop.n1, t);
3054 }
3055 }
3056 else
3057 {
3058 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3059 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
3060 t = fold_convert (TREE_TYPE (startvar), t);
3061 }
3062 t = force_gimple_operand_gsi (&gsi, t,
3063 DECL_P (startvar)
3064 && TREE_ADDRESSABLE (startvar),
3065 NULL_TREE, false, GSI_CONTINUE_LINKING);
3066 assign_stmt = gimple_build_assign (startvar, t);
3067 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3068 if (cond_var)
3069 {
3070 tree itype = TREE_TYPE (cond_var);
3071 /* For lastprivate(conditional:) itervar, we need some iteration
3072 counter that starts at unsigned non-zero and increases.
3073 Prefer as few IVs as possible, so if we can use startvar
3074 itself, use that, or startvar + constant (those would be
3075 incremented with step), and as last resort use the s0 + 1
3076 incremented by 1. */
3077 if ((fd->ordered && fd->collapse == 1)
3078 || bias
3079 || POINTER_TYPE_P (type)
3080 || TREE_CODE (fd->loop.n1) != INTEGER_CST
3081 || fd->loop.cond_code != LT_EXPR)
3082 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, istart0),
3083 build_int_cst (itype, 1));
3084 else if (tree_int_cst_sgn (fd->loop.n1) == 1)
3085 t = fold_convert (itype, t);
3086 else
3087 {
3088 tree c = fold_convert (itype, fd->loop.n1);
3089 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
3090 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
3091 }
3092 t = force_gimple_operand_gsi (&gsi, t, false,
3093 NULL_TREE, false, GSI_CONTINUE_LINKING);
3094 assign_stmt = gimple_build_assign (cond_var, t);
3095 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3096 }
3097
3098 t = iend0;
3099 if (fd->ordered && fd->collapse == 1)
3100 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
3101 fold_convert (fd->iter_type, fd->loop.step));
3102 else if (bias)
3103 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
3104 if (fd->ordered && fd->collapse == 1)
3105 {
3106 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3107 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
3108 fd->loop.n1, fold_convert (sizetype, t));
3109 else
3110 {
3111 t = fold_convert (TREE_TYPE (startvar), t);
3112 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
3113 fd->loop.n1, t);
3114 }
3115 }
3116 else
3117 {
3118 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3119 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
3120 t = fold_convert (TREE_TYPE (startvar), t);
3121 }
3122 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3123 false, GSI_CONTINUE_LINKING);
3124 if (endvar)
3125 {
3126 assign_stmt = gimple_build_assign (endvar, iend);
3127 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3128 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
3129 assign_stmt = gimple_build_assign (fd->loop.v, iend);
3130 else
3131 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
3132 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3133 }
3134 /* Handle linear clause adjustments. */
3135 tree itercnt = NULL_TREE;
3136 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
3137 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
3138 c; c = OMP_CLAUSE_CHAIN (c))
3139 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
3140 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
3141 {
3142 tree d = OMP_CLAUSE_DECL (c);
3143 bool is_ref = omp_is_reference (d);
3144 tree t = d, a, dest;
3145 if (is_ref)
3146 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
3147 tree type = TREE_TYPE (t);
3148 if (POINTER_TYPE_P (type))
3149 type = sizetype;
3150 dest = unshare_expr (t);
3151 tree v = create_tmp_var (TREE_TYPE (t), NULL);
3152 expand_omp_build_assign (&gsif, v, t);
3153 if (itercnt == NULL_TREE)
3154 {
3155 itercnt = startvar;
3156 tree n1 = fd->loop.n1;
3157 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
3158 {
3159 itercnt
3160 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
3161 itercnt);
3162 n1 = fold_convert (TREE_TYPE (itercnt), n1);
3163 }
3164 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
3165 itercnt, n1);
3166 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
3167 itercnt, fd->loop.step);
3168 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
3169 NULL_TREE, false,
3170 GSI_CONTINUE_LINKING);
3171 }
3172 a = fold_build2 (MULT_EXPR, type,
3173 fold_convert (type, itercnt),
3174 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
3175 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
3176 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
3177 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3178 false, GSI_CONTINUE_LINKING);
3179 assign_stmt = gimple_build_assign (dest, t);
3180 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3181 }
3182 if (fd->collapse > 1)
3183 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
3184
3185 if (fd->ordered)
3186 {
3187 /* Until now, counts array contained number of iterations or
3188 variable containing it for ith loop. From now on, we need
3189 those counts only for collapsed loops, and only for the 2nd
3190 till the last collapsed one. Move those one element earlier,
3191 we'll use counts[fd->collapse - 1] for the first source/sink
3192 iteration counter and so on and counts[fd->ordered]
3193 as the array holding the current counter values for
3194 depend(source). */
3195 if (fd->collapse > 1)
3196 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
3197 if (broken_loop)
3198 {
3199 int i;
3200 for (i = fd->collapse; i < fd->ordered; i++)
3201 {
3202 tree type = TREE_TYPE (fd->loops[i].v);
3203 tree this_cond
3204 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
3205 fold_convert (type, fd->loops[i].n1),
3206 fold_convert (type, fd->loops[i].n2));
3207 if (!integer_onep (this_cond))
3208 break;
3209 }
3210 if (i < fd->ordered)
3211 {
3212 cont_bb
3213 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
3214 add_bb_to_loop (cont_bb, l1_bb->loop_father);
3215 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
3216 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
3217 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3218 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
3219 make_edge (cont_bb, l1_bb, 0);
3220 l2_bb = create_empty_bb (cont_bb);
3221 broken_loop = false;
3222 }
3223 }
3224 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
3225 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
3226 ordered_lastprivate);
3227 if (counts[fd->collapse - 1])
3228 {
3229 gcc_assert (fd->collapse == 1);
3230 gsi = gsi_last_bb (l0_bb);
3231 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
3232 istart0, true);
3233 gsi = gsi_last_bb (cont_bb);
3234 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1],
3235 build_int_cst (fd->iter_type, 1));
3236 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
3237 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3238 size_zero_node, NULL_TREE, NULL_TREE);
3239 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
3240 t = counts[fd->collapse - 1];
3241 }
3242 else if (fd->collapse > 1)
3243 t = fd->loop.v;
3244 else
3245 {
3246 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3247 fd->loops[0].v, fd->loops[0].n1);
3248 t = fold_convert (fd->iter_type, t);
3249 }
3250 gsi = gsi_last_bb (l0_bb);
3251 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3252 size_zero_node, NULL_TREE, NULL_TREE);
3253 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3254 false, GSI_CONTINUE_LINKING);
3255 expand_omp_build_assign (&gsi, aref, t, true);
3256 }
3257
3258 if (!broken_loop)
3259 {
3260 /* Code to control the increment and predicate for the sequential
3261 loop goes in the CONT_BB. */
3262 gsi = gsi_last_nondebug_bb (cont_bb);
3263 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3264 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3265 vmain = gimple_omp_continue_control_use (cont_stmt);
3266 vback = gimple_omp_continue_control_def (cont_stmt);
3267
3268 if (cond_var)
3269 {
3270 tree itype = TREE_TYPE (cond_var);
3271 tree t2;
3272 if ((fd->ordered && fd->collapse == 1)
3273 || bias
3274 || POINTER_TYPE_P (type)
3275 || TREE_CODE (fd->loop.n1) != INTEGER_CST
3276 || fd->loop.cond_code != LT_EXPR)
3277 t2 = build_int_cst (itype, 1);
3278 else
3279 t2 = fold_convert (itype, fd->loop.step);
3280 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
3281 t2 = force_gimple_operand_gsi (&gsi, t2, false,
3282 NULL_TREE, true, GSI_SAME_STMT);
3283 assign_stmt = gimple_build_assign (cond_var, t2);
3284 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3285 }
3286
3287 if (!gimple_omp_for_combined_p (fd->for_stmt))
3288 {
3289 if (POINTER_TYPE_P (type))
3290 t = fold_build_pointer_plus (vmain, fd->loop.step);
3291 else
3292 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
3293 t = force_gimple_operand_gsi (&gsi, t,
3294 DECL_P (vback)
3295 && TREE_ADDRESSABLE (vback),
3296 NULL_TREE, true, GSI_SAME_STMT);
3297 assign_stmt = gimple_build_assign (vback, t);
3298 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3299
3300 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
3301 {
3302 tree tem;
3303 if (fd->collapse > 1)
3304 tem = fd->loop.v;
3305 else
3306 {
3307 tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3308 fd->loops[0].v, fd->loops[0].n1);
3309 tem = fold_convert (fd->iter_type, tem);
3310 }
3311 tree aref = build4 (ARRAY_REF, fd->iter_type,
3312 counts[fd->ordered], size_zero_node,
3313 NULL_TREE, NULL_TREE);
3314 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE,
3315 true, GSI_SAME_STMT);
3316 expand_omp_build_assign (&gsi, aref, tem);
3317 }
3318
3319 t = build2 (fd->loop.cond_code, boolean_type_node,
3320 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
3321 iend);
3322 gcond *cond_stmt = gimple_build_cond_empty (t);
3323 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3324 }
3325
3326 /* Remove GIMPLE_OMP_CONTINUE. */
3327 gsi_remove (&gsi, true);
3328
3329 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3330 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb);
3331
3332 /* Emit code to get the next parallel iteration in L2_BB. */
3333 gsi = gsi_start_bb (l2_bb);
3334
3335 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
3336 build_fold_addr_expr (istart0),
3337 build_fold_addr_expr (iend0));
3338 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3339 false, GSI_CONTINUE_LINKING);
3340 if (TREE_TYPE (t) != boolean_type_node)
3341 t = fold_build2 (NE_EXPR, boolean_type_node,
3342 t, build_int_cst (TREE_TYPE (t), 0));
3343 gcond *cond_stmt = gimple_build_cond_empty (t);
3344 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
3345 }
3346
3347 /* Add the loop cleanup function. */
3348 gsi = gsi_last_nondebug_bb (exit_bb);
3349 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3350 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
3351 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3352 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
3353 else
3354 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
3355 gcall *call_stmt = gimple_build_call (t, 0);
3356 if (fd->ordered)
3357 {
3358 tree arr = counts[fd->ordered];
3359 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
3360 TREE_THIS_VOLATILE (clobber) = 1;
3361 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
3362 GSI_SAME_STMT);
3363 }
3364 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3365 {
3366 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
3367 if (fd->have_reductemp)
3368 {
3369 gimple *g = gimple_build_assign (reductions, NOP_EXPR,
3370 gimple_call_lhs (call_stmt));
3371 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
3372 }
3373 }
3374 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
3375 gsi_remove (&gsi, true);
3376
3377 /* Connect the new blocks. */
3378 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
3379 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
3380
3381 if (!broken_loop)
3382 {
3383 gimple_seq phis;
3384
3385 e = find_edge (cont_bb, l3_bb);
3386 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
3387
3388 phis = phi_nodes (l3_bb);
3389 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
3390 {
3391 gimple *phi = gsi_stmt (gsi);
3392 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
3393 PHI_ARG_DEF_FROM_EDGE (phi, e));
3394 }
3395 remove_edge (e);
3396
3397 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
3398 e = find_edge (cont_bb, l1_bb);
3399 if (e == NULL)
3400 {
3401 e = BRANCH_EDGE (cont_bb);
3402 gcc_assert (single_succ (e->dest) == l1_bb);
3403 }
3404 if (gimple_omp_for_combined_p (fd->for_stmt))
3405 {
3406 remove_edge (e);
3407 e = NULL;
3408 }
3409 else if (fd->collapse > 1)
3410 {
3411 remove_edge (e);
3412 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3413 }
3414 else
3415 e->flags = EDGE_TRUE_VALUE;
3416 if (e)
3417 {
3418 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
3419 find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
3420 }
3421 else
3422 {
3423 e = find_edge (cont_bb, l2_bb);
3424 e->flags = EDGE_FALLTHRU;
3425 }
3426 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
3427
3428 if (gimple_in_ssa_p (cfun))
3429 {
3430 /* Add phis to the outer loop that connect to the phis in the inner,
3431 original loop, and move the loop entry value of the inner phi to
3432 the loop entry value of the outer phi. */
3433 gphi_iterator psi;
3434 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
3435 {
3436 location_t locus;
3437 gphi *nphi;
3438 gphi *exit_phi = psi.phi ();
3439
3440 if (virtual_operand_p (gimple_phi_result (exit_phi)))
3441 continue;
3442
3443 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
3444 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
3445
3446 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
3447 edge latch_to_l1 = find_edge (latch, l1_bb);
3448 gphi *inner_phi
3449 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
3450
3451 tree t = gimple_phi_result (exit_phi);
3452 tree new_res = copy_ssa_name (t, NULL);
3453 nphi = create_phi_node (new_res, l0_bb);
3454
3455 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
3456 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
3457 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
3458 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
3459 add_phi_arg (nphi, t, entry_to_l0, locus);
3460
3461 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
3462 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
3463
3464 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
3465 }
3466 }
3467
3468 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
3469 recompute_dominator (CDI_DOMINATORS, l2_bb));
3470 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
3471 recompute_dominator (CDI_DOMINATORS, l3_bb));
3472 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
3473 recompute_dominator (CDI_DOMINATORS, l0_bb));
3474 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
3475 recompute_dominator (CDI_DOMINATORS, l1_bb));
3476
3477 /* We enter expand_omp_for_generic with a loop. This original loop may
3478 have its own loop struct, or it may be part of an outer loop struct
3479 (which may be the fake loop). */
3480 struct loop *outer_loop = entry_bb->loop_father;
3481 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
3482
3483 add_bb_to_loop (l2_bb, outer_loop);
3484
3485 /* We've added a new loop around the original loop. Allocate the
3486 corresponding loop struct. */
3487 struct loop *new_loop = alloc_loop ();
3488 new_loop->header = l0_bb;
3489 new_loop->latch = l2_bb;
3490 add_loop (new_loop, outer_loop);
3491
3492 /* Allocate a loop structure for the original loop unless we already
3493 had one. */
3494 if (!orig_loop_has_loop_struct
3495 && !gimple_omp_for_combined_p (fd->for_stmt))
3496 {
3497 struct loop *orig_loop = alloc_loop ();
3498 orig_loop->header = l1_bb;
3499 /* The loop may have multiple latches. */
3500 add_loop (orig_loop, new_loop);
3501 }
3502 }
3503 }
3504
3505 /* A subroutine of expand_omp_for. Generate code for a parallel
3506 loop with static schedule and no specified chunk size. Given
3507 parameters:
3508
3509 for (V = N1; V cond N2; V += STEP) BODY;
3510
3511 where COND is "<" or ">", we generate pseudocode
3512
3513 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3514 if (cond is <)
3515 adj = STEP - 1;
3516 else
3517 adj = STEP + 1;
3518 if ((__typeof (V)) -1 > 0 && cond is >)
3519 n = -(adj + N2 - N1) / -STEP;
3520 else
3521 n = (adj + N2 - N1) / STEP;
3522 q = n / nthreads;
3523 tt = n % nthreads;
3524 if (threadid < tt) goto L3; else goto L4;
3525 L3:
3526 tt = 0;
3527 q = q + 1;
3528 L4:
3529 s0 = q * threadid + tt;
3530 e0 = s0 + q;
3531 V = s0 * STEP + N1;
3532 if (s0 >= e0) goto L2; else goto L0;
3533 L0:
3534 e = e0 * STEP + N1;
3535 L1:
3536 BODY;
3537 V += STEP;
3538 if (V cond e) goto L1;
3539 L2:
3540 */
3541
3542 static void
3543 expand_omp_for_static_nochunk (struct omp_region *region,
3544 struct omp_for_data *fd,
3545 gimple *inner_stmt)
3546 {
3547 tree n, q, s0, e0, e, t, tt, nthreads, threadid;
3548 tree type, itype, vmain, vback;
3549 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
3550 basic_block body_bb, cont_bb, collapse_bb = NULL;
3551 basic_block fin_bb;
3552 gimple_stmt_iterator gsi, gsip;
3553 edge ep;
3554 bool broken_loop = region->cont == NULL;
3555 tree *counts = NULL;
3556 tree n1, n2, step;
3557 tree reductions = NULL_TREE;
3558 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
3559
3560 itype = type = TREE_TYPE (fd->loop.v);
3561 if (POINTER_TYPE_P (type))
3562 itype = signed_type_for (type);
3563
3564 entry_bb = region->entry;
3565 cont_bb = region->cont;
3566 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3567 fin_bb = BRANCH_EDGE (entry_bb)->dest;
3568 gcc_assert (broken_loop
3569 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
3570 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3571 body_bb = single_succ (seq_start_bb);
3572 if (!broken_loop)
3573 {
3574 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3575 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3576 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3577 }
3578 exit_bb = region->exit;
3579
3580 /* Iteration space partitioning goes in ENTRY_BB. */
3581 gsi = gsi_last_nondebug_bb (entry_bb);
3582 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3583 gsip = gsi;
3584 gsi_prev (&gsip);
3585
3586 if (fd->collapse > 1)
3587 {
3588 int first_zero_iter = -1, dummy = -1;
3589 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3590
3591 counts = XALLOCAVEC (tree, fd->collapse);
3592 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3593 fin_bb, first_zero_iter,
3594 dummy_bb, dummy, l2_dom_bb);
3595 t = NULL_TREE;
3596 }
3597 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3598 t = integer_one_node;
3599 else
3600 t = fold_binary (fd->loop.cond_code, boolean_type_node,
3601 fold_convert (type, fd->loop.n1),
3602 fold_convert (type, fd->loop.n2));
3603 if (fd->collapse == 1
3604 && TYPE_UNSIGNED (type)
3605 && (t == NULL_TREE || !integer_onep (t)))
3606 {
3607 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3608 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3609 true, GSI_SAME_STMT);
3610 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3611 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3612 true, GSI_SAME_STMT);
3613 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3614 NULL_TREE, NULL_TREE);
3615 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3616 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3617 expand_omp_regimplify_p, NULL, NULL)
3618 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3619 expand_omp_regimplify_p, NULL, NULL))
3620 {
3621 gsi = gsi_for_stmt (cond_stmt);
3622 gimple_regimplify_operands (cond_stmt, &gsi);
3623 }
3624 ep = split_block (entry_bb, cond_stmt);
3625 ep->flags = EDGE_TRUE_VALUE;
3626 entry_bb = ep->dest;
3627 ep->probability = profile_probability::very_likely ();
3628 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
3629 ep->probability = profile_probability::very_unlikely ();
3630 if (gimple_in_ssa_p (cfun))
3631 {
3632 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
3633 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3634 !gsi_end_p (gpi); gsi_next (&gpi))
3635 {
3636 gphi *phi = gpi.phi ();
3637 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3638 ep, UNKNOWN_LOCATION);
3639 }
3640 }
3641 gsi = gsi_last_bb (entry_bb);
3642 }
3643
3644 if (fd->lastprivate_conditional)
3645 {
3646 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
3647 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
3648 if (fd->have_pointer_condtemp)
3649 condtemp = OMP_CLAUSE_DECL (c);
3650 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
3651 cond_var = OMP_CLAUSE_DECL (c);
3652 }
3653 if (fd->have_reductemp || fd->have_pointer_condtemp)
3654 {
3655 tree t1 = build_int_cst (long_integer_type_node, 0);
3656 tree t2 = build_int_cst (long_integer_type_node, 1);
3657 tree t3 = build_int_cstu (long_integer_type_node,
3658 (HOST_WIDE_INT_1U << 31) + 1);
3659 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
3660 gimple_stmt_iterator gsi2 = gsi_none ();
3661 gimple *g = NULL;
3662 tree mem = null_pointer_node, memv = NULL_TREE;
3663 if (fd->have_reductemp)
3664 {
3665 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
3666 reductions = OMP_CLAUSE_DECL (c);
3667 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
3668 g = SSA_NAME_DEF_STMT (reductions);
3669 reductions = gimple_assign_rhs1 (g);
3670 OMP_CLAUSE_DECL (c) = reductions;
3671 gsi2 = gsi_for_stmt (g);
3672 }
3673 else
3674 {
3675 if (gsi_end_p (gsip))
3676 gsi2 = gsi_after_labels (region->entry);
3677 else
3678 gsi2 = gsip;
3679 reductions = null_pointer_node;
3680 }
3681 if (fd->have_pointer_condtemp)
3682 {
3683 tree type = TREE_TYPE (condtemp);
3684 memv = create_tmp_var (type);
3685 TREE_ADDRESSABLE (memv) = 1;
3686 unsigned HOST_WIDE_INT sz
3687 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
3688 sz *= fd->lastprivate_conditional;
3689 expand_omp_build_assign (&gsi2, memv, build_int_cst (type, sz),
3690 false);
3691 mem = build_fold_addr_expr (memv);
3692 }
3693 tree t
3694 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
3695 9, t1, t2, t2, t3, t1, null_pointer_node,
3696 null_pointer_node, reductions, mem);
3697 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3698 true, GSI_SAME_STMT);
3699 if (fd->have_pointer_condtemp)
3700 expand_omp_build_assign (&gsi2, condtemp, memv, false);
3701 if (fd->have_reductemp)
3702 {
3703 gsi_remove (&gsi2, true);
3704 release_ssa_name (gimple_assign_lhs (g));
3705 }
3706 }
3707 switch (gimple_omp_for_kind (fd->for_stmt))
3708 {
3709 case GF_OMP_FOR_KIND_FOR:
3710 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3711 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3712 break;
3713 case GF_OMP_FOR_KIND_DISTRIBUTE:
3714 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3715 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3716 break;
3717 default:
3718 gcc_unreachable ();
3719 }
3720 nthreads = build_call_expr (nthreads, 0);
3721 nthreads = fold_convert (itype, nthreads);
3722 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3723 true, GSI_SAME_STMT);
3724 threadid = build_call_expr (threadid, 0);
3725 threadid = fold_convert (itype, threadid);
3726 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3727 true, GSI_SAME_STMT);
3728
3729 n1 = fd->loop.n1;
3730 n2 = fd->loop.n2;
3731 step = fd->loop.step;
3732 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3733 {
3734 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3735 OMP_CLAUSE__LOOPTEMP_);
3736 gcc_assert (innerc);
3737 n1 = OMP_CLAUSE_DECL (innerc);
3738 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3739 OMP_CLAUSE__LOOPTEMP_);
3740 gcc_assert (innerc);
3741 n2 = OMP_CLAUSE_DECL (innerc);
3742 }
3743 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3744 true, NULL_TREE, true, GSI_SAME_STMT);
3745 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3746 true, NULL_TREE, true, GSI_SAME_STMT);
3747 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3748 true, NULL_TREE, true, GSI_SAME_STMT);
3749
3750 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3751 t = fold_build2 (PLUS_EXPR, itype, step, t);
3752 t = fold_build2 (PLUS_EXPR, itype, t, n2);
3753 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3754 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3755 t = fold_build2 (TRUNC_DIV_EXPR, itype,
3756 fold_build1 (NEGATE_EXPR, itype, t),
3757 fold_build1 (NEGATE_EXPR, itype, step));
3758 else
3759 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3760 t = fold_convert (itype, t);
3761 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3762
3763 q = create_tmp_reg (itype, "q");
3764 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
3765 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3766 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
3767
3768 tt = create_tmp_reg (itype, "tt");
3769 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
3770 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3771 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
3772
3773 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
3774 gcond *cond_stmt = gimple_build_cond_empty (t);
3775 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3776
3777 second_bb = split_block (entry_bb, cond_stmt)->dest;
3778 gsi = gsi_last_nondebug_bb (second_bb);
3779 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3780
3781 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
3782 GSI_SAME_STMT);
3783 gassign *assign_stmt
3784 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
3785 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3786
3787 third_bb = split_block (second_bb, assign_stmt)->dest;
3788 gsi = gsi_last_nondebug_bb (third_bb);
3789 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3790
3791 t = build2 (MULT_EXPR, itype, q, threadid);
3792 t = build2 (PLUS_EXPR, itype, t, tt);
3793 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3794
3795 t = fold_build2 (PLUS_EXPR, itype, s0, q);
3796 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3797
3798 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
3799 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3800
3801 /* Remove the GIMPLE_OMP_FOR statement. */
3802 gsi_remove (&gsi, true);
3803
3804 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3805 gsi = gsi_start_bb (seq_start_bb);
3806
3807 tree startvar = fd->loop.v;
3808 tree endvar = NULL_TREE;
3809
3810 if (gimple_omp_for_combined_p (fd->for_stmt))
3811 {
3812 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
3813 ? gimple_omp_parallel_clauses (inner_stmt)
3814 : gimple_omp_for_clauses (inner_stmt);
3815 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
3816 gcc_assert (innerc);
3817 startvar = OMP_CLAUSE_DECL (innerc);
3818 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3819 OMP_CLAUSE__LOOPTEMP_);
3820 gcc_assert (innerc);
3821 endvar = OMP_CLAUSE_DECL (innerc);
3822 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
3823 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3824 {
3825 int i;
3826 for (i = 1; i < fd->collapse; i++)
3827 {
3828 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3829 OMP_CLAUSE__LOOPTEMP_);
3830 gcc_assert (innerc);
3831 }
3832 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3833 OMP_CLAUSE__LOOPTEMP_);
3834 if (innerc)
3835 {
3836 /* If needed (distribute parallel for with lastprivate),
3837 propagate down the total number of iterations. */
3838 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
3839 fd->loop.n2);
3840 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
3841 GSI_CONTINUE_LINKING);
3842 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
3843 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3844 }
3845 }
3846 }
3847 t = fold_convert (itype, s0);
3848 t = fold_build2 (MULT_EXPR, itype, t, step);
3849 if (POINTER_TYPE_P (type))
3850 {
3851 t = fold_build_pointer_plus (n1, t);
3852 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
3853 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
3854 t = fold_convert (signed_type_for (type), t);
3855 }
3856 else
3857 t = fold_build2 (PLUS_EXPR, type, t, n1);
3858 t = fold_convert (TREE_TYPE (startvar), t);
3859 t = force_gimple_operand_gsi (&gsi, t,
3860 DECL_P (startvar)
3861 && TREE_ADDRESSABLE (startvar),
3862 NULL_TREE, false, GSI_CONTINUE_LINKING);
3863 assign_stmt = gimple_build_assign (startvar, t);
3864 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3865 if (cond_var)
3866 {
3867 tree itype = TREE_TYPE (cond_var);
3868 /* For lastprivate(conditional:) itervar, we need some iteration
3869 counter that starts at unsigned non-zero and increases.
3870 Prefer as few IVs as possible, so if we can use startvar
3871 itself, use that, or startvar + constant (those would be
3872 incremented with step), and as last resort use the s0 + 1
3873 incremented by 1. */
3874 if (POINTER_TYPE_P (type)
3875 || TREE_CODE (n1) != INTEGER_CST
3876 || fd->loop.cond_code != LT_EXPR)
3877 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
3878 build_int_cst (itype, 1));
3879 else if (tree_int_cst_sgn (n1) == 1)
3880 t = fold_convert (itype, t);
3881 else
3882 {
3883 tree c = fold_convert (itype, n1);
3884 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
3885 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
3886 }
3887 t = force_gimple_operand_gsi (&gsi, t, false,
3888 NULL_TREE, false, GSI_CONTINUE_LINKING);
3889 assign_stmt = gimple_build_assign (cond_var, t);
3890 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3891 }
3892
3893 t = fold_convert (itype, e0);
3894 t = fold_build2 (MULT_EXPR, itype, t, step);
3895 if (POINTER_TYPE_P (type))
3896 {
3897 t = fold_build_pointer_plus (n1, t);
3898 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
3899 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
3900 t = fold_convert (signed_type_for (type), t);
3901 }
3902 else
3903 t = fold_build2 (PLUS_EXPR, type, t, n1);
3904 t = fold_convert (TREE_TYPE (startvar), t);
3905 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3906 false, GSI_CONTINUE_LINKING);
3907 if (endvar)
3908 {
3909 assign_stmt = gimple_build_assign (endvar, e);
3910 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3911 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
3912 assign_stmt = gimple_build_assign (fd->loop.v, e);
3913 else
3914 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
3915 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3916 }
3917 /* Handle linear clause adjustments. */
3918 tree itercnt = NULL_TREE;
3919 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
3920 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
3921 c; c = OMP_CLAUSE_CHAIN (c))
3922 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
3923 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
3924 {
3925 tree d = OMP_CLAUSE_DECL (c);
3926 bool is_ref = omp_is_reference (d);
3927 tree t = d, a, dest;
3928 if (is_ref)
3929 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
3930 if (itercnt == NULL_TREE)
3931 {
3932 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3933 {
3934 itercnt = fold_build2 (MINUS_EXPR, itype,
3935 fold_convert (itype, n1),
3936 fold_convert (itype, fd->loop.n1));
3937 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
3938 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
3939 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
3940 NULL_TREE, false,
3941 GSI_CONTINUE_LINKING);
3942 }
3943 else
3944 itercnt = s0;
3945 }
3946 tree type = TREE_TYPE (t);
3947 if (POINTER_TYPE_P (type))
3948 type = sizetype;
3949 a = fold_build2 (MULT_EXPR, type,
3950 fold_convert (type, itercnt),
3951 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
3952 dest = unshare_expr (t);
3953 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
3954 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
3955 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3956 false, GSI_CONTINUE_LINKING);
3957 assign_stmt = gimple_build_assign (dest, t);
3958 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3959 }
3960 if (fd->collapse > 1)
3961 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
3962
3963 if (!broken_loop)
3964 {
3965 /* The code controlling the sequential loop replaces the
3966 GIMPLE_OMP_CONTINUE. */
3967 gsi = gsi_last_nondebug_bb (cont_bb);
3968 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3969 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3970 vmain = gimple_omp_continue_control_use (cont_stmt);
3971 vback = gimple_omp_continue_control_def (cont_stmt);
3972
3973 if (cond_var)
3974 {
3975 tree itype = TREE_TYPE (cond_var);
3976 tree t2;
3977 if (POINTER_TYPE_P (type)
3978 || TREE_CODE (n1) != INTEGER_CST
3979 || fd->loop.cond_code != LT_EXPR)
3980 t2 = build_int_cst (itype, 1);
3981 else
3982 t2 = fold_convert (itype, step);
3983 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
3984 t2 = force_gimple_operand_gsi (&gsi, t2, false,
3985 NULL_TREE, true, GSI_SAME_STMT);
3986 assign_stmt = gimple_build_assign (cond_var, t2);
3987 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3988 }
3989
3990 if (!gimple_omp_for_combined_p (fd->for_stmt))
3991 {
3992 if (POINTER_TYPE_P (type))
3993 t = fold_build_pointer_plus (vmain, step);
3994 else
3995 t = fold_build2 (PLUS_EXPR, type, vmain, step);
3996 t = force_gimple_operand_gsi (&gsi, t,
3997 DECL_P (vback)
3998 && TREE_ADDRESSABLE (vback),
3999 NULL_TREE, true, GSI_SAME_STMT);
4000 assign_stmt = gimple_build_assign (vback, t);
4001 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4002
4003 t = build2 (fd->loop.cond_code, boolean_type_node,
4004 DECL_P (vback) && TREE_ADDRESSABLE (vback)
4005 ? t : vback, e);
4006 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4007 }
4008
4009 /* Remove the GIMPLE_OMP_CONTINUE statement. */
4010 gsi_remove (&gsi, true);
4011
4012 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4013 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
4014 }
4015
4016 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
4017 gsi = gsi_last_nondebug_bb (exit_bb);
4018 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4019 {
4020 t = gimple_omp_return_lhs (gsi_stmt (gsi));
4021 if (fd->have_reductemp || fd->have_pointer_condtemp)
4022 {
4023 tree fn;
4024 if (t)
4025 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4026 else
4027 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4028 gcall *g = gimple_build_call (fn, 0);
4029 if (t)
4030 {
4031 gimple_call_set_lhs (g, t);
4032 if (fd->have_reductemp)
4033 gsi_insert_after (&gsi, gimple_build_assign (reductions,
4034 NOP_EXPR, t),
4035 GSI_SAME_STMT);
4036 }
4037 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4038 }
4039 else
4040 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
4041 }
4042 gsi_remove (&gsi, true);
4043
4044 /* Connect all the blocks. */
4045 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
4046 ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
4047 ep = find_edge (entry_bb, second_bb);
4048 ep->flags = EDGE_TRUE_VALUE;
4049 ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
4050 find_edge (third_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
4051 find_edge (third_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
4052
4053 if (!broken_loop)
4054 {
4055 ep = find_edge (cont_bb, body_bb);
4056 if (ep == NULL)
4057 {
4058 ep = BRANCH_EDGE (cont_bb);
4059 gcc_assert (single_succ (ep->dest) == body_bb);
4060 }
4061 if (gimple_omp_for_combined_p (fd->for_stmt))
4062 {
4063 remove_edge (ep);
4064 ep = NULL;
4065 }
4066 else if (fd->collapse > 1)
4067 {
4068 remove_edge (ep);
4069 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4070 }
4071 else
4072 ep->flags = EDGE_TRUE_VALUE;
4073 find_edge (cont_bb, fin_bb)->flags
4074 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
4075 }
4076
4077 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
4078 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
4079 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, third_bb);
4080
4081 set_immediate_dominator (CDI_DOMINATORS, body_bb,
4082 recompute_dominator (CDI_DOMINATORS, body_bb));
4083 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
4084 recompute_dominator (CDI_DOMINATORS, fin_bb));
4085
4086 struct loop *loop = body_bb->loop_father;
4087 if (loop != entry_bb->loop_father)
4088 {
4089 gcc_assert (broken_loop || loop->header == body_bb);
4090 gcc_assert (broken_loop
4091 || loop->latch == region->cont
4092 || single_pred (loop->latch) == region->cont);
4093 return;
4094 }
4095
4096 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
4097 {
4098 loop = alloc_loop ();
4099 loop->header = body_bb;
4100 if (collapse_bb == NULL)
4101 loop->latch = cont_bb;
4102 add_loop (loop, body_bb->loop_father);
4103 }
4104 }
4105
4106 /* Return phi in E->DEST with ARG on edge E. */
4107
4108 static gphi *
4109 find_phi_with_arg_on_edge (tree arg, edge e)
4110 {
4111 basic_block bb = e->dest;
4112
4113 for (gphi_iterator gpi = gsi_start_phis (bb);
4114 !gsi_end_p (gpi);
4115 gsi_next (&gpi))
4116 {
4117 gphi *phi = gpi.phi ();
4118 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
4119 return phi;
4120 }
4121
4122 return NULL;
4123 }
4124
4125 /* A subroutine of expand_omp_for. Generate code for a parallel
4126 loop with static schedule and a specified chunk size. Given
4127 parameters:
4128
4129 for (V = N1; V cond N2; V += STEP) BODY;
4130
4131 where COND is "<" or ">", we generate pseudocode
4132
4133 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
4134 if (cond is <)
4135 adj = STEP - 1;
4136 else
4137 adj = STEP + 1;
4138 if ((__typeof (V)) -1 > 0 && cond is >)
4139 n = -(adj + N2 - N1) / -STEP;
4140 else
4141 n = (adj + N2 - N1) / STEP;
4142 trip = 0;
4143 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
4144 here so that V is defined
4145 if the loop is not entered
4146 L0:
4147 s0 = (trip * nthreads + threadid) * CHUNK;
4148 e0 = min (s0 + CHUNK, n);
4149 if (s0 < n) goto L1; else goto L4;
4150 L1:
4151 V = s0 * STEP + N1;
4152 e = e0 * STEP + N1;
4153 L2:
4154 BODY;
4155 V += STEP;
4156 if (V cond e) goto L2; else goto L3;
4157 L3:
4158 trip += 1;
4159 goto L0;
4160 L4:
4161 */
4162
4163 static void
4164 expand_omp_for_static_chunk (struct omp_region *region,
4165 struct omp_for_data *fd, gimple *inner_stmt)
4166 {
4167 tree n, s0, e0, e, t;
4168 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
4169 tree type, itype, vmain, vback, vextra;
4170 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
4171 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
4172 gimple_stmt_iterator gsi, gsip;
4173 edge se;
4174 bool broken_loop = region->cont == NULL;
4175 tree *counts = NULL;
4176 tree n1, n2, step;
4177 tree reductions = NULL_TREE;
4178 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
4179
4180 itype = type = TREE_TYPE (fd->loop.v);
4181 if (POINTER_TYPE_P (type))
4182 itype = signed_type_for (type);
4183
4184 entry_bb = region->entry;
4185 se = split_block (entry_bb, last_stmt (entry_bb));
4186 entry_bb = se->src;
4187 iter_part_bb = se->dest;
4188 cont_bb = region->cont;
4189 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
4190 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
4191 gcc_assert (broken_loop
4192 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
4193 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
4194 body_bb = single_succ (seq_start_bb);
4195 if (!broken_loop)
4196 {
4197 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
4198 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
4199 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4200 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
4201 }
4202 exit_bb = region->exit;
4203
4204 /* Trip and adjustment setup goes in ENTRY_BB. */
4205 gsi = gsi_last_nondebug_bb (entry_bb);
4206 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4207 gsip = gsi;
4208 gsi_prev (&gsip);
4209
4210 if (fd->collapse > 1)
4211 {
4212 int first_zero_iter = -1, dummy = -1;
4213 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
4214
4215 counts = XALLOCAVEC (tree, fd->collapse);
4216 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4217 fin_bb, first_zero_iter,
4218 dummy_bb, dummy, l2_dom_bb);
4219 t = NULL_TREE;
4220 }
4221 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
4222 t = integer_one_node;
4223 else
4224 t = fold_binary (fd->loop.cond_code, boolean_type_node,
4225 fold_convert (type, fd->loop.n1),
4226 fold_convert (type, fd->loop.n2));
4227 if (fd->collapse == 1
4228 && TYPE_UNSIGNED (type)
4229 && (t == NULL_TREE || !integer_onep (t)))
4230 {
4231 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
4232 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
4233 true, GSI_SAME_STMT);
4234 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
4235 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
4236 true, GSI_SAME_STMT);
4237 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
4238 NULL_TREE, NULL_TREE);
4239 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4240 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
4241 expand_omp_regimplify_p, NULL, NULL)
4242 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
4243 expand_omp_regimplify_p, NULL, NULL))
4244 {
4245 gsi = gsi_for_stmt (cond_stmt);
4246 gimple_regimplify_operands (cond_stmt, &gsi);
4247 }
4248 se = split_block (entry_bb, cond_stmt);
4249 se->flags = EDGE_TRUE_VALUE;
4250 entry_bb = se->dest;
4251 se->probability = profile_probability::very_likely ();
4252 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
4253 se->probability = profile_probability::very_unlikely ();
4254 if (gimple_in_ssa_p (cfun))
4255 {
4256 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
4257 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
4258 !gsi_end_p (gpi); gsi_next (&gpi))
4259 {
4260 gphi *phi = gpi.phi ();
4261 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
4262 se, UNKNOWN_LOCATION);
4263 }
4264 }
4265 gsi = gsi_last_bb (entry_bb);
4266 }
4267
4268 if (fd->lastprivate_conditional)
4269 {
4270 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4271 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
4272 if (fd->have_pointer_condtemp)
4273 condtemp = OMP_CLAUSE_DECL (c);
4274 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
4275 cond_var = OMP_CLAUSE_DECL (c);
4276 }
4277 if (fd->have_reductemp || fd->have_pointer_condtemp)
4278 {
4279 tree t1 = build_int_cst (long_integer_type_node, 0);
4280 tree t2 = build_int_cst (long_integer_type_node, 1);
4281 tree t3 = build_int_cstu (long_integer_type_node,
4282 (HOST_WIDE_INT_1U << 31) + 1);
4283 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4284 gimple_stmt_iterator gsi2 = gsi_none ();
4285 gimple *g = NULL;
4286 tree mem = null_pointer_node, memv = NULL_TREE;
4287 if (fd->have_reductemp)
4288 {
4289 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
4290 reductions = OMP_CLAUSE_DECL (c);
4291 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
4292 g = SSA_NAME_DEF_STMT (reductions);
4293 reductions = gimple_assign_rhs1 (g);
4294 OMP_CLAUSE_DECL (c) = reductions;
4295 gsi2 = gsi_for_stmt (g);
4296 }
4297 else
4298 {
4299 if (gsi_end_p (gsip))
4300 gsi2 = gsi_after_labels (region->entry);
4301 else
4302 gsi2 = gsip;
4303 reductions = null_pointer_node;
4304 }
4305 if (fd->have_pointer_condtemp)
4306 {
4307 tree type = TREE_TYPE (condtemp);
4308 memv = create_tmp_var (type);
4309 TREE_ADDRESSABLE (memv) = 1;
4310 unsigned HOST_WIDE_INT sz
4311 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
4312 sz *= fd->lastprivate_conditional;
4313 expand_omp_build_assign (&gsi2, memv, build_int_cst (type, sz),
4314 false);
4315 mem = build_fold_addr_expr (memv);
4316 }
4317 tree t
4318 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
4319 9, t1, t2, t2, t3, t1, null_pointer_node,
4320 null_pointer_node, reductions, mem);
4321 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
4322 true, GSI_SAME_STMT);
4323 if (fd->have_pointer_condtemp)
4324 expand_omp_build_assign (&gsi2, condtemp, memv, false);
4325 if (fd->have_reductemp)
4326 {
4327 gsi_remove (&gsi2, true);
4328 release_ssa_name (gimple_assign_lhs (g));
4329 }
4330 }
4331 switch (gimple_omp_for_kind (fd->for_stmt))
4332 {
4333 case GF_OMP_FOR_KIND_FOR:
4334 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
4335 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
4336 break;
4337 case GF_OMP_FOR_KIND_DISTRIBUTE:
4338 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
4339 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
4340 break;
4341 default:
4342 gcc_unreachable ();
4343 }
4344 nthreads = build_call_expr (nthreads, 0);
4345 nthreads = fold_convert (itype, nthreads);
4346 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
4347 true, GSI_SAME_STMT);
4348 threadid = build_call_expr (threadid, 0);
4349 threadid = fold_convert (itype, threadid);
4350 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
4351 true, GSI_SAME_STMT);
4352
4353 n1 = fd->loop.n1;
4354 n2 = fd->loop.n2;
4355 step = fd->loop.step;
4356 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4357 {
4358 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4359 OMP_CLAUSE__LOOPTEMP_);
4360 gcc_assert (innerc);
4361 n1 = OMP_CLAUSE_DECL (innerc);
4362 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4363 OMP_CLAUSE__LOOPTEMP_);
4364 gcc_assert (innerc);
4365 n2 = OMP_CLAUSE_DECL (innerc);
4366 }
4367 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
4368 true, NULL_TREE, true, GSI_SAME_STMT);
4369 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
4370 true, NULL_TREE, true, GSI_SAME_STMT);
4371 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
4372 true, NULL_TREE, true, GSI_SAME_STMT);
4373 tree chunk_size = fold_convert (itype, fd->chunk_size);
4374 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
4375 chunk_size
4376 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
4377 GSI_SAME_STMT);
4378
4379 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
4380 t = fold_build2 (PLUS_EXPR, itype, step, t);
4381 t = fold_build2 (PLUS_EXPR, itype, t, n2);
4382 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
4383 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
4384 t = fold_build2 (TRUNC_DIV_EXPR, itype,
4385 fold_build1 (NEGATE_EXPR, itype, t),
4386 fold_build1 (NEGATE_EXPR, itype, step));
4387 else
4388 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
4389 t = fold_convert (itype, t);
4390 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4391 true, GSI_SAME_STMT);
4392
4393 trip_var = create_tmp_reg (itype, ".trip");
4394 if (gimple_in_ssa_p (cfun))
4395 {
4396 trip_init = make_ssa_name (trip_var);
4397 trip_main = make_ssa_name (trip_var);
4398 trip_back = make_ssa_name (trip_var);
4399 }
4400 else
4401 {
4402 trip_init = trip_var;
4403 trip_main = trip_var;
4404 trip_back = trip_var;
4405 }
4406
4407 gassign *assign_stmt
4408 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
4409 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4410
4411 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
4412 t = fold_build2 (MULT_EXPR, itype, t, step);
4413 if (POINTER_TYPE_P (type))
4414 t = fold_build_pointer_plus (n1, t);
4415 else
4416 t = fold_build2 (PLUS_EXPR, type, t, n1);
4417 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4418 true, GSI_SAME_STMT);
4419
4420 /* Remove the GIMPLE_OMP_FOR. */
4421 gsi_remove (&gsi, true);
4422
4423 gimple_stmt_iterator gsif = gsi;
4424
4425 /* Iteration space partitioning goes in ITER_PART_BB. */
4426 gsi = gsi_last_bb (iter_part_bb);
4427
4428 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
4429 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
4430 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
4431 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4432 false, GSI_CONTINUE_LINKING);
4433
4434 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
4435 t = fold_build2 (MIN_EXPR, itype, t, n);
4436 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4437 false, GSI_CONTINUE_LINKING);
4438
4439 t = build2 (LT_EXPR, boolean_type_node, s0, n);
4440 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
4441
4442 /* Setup code for sequential iteration goes in SEQ_START_BB. */
4443 gsi = gsi_start_bb (seq_start_bb);
4444
4445 tree startvar = fd->loop.v;
4446 tree endvar = NULL_TREE;
4447
4448 if (gimple_omp_for_combined_p (fd->for_stmt))
4449 {
4450 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
4451 ? gimple_omp_parallel_clauses (inner_stmt)
4452 : gimple_omp_for_clauses (inner_stmt);
4453 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
4454 gcc_assert (innerc);
4455 startvar = OMP_CLAUSE_DECL (innerc);
4456 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4457 OMP_CLAUSE__LOOPTEMP_);
4458 gcc_assert (innerc);
4459 endvar = OMP_CLAUSE_DECL (innerc);
4460 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
4461 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
4462 {
4463 int i;
4464 for (i = 1; i < fd->collapse; i++)
4465 {
4466 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4467 OMP_CLAUSE__LOOPTEMP_);
4468 gcc_assert (innerc);
4469 }
4470 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4471 OMP_CLAUSE__LOOPTEMP_);
4472 if (innerc)
4473 {
4474 /* If needed (distribute parallel for with lastprivate),
4475 propagate down the total number of iterations. */
4476 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
4477 fd->loop.n2);
4478 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
4479 GSI_CONTINUE_LINKING);
4480 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4481 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4482 }
4483 }
4484 }
4485
4486 t = fold_convert (itype, s0);
4487 t = fold_build2 (MULT_EXPR, itype, t, step);
4488 if (POINTER_TYPE_P (type))
4489 {
4490 t = fold_build_pointer_plus (n1, t);
4491 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4492 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4493 t = fold_convert (signed_type_for (type), t);
4494 }
4495 else
4496 t = fold_build2 (PLUS_EXPR, type, t, n1);
4497 t = fold_convert (TREE_TYPE (startvar), t);
4498 t = force_gimple_operand_gsi (&gsi, t,
4499 DECL_P (startvar)
4500 && TREE_ADDRESSABLE (startvar),
4501 NULL_TREE, false, GSI_CONTINUE_LINKING);
4502 assign_stmt = gimple_build_assign (startvar, t);
4503 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4504 if (cond_var)
4505 {
4506 tree itype = TREE_TYPE (cond_var);
4507 /* For lastprivate(conditional:) itervar, we need some iteration
4508 counter that starts at unsigned non-zero and increases.
4509 Prefer as few IVs as possible, so if we can use startvar
4510 itself, use that, or startvar + constant (those would be
4511 incremented with step), and as last resort use the s0 + 1
4512 incremented by 1. */
4513 if (POINTER_TYPE_P (type)
4514 || TREE_CODE (n1) != INTEGER_CST
4515 || fd->loop.cond_code != LT_EXPR)
4516 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
4517 build_int_cst (itype, 1));
4518 else if (tree_int_cst_sgn (n1) == 1)
4519 t = fold_convert (itype, t);
4520 else
4521 {
4522 tree c = fold_convert (itype, n1);
4523 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
4524 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
4525 }
4526 t = force_gimple_operand_gsi (&gsi, t, false,
4527 NULL_TREE, false, GSI_CONTINUE_LINKING);
4528 assign_stmt = gimple_build_assign (cond_var, t);
4529 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4530 }
4531
4532 t = fold_convert (itype, e0);
4533 t = fold_build2 (MULT_EXPR, itype, t, step);
4534 if (POINTER_TYPE_P (type))
4535 {
4536 t = fold_build_pointer_plus (n1, t);
4537 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4538 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4539 t = fold_convert (signed_type_for (type), t);
4540 }
4541 else
4542 t = fold_build2 (PLUS_EXPR, type, t, n1);
4543 t = fold_convert (TREE_TYPE (startvar), t);
4544 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4545 false, GSI_CONTINUE_LINKING);
4546 if (endvar)
4547 {
4548 assign_stmt = gimple_build_assign (endvar, e);
4549 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4550 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4551 assign_stmt = gimple_build_assign (fd->loop.v, e);
4552 else
4553 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4554 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4555 }
4556 /* Handle linear clause adjustments. */
4557 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
4558 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4559 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4560 c; c = OMP_CLAUSE_CHAIN (c))
4561 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4562 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4563 {
4564 tree d = OMP_CLAUSE_DECL (c);
4565 bool is_ref = omp_is_reference (d);
4566 tree t = d, a, dest;
4567 if (is_ref)
4568 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4569 tree type = TREE_TYPE (t);
4570 if (POINTER_TYPE_P (type))
4571 type = sizetype;
4572 dest = unshare_expr (t);
4573 tree v = create_tmp_var (TREE_TYPE (t), NULL);
4574 expand_omp_build_assign (&gsif, v, t);
4575 if (itercnt == NULL_TREE)
4576 {
4577 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4578 {
4579 itercntbias
4580 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
4581 fold_convert (itype, fd->loop.n1));
4582 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
4583 itercntbias, step);
4584 itercntbias
4585 = force_gimple_operand_gsi (&gsif, itercntbias, true,
4586 NULL_TREE, true,
4587 GSI_SAME_STMT);
4588 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
4589 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4590 NULL_TREE, false,
4591 GSI_CONTINUE_LINKING);
4592 }
4593 else
4594 itercnt = s0;
4595 }
4596 a = fold_build2 (MULT_EXPR, type,
4597 fold_convert (type, itercnt),
4598 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4599 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4600 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4601 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4602 false, GSI_CONTINUE_LINKING);
4603 assign_stmt = gimple_build_assign (dest, t);
4604 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4605 }
4606 if (fd->collapse > 1)
4607 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4608
4609 if (!broken_loop)
4610 {
4611 /* The code controlling the sequential loop goes in CONT_BB,
4612 replacing the GIMPLE_OMP_CONTINUE. */
4613 gsi = gsi_last_nondebug_bb (cont_bb);
4614 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4615 vmain = gimple_omp_continue_control_use (cont_stmt);
4616 vback = gimple_omp_continue_control_def (cont_stmt);
4617
4618 if (cond_var)
4619 {
4620 tree itype = TREE_TYPE (cond_var);
4621 tree t2;
4622 if (POINTER_TYPE_P (type)
4623 || TREE_CODE (n1) != INTEGER_CST
4624 || fd->loop.cond_code != LT_EXPR)
4625 t2 = build_int_cst (itype, 1);
4626 else
4627 t2 = fold_convert (itype, step);
4628 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
4629 t2 = force_gimple_operand_gsi (&gsi, t2, false,
4630 NULL_TREE, true, GSI_SAME_STMT);
4631 assign_stmt = gimple_build_assign (cond_var, t2);
4632 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4633 }
4634
4635 if (!gimple_omp_for_combined_p (fd->for_stmt))
4636 {
4637 if (POINTER_TYPE_P (type))
4638 t = fold_build_pointer_plus (vmain, step);
4639 else
4640 t = fold_build2 (PLUS_EXPR, type, vmain, step);
4641 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
4642 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4643 true, GSI_SAME_STMT);
4644 assign_stmt = gimple_build_assign (vback, t);
4645 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4646
4647 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
4648 t = build2 (EQ_EXPR, boolean_type_node,
4649 build_int_cst (itype, 0),
4650 build_int_cst (itype, 1));
4651 else
4652 t = build2 (fd->loop.cond_code, boolean_type_node,
4653 DECL_P (vback) && TREE_ADDRESSABLE (vback)
4654 ? t : vback, e);
4655 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4656 }
4657
4658 /* Remove GIMPLE_OMP_CONTINUE. */
4659 gsi_remove (&gsi, true);
4660
4661 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4662 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
4663
4664 /* Trip update code goes into TRIP_UPDATE_BB. */
4665 gsi = gsi_start_bb (trip_update_bb);
4666
4667 t = build_int_cst (itype, 1);
4668 t = build2 (PLUS_EXPR, itype, trip_main, t);
4669 assign_stmt = gimple_build_assign (trip_back, t);
4670 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4671 }
4672
4673 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
4674 gsi = gsi_last_nondebug_bb (exit_bb);
4675 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4676 {
4677 t = gimple_omp_return_lhs (gsi_stmt (gsi));
4678 if (fd->have_reductemp || fd->have_pointer_condtemp)
4679 {
4680 tree fn;
4681 if (t)
4682 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4683 else
4684 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4685 gcall *g = gimple_build_call (fn, 0);
4686 if (t)
4687 {
4688 gimple_call_set_lhs (g, t);
4689 if (fd->have_reductemp)
4690 gsi_insert_after (&gsi, gimple_build_assign (reductions,
4691 NOP_EXPR, t),
4692 GSI_SAME_STMT);
4693 }
4694 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4695 }
4696 else
4697 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
4698 }
4699 gsi_remove (&gsi, true);
4700
4701 /* Connect the new blocks. */
4702 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
4703 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
4704
4705 if (!broken_loop)
4706 {
4707 se = find_edge (cont_bb, body_bb);
4708 if (se == NULL)
4709 {
4710 se = BRANCH_EDGE (cont_bb);
4711 gcc_assert (single_succ (se->dest) == body_bb);
4712 }
4713 if (gimple_omp_for_combined_p (fd->for_stmt))
4714 {
4715 remove_edge (se);
4716 se = NULL;
4717 }
4718 else if (fd->collapse > 1)
4719 {
4720 remove_edge (se);
4721 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4722 }
4723 else
4724 se->flags = EDGE_TRUE_VALUE;
4725 find_edge (cont_bb, trip_update_bb)->flags
4726 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
4727
4728 redirect_edge_and_branch (single_succ_edge (trip_update_bb),
4729 iter_part_bb);
4730 }
4731
4732 if (gimple_in_ssa_p (cfun))
4733 {
4734 gphi_iterator psi;
4735 gphi *phi;
4736 edge re, ene;
4737 edge_var_map *vm;
4738 size_t i;
4739
4740 gcc_assert (fd->collapse == 1 && !broken_loop);
4741
4742 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
4743 remove arguments of the phi nodes in fin_bb. We need to create
4744 appropriate phi nodes in iter_part_bb instead. */
4745 se = find_edge (iter_part_bb, fin_bb);
4746 re = single_succ_edge (trip_update_bb);
4747 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
4748 ene = single_succ_edge (entry_bb);
4749
4750 psi = gsi_start_phis (fin_bb);
4751 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
4752 gsi_next (&psi), ++i)
4753 {
4754 gphi *nphi;
4755 location_t locus;
4756
4757 phi = psi.phi ();
4758 if (operand_equal_p (gimple_phi_arg_def (phi, 0),
4759 redirect_edge_var_map_def (vm), 0))
4760 continue;
4761
4762 t = gimple_phi_result (phi);
4763 gcc_assert (t == redirect_edge_var_map_result (vm));
4764
4765 if (!single_pred_p (fin_bb))
4766 t = copy_ssa_name (t, phi);
4767
4768 nphi = create_phi_node (t, iter_part_bb);
4769
4770 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
4771 locus = gimple_phi_arg_location_from_edge (phi, se);
4772
4773 /* A special case -- fd->loop.v is not yet computed in
4774 iter_part_bb, we need to use vextra instead. */
4775 if (t == fd->loop.v)
4776 t = vextra;
4777 add_phi_arg (nphi, t, ene, locus);
4778 locus = redirect_edge_var_map_location (vm);
4779 tree back_arg = redirect_edge_var_map_def (vm);
4780 add_phi_arg (nphi, back_arg, re, locus);
4781 edge ce = find_edge (cont_bb, body_bb);
4782 if (ce == NULL)
4783 {
4784 ce = BRANCH_EDGE (cont_bb);
4785 gcc_assert (single_succ (ce->dest) == body_bb);
4786 ce = single_succ_edge (ce->dest);
4787 }
4788 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
4789 gcc_assert (inner_loop_phi != NULL);
4790 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
4791 find_edge (seq_start_bb, body_bb), locus);
4792
4793 if (!single_pred_p (fin_bb))
4794 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
4795 }
4796 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
4797 redirect_edge_var_map_clear (re);
4798 if (single_pred_p (fin_bb))
4799 while (1)
4800 {
4801 psi = gsi_start_phis (fin_bb);
4802 if (gsi_end_p (psi))
4803 break;
4804 remove_phi_node (&psi, false);
4805 }
4806
4807 /* Make phi node for trip. */
4808 phi = create_phi_node (trip_main, iter_part_bb);
4809 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
4810 UNKNOWN_LOCATION);
4811 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
4812 UNKNOWN_LOCATION);
4813 }
4814
4815 if (!broken_loop)
4816 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
4817 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
4818 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
4819 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
4820 recompute_dominator (CDI_DOMINATORS, fin_bb));
4821 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
4822 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
4823 set_immediate_dominator (CDI_DOMINATORS, body_bb,
4824 recompute_dominator (CDI_DOMINATORS, body_bb));
4825
4826 if (!broken_loop)
4827 {
4828 struct loop *loop = body_bb->loop_father;
4829 struct loop *trip_loop = alloc_loop ();
4830 trip_loop->header = iter_part_bb;
4831 trip_loop->latch = trip_update_bb;
4832 add_loop (trip_loop, iter_part_bb->loop_father);
4833
4834 if (loop != entry_bb->loop_father)
4835 {
4836 gcc_assert (loop->header == body_bb);
4837 gcc_assert (loop->latch == region->cont
4838 || single_pred (loop->latch) == region->cont);
4839 trip_loop->inner = loop;
4840 return;
4841 }
4842
4843 if (!gimple_omp_for_combined_p (fd->for_stmt))
4844 {
4845 loop = alloc_loop ();
4846 loop->header = body_bb;
4847 if (collapse_bb == NULL)
4848 loop->latch = cont_bb;
4849 add_loop (loop, trip_loop);
4850 }
4851 }
4852 }
4853
4854 /* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
4855 loop. Given parameters:
4856
4857 for (V = N1; V cond N2; V += STEP) BODY;
4858
4859 where COND is "<" or ">", we generate pseudocode
4860
4861 V = N1;
4862 goto L1;
4863 L0:
4864 BODY;
4865 V += STEP;
4866 L1:
4867 if (V cond N2) goto L0; else goto L2;
4868 L2:
4869
4870 For collapsed loops, given parameters:
4871 collapse(3)
4872 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
4873 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
4874 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
4875 BODY;
4876
4877 we generate pseudocode
4878
4879 if (cond3 is <)
4880 adj = STEP3 - 1;
4881 else
4882 adj = STEP3 + 1;
4883 count3 = (adj + N32 - N31) / STEP3;
4884 if (cond2 is <)
4885 adj = STEP2 - 1;
4886 else
4887 adj = STEP2 + 1;
4888 count2 = (adj + N22 - N21) / STEP2;
4889 if (cond1 is <)
4890 adj = STEP1 - 1;
4891 else
4892 adj = STEP1 + 1;
4893 count1 = (adj + N12 - N11) / STEP1;
4894 count = count1 * count2 * count3;
4895 V = 0;
4896 V1 = N11;
4897 V2 = N21;
4898 V3 = N31;
4899 goto L1;
4900 L0:
4901 BODY;
4902 V += 1;
4903 V3 += STEP3;
4904 V2 += (V3 cond3 N32) ? 0 : STEP2;
4905 V3 = (V3 cond3 N32) ? V3 : N31;
4906 V1 += (V2 cond2 N22) ? 0 : STEP1;
4907 V2 = (V2 cond2 N22) ? V2 : N21;
4908 L1:
4909 if (V < count) goto L0; else goto L2;
4910 L2:
4911
4912 */
4913
4914 static void
4915 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
4916 {
4917 tree type, t;
4918 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
4919 gimple_stmt_iterator gsi;
4920 gimple *stmt;
4921 gcond *cond_stmt;
4922 bool broken_loop = region->cont == NULL;
4923 edge e, ne;
4924 tree *counts = NULL;
4925 int i;
4926 int safelen_int = INT_MAX;
4927 bool dont_vectorize = false;
4928 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4929 OMP_CLAUSE_SAFELEN);
4930 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4931 OMP_CLAUSE__SIMDUID_);
4932 tree ifc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4933 OMP_CLAUSE_IF);
4934 tree simdlen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4935 OMP_CLAUSE_SIMDLEN);
4936 tree condtemp = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4937 OMP_CLAUSE__CONDTEMP_);
4938 tree n1, n2;
4939 tree cond_var = condtemp ? OMP_CLAUSE_DECL (condtemp) : NULL_TREE;
4940
4941 if (safelen)
4942 {
4943 poly_uint64 val;
4944 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
4945 if (!poly_int_tree_p (safelen, &val))
4946 safelen_int = 0;
4947 else
4948 safelen_int = MIN (constant_lower_bound (val), INT_MAX);
4949 if (safelen_int == 1)
4950 safelen_int = 0;
4951 }
4952 if ((ifc && integer_zerop (OMP_CLAUSE_IF_EXPR (ifc)))
4953 || (simdlen && integer_onep (OMP_CLAUSE_SIMDLEN_EXPR (simdlen))))
4954 {
4955 safelen_int = 0;
4956 dont_vectorize = true;
4957 }
4958 type = TREE_TYPE (fd->loop.v);
4959 entry_bb = region->entry;
4960 cont_bb = region->cont;
4961 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4962 gcc_assert (broken_loop
4963 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4964 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
4965 if (!broken_loop)
4966 {
4967 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
4968 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4969 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
4970 l2_bb = BRANCH_EDGE (entry_bb)->dest;
4971 }
4972 else
4973 {
4974 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
4975 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
4976 l2_bb = single_succ (l1_bb);
4977 }
4978 exit_bb = region->exit;
4979 l2_dom_bb = NULL;
4980
4981 gsi = gsi_last_nondebug_bb (entry_bb);
4982
4983 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4984 /* Not needed in SSA form right now. */
4985 gcc_assert (!gimple_in_ssa_p (cfun));
4986 if (fd->collapse > 1)
4987 {
4988 int first_zero_iter = -1, dummy = -1;
4989 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
4990
4991 counts = XALLOCAVEC (tree, fd->collapse);
4992 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4993 zero_iter_bb, first_zero_iter,
4994 dummy_bb, dummy, l2_dom_bb);
4995 }
4996 if (l2_dom_bb == NULL)
4997 l2_dom_bb = l1_bb;
4998
4999 n1 = fd->loop.n1;
5000 n2 = fd->loop.n2;
5001 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5002 {
5003 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5004 OMP_CLAUSE__LOOPTEMP_);
5005 gcc_assert (innerc);
5006 n1 = OMP_CLAUSE_DECL (innerc);
5007 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5008 OMP_CLAUSE__LOOPTEMP_);
5009 gcc_assert (innerc);
5010 n2 = OMP_CLAUSE_DECL (innerc);
5011 }
5012 tree step = fd->loop.step;
5013
5014 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5015 OMP_CLAUSE__SIMT_);
5016 if (is_simt)
5017 {
5018 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
5019 is_simt = safelen_int > 1;
5020 }
5021 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
5022 if (is_simt)
5023 {
5024 simt_lane = create_tmp_var (unsigned_type_node);
5025 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
5026 gimple_call_set_lhs (g, simt_lane);
5027 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5028 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
5029 fold_convert (TREE_TYPE (step), simt_lane));
5030 n1 = fold_convert (type, n1);
5031 if (POINTER_TYPE_P (type))
5032 n1 = fold_build_pointer_plus (n1, offset);
5033 else
5034 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
5035
5036 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
5037 if (fd->collapse > 1)
5038 simt_maxlane = build_one_cst (unsigned_type_node);
5039 else if (safelen_int < omp_max_simt_vf ())
5040 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
5041 tree vf
5042 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
5043 unsigned_type_node, 0);
5044 if (simt_maxlane)
5045 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
5046 vf = fold_convert (TREE_TYPE (step), vf);
5047 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
5048 }
5049
5050 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
5051 if (fd->collapse > 1)
5052 {
5053 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5054 {
5055 gsi_prev (&gsi);
5056 expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1);
5057 gsi_next (&gsi);
5058 }
5059 else
5060 for (i = 0; i < fd->collapse; i++)
5061 {
5062 tree itype = TREE_TYPE (fd->loops[i].v);
5063 if (POINTER_TYPE_P (itype))
5064 itype = signed_type_for (itype);
5065 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
5066 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
5067 }
5068 }
5069 if (cond_var)
5070 {
5071 if (POINTER_TYPE_P (type)
5072 || TREE_CODE (n1) != INTEGER_CST
5073 || fd->loop.cond_code != LT_EXPR
5074 || tree_int_cst_sgn (n1) != 1)
5075 expand_omp_build_assign (&gsi, cond_var,
5076 build_one_cst (TREE_TYPE (cond_var)));
5077 else
5078 expand_omp_build_assign (&gsi, cond_var,
5079 fold_convert (TREE_TYPE (cond_var), n1));
5080 }
5081
5082 /* Remove the GIMPLE_OMP_FOR statement. */
5083 gsi_remove (&gsi, true);
5084
5085 if (!broken_loop)
5086 {
5087 /* Code to control the increment goes in the CONT_BB. */
5088 gsi = gsi_last_nondebug_bb (cont_bb);
5089 stmt = gsi_stmt (gsi);
5090 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
5091
5092 if (POINTER_TYPE_P (type))
5093 t = fold_build_pointer_plus (fd->loop.v, step);
5094 else
5095 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
5096 expand_omp_build_assign (&gsi, fd->loop.v, t);
5097
5098 if (fd->collapse > 1)
5099 {
5100 i = fd->collapse - 1;
5101 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
5102 {
5103 t = fold_convert (sizetype, fd->loops[i].step);
5104 t = fold_build_pointer_plus (fd->loops[i].v, t);
5105 }
5106 else
5107 {
5108 t = fold_convert (TREE_TYPE (fd->loops[i].v),
5109 fd->loops[i].step);
5110 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
5111 fd->loops[i].v, t);
5112 }
5113 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
5114
5115 for (i = fd->collapse - 1; i > 0; i--)
5116 {
5117 tree itype = TREE_TYPE (fd->loops[i].v);
5118 tree itype2 = TREE_TYPE (fd->loops[i - 1].v);
5119 if (POINTER_TYPE_P (itype2))
5120 itype2 = signed_type_for (itype2);
5121 t = fold_convert (itype2, fd->loops[i - 1].step);
5122 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
5123 GSI_SAME_STMT);
5124 t = build3 (COND_EXPR, itype2,
5125 build2 (fd->loops[i].cond_code, boolean_type_node,
5126 fd->loops[i].v,
5127 fold_convert (itype, fd->loops[i].n2)),
5128 build_int_cst (itype2, 0), t);
5129 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v)))
5130 t = fold_build_pointer_plus (fd->loops[i - 1].v, t);
5131 else
5132 t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t);
5133 expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t);
5134
5135 t = fold_convert (itype, fd->loops[i].n1);
5136 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
5137 GSI_SAME_STMT);
5138 t = build3 (COND_EXPR, itype,
5139 build2 (fd->loops[i].cond_code, boolean_type_node,
5140 fd->loops[i].v,
5141 fold_convert (itype, fd->loops[i].n2)),
5142 fd->loops[i].v, t);
5143 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
5144 }
5145 }
5146 if (cond_var)
5147 {
5148 if (POINTER_TYPE_P (type)
5149 || TREE_CODE (n1) != INTEGER_CST
5150 || fd->loop.cond_code != LT_EXPR
5151 || tree_int_cst_sgn (n1) != 1)
5152 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
5153 build_one_cst (TREE_TYPE (cond_var)));
5154 else
5155 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
5156 fold_convert (TREE_TYPE (cond_var), step));
5157 expand_omp_build_assign (&gsi, cond_var, t);
5158 }
5159
5160 /* Remove GIMPLE_OMP_CONTINUE. */
5161 gsi_remove (&gsi, true);
5162 }
5163
5164 /* Emit the condition in L1_BB. */
5165 gsi = gsi_start_bb (l1_bb);
5166
5167 t = fold_convert (type, n2);
5168 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5169 false, GSI_CONTINUE_LINKING);
5170 tree v = fd->loop.v;
5171 if (DECL_P (v) && TREE_ADDRESSABLE (v))
5172 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
5173 false, GSI_CONTINUE_LINKING);
5174 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
5175 cond_stmt = gimple_build_cond_empty (t);
5176 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
5177 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
5178 NULL, NULL)
5179 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
5180 NULL, NULL))
5181 {
5182 gsi = gsi_for_stmt (cond_stmt);
5183 gimple_regimplify_operands (cond_stmt, &gsi);
5184 }
5185
5186 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
5187 if (is_simt)
5188 {
5189 gsi = gsi_start_bb (l2_bb);
5190 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step);
5191 if (POINTER_TYPE_P (type))
5192 t = fold_build_pointer_plus (fd->loop.v, step);
5193 else
5194 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
5195 expand_omp_build_assign (&gsi, fd->loop.v, t);
5196 }
5197
5198 /* Remove GIMPLE_OMP_RETURN. */
5199 gsi = gsi_last_nondebug_bb (exit_bb);
5200 gsi_remove (&gsi, true);
5201
5202 /* Connect the new blocks. */
5203 remove_edge (FALLTHRU_EDGE (entry_bb));
5204
5205 if (!broken_loop)
5206 {
5207 remove_edge (BRANCH_EDGE (entry_bb));
5208 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
5209
5210 e = BRANCH_EDGE (l1_bb);
5211 ne = FALLTHRU_EDGE (l1_bb);
5212 e->flags = EDGE_TRUE_VALUE;
5213 }
5214 else
5215 {
5216 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
5217
5218 ne = single_succ_edge (l1_bb);
5219 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
5220
5221 }
5222 ne->flags = EDGE_FALSE_VALUE;
5223 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
5224 ne->probability = e->probability.invert ();
5225
5226 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
5227 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
5228
5229 if (simt_maxlane)
5230 {
5231 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
5232 NULL_TREE, NULL_TREE);
5233 gsi = gsi_last_bb (entry_bb);
5234 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
5235 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
5236 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
5237 FALLTHRU_EDGE (entry_bb)->probability
5238 = profile_probability::guessed_always ().apply_scale (7, 8);
5239 BRANCH_EDGE (entry_bb)->probability
5240 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
5241 l2_dom_bb = entry_bb;
5242 }
5243 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
5244
5245 if (!broken_loop)
5246 {
5247 struct loop *loop = alloc_loop ();
5248 loop->header = l1_bb;
5249 loop->latch = cont_bb;
5250 add_loop (loop, l1_bb->loop_father);
5251 loop->safelen = safelen_int;
5252 if (simduid)
5253 {
5254 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
5255 cfun->has_simduid_loops = true;
5256 }
5257 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
5258 the loop. */
5259 if ((flag_tree_loop_vectorize
5260 || !global_options_set.x_flag_tree_loop_vectorize)
5261 && flag_tree_loop_optimize
5262 && loop->safelen > 1)
5263 {
5264 loop->force_vectorize = true;
5265 if (simdlen && tree_fits_uhwi_p (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)))
5266 {
5267 unsigned HOST_WIDE_INT v
5268 = tree_to_uhwi (OMP_CLAUSE_SIMDLEN_EXPR (simdlen));
5269 if (v < INT_MAX && v <= (unsigned HOST_WIDE_INT) loop->safelen)
5270 loop->simdlen = v;
5271 }
5272 cfun->has_force_vectorize_loops = true;
5273 }
5274 else if (dont_vectorize)
5275 loop->dont_vectorize = true;
5276 }
5277 else if (simduid)
5278 cfun->has_simduid_loops = true;
5279 }
5280
5281 /* Taskloop construct is represented after gimplification with
5282 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
5283 in between them. This routine expands the outer GIMPLE_OMP_FOR,
5284 which should just compute all the needed loop temporaries
5285 for GIMPLE_OMP_TASK. */
5286
5287 static void
5288 expand_omp_taskloop_for_outer (struct omp_region *region,
5289 struct omp_for_data *fd,
5290 gimple *inner_stmt)
5291 {
5292 tree type, bias = NULL_TREE;
5293 basic_block entry_bb, cont_bb, exit_bb;
5294 gimple_stmt_iterator gsi;
5295 gassign *assign_stmt;
5296 tree *counts = NULL;
5297 int i;
5298
5299 gcc_assert (inner_stmt);
5300 gcc_assert (region->cont);
5301 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
5302 && gimple_omp_task_taskloop_p (inner_stmt));
5303 type = TREE_TYPE (fd->loop.v);
5304
5305 /* See if we need to bias by LLONG_MIN. */
5306 if (fd->iter_type == long_long_unsigned_type_node
5307 && TREE_CODE (type) == INTEGER_TYPE
5308 && !TYPE_UNSIGNED (type))
5309 {
5310 tree n1, n2;
5311
5312 if (fd->loop.cond_code == LT_EXPR)
5313 {
5314 n1 = fd->loop.n1;
5315 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
5316 }
5317 else
5318 {
5319 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
5320 n2 = fd->loop.n1;
5321 }
5322 if (TREE_CODE (n1) != INTEGER_CST
5323 || TREE_CODE (n2) != INTEGER_CST
5324 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
5325 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
5326 }
5327
5328 entry_bb = region->entry;
5329 cont_bb = region->cont;
5330 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5331 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
5332 exit_bb = region->exit;
5333
5334 gsi = gsi_last_nondebug_bb (entry_bb);
5335 gimple *for_stmt = gsi_stmt (gsi);
5336 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
5337 if (fd->collapse > 1)
5338 {
5339 int first_zero_iter = -1, dummy = -1;
5340 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
5341
5342 counts = XALLOCAVEC (tree, fd->collapse);
5343 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5344 zero_iter_bb, first_zero_iter,
5345 dummy_bb, dummy, l2_dom_bb);
5346
5347 if (zero_iter_bb)
5348 {
5349 /* Some counts[i] vars might be uninitialized if
5350 some loop has zero iterations. But the body shouldn't
5351 be executed in that case, so just avoid uninit warnings. */
5352 for (i = first_zero_iter; i < fd->collapse; i++)
5353 if (SSA_VAR_P (counts[i]))
5354 TREE_NO_WARNING (counts[i]) = 1;
5355 gsi_prev (&gsi);
5356 edge e = split_block (entry_bb, gsi_stmt (gsi));
5357 entry_bb = e->dest;
5358 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
5359 gsi = gsi_last_bb (entry_bb);
5360 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
5361 get_immediate_dominator (CDI_DOMINATORS,
5362 zero_iter_bb));
5363 }
5364 }
5365
5366 tree t0, t1;
5367 t1 = fd->loop.n2;
5368 t0 = fd->loop.n1;
5369 if (POINTER_TYPE_P (TREE_TYPE (t0))
5370 && TYPE_PRECISION (TREE_TYPE (t0))
5371 != TYPE_PRECISION (fd->iter_type))
5372 {
5373 /* Avoid casting pointers to integer of a different size. */
5374 tree itype = signed_type_for (type);
5375 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
5376 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
5377 }
5378 else
5379 {
5380 t1 = fold_convert (fd->iter_type, t1);
5381 t0 = fold_convert (fd->iter_type, t0);
5382 }
5383 if (bias)
5384 {
5385 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
5386 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
5387 }
5388
5389 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
5390 OMP_CLAUSE__LOOPTEMP_);
5391 gcc_assert (innerc);
5392 tree startvar = OMP_CLAUSE_DECL (innerc);
5393 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
5394 gcc_assert (innerc);
5395 tree endvar = OMP_CLAUSE_DECL (innerc);
5396 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
5397 {
5398 gcc_assert (innerc);
5399 for (i = 1; i < fd->collapse; i++)
5400 {
5401 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5402 OMP_CLAUSE__LOOPTEMP_);
5403 gcc_assert (innerc);
5404 }
5405 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5406 OMP_CLAUSE__LOOPTEMP_);
5407 if (innerc)
5408 {
5409 /* If needed (inner taskloop has lastprivate clause), propagate
5410 down the total number of iterations. */
5411 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
5412 NULL_TREE, false,
5413 GSI_CONTINUE_LINKING);
5414 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5415 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5416 }
5417 }
5418
5419 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
5420 GSI_CONTINUE_LINKING);
5421 assign_stmt = gimple_build_assign (startvar, t0);
5422 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5423
5424 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
5425 GSI_CONTINUE_LINKING);
5426 assign_stmt = gimple_build_assign (endvar, t1);
5427 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5428 if (fd->collapse > 1)
5429 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5430
5431 /* Remove the GIMPLE_OMP_FOR statement. */
5432 gsi = gsi_for_stmt (for_stmt);
5433 gsi_remove (&gsi, true);
5434
5435 gsi = gsi_last_nondebug_bb (cont_bb);
5436 gsi_remove (&gsi, true);
5437
5438 gsi = gsi_last_nondebug_bb (exit_bb);
5439 gsi_remove (&gsi, true);
5440
5441 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
5442 remove_edge (BRANCH_EDGE (entry_bb));
5443 FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
5444 remove_edge (BRANCH_EDGE (cont_bb));
5445 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
5446 set_immediate_dominator (CDI_DOMINATORS, region->entry,
5447 recompute_dominator (CDI_DOMINATORS, region->entry));
5448 }
5449
5450 /* Taskloop construct is represented after gimplification with
5451 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
5452 in between them. This routine expands the inner GIMPLE_OMP_FOR.
5453 GOMP_taskloop{,_ull} function arranges for each task to be given just
5454 a single range of iterations. */
5455
5456 static void
5457 expand_omp_taskloop_for_inner (struct omp_region *region,
5458 struct omp_for_data *fd,
5459 gimple *inner_stmt)
5460 {
5461 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
5462 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
5463 basic_block fin_bb;
5464 gimple_stmt_iterator gsi;
5465 edge ep;
5466 bool broken_loop = region->cont == NULL;
5467 tree *counts = NULL;
5468 tree n1, n2, step;
5469
5470 itype = type = TREE_TYPE (fd->loop.v);
5471 if (POINTER_TYPE_P (type))
5472 itype = signed_type_for (type);
5473
5474 /* See if we need to bias by LLONG_MIN. */
5475 if (fd->iter_type == long_long_unsigned_type_node
5476 && TREE_CODE (type) == INTEGER_TYPE
5477 && !TYPE_UNSIGNED (type))
5478 {
5479 tree n1, n2;
5480
5481 if (fd->loop.cond_code == LT_EXPR)
5482 {
5483 n1 = fd->loop.n1;
5484 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
5485 }
5486 else
5487 {
5488 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
5489 n2 = fd->loop.n1;
5490 }
5491 if (TREE_CODE (n1) != INTEGER_CST
5492 || TREE_CODE (n2) != INTEGER_CST
5493 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
5494 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
5495 }
5496
5497 entry_bb = region->entry;
5498 cont_bb = region->cont;
5499 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5500 fin_bb = BRANCH_EDGE (entry_bb)->dest;
5501 gcc_assert (broken_loop
5502 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
5503 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5504 if (!broken_loop)
5505 {
5506 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
5507 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5508 }
5509 exit_bb = region->exit;
5510
5511 /* Iteration space partitioning goes in ENTRY_BB. */
5512 gsi = gsi_last_nondebug_bb (entry_bb);
5513 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5514
5515 if (fd->collapse > 1)
5516 {
5517 int first_zero_iter = -1, dummy = -1;
5518 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5519
5520 counts = XALLOCAVEC (tree, fd->collapse);
5521 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5522 fin_bb, first_zero_iter,
5523 dummy_bb, dummy, l2_dom_bb);
5524 t = NULL_TREE;
5525 }
5526 else
5527 t = integer_one_node;
5528
5529 step = fd->loop.step;
5530 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5531 OMP_CLAUSE__LOOPTEMP_);
5532 gcc_assert (innerc);
5533 n1 = OMP_CLAUSE_DECL (innerc);
5534 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
5535 gcc_assert (innerc);
5536 n2 = OMP_CLAUSE_DECL (innerc);
5537 if (bias)
5538 {
5539 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
5540 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
5541 }
5542 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5543 true, NULL_TREE, true, GSI_SAME_STMT);
5544 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5545 true, NULL_TREE, true, GSI_SAME_STMT);
5546 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5547 true, NULL_TREE, true, GSI_SAME_STMT);
5548
5549 tree startvar = fd->loop.v;
5550 tree endvar = NULL_TREE;
5551
5552 if (gimple_omp_for_combined_p (fd->for_stmt))
5553 {
5554 tree clauses = gimple_omp_for_clauses (inner_stmt);
5555 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5556 gcc_assert (innerc);
5557 startvar = OMP_CLAUSE_DECL (innerc);
5558 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5559 OMP_CLAUSE__LOOPTEMP_);
5560 gcc_assert (innerc);
5561 endvar = OMP_CLAUSE_DECL (innerc);
5562 }
5563 t = fold_convert (TREE_TYPE (startvar), n1);
5564 t = force_gimple_operand_gsi (&gsi, t,
5565 DECL_P (startvar)
5566 && TREE_ADDRESSABLE (startvar),
5567 NULL_TREE, false, GSI_CONTINUE_LINKING);
5568 gimple *assign_stmt = gimple_build_assign (startvar, t);
5569 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5570
5571 t = fold_convert (TREE_TYPE (startvar), n2);
5572 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5573 false, GSI_CONTINUE_LINKING);
5574 if (endvar)
5575 {
5576 assign_stmt = gimple_build_assign (endvar, e);
5577 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5578 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5579 assign_stmt = gimple_build_assign (fd->loop.v, e);
5580 else
5581 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5582 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5583 }
5584 if (fd->collapse > 1)
5585 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5586
5587 if (!broken_loop)
5588 {
5589 /* The code controlling the sequential loop replaces the
5590 GIMPLE_OMP_CONTINUE. */
5591 gsi = gsi_last_nondebug_bb (cont_bb);
5592 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5593 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5594 vmain = gimple_omp_continue_control_use (cont_stmt);
5595 vback = gimple_omp_continue_control_def (cont_stmt);
5596
5597 if (!gimple_omp_for_combined_p (fd->for_stmt))
5598 {
5599 if (POINTER_TYPE_P (type))
5600 t = fold_build_pointer_plus (vmain, step);
5601 else
5602 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5603 t = force_gimple_operand_gsi (&gsi, t,
5604 DECL_P (vback)
5605 && TREE_ADDRESSABLE (vback),
5606 NULL_TREE, true, GSI_SAME_STMT);
5607 assign_stmt = gimple_build_assign (vback, t);
5608 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5609
5610 t = build2 (fd->loop.cond_code, boolean_type_node,
5611 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5612 ? t : vback, e);
5613 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5614 }
5615
5616 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5617 gsi_remove (&gsi, true);
5618
5619 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5620 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
5621 }
5622
5623 /* Remove the GIMPLE_OMP_FOR statement. */
5624 gsi = gsi_for_stmt (fd->for_stmt);
5625 gsi_remove (&gsi, true);
5626
5627 /* Remove the GIMPLE_OMP_RETURN statement. */
5628 gsi = gsi_last_nondebug_bb (exit_bb);
5629 gsi_remove (&gsi, true);
5630
5631 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
5632 if (!broken_loop)
5633 remove_edge (BRANCH_EDGE (entry_bb));
5634 else
5635 {
5636 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
5637 region->outer->cont = NULL;
5638 }
5639
5640 /* Connect all the blocks. */
5641 if (!broken_loop)
5642 {
5643 ep = find_edge (cont_bb, body_bb);
5644 if (gimple_omp_for_combined_p (fd->for_stmt))
5645 {
5646 remove_edge (ep);
5647 ep = NULL;
5648 }
5649 else if (fd->collapse > 1)
5650 {
5651 remove_edge (ep);
5652 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5653 }
5654 else
5655 ep->flags = EDGE_TRUE_VALUE;
5656 find_edge (cont_bb, fin_bb)->flags
5657 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5658 }
5659
5660 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5661 recompute_dominator (CDI_DOMINATORS, body_bb));
5662 if (!broken_loop)
5663 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5664 recompute_dominator (CDI_DOMINATORS, fin_bb));
5665
5666 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5667 {
5668 struct loop *loop = alloc_loop ();
5669 loop->header = body_bb;
5670 if (collapse_bb == NULL)
5671 loop->latch = cont_bb;
5672 add_loop (loop, body_bb->loop_father);
5673 }
5674 }
5675
5676 /* A subroutine of expand_omp_for. Generate code for an OpenACC
5677 partitioned loop. The lowering here is abstracted, in that the
5678 loop parameters are passed through internal functions, which are
5679 further lowered by oacc_device_lower, once we get to the target
5680 compiler. The loop is of the form:
5681
5682 for (V = B; V LTGT E; V += S) {BODY}
5683
5684 where LTGT is < or >. We may have a specified chunking size, CHUNKING
5685 (constant 0 for no chunking) and we will have a GWV partitioning
5686 mask, specifying dimensions over which the loop is to be
5687 partitioned (see note below). We generate code that looks like
5688 (this ignores tiling):
5689
5690 <entry_bb> [incoming FALL->body, BRANCH->exit]
5691 typedef signedintify (typeof (V)) T; // underlying signed integral type
5692 T range = E - B;
5693 T chunk_no = 0;
5694 T DIR = LTGT == '<' ? +1 : -1;
5695 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
5696 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
5697
5698 <head_bb> [created by splitting end of entry_bb]
5699 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
5700 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
5701 if (!(offset LTGT bound)) goto bottom_bb;
5702
5703 <body_bb> [incoming]
5704 V = B + offset;
5705 {BODY}
5706
5707 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
5708 offset += step;
5709 if (offset LTGT bound) goto body_bb; [*]
5710
5711 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
5712 chunk_no++;
5713 if (chunk < chunk_max) goto head_bb;
5714
5715 <exit_bb> [incoming]
5716 V = B + ((range -/+ 1) / S +/- 1) * S [*]
5717
5718 [*] Needed if V live at end of loop. */
5719
5720 static void
5721 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
5722 {
5723 tree v = fd->loop.v;
5724 enum tree_code cond_code = fd->loop.cond_code;
5725 enum tree_code plus_code = PLUS_EXPR;
5726
5727 tree chunk_size = integer_minus_one_node;
5728 tree gwv = integer_zero_node;
5729 tree iter_type = TREE_TYPE (v);
5730 tree diff_type = iter_type;
5731 tree plus_type = iter_type;
5732 struct oacc_collapse *counts = NULL;
5733
5734 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
5735 == GF_OMP_FOR_KIND_OACC_LOOP);
5736 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
5737 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
5738
5739 if (POINTER_TYPE_P (iter_type))
5740 {
5741 plus_code = POINTER_PLUS_EXPR;
5742 plus_type = sizetype;
5743 }
5744 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
5745 diff_type = signed_type_for (diff_type);
5746 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
5747 diff_type = integer_type_node;
5748
5749 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
5750 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
5751 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
5752 basic_block bottom_bb = NULL;
5753
5754 /* entry_bb has two sucessors; the branch edge is to the exit
5755 block, fallthrough edge to body. */
5756 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
5757 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
5758
5759 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
5760 body_bb, or to a block whose only successor is the body_bb. Its
5761 fallthrough successor is the final block (same as the branch
5762 successor of the entry_bb). */
5763 if (cont_bb)
5764 {
5765 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5766 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
5767
5768 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
5769 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
5770 }
5771 else
5772 gcc_assert (!gimple_in_ssa_p (cfun));
5773
5774 /* The exit block only has entry_bb and cont_bb as predecessors. */
5775 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
5776
5777 tree chunk_no;
5778 tree chunk_max = NULL_TREE;
5779 tree bound, offset;
5780 tree step = create_tmp_var (diff_type, ".step");
5781 bool up = cond_code == LT_EXPR;
5782 tree dir = build_int_cst (diff_type, up ? +1 : -1);
5783 bool chunking = !gimple_in_ssa_p (cfun);
5784 bool negating;
5785
5786 /* Tiling vars. */
5787 tree tile_size = NULL_TREE;
5788 tree element_s = NULL_TREE;
5789 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
5790 basic_block elem_body_bb = NULL;
5791 basic_block elem_cont_bb = NULL;
5792
5793 /* SSA instances. */
5794 tree offset_incr = NULL_TREE;
5795 tree offset_init = NULL_TREE;
5796
5797 gimple_stmt_iterator gsi;
5798 gassign *ass;
5799 gcall *call;
5800 gimple *stmt;
5801 tree expr;
5802 location_t loc;
5803 edge split, be, fte;
5804
5805 /* Split the end of entry_bb to create head_bb. */
5806 split = split_block (entry_bb, last_stmt (entry_bb));
5807 basic_block head_bb = split->dest;
5808 entry_bb = split->src;
5809
5810 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
5811 gsi = gsi_last_nondebug_bb (entry_bb);
5812 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
5813 loc = gimple_location (for_stmt);
5814
5815 if (gimple_in_ssa_p (cfun))
5816 {
5817 offset_init = gimple_omp_for_index (for_stmt, 0);
5818 gcc_assert (integer_zerop (fd->loop.n1));
5819 /* The SSA parallelizer does gang parallelism. */
5820 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
5821 }
5822
5823 if (fd->collapse > 1 || fd->tiling)
5824 {
5825 gcc_assert (!gimple_in_ssa_p (cfun) && up);
5826 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
5827 tree total = expand_oacc_collapse_init (fd, &gsi, counts,
5828 TREE_TYPE (fd->loop.n2), loc);
5829
5830 if (SSA_VAR_P (fd->loop.n2))
5831 {
5832 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
5833 true, GSI_SAME_STMT);
5834 ass = gimple_build_assign (fd->loop.n2, total);
5835 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5836 }
5837 }
5838
5839 tree b = fd->loop.n1;
5840 tree e = fd->loop.n2;
5841 tree s = fd->loop.step;
5842
5843 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
5844 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
5845
5846 /* Convert the step, avoiding possible unsigned->signed overflow. */
5847 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
5848 if (negating)
5849 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
5850 s = fold_convert (diff_type, s);
5851 if (negating)
5852 s = fold_build1 (NEGATE_EXPR, diff_type, s);
5853 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
5854
5855 if (!chunking)
5856 chunk_size = integer_zero_node;
5857 expr = fold_convert (diff_type, chunk_size);
5858 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
5859 NULL_TREE, true, GSI_SAME_STMT);
5860
5861 if (fd->tiling)
5862 {
5863 /* Determine the tile size and element step,
5864 modify the outer loop step size. */
5865 tile_size = create_tmp_var (diff_type, ".tile_size");
5866 expr = build_int_cst (diff_type, 1);
5867 for (int ix = 0; ix < fd->collapse; ix++)
5868 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
5869 expr = force_gimple_operand_gsi (&gsi, expr, true,
5870 NULL_TREE, true, GSI_SAME_STMT);
5871 ass = gimple_build_assign (tile_size, expr);
5872 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5873
5874 element_s = create_tmp_var (diff_type, ".element_s");
5875 ass = gimple_build_assign (element_s, s);
5876 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5877
5878 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
5879 s = force_gimple_operand_gsi (&gsi, expr, true,
5880 NULL_TREE, true, GSI_SAME_STMT);
5881 }
5882
5883 /* Determine the range, avoiding possible unsigned->signed overflow. */
5884 negating = !up && TYPE_UNSIGNED (iter_type);
5885 expr = fold_build2 (MINUS_EXPR, plus_type,
5886 fold_convert (plus_type, negating ? b : e),
5887 fold_convert (plus_type, negating ? e : b));
5888 expr = fold_convert (diff_type, expr);
5889 if (negating)
5890 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
5891 tree range = force_gimple_operand_gsi (&gsi, expr, true,
5892 NULL_TREE, true, GSI_SAME_STMT);
5893
5894 chunk_no = build_int_cst (diff_type, 0);
5895 if (chunking)
5896 {
5897 gcc_assert (!gimple_in_ssa_p (cfun));
5898
5899 expr = chunk_no;
5900 chunk_max = create_tmp_var (diff_type, ".chunk_max");
5901 chunk_no = create_tmp_var (diff_type, ".chunk_no");
5902
5903 ass = gimple_build_assign (chunk_no, expr);
5904 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5905
5906 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5907 build_int_cst (integer_type_node,
5908 IFN_GOACC_LOOP_CHUNKS),
5909 dir, range, s, chunk_size, gwv);
5910 gimple_call_set_lhs (call, chunk_max);
5911 gimple_set_location (call, loc);
5912 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5913 }
5914 else
5915 chunk_size = chunk_no;
5916
5917 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5918 build_int_cst (integer_type_node,
5919 IFN_GOACC_LOOP_STEP),
5920 dir, range, s, chunk_size, gwv);
5921 gimple_call_set_lhs (call, step);
5922 gimple_set_location (call, loc);
5923 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5924
5925 /* Remove the GIMPLE_OMP_FOR. */
5926 gsi_remove (&gsi, true);
5927
5928 /* Fixup edges from head_bb. */
5929 be = BRANCH_EDGE (head_bb);
5930 fte = FALLTHRU_EDGE (head_bb);
5931 be->flags |= EDGE_FALSE_VALUE;
5932 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5933
5934 basic_block body_bb = fte->dest;
5935
5936 if (gimple_in_ssa_p (cfun))
5937 {
5938 gsi = gsi_last_nondebug_bb (cont_bb);
5939 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5940
5941 offset = gimple_omp_continue_control_use (cont_stmt);
5942 offset_incr = gimple_omp_continue_control_def (cont_stmt);
5943 }
5944 else
5945 {
5946 offset = create_tmp_var (diff_type, ".offset");
5947 offset_init = offset_incr = offset;
5948 }
5949 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
5950
5951 /* Loop offset & bound go into head_bb. */
5952 gsi = gsi_start_bb (head_bb);
5953
5954 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5955 build_int_cst (integer_type_node,
5956 IFN_GOACC_LOOP_OFFSET),
5957 dir, range, s,
5958 chunk_size, gwv, chunk_no);
5959 gimple_call_set_lhs (call, offset_init);
5960 gimple_set_location (call, loc);
5961 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5962
5963 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5964 build_int_cst (integer_type_node,
5965 IFN_GOACC_LOOP_BOUND),
5966 dir, range, s,
5967 chunk_size, gwv, offset_init);
5968 gimple_call_set_lhs (call, bound);
5969 gimple_set_location (call, loc);
5970 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5971
5972 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
5973 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5974 GSI_CONTINUE_LINKING);
5975
5976 /* V assignment goes into body_bb. */
5977 if (!gimple_in_ssa_p (cfun))
5978 {
5979 gsi = gsi_start_bb (body_bb);
5980
5981 expr = build2 (plus_code, iter_type, b,
5982 fold_convert (plus_type, offset));
5983 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5984 true, GSI_SAME_STMT);
5985 ass = gimple_build_assign (v, expr);
5986 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5987
5988 if (fd->collapse > 1 || fd->tiling)
5989 expand_oacc_collapse_vars (fd, false, &gsi, counts, v);
5990
5991 if (fd->tiling)
5992 {
5993 /* Determine the range of the element loop -- usually simply
5994 the tile_size, but could be smaller if the final
5995 iteration of the outer loop is a partial tile. */
5996 tree e_range = create_tmp_var (diff_type, ".e_range");
5997
5998 expr = build2 (MIN_EXPR, diff_type,
5999 build2 (MINUS_EXPR, diff_type, bound, offset),
6000 build2 (MULT_EXPR, diff_type, tile_size,
6001 element_s));
6002 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
6003 true, GSI_SAME_STMT);
6004 ass = gimple_build_assign (e_range, expr);
6005 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6006
6007 /* Determine bound, offset & step of inner loop. */
6008 e_bound = create_tmp_var (diff_type, ".e_bound");
6009 e_offset = create_tmp_var (diff_type, ".e_offset");
6010 e_step = create_tmp_var (diff_type, ".e_step");
6011
6012 /* Mark these as element loops. */
6013 tree t, e_gwv = integer_minus_one_node;
6014 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */
6015
6016 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
6017 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
6018 element_s, chunk, e_gwv, chunk);
6019 gimple_call_set_lhs (call, e_offset);
6020 gimple_set_location (call, loc);
6021 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
6022
6023 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
6024 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
6025 element_s, chunk, e_gwv, e_offset);
6026 gimple_call_set_lhs (call, e_bound);
6027 gimple_set_location (call, loc);
6028 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
6029
6030 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
6031 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
6032 element_s, chunk, e_gwv);
6033 gimple_call_set_lhs (call, e_step);
6034 gimple_set_location (call, loc);
6035 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
6036
6037 /* Add test and split block. */
6038 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
6039 stmt = gimple_build_cond_empty (expr);
6040 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
6041 split = split_block (body_bb, stmt);
6042 elem_body_bb = split->dest;
6043 if (cont_bb == body_bb)
6044 cont_bb = elem_body_bb;
6045 body_bb = split->src;
6046
6047 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
6048
6049 /* Add a dummy exit for the tiled block when cont_bb is missing. */
6050 if (cont_bb == NULL)
6051 {
6052 edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
6053 e->probability = profile_probability::even ();
6054 split->probability = profile_probability::even ();
6055 }
6056
6057 /* Initialize the user's loop vars. */
6058 gsi = gsi_start_bb (elem_body_bb);
6059 expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset);
6060 }
6061 }
6062
6063 /* Loop increment goes into cont_bb. If this is not a loop, we
6064 will have spawned threads as if it was, and each one will
6065 execute one iteration. The specification is not explicit about
6066 whether such constructs are ill-formed or not, and they can
6067 occur, especially when noreturn routines are involved. */
6068 if (cont_bb)
6069 {
6070 gsi = gsi_last_nondebug_bb (cont_bb);
6071 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
6072 loc = gimple_location (cont_stmt);
6073
6074 if (fd->tiling)
6075 {
6076 /* Insert element loop increment and test. */
6077 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
6078 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
6079 true, GSI_SAME_STMT);
6080 ass = gimple_build_assign (e_offset, expr);
6081 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6082 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
6083
6084 stmt = gimple_build_cond_empty (expr);
6085 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
6086 split = split_block (cont_bb, stmt);
6087 elem_cont_bb = split->src;
6088 cont_bb = split->dest;
6089
6090 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
6091 split->probability = profile_probability::unlikely ().guessed ();
6092 edge latch_edge
6093 = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
6094 latch_edge->probability = profile_probability::likely ().guessed ();
6095
6096 edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
6097 skip_edge->probability = profile_probability::unlikely ().guessed ();
6098 edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
6099 loop_entry_edge->probability
6100 = profile_probability::likely ().guessed ();
6101
6102 gsi = gsi_for_stmt (cont_stmt);
6103 }
6104
6105 /* Increment offset. */
6106 if (gimple_in_ssa_p (cfun))
6107 expr = build2 (plus_code, iter_type, offset,
6108 fold_convert (plus_type, step));
6109 else
6110 expr = build2 (PLUS_EXPR, diff_type, offset, step);
6111 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
6112 true, GSI_SAME_STMT);
6113 ass = gimple_build_assign (offset_incr, expr);
6114 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6115 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
6116 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
6117
6118 /* Remove the GIMPLE_OMP_CONTINUE. */
6119 gsi_remove (&gsi, true);
6120
6121 /* Fixup edges from cont_bb. */
6122 be = BRANCH_EDGE (cont_bb);
6123 fte = FALLTHRU_EDGE (cont_bb);
6124 be->flags |= EDGE_TRUE_VALUE;
6125 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
6126
6127 if (chunking)
6128 {
6129 /* Split the beginning of exit_bb to make bottom_bb. We
6130 need to insert a nop at the start, because splitting is
6131 after a stmt, not before. */
6132 gsi = gsi_start_bb (exit_bb);
6133 stmt = gimple_build_nop ();
6134 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
6135 split = split_block (exit_bb, stmt);
6136 bottom_bb = split->src;
6137 exit_bb = split->dest;
6138 gsi = gsi_last_bb (bottom_bb);
6139
6140 /* Chunk increment and test goes into bottom_bb. */
6141 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
6142 build_int_cst (diff_type, 1));
6143 ass = gimple_build_assign (chunk_no, expr);
6144 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
6145
6146 /* Chunk test at end of bottom_bb. */
6147 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
6148 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
6149 GSI_CONTINUE_LINKING);
6150
6151 /* Fixup edges from bottom_bb. */
6152 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
6153 split->probability = profile_probability::unlikely ().guessed ();
6154 edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
6155 latch_edge->probability = profile_probability::likely ().guessed ();
6156 }
6157 }
6158
6159 gsi = gsi_last_nondebug_bb (exit_bb);
6160 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
6161 loc = gimple_location (gsi_stmt (gsi));
6162
6163 if (!gimple_in_ssa_p (cfun))
6164 {
6165 /* Insert the final value of V, in case it is live. This is the
6166 value for the only thread that survives past the join. */
6167 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
6168 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
6169 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
6170 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
6171 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
6172 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
6173 true, GSI_SAME_STMT);
6174 ass = gimple_build_assign (v, expr);
6175 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
6176 }
6177
6178 /* Remove the OMP_RETURN. */
6179 gsi_remove (&gsi, true);
6180
6181 if (cont_bb)
6182 {
6183 /* We now have one, two or three nested loops. Update the loop
6184 structures. */
6185 struct loop *parent = entry_bb->loop_father;
6186 struct loop *body = body_bb->loop_father;
6187
6188 if (chunking)
6189 {
6190 struct loop *chunk_loop = alloc_loop ();
6191 chunk_loop->header = head_bb;
6192 chunk_loop->latch = bottom_bb;
6193 add_loop (chunk_loop, parent);
6194 parent = chunk_loop;
6195 }
6196 else if (parent != body)
6197 {
6198 gcc_assert (body->header == body_bb);
6199 gcc_assert (body->latch == cont_bb
6200 || single_pred (body->latch) == cont_bb);
6201 parent = NULL;
6202 }
6203
6204 if (parent)
6205 {
6206 struct loop *body_loop = alloc_loop ();
6207 body_loop->header = body_bb;
6208 body_loop->latch = cont_bb;
6209 add_loop (body_loop, parent);
6210
6211 if (fd->tiling)
6212 {
6213 /* Insert tiling's element loop. */
6214 struct loop *inner_loop = alloc_loop ();
6215 inner_loop->header = elem_body_bb;
6216 inner_loop->latch = elem_cont_bb;
6217 add_loop (inner_loop, body_loop);
6218 }
6219 }
6220 }
6221 }
6222
6223 /* Expand the OMP loop defined by REGION. */
6224
6225 static void
6226 expand_omp_for (struct omp_region *region, gimple *inner_stmt)
6227 {
6228 struct omp_for_data fd;
6229 struct omp_for_data_loop *loops;
6230
6231 loops
6232 = (struct omp_for_data_loop *)
6233 alloca (gimple_omp_for_collapse (last_stmt (region->entry))
6234 * sizeof (struct omp_for_data_loop));
6235 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
6236 &fd, loops);
6237 region->sched_kind = fd.sched_kind;
6238 region->sched_modifiers = fd.sched_modifiers;
6239 region->has_lastprivate_conditional = fd.lastprivate_conditional != 0;
6240
6241 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
6242 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
6243 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
6244 if (region->cont)
6245 {
6246 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
6247 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
6248 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
6249 }
6250 else
6251 /* If there isn't a continue then this is a degerate case where
6252 the introduction of abnormal edges during lowering will prevent
6253 original loops from being detected. Fix that up. */
6254 loops_state_set (LOOPS_NEED_FIXUP);
6255
6256 if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_SIMD)
6257 expand_omp_simd (region, &fd);
6258 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
6259 {
6260 gcc_assert (!inner_stmt);
6261 expand_oacc_for (region, &fd);
6262 }
6263 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
6264 {
6265 if (gimple_omp_for_combined_into_p (fd.for_stmt))
6266 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
6267 else
6268 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
6269 }
6270 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
6271 && !fd.have_ordered)
6272 {
6273 if (fd.chunk_size == NULL)
6274 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
6275 else
6276 expand_omp_for_static_chunk (region, &fd, inner_stmt);
6277 }
6278 else
6279 {
6280 int fn_index, start_ix, next_ix;
6281 unsigned HOST_WIDE_INT sched = 0;
6282 tree sched_arg = NULL_TREE;
6283
6284 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
6285 == GF_OMP_FOR_KIND_FOR);
6286 if (fd.chunk_size == NULL
6287 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
6288 fd.chunk_size = integer_zero_node;
6289 switch (fd.sched_kind)
6290 {
6291 case OMP_CLAUSE_SCHEDULE_RUNTIME:
6292 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0
6293 && fd.lastprivate_conditional == 0)
6294 {
6295 gcc_assert (!fd.have_ordered);
6296 fn_index = 6;
6297 sched = 4;
6298 }
6299 else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
6300 && !fd.have_ordered
6301 && fd.lastprivate_conditional == 0)
6302 fn_index = 7;
6303 else
6304 {
6305 fn_index = 3;
6306 sched = (HOST_WIDE_INT_1U << 31);
6307 }
6308 break;
6309 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
6310 case OMP_CLAUSE_SCHEDULE_GUIDED:
6311 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
6312 && !fd.have_ordered
6313 && fd.lastprivate_conditional == 0)
6314 {
6315 fn_index = 3 + fd.sched_kind;
6316 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
6317 break;
6318 }
6319 fn_index = fd.sched_kind;
6320 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
6321 sched += (HOST_WIDE_INT_1U << 31);
6322 break;
6323 case OMP_CLAUSE_SCHEDULE_STATIC:
6324 gcc_assert (fd.have_ordered);
6325 fn_index = 0;
6326 sched = (HOST_WIDE_INT_1U << 31) + 1;
6327 break;
6328 default:
6329 gcc_unreachable ();
6330 }
6331 if (!fd.ordered)
6332 fn_index += fd.have_ordered * 8;
6333 if (fd.ordered)
6334 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
6335 else
6336 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
6337 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
6338 if (fd.have_reductemp || fd.have_pointer_condtemp)
6339 {
6340 if (fd.ordered)
6341 start_ix = (int)BUILT_IN_GOMP_LOOP_DOACROSS_START;
6342 else if (fd.have_ordered)
6343 start_ix = (int)BUILT_IN_GOMP_LOOP_ORDERED_START;
6344 else
6345 start_ix = (int)BUILT_IN_GOMP_LOOP_START;
6346 sched_arg = build_int_cstu (long_integer_type_node, sched);
6347 if (!fd.chunk_size)
6348 fd.chunk_size = integer_zero_node;
6349 }
6350 if (fd.iter_type == long_long_unsigned_type_node)
6351 {
6352 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
6353 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
6354 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
6355 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
6356 }
6357 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
6358 (enum built_in_function) next_ix, sched_arg,
6359 inner_stmt);
6360 }
6361
6362 if (gimple_in_ssa_p (cfun))
6363 update_ssa (TODO_update_ssa_only_virtuals);
6364 }
6365
6366 /* Expand code for an OpenMP sections directive. In pseudo code, we generate
6367
6368 v = GOMP_sections_start (n);
6369 L0:
6370 switch (v)
6371 {
6372 case 0:
6373 goto L2;
6374 case 1:
6375 section 1;
6376 goto L1;
6377 case 2:
6378 ...
6379 case n:
6380 ...
6381 default:
6382 abort ();
6383 }
6384 L1:
6385 v = GOMP_sections_next ();
6386 goto L0;
6387 L2:
6388 reduction;
6389
6390 If this is a combined parallel sections, replace the call to
6391 GOMP_sections_start with call to GOMP_sections_next. */
6392
6393 static void
6394 expand_omp_sections (struct omp_region *region)
6395 {
6396 tree t, u, vin = NULL, vmain, vnext, l2;
6397 unsigned len;
6398 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
6399 gimple_stmt_iterator si, switch_si;
6400 gomp_sections *sections_stmt;
6401 gimple *stmt;
6402 gomp_continue *cont;
6403 edge_iterator ei;
6404 edge e;
6405 struct omp_region *inner;
6406 unsigned i, casei;
6407 bool exit_reachable = region->cont != NULL;
6408
6409 gcc_assert (region->exit != NULL);
6410 entry_bb = region->entry;
6411 l0_bb = single_succ (entry_bb);
6412 l1_bb = region->cont;
6413 l2_bb = region->exit;
6414 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
6415 l2 = gimple_block_label (l2_bb);
6416 else
6417 {
6418 /* This can happen if there are reductions. */
6419 len = EDGE_COUNT (l0_bb->succs);
6420 gcc_assert (len > 0);
6421 e = EDGE_SUCC (l0_bb, len - 1);
6422 si = gsi_last_nondebug_bb (e->dest);
6423 l2 = NULL_TREE;
6424 if (gsi_end_p (si)
6425 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
6426 l2 = gimple_block_label (e->dest);
6427 else
6428 FOR_EACH_EDGE (e, ei, l0_bb->succs)
6429 {
6430 si = gsi_last_nondebug_bb (e->dest);
6431 if (gsi_end_p (si)
6432 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
6433 {
6434 l2 = gimple_block_label (e->dest);
6435 break;
6436 }
6437 }
6438 }
6439 if (exit_reachable)
6440 default_bb = create_empty_bb (l1_bb->prev_bb);
6441 else
6442 default_bb = create_empty_bb (l0_bb);
6443
6444 /* We will build a switch() with enough cases for all the
6445 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
6446 and a default case to abort if something goes wrong. */
6447 len = EDGE_COUNT (l0_bb->succs);
6448
6449 /* Use vec::quick_push on label_vec throughout, since we know the size
6450 in advance. */
6451 auto_vec<tree> label_vec (len);
6452
6453 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
6454 GIMPLE_OMP_SECTIONS statement. */
6455 si = gsi_last_nondebug_bb (entry_bb);
6456 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
6457 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
6458 vin = gimple_omp_sections_control (sections_stmt);
6459 tree clauses = gimple_omp_sections_clauses (sections_stmt);
6460 tree reductmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
6461 tree condtmp = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
6462 tree cond_var = NULL_TREE;
6463 if (reductmp || condtmp)
6464 {
6465 tree reductions = null_pointer_node, mem = null_pointer_node;
6466 tree memv = NULL_TREE, condtemp = NULL_TREE;
6467 gimple_stmt_iterator gsi = gsi_none ();
6468 gimple *g = NULL;
6469 if (reductmp)
6470 {
6471 reductions = OMP_CLAUSE_DECL (reductmp);
6472 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
6473 g = SSA_NAME_DEF_STMT (reductions);
6474 reductions = gimple_assign_rhs1 (g);
6475 OMP_CLAUSE_DECL (reductmp) = reductions;
6476 gsi = gsi_for_stmt (g);
6477 }
6478 else
6479 gsi = si;
6480 if (condtmp)
6481 {
6482 condtemp = OMP_CLAUSE_DECL (condtmp);
6483 tree c = omp_find_clause (OMP_CLAUSE_CHAIN (condtmp),
6484 OMP_CLAUSE__CONDTEMP_);
6485 cond_var = OMP_CLAUSE_DECL (c);
6486 tree type = TREE_TYPE (condtemp);
6487 memv = create_tmp_var (type);
6488 TREE_ADDRESSABLE (memv) = 1;
6489 unsigned cnt = 0;
6490 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
6491 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
6492 && OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (c))
6493 ++cnt;
6494 unsigned HOST_WIDE_INT sz
6495 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))) * cnt;
6496 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
6497 false);
6498 mem = build_fold_addr_expr (memv);
6499 }
6500 t = build_int_cst (unsigned_type_node, len - 1);
6501 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS2_START);
6502 stmt = gimple_build_call (u, 3, t, reductions, mem);
6503 gimple_call_set_lhs (stmt, vin);
6504 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
6505 if (condtmp)
6506 {
6507 expand_omp_build_assign (&gsi, condtemp, memv, false);
6508 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
6509 vin, build_one_cst (TREE_TYPE (cond_var)));
6510 expand_omp_build_assign (&gsi, cond_var, t, false);
6511 }
6512 if (reductmp)
6513 {
6514 gsi_remove (&gsi, true);
6515 release_ssa_name (gimple_assign_lhs (g));
6516 }
6517 }
6518 else if (!is_combined_parallel (region))
6519 {
6520 /* If we are not inside a combined parallel+sections region,
6521 call GOMP_sections_start. */
6522 t = build_int_cst (unsigned_type_node, len - 1);
6523 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
6524 stmt = gimple_build_call (u, 1, t);
6525 }
6526 else
6527 {
6528 /* Otherwise, call GOMP_sections_next. */
6529 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
6530 stmt = gimple_build_call (u, 0);
6531 }
6532 if (!reductmp && !condtmp)
6533 {
6534 gimple_call_set_lhs (stmt, vin);
6535 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6536 }
6537 gsi_remove (&si, true);
6538
6539 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
6540 L0_BB. */
6541 switch_si = gsi_last_nondebug_bb (l0_bb);
6542 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
6543 if (exit_reachable)
6544 {
6545 cont = as_a <gomp_continue *> (last_stmt (l1_bb));
6546 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
6547 vmain = gimple_omp_continue_control_use (cont);
6548 vnext = gimple_omp_continue_control_def (cont);
6549 }
6550 else
6551 {
6552 vmain = vin;
6553 vnext = NULL_TREE;
6554 }
6555
6556 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
6557 label_vec.quick_push (t);
6558 i = 1;
6559
6560 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
6561 for (inner = region->inner, casei = 1;
6562 inner;
6563 inner = inner->next, i++, casei++)
6564 {
6565 basic_block s_entry_bb, s_exit_bb;
6566
6567 /* Skip optional reduction region. */
6568 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
6569 {
6570 --i;
6571 --casei;
6572 continue;
6573 }
6574
6575 s_entry_bb = inner->entry;
6576 s_exit_bb = inner->exit;
6577
6578 t = gimple_block_label (s_entry_bb);
6579 u = build_int_cst (unsigned_type_node, casei);
6580 u = build_case_label (u, NULL, t);
6581 label_vec.quick_push (u);
6582
6583 si = gsi_last_nondebug_bb (s_entry_bb);
6584 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
6585 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
6586 gsi_remove (&si, true);
6587 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
6588
6589 if (s_exit_bb == NULL)
6590 continue;
6591
6592 si = gsi_last_nondebug_bb (s_exit_bb);
6593 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6594 gsi_remove (&si, true);
6595
6596 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
6597 }
6598
6599 /* Error handling code goes in DEFAULT_BB. */
6600 t = gimple_block_label (default_bb);
6601 u = build_case_label (NULL, NULL, t);
6602 make_edge (l0_bb, default_bb, 0);
6603 add_bb_to_loop (default_bb, current_loops->tree_root);
6604
6605 stmt = gimple_build_switch (vmain, u, label_vec);
6606 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
6607 gsi_remove (&switch_si, true);
6608
6609 si = gsi_start_bb (default_bb);
6610 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
6611 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
6612
6613 if (exit_reachable)
6614 {
6615 tree bfn_decl;
6616
6617 /* Code to get the next section goes in L1_BB. */
6618 si = gsi_last_nondebug_bb (l1_bb);
6619 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
6620
6621 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
6622 stmt = gimple_build_call (bfn_decl, 0);
6623 gimple_call_set_lhs (stmt, vnext);
6624 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6625 if (cond_var)
6626 {
6627 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
6628 vnext, build_one_cst (TREE_TYPE (cond_var)));
6629 expand_omp_build_assign (&si, cond_var, t, false);
6630 }
6631 gsi_remove (&si, true);
6632
6633 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
6634 }
6635
6636 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
6637 si = gsi_last_nondebug_bb (l2_bb);
6638 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
6639 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
6640 else if (gimple_omp_return_lhs (gsi_stmt (si)))
6641 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
6642 else
6643 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
6644 stmt = gimple_build_call (t, 0);
6645 if (gimple_omp_return_lhs (gsi_stmt (si)))
6646 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
6647 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6648 gsi_remove (&si, true);
6649
6650 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
6651 }
6652
6653 /* Expand code for an OpenMP single directive. We've already expanded
6654 much of the code, here we simply place the GOMP_barrier call. */
6655
6656 static void
6657 expand_omp_single (struct omp_region *region)
6658 {
6659 basic_block entry_bb, exit_bb;
6660 gimple_stmt_iterator si;
6661
6662 entry_bb = region->entry;
6663 exit_bb = region->exit;
6664
6665 si = gsi_last_nondebug_bb (entry_bb);
6666 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
6667 gsi_remove (&si, true);
6668 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6669
6670 si = gsi_last_nondebug_bb (exit_bb);
6671 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
6672 {
6673 tree t = gimple_omp_return_lhs (gsi_stmt (si));
6674 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
6675 }
6676 gsi_remove (&si, true);
6677 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6678 }
6679
6680 /* Generic expansion for OpenMP synchronization directives: master,
6681 ordered and critical. All we need to do here is remove the entry
6682 and exit markers for REGION. */
6683
6684 static void
6685 expand_omp_synch (struct omp_region *region)
6686 {
6687 basic_block entry_bb, exit_bb;
6688 gimple_stmt_iterator si;
6689
6690 entry_bb = region->entry;
6691 exit_bb = region->exit;
6692
6693 si = gsi_last_nondebug_bb (entry_bb);
6694 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
6695 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
6696 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
6697 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
6698 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
6699 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
6700 if (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS
6701 && gimple_omp_teams_host (as_a <gomp_teams *> (gsi_stmt (si))))
6702 {
6703 expand_omp_taskreg (region);
6704 return;
6705 }
6706 gsi_remove (&si, true);
6707 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6708
6709 if (exit_bb)
6710 {
6711 si = gsi_last_nondebug_bb (exit_bb);
6712 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6713 gsi_remove (&si, true);
6714 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6715 }
6716 }
6717
6718 /* Translate enum omp_memory_order to enum memmodel. The two enums
6719 are using different numbers so that OMP_MEMORY_ORDER_UNSPECIFIED
6720 is 0. */
6721
6722 static enum memmodel
6723 omp_memory_order_to_memmodel (enum omp_memory_order mo)
6724 {
6725 switch (mo)
6726 {
6727 case OMP_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
6728 case OMP_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
6729 case OMP_MEMORY_ORDER_RELEASE: return MEMMODEL_RELEASE;
6730 case OMP_MEMORY_ORDER_ACQ_REL: return MEMMODEL_ACQ_REL;
6731 case OMP_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
6732 default: gcc_unreachable ();
6733 }
6734 }
6735
6736 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6737 operation as a normal volatile load. */
6738
6739 static bool
6740 expand_omp_atomic_load (basic_block load_bb, tree addr,
6741 tree loaded_val, int index)
6742 {
6743 enum built_in_function tmpbase;
6744 gimple_stmt_iterator gsi;
6745 basic_block store_bb;
6746 location_t loc;
6747 gimple *stmt;
6748 tree decl, call, type, itype;
6749
6750 gsi = gsi_last_nondebug_bb (load_bb);
6751 stmt = gsi_stmt (gsi);
6752 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6753 loc = gimple_location (stmt);
6754
6755 /* ??? If the target does not implement atomic_load_optab[mode], and mode
6756 is smaller than word size, then expand_atomic_load assumes that the load
6757 is atomic. We could avoid the builtin entirely in this case. */
6758
6759 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6760 decl = builtin_decl_explicit (tmpbase);
6761 if (decl == NULL_TREE)
6762 return false;
6763
6764 type = TREE_TYPE (loaded_val);
6765 itype = TREE_TYPE (TREE_TYPE (decl));
6766
6767 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
6768 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
6769 call = build_call_expr_loc (loc, decl, 2, addr, mo);
6770 if (!useless_type_conversion_p (type, itype))
6771 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6772 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6773
6774 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6775 gsi_remove (&gsi, true);
6776
6777 store_bb = single_succ (load_bb);
6778 gsi = gsi_last_nondebug_bb (store_bb);
6779 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6780 gsi_remove (&gsi, true);
6781
6782 if (gimple_in_ssa_p (cfun))
6783 update_ssa (TODO_update_ssa_no_phi);
6784
6785 return true;
6786 }
6787
6788 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6789 operation as a normal volatile store. */
6790
6791 static bool
6792 expand_omp_atomic_store (basic_block load_bb, tree addr,
6793 tree loaded_val, tree stored_val, int index)
6794 {
6795 enum built_in_function tmpbase;
6796 gimple_stmt_iterator gsi;
6797 basic_block store_bb = single_succ (load_bb);
6798 location_t loc;
6799 gimple *stmt;
6800 tree decl, call, type, itype;
6801 machine_mode imode;
6802 bool exchange;
6803
6804 gsi = gsi_last_nondebug_bb (load_bb);
6805 stmt = gsi_stmt (gsi);
6806 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6807
6808 /* If the load value is needed, then this isn't a store but an exchange. */
6809 exchange = gimple_omp_atomic_need_value_p (stmt);
6810
6811 gsi = gsi_last_nondebug_bb (store_bb);
6812 stmt = gsi_stmt (gsi);
6813 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
6814 loc = gimple_location (stmt);
6815
6816 /* ??? If the target does not implement atomic_store_optab[mode], and mode
6817 is smaller than word size, then expand_atomic_store assumes that the store
6818 is atomic. We could avoid the builtin entirely in this case. */
6819
6820 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
6821 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
6822 decl = builtin_decl_explicit (tmpbase);
6823 if (decl == NULL_TREE)
6824 return false;
6825
6826 type = TREE_TYPE (stored_val);
6827
6828 /* Dig out the type of the function's second argument. */
6829 itype = TREE_TYPE (decl);
6830 itype = TYPE_ARG_TYPES (itype);
6831 itype = TREE_CHAIN (itype);
6832 itype = TREE_VALUE (itype);
6833 imode = TYPE_MODE (itype);
6834
6835 if (exchange && !can_atomic_exchange_p (imode, true))
6836 return false;
6837
6838 if (!useless_type_conversion_p (itype, type))
6839 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
6840 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
6841 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
6842 call = build_call_expr_loc (loc, decl, 3, addr, stored_val, mo);
6843 if (exchange)
6844 {
6845 if (!useless_type_conversion_p (type, itype))
6846 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6847 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6848 }
6849
6850 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6851 gsi_remove (&gsi, true);
6852
6853 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
6854 gsi = gsi_last_nondebug_bb (load_bb);
6855 gsi_remove (&gsi, true);
6856
6857 if (gimple_in_ssa_p (cfun))
6858 update_ssa (TODO_update_ssa_no_phi);
6859
6860 return true;
6861 }
6862
6863 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6864 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
6865 size of the data type, and thus usable to find the index of the builtin
6866 decl. Returns false if the expression is not of the proper form. */
6867
6868 static bool
6869 expand_omp_atomic_fetch_op (basic_block load_bb,
6870 tree addr, tree loaded_val,
6871 tree stored_val, int index)
6872 {
6873 enum built_in_function oldbase, newbase, tmpbase;
6874 tree decl, itype, call;
6875 tree lhs, rhs;
6876 basic_block store_bb = single_succ (load_bb);
6877 gimple_stmt_iterator gsi;
6878 gimple *stmt;
6879 location_t loc;
6880 enum tree_code code;
6881 bool need_old, need_new;
6882 machine_mode imode;
6883
6884 /* We expect to find the following sequences:
6885
6886 load_bb:
6887 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
6888
6889 store_bb:
6890 val = tmp OP something; (or: something OP tmp)
6891 GIMPLE_OMP_STORE (val)
6892
6893 ???FIXME: Allow a more flexible sequence.
6894 Perhaps use data flow to pick the statements.
6895
6896 */
6897
6898 gsi = gsi_after_labels (store_bb);
6899 stmt = gsi_stmt (gsi);
6900 if (is_gimple_debug (stmt))
6901 {
6902 gsi_next_nondebug (&gsi);
6903 if (gsi_end_p (gsi))
6904 return false;
6905 stmt = gsi_stmt (gsi);
6906 }
6907 loc = gimple_location (stmt);
6908 if (!is_gimple_assign (stmt))
6909 return false;
6910 gsi_next_nondebug (&gsi);
6911 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
6912 return false;
6913 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
6914 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
6915 enum omp_memory_order omo
6916 = gimple_omp_atomic_memory_order (last_stmt (load_bb));
6917 enum memmodel mo = omp_memory_order_to_memmodel (omo);
6918 gcc_checking_assert (!need_old || !need_new);
6919
6920 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
6921 return false;
6922
6923 /* Check for one of the supported fetch-op operations. */
6924 code = gimple_assign_rhs_code (stmt);
6925 switch (code)
6926 {
6927 case PLUS_EXPR:
6928 case POINTER_PLUS_EXPR:
6929 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
6930 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
6931 break;
6932 case MINUS_EXPR:
6933 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
6934 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
6935 break;
6936 case BIT_AND_EXPR:
6937 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
6938 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
6939 break;
6940 case BIT_IOR_EXPR:
6941 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
6942 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
6943 break;
6944 case BIT_XOR_EXPR:
6945 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
6946 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
6947 break;
6948 default:
6949 return false;
6950 }
6951
6952 /* Make sure the expression is of the proper form. */
6953 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
6954 rhs = gimple_assign_rhs2 (stmt);
6955 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
6956 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
6957 rhs = gimple_assign_rhs1 (stmt);
6958 else
6959 return false;
6960
6961 tmpbase = ((enum built_in_function)
6962 ((need_new ? newbase : oldbase) + index + 1));
6963 decl = builtin_decl_explicit (tmpbase);
6964 if (decl == NULL_TREE)
6965 return false;
6966 itype = TREE_TYPE (TREE_TYPE (decl));
6967 imode = TYPE_MODE (itype);
6968
6969 /* We could test all of the various optabs involved, but the fact of the
6970 matter is that (with the exception of i486 vs i586 and xadd) all targets
6971 that support any atomic operaton optab also implements compare-and-swap.
6972 Let optabs.c take care of expanding any compare-and-swap loop. */
6973 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
6974 return false;
6975
6976 gsi = gsi_last_nondebug_bb (load_bb);
6977 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
6978
6979 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
6980 It only requires that the operation happen atomically. Thus we can
6981 use the RELAXED memory model. */
6982 call = build_call_expr_loc (loc, decl, 3, addr,
6983 fold_convert_loc (loc, itype, rhs),
6984 build_int_cst (NULL, mo));
6985
6986 if (need_old || need_new)
6987 {
6988 lhs = need_old ? loaded_val : stored_val;
6989 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
6990 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
6991 }
6992 else
6993 call = fold_convert_loc (loc, void_type_node, call);
6994 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6995 gsi_remove (&gsi, true);
6996
6997 gsi = gsi_last_nondebug_bb (store_bb);
6998 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6999 gsi_remove (&gsi, true);
7000 gsi = gsi_last_nondebug_bb (store_bb);
7001 stmt = gsi_stmt (gsi);
7002 gsi_remove (&gsi, true);
7003
7004 if (gimple_in_ssa_p (cfun))
7005 {
7006 release_defs (stmt);
7007 update_ssa (TODO_update_ssa_no_phi);
7008 }
7009
7010 return true;
7011 }
7012
7013 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
7014
7015 oldval = *addr;
7016 repeat:
7017 newval = rhs; // with oldval replacing *addr in rhs
7018 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
7019 if (oldval != newval)
7020 goto repeat;
7021
7022 INDEX is log2 of the size of the data type, and thus usable to find the
7023 index of the builtin decl. */
7024
7025 static bool
7026 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
7027 tree addr, tree loaded_val, tree stored_val,
7028 int index)
7029 {
7030 tree loadedi, storedi, initial, new_storedi, old_vali;
7031 tree type, itype, cmpxchg, iaddr, atype;
7032 gimple_stmt_iterator si;
7033 basic_block loop_header = single_succ (load_bb);
7034 gimple *phi, *stmt;
7035 edge e;
7036 enum built_in_function fncode;
7037
7038 /* ??? We need a non-pointer interface to __atomic_compare_exchange in
7039 order to use the RELAXED memory model effectively. */
7040 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
7041 + index + 1);
7042 cmpxchg = builtin_decl_explicit (fncode);
7043 if (cmpxchg == NULL_TREE)
7044 return false;
7045 type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
7046 atype = type;
7047 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
7048
7049 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
7050 || !can_atomic_load_p (TYPE_MODE (itype)))
7051 return false;
7052
7053 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
7054 si = gsi_last_nondebug_bb (load_bb);
7055 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
7056
7057 /* For floating-point values, we'll need to view-convert them to integers
7058 so that we can perform the atomic compare and swap. Simplify the
7059 following code by always setting up the "i"ntegral variables. */
7060 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
7061 {
7062 tree iaddr_val;
7063
7064 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
7065 true));
7066 atype = itype;
7067 iaddr_val
7068 = force_gimple_operand_gsi (&si,
7069 fold_convert (TREE_TYPE (iaddr), addr),
7070 false, NULL_TREE, true, GSI_SAME_STMT);
7071 stmt = gimple_build_assign (iaddr, iaddr_val);
7072 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7073 loadedi = create_tmp_var (itype);
7074 if (gimple_in_ssa_p (cfun))
7075 loadedi = make_ssa_name (loadedi);
7076 }
7077 else
7078 {
7079 iaddr = addr;
7080 loadedi = loaded_val;
7081 }
7082
7083 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
7084 tree loaddecl = builtin_decl_explicit (fncode);
7085 if (loaddecl)
7086 initial
7087 = fold_convert (atype,
7088 build_call_expr (loaddecl, 2, iaddr,
7089 build_int_cst (NULL_TREE,
7090 MEMMODEL_RELAXED)));
7091 else
7092 {
7093 tree off
7094 = build_int_cst (build_pointer_type_for_mode (atype, ptr_mode,
7095 true), 0);
7096 initial = build2 (MEM_REF, atype, iaddr, off);
7097 }
7098
7099 initial
7100 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
7101 GSI_SAME_STMT);
7102
7103 /* Move the value to the LOADEDI temporary. */
7104 if (gimple_in_ssa_p (cfun))
7105 {
7106 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
7107 phi = create_phi_node (loadedi, loop_header);
7108 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
7109 initial);
7110 }
7111 else
7112 gsi_insert_before (&si,
7113 gimple_build_assign (loadedi, initial),
7114 GSI_SAME_STMT);
7115 if (loadedi != loaded_val)
7116 {
7117 gimple_stmt_iterator gsi2;
7118 tree x;
7119
7120 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
7121 gsi2 = gsi_start_bb (loop_header);
7122 if (gimple_in_ssa_p (cfun))
7123 {
7124 gassign *stmt;
7125 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
7126 true, GSI_SAME_STMT);
7127 stmt = gimple_build_assign (loaded_val, x);
7128 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
7129 }
7130 else
7131 {
7132 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
7133 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
7134 true, GSI_SAME_STMT);
7135 }
7136 }
7137 gsi_remove (&si, true);
7138
7139 si = gsi_last_nondebug_bb (store_bb);
7140 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
7141
7142 if (iaddr == addr)
7143 storedi = stored_val;
7144 else
7145 storedi
7146 = force_gimple_operand_gsi (&si,
7147 build1 (VIEW_CONVERT_EXPR, itype,
7148 stored_val), true, NULL_TREE, true,
7149 GSI_SAME_STMT);
7150
7151 /* Build the compare&swap statement. */
7152 new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
7153 new_storedi = force_gimple_operand_gsi (&si,
7154 fold_convert (TREE_TYPE (loadedi),
7155 new_storedi),
7156 true, NULL_TREE,
7157 true, GSI_SAME_STMT);
7158
7159 if (gimple_in_ssa_p (cfun))
7160 old_vali = loadedi;
7161 else
7162 {
7163 old_vali = create_tmp_var (TREE_TYPE (loadedi));
7164 stmt = gimple_build_assign (old_vali, loadedi);
7165 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7166
7167 stmt = gimple_build_assign (loadedi, new_storedi);
7168 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7169 }
7170
7171 /* Note that we always perform the comparison as an integer, even for
7172 floating point. This allows the atomic operation to properly
7173 succeed even with NaNs and -0.0. */
7174 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
7175 stmt = gimple_build_cond_empty (ne);
7176 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7177
7178 /* Update cfg. */
7179 e = single_succ_edge (store_bb);
7180 e->flags &= ~EDGE_FALLTHRU;
7181 e->flags |= EDGE_FALSE_VALUE;
7182 /* Expect no looping. */
7183 e->probability = profile_probability::guessed_always ();
7184
7185 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
7186 e->probability = profile_probability::guessed_never ();
7187
7188 /* Copy the new value to loadedi (we already did that before the condition
7189 if we are not in SSA). */
7190 if (gimple_in_ssa_p (cfun))
7191 {
7192 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
7193 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
7194 }
7195
7196 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
7197 gsi_remove (&si, true);
7198
7199 struct loop *loop = alloc_loop ();
7200 loop->header = loop_header;
7201 loop->latch = store_bb;
7202 add_loop (loop, loop_header->loop_father);
7203
7204 if (gimple_in_ssa_p (cfun))
7205 update_ssa (TODO_update_ssa_no_phi);
7206
7207 return true;
7208 }
7209
7210 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
7211
7212 GOMP_atomic_start ();
7213 *addr = rhs;
7214 GOMP_atomic_end ();
7215
7216 The result is not globally atomic, but works so long as all parallel
7217 references are within #pragma omp atomic directives. According to
7218 responses received from omp@openmp.org, appears to be within spec.
7219 Which makes sense, since that's how several other compilers handle
7220 this situation as well.
7221 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
7222 expanding. STORED_VAL is the operand of the matching
7223 GIMPLE_OMP_ATOMIC_STORE.
7224
7225 We replace
7226 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
7227 loaded_val = *addr;
7228
7229 and replace
7230 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
7231 *addr = stored_val;
7232 */
7233
7234 static bool
7235 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
7236 tree addr, tree loaded_val, tree stored_val)
7237 {
7238 gimple_stmt_iterator si;
7239 gassign *stmt;
7240 tree t;
7241
7242 si = gsi_last_nondebug_bb (load_bb);
7243 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
7244
7245 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
7246 t = build_call_expr (t, 0);
7247 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
7248
7249 tree mem = build_simple_mem_ref (addr);
7250 TREE_TYPE (mem) = TREE_TYPE (loaded_val);
7251 TREE_OPERAND (mem, 1)
7252 = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode,
7253 true),
7254 TREE_OPERAND (mem, 1));
7255 stmt = gimple_build_assign (loaded_val, mem);
7256 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7257 gsi_remove (&si, true);
7258
7259 si = gsi_last_nondebug_bb (store_bb);
7260 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
7261
7262 stmt = gimple_build_assign (unshare_expr (mem), stored_val);
7263 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
7264
7265 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
7266 t = build_call_expr (t, 0);
7267 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
7268 gsi_remove (&si, true);
7269
7270 if (gimple_in_ssa_p (cfun))
7271 update_ssa (TODO_update_ssa_no_phi);
7272 return true;
7273 }
7274
7275 /* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
7276 using expand_omp_atomic_fetch_op. If it failed, we try to
7277 call expand_omp_atomic_pipeline, and if it fails too, the
7278 ultimate fallback is wrapping the operation in a mutex
7279 (expand_omp_atomic_mutex). REGION is the atomic region built
7280 by build_omp_regions_1(). */
7281
7282 static void
7283 expand_omp_atomic (struct omp_region *region)
7284 {
7285 basic_block load_bb = region->entry, store_bb = region->exit;
7286 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
7287 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
7288 tree loaded_val = gimple_omp_atomic_load_lhs (load);
7289 tree addr = gimple_omp_atomic_load_rhs (load);
7290 tree stored_val = gimple_omp_atomic_store_val (store);
7291 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
7292 HOST_WIDE_INT index;
7293
7294 /* Make sure the type is one of the supported sizes. */
7295 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
7296 index = exact_log2 (index);
7297 if (index >= 0 && index <= 4)
7298 {
7299 unsigned int align = TYPE_ALIGN_UNIT (type);
7300
7301 /* __sync builtins require strict data alignment. */
7302 if (exact_log2 (align) >= index)
7303 {
7304 /* Atomic load. */
7305 scalar_mode smode;
7306 if (loaded_val == stored_val
7307 && (is_int_mode (TYPE_MODE (type), &smode)
7308 || is_float_mode (TYPE_MODE (type), &smode))
7309 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
7310 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
7311 return;
7312
7313 /* Atomic store. */
7314 if ((is_int_mode (TYPE_MODE (type), &smode)
7315 || is_float_mode (TYPE_MODE (type), &smode))
7316 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
7317 && store_bb == single_succ (load_bb)
7318 && first_stmt (store_bb) == store
7319 && expand_omp_atomic_store (load_bb, addr, loaded_val,
7320 stored_val, index))
7321 return;
7322
7323 /* When possible, use specialized atomic update functions. */
7324 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
7325 && store_bb == single_succ (load_bb)
7326 && expand_omp_atomic_fetch_op (load_bb, addr,
7327 loaded_val, stored_val, index))
7328 return;
7329
7330 /* If we don't have specialized __sync builtins, try and implement
7331 as a compare and swap loop. */
7332 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
7333 loaded_val, stored_val, index))
7334 return;
7335 }
7336 }
7337
7338 /* The ultimate fallback is wrapping the operation in a mutex. */
7339 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
7340 }
7341
7342 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
7343 at REGION_EXIT. */
7344
7345 static void
7346 mark_loops_in_oacc_kernels_region (basic_block region_entry,
7347 basic_block region_exit)
7348 {
7349 struct loop *outer = region_entry->loop_father;
7350 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
7351
7352 /* Don't parallelize the kernels region if it contains more than one outer
7353 loop. */
7354 unsigned int nr_outer_loops = 0;
7355 struct loop *single_outer = NULL;
7356 for (struct loop *loop = outer->inner; loop != NULL; loop = loop->next)
7357 {
7358 gcc_assert (loop_outer (loop) == outer);
7359
7360 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
7361 continue;
7362
7363 if (region_exit != NULL
7364 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
7365 continue;
7366
7367 nr_outer_loops++;
7368 single_outer = loop;
7369 }
7370 if (nr_outer_loops != 1)
7371 return;
7372
7373 for (struct loop *loop = single_outer->inner;
7374 loop != NULL;
7375 loop = loop->inner)
7376 if (loop->next)
7377 return;
7378
7379 /* Mark the loops in the region. */
7380 for (struct loop *loop = single_outer; loop != NULL; loop = loop->inner)
7381 loop->in_oacc_kernels_region = true;
7382 }
7383
7384 /* Types used to pass grid and wortkgroup sizes to kernel invocation. */
7385
7386 struct GTY(()) grid_launch_attributes_trees
7387 {
7388 tree kernel_dim_array_type;
7389 tree kernel_lattrs_dimnum_decl;
7390 tree kernel_lattrs_grid_decl;
7391 tree kernel_lattrs_group_decl;
7392 tree kernel_launch_attributes_type;
7393 };
7394
7395 static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees;
7396
7397 /* Create types used to pass kernel launch attributes to target. */
7398
7399 static void
7400 grid_create_kernel_launch_attr_types (void)
7401 {
7402 if (grid_attr_trees)
7403 return;
7404 grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> ();
7405
7406 tree dim_arr_index_type
7407 = build_index_type (build_int_cst (integer_type_node, 2));
7408 grid_attr_trees->kernel_dim_array_type
7409 = build_array_type (uint32_type_node, dim_arr_index_type);
7410
7411 grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE);
7412 grid_attr_trees->kernel_lattrs_dimnum_decl
7413 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"),
7414 uint32_type_node);
7415 DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE;
7416
7417 grid_attr_trees->kernel_lattrs_grid_decl
7418 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"),
7419 grid_attr_trees->kernel_dim_array_type);
7420 DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl)
7421 = grid_attr_trees->kernel_lattrs_dimnum_decl;
7422 grid_attr_trees->kernel_lattrs_group_decl
7423 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"),
7424 grid_attr_trees->kernel_dim_array_type);
7425 DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl)
7426 = grid_attr_trees->kernel_lattrs_grid_decl;
7427 finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type,
7428 "__gomp_kernel_launch_attributes",
7429 grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE);
7430 }
7431
7432 /* Insert before the current statement in GSI a store of VALUE to INDEX of
7433 array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be
7434 of type uint32_type_node. */
7435
7436 static void
7437 grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var,
7438 tree fld_decl, int index, tree value)
7439 {
7440 tree ref = build4 (ARRAY_REF, uint32_type_node,
7441 build3 (COMPONENT_REF,
7442 grid_attr_trees->kernel_dim_array_type,
7443 range_var, fld_decl, NULL_TREE),
7444 build_int_cst (integer_type_node, index),
7445 NULL_TREE, NULL_TREE);
7446 gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT);
7447 }
7448
7449 /* Return a tree representation of a pointer to a structure with grid and
7450 work-group size information. Statements filling that information will be
7451 inserted before GSI, TGT_STMT is the target statement which has the
7452 necessary information in it. */
7453
7454 static tree
7455 grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi,
7456 gomp_target *tgt_stmt)
7457 {
7458 grid_create_kernel_launch_attr_types ();
7459 tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type,
7460 "__kernel_launch_attrs");
7461
7462 unsigned max_dim = 0;
7463 for (tree clause = gimple_omp_target_clauses (tgt_stmt);
7464 clause;
7465 clause = OMP_CLAUSE_CHAIN (clause))
7466 {
7467 if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_)
7468 continue;
7469
7470 unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause);
7471 max_dim = MAX (dim, max_dim);
7472
7473 grid_insert_store_range_dim (gsi, lattrs,
7474 grid_attr_trees->kernel_lattrs_grid_decl,
7475 dim, OMP_CLAUSE__GRIDDIM__SIZE (clause));
7476 grid_insert_store_range_dim (gsi, lattrs,
7477 grid_attr_trees->kernel_lattrs_group_decl,
7478 dim, OMP_CLAUSE__GRIDDIM__GROUP (clause));
7479 }
7480
7481 tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs,
7482 grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE);
7483 gcc_checking_assert (max_dim <= 2);
7484 tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1);
7485 gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions),
7486 GSI_SAME_STMT);
7487 TREE_ADDRESSABLE (lattrs) = 1;
7488 return build_fold_addr_expr (lattrs);
7489 }
7490
7491 /* Build target argument identifier from the DEVICE identifier, value
7492 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
7493
7494 static tree
7495 get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
7496 {
7497 tree t = build_int_cst (integer_type_node, device);
7498 if (subseqent_param)
7499 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
7500 build_int_cst (integer_type_node,
7501 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
7502 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
7503 build_int_cst (integer_type_node, id));
7504 return t;
7505 }
7506
7507 /* Like above but return it in type that can be directly stored as an element
7508 of the argument array. */
7509
7510 static tree
7511 get_target_argument_identifier (int device, bool subseqent_param, int id)
7512 {
7513 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
7514 return fold_convert (ptr_type_node, t);
7515 }
7516
7517 /* Return a target argument consisting of DEVICE identifier, value identifier
7518 ID, and the actual VALUE. */
7519
7520 static tree
7521 get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
7522 tree value)
7523 {
7524 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
7525 fold_convert (integer_type_node, value),
7526 build_int_cst (unsigned_type_node,
7527 GOMP_TARGET_ARG_VALUE_SHIFT));
7528 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
7529 get_target_argument_identifier_1 (device, false, id));
7530 t = fold_convert (ptr_type_node, t);
7531 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
7532 }
7533
7534 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
7535 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
7536 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
7537 arguments. */
7538
7539 static void
7540 push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
7541 int id, tree value, vec <tree> *args)
7542 {
7543 if (tree_fits_shwi_p (value)
7544 && tree_to_shwi (value) > -(1 << 15)
7545 && tree_to_shwi (value) < (1 << 15))
7546 args->quick_push (get_target_argument_value (gsi, device, id, value));
7547 else
7548 {
7549 args->quick_push (get_target_argument_identifier (device, true, id));
7550 value = fold_convert (ptr_type_node, value);
7551 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
7552 GSI_SAME_STMT);
7553 args->quick_push (value);
7554 }
7555 }
7556
7557 /* Create an array of arguments that is then passed to GOMP_target. */
7558
7559 static tree
7560 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
7561 {
7562 auto_vec <tree, 6> args;
7563 tree clauses = gimple_omp_target_clauses (tgt_stmt);
7564 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
7565 if (c)
7566 t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
7567 else
7568 t = integer_minus_one_node;
7569 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
7570 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
7571
7572 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
7573 if (c)
7574 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
7575 else
7576 t = integer_minus_one_node;
7577 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
7578 GOMP_TARGET_ARG_THREAD_LIMIT, t,
7579 &args);
7580
7581 /* Add HSA-specific grid sizes, if available. */
7582 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7583 OMP_CLAUSE__GRIDDIM_))
7584 {
7585 int id = GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES;
7586 t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, id);
7587 args.quick_push (t);
7588 args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt));
7589 }
7590
7591 /* Produce more, perhaps device specific, arguments here. */
7592
7593 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
7594 args.length () + 1),
7595 ".omp_target_args");
7596 for (unsigned i = 0; i < args.length (); i++)
7597 {
7598 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
7599 build_int_cst (integer_type_node, i),
7600 NULL_TREE, NULL_TREE);
7601 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
7602 GSI_SAME_STMT);
7603 }
7604 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
7605 build_int_cst (integer_type_node, args.length ()),
7606 NULL_TREE, NULL_TREE);
7607 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
7608 GSI_SAME_STMT);
7609 TREE_ADDRESSABLE (argarray) = 1;
7610 return build_fold_addr_expr (argarray);
7611 }
7612
7613 /* Expand the GIMPLE_OMP_TARGET starting at REGION. */
7614
7615 static void
7616 expand_omp_target (struct omp_region *region)
7617 {
7618 basic_block entry_bb, exit_bb, new_bb;
7619 struct function *child_cfun;
7620 tree child_fn, block, t;
7621 gimple_stmt_iterator gsi;
7622 gomp_target *entry_stmt;
7623 gimple *stmt;
7624 edge e;
7625 bool offloaded, data_region;
7626
7627 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
7628 new_bb = region->entry;
7629
7630 offloaded = is_gimple_omp_offloaded (entry_stmt);
7631 switch (gimple_omp_target_kind (entry_stmt))
7632 {
7633 case GF_OMP_TARGET_KIND_REGION:
7634 case GF_OMP_TARGET_KIND_UPDATE:
7635 case GF_OMP_TARGET_KIND_ENTER_DATA:
7636 case GF_OMP_TARGET_KIND_EXIT_DATA:
7637 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7638 case GF_OMP_TARGET_KIND_OACC_KERNELS:
7639 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7640 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7641 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7642 data_region = false;
7643 break;
7644 case GF_OMP_TARGET_KIND_DATA:
7645 case GF_OMP_TARGET_KIND_OACC_DATA:
7646 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7647 data_region = true;
7648 break;
7649 default:
7650 gcc_unreachable ();
7651 }
7652
7653 child_fn = NULL_TREE;
7654 child_cfun = NULL;
7655 if (offloaded)
7656 {
7657 child_fn = gimple_omp_target_child_fn (entry_stmt);
7658 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
7659 }
7660
7661 /* Supported by expand_omp_taskreg, but not here. */
7662 if (child_cfun != NULL)
7663 gcc_checking_assert (!child_cfun->cfg);
7664 gcc_checking_assert (!gimple_in_ssa_p (cfun));
7665
7666 entry_bb = region->entry;
7667 exit_bb = region->exit;
7668
7669 if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
7670 {
7671 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
7672
7673 /* Further down, both OpenACC kernels and OpenACC parallel constructs
7674 will be mappted to BUILT_IN_GOACC_PARALLEL, and to distinguish the
7675 two, there is an "oacc kernels" attribute set for OpenACC kernels. */
7676 DECL_ATTRIBUTES (child_fn)
7677 = tree_cons (get_identifier ("oacc kernels"),
7678 NULL_TREE, DECL_ATTRIBUTES (child_fn));
7679 }
7680
7681 if (offloaded)
7682 {
7683 unsigned srcidx, dstidx, num;
7684
7685 /* If the offloading region needs data sent from the parent
7686 function, then the very first statement (except possible
7687 tree profile counter updates) of the offloading body
7688 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
7689 &.OMP_DATA_O is passed as an argument to the child function,
7690 we need to replace it with the argument as seen by the child
7691 function.
7692
7693 In most cases, this will end up being the identity assignment
7694 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
7695 a function call that has been inlined, the original PARM_DECL
7696 .OMP_DATA_I may have been converted into a different local
7697 variable. In which case, we need to keep the assignment. */
7698 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
7699 if (data_arg)
7700 {
7701 basic_block entry_succ_bb = single_succ (entry_bb);
7702 gimple_stmt_iterator gsi;
7703 tree arg;
7704 gimple *tgtcopy_stmt = NULL;
7705 tree sender = TREE_VEC_ELT (data_arg, 0);
7706
7707 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
7708 {
7709 gcc_assert (!gsi_end_p (gsi));
7710 stmt = gsi_stmt (gsi);
7711 if (gimple_code (stmt) != GIMPLE_ASSIGN)
7712 continue;
7713
7714 if (gimple_num_ops (stmt) == 2)
7715 {
7716 tree arg = gimple_assign_rhs1 (stmt);
7717
7718 /* We're ignoring the subcode because we're
7719 effectively doing a STRIP_NOPS. */
7720
7721 if (TREE_CODE (arg) == ADDR_EXPR
7722 && TREE_OPERAND (arg, 0) == sender)
7723 {
7724 tgtcopy_stmt = stmt;
7725 break;
7726 }
7727 }
7728 }
7729
7730 gcc_assert (tgtcopy_stmt != NULL);
7731 arg = DECL_ARGUMENTS (child_fn);
7732
7733 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
7734 gsi_remove (&gsi, true);
7735 }
7736
7737 /* Declare local variables needed in CHILD_CFUN. */
7738 block = DECL_INITIAL (child_fn);
7739 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
7740 /* The gimplifier could record temporaries in the offloading block
7741 rather than in containing function's local_decls chain,
7742 which would mean cgraph missed finalizing them. Do it now. */
7743 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
7744 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
7745 varpool_node::finalize_decl (t);
7746 DECL_SAVED_TREE (child_fn) = NULL;
7747 /* We'll create a CFG for child_fn, so no gimple body is needed. */
7748 gimple_set_body (child_fn, NULL);
7749 TREE_USED (block) = 1;
7750
7751 /* Reset DECL_CONTEXT on function arguments. */
7752 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
7753 DECL_CONTEXT (t) = child_fn;
7754
7755 /* Split ENTRY_BB at GIMPLE_*,
7756 so that it can be moved to the child function. */
7757 gsi = gsi_last_nondebug_bb (entry_bb);
7758 stmt = gsi_stmt (gsi);
7759 gcc_assert (stmt
7760 && gimple_code (stmt) == gimple_code (entry_stmt));
7761 e = split_block (entry_bb, stmt);
7762 gsi_remove (&gsi, true);
7763 entry_bb = e->dest;
7764 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
7765
7766 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
7767 if (exit_bb)
7768 {
7769 gsi = gsi_last_nondebug_bb (exit_bb);
7770 gcc_assert (!gsi_end_p (gsi)
7771 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7772 stmt = gimple_build_return (NULL);
7773 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
7774 gsi_remove (&gsi, true);
7775 }
7776
7777 /* Move the offloading region into CHILD_CFUN. */
7778
7779 block = gimple_block (entry_stmt);
7780
7781 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
7782 if (exit_bb)
7783 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
7784 /* When the OMP expansion process cannot guarantee an up-to-date
7785 loop tree arrange for the child function to fixup loops. */
7786 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7787 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
7788
7789 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
7790 num = vec_safe_length (child_cfun->local_decls);
7791 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
7792 {
7793 t = (*child_cfun->local_decls)[srcidx];
7794 if (DECL_CONTEXT (t) == cfun->decl)
7795 continue;
7796 if (srcidx != dstidx)
7797 (*child_cfun->local_decls)[dstidx] = t;
7798 dstidx++;
7799 }
7800 if (dstidx != num)
7801 vec_safe_truncate (child_cfun->local_decls, dstidx);
7802
7803 /* Inform the callgraph about the new function. */
7804 child_cfun->curr_properties = cfun->curr_properties;
7805 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
7806 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
7807 cgraph_node *node = cgraph_node::get_create (child_fn);
7808 node->parallelized_function = 1;
7809 cgraph_node::add_new_function (child_fn, true);
7810
7811 /* Add the new function to the offload table. */
7812 if (ENABLE_OFFLOADING)
7813 {
7814 if (in_lto_p)
7815 DECL_PRESERVE_P (child_fn) = 1;
7816 vec_safe_push (offload_funcs, child_fn);
7817 }
7818
7819 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
7820 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
7821
7822 /* Fix the callgraph edges for child_cfun. Those for cfun will be
7823 fixed in a following pass. */
7824 push_cfun (child_cfun);
7825 if (need_asm)
7826 assign_assembler_name_if_needed (child_fn);
7827 cgraph_edge::rebuild_edges ();
7828
7829 /* Some EH regions might become dead, see PR34608. If
7830 pass_cleanup_cfg isn't the first pass to happen with the
7831 new child, these dead EH edges might cause problems.
7832 Clean them up now. */
7833 if (flag_exceptions)
7834 {
7835 basic_block bb;
7836 bool changed = false;
7837
7838 FOR_EACH_BB_FN (bb, cfun)
7839 changed |= gimple_purge_dead_eh_edges (bb);
7840 if (changed)
7841 cleanup_tree_cfg ();
7842 }
7843 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7844 verify_loop_structure ();
7845 pop_cfun ();
7846
7847 if (dump_file && !gimple_in_ssa_p (cfun))
7848 {
7849 omp_any_child_fn_dumped = true;
7850 dump_function_header (dump_file, child_fn, dump_flags);
7851 dump_function_to_file (child_fn, dump_file, dump_flags);
7852 }
7853
7854 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
7855 }
7856
7857 /* Emit a library call to launch the offloading region, or do data
7858 transfers. */
7859 tree t1, t2, t3, t4, depend, c, clauses;
7860 enum built_in_function start_ix;
7861 unsigned int flags_i = 0;
7862
7863 switch (gimple_omp_target_kind (entry_stmt))
7864 {
7865 case GF_OMP_TARGET_KIND_REGION:
7866 start_ix = BUILT_IN_GOMP_TARGET;
7867 break;
7868 case GF_OMP_TARGET_KIND_DATA:
7869 start_ix = BUILT_IN_GOMP_TARGET_DATA;
7870 break;
7871 case GF_OMP_TARGET_KIND_UPDATE:
7872 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
7873 break;
7874 case GF_OMP_TARGET_KIND_ENTER_DATA:
7875 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7876 break;
7877 case GF_OMP_TARGET_KIND_EXIT_DATA:
7878 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7879 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
7880 break;
7881 case GF_OMP_TARGET_KIND_OACC_KERNELS:
7882 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7883 start_ix = BUILT_IN_GOACC_PARALLEL;
7884 break;
7885 case GF_OMP_TARGET_KIND_OACC_DATA:
7886 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7887 start_ix = BUILT_IN_GOACC_DATA_START;
7888 break;
7889 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7890 start_ix = BUILT_IN_GOACC_UPDATE;
7891 break;
7892 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7893 start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA;
7894 break;
7895 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7896 start_ix = BUILT_IN_GOACC_DECLARE;
7897 break;
7898 default:
7899 gcc_unreachable ();
7900 }
7901
7902 clauses = gimple_omp_target_clauses (entry_stmt);
7903
7904 tree device = NULL_TREE;
7905 location_t device_loc = UNKNOWN_LOCATION;
7906 tree goacc_flags = NULL_TREE;
7907 if (is_gimple_omp_oacc (entry_stmt))
7908 {
7909 /* By default, no GOACC_FLAGs are set. */
7910 goacc_flags = integer_zero_node;
7911 }
7912 else
7913 {
7914 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
7915 if (c)
7916 {
7917 device = OMP_CLAUSE_DEVICE_ID (c);
7918 device_loc = OMP_CLAUSE_LOCATION (c);
7919 }
7920 else
7921 {
7922 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
7923 library choose). */
7924 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
7925 device_loc = gimple_location (entry_stmt);
7926 }
7927
7928 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
7929 if (c)
7930 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
7931 }
7932
7933 /* By default, there is no conditional. */
7934 tree cond = NULL_TREE;
7935 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
7936 if (c)
7937 cond = OMP_CLAUSE_IF_EXPR (c);
7938 /* If we found the clause 'if (cond)', build:
7939 OpenACC: goacc_flags = (cond ? goacc_flags : flags | GOACC_FLAG_HOST_FALLBACK)
7940 OpenMP: device = (cond ? device : GOMP_DEVICE_HOST_FALLBACK) */
7941 if (cond)
7942 {
7943 tree *tp;
7944 if (is_gimple_omp_oacc (entry_stmt))
7945 tp = &goacc_flags;
7946 else
7947 {
7948 /* Ensure 'device' is of the correct type. */
7949 device = fold_convert_loc (device_loc, integer_type_node, device);
7950
7951 tp = &device;
7952 }
7953
7954 cond = gimple_boolify (cond);
7955
7956 basic_block cond_bb, then_bb, else_bb;
7957 edge e;
7958 tree tmp_var;
7959
7960 tmp_var = create_tmp_var (TREE_TYPE (*tp));
7961 if (offloaded)
7962 e = split_block_after_labels (new_bb);
7963 else
7964 {
7965 gsi = gsi_last_nondebug_bb (new_bb);
7966 gsi_prev (&gsi);
7967 e = split_block (new_bb, gsi_stmt (gsi));
7968 }
7969 cond_bb = e->src;
7970 new_bb = e->dest;
7971 remove_edge (e);
7972
7973 then_bb = create_empty_bb (cond_bb);
7974 else_bb = create_empty_bb (then_bb);
7975 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
7976 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
7977
7978 stmt = gimple_build_cond_empty (cond);
7979 gsi = gsi_last_bb (cond_bb);
7980 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7981
7982 gsi = gsi_start_bb (then_bb);
7983 stmt = gimple_build_assign (tmp_var, *tp);
7984 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7985
7986 gsi = gsi_start_bb (else_bb);
7987 if (is_gimple_omp_oacc (entry_stmt))
7988 stmt = gimple_build_assign (tmp_var,
7989 BIT_IOR_EXPR,
7990 *tp,
7991 build_int_cst (integer_type_node,
7992 GOACC_FLAG_HOST_FALLBACK));
7993 else
7994 stmt = gimple_build_assign (tmp_var,
7995 build_int_cst (integer_type_node,
7996 GOMP_DEVICE_HOST_FALLBACK));
7997 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7998
7999 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
8000 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
8001 add_bb_to_loop (then_bb, cond_bb->loop_father);
8002 add_bb_to_loop (else_bb, cond_bb->loop_father);
8003 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
8004 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
8005
8006 *tp = tmp_var;
8007
8008 gsi = gsi_last_nondebug_bb (new_bb);
8009 }
8010 else
8011 {
8012 gsi = gsi_last_nondebug_bb (new_bb);
8013
8014 if (device != NULL_TREE)
8015 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
8016 true, GSI_SAME_STMT);
8017 }
8018
8019 t = gimple_omp_target_data_arg (entry_stmt);
8020 if (t == NULL)
8021 {
8022 t1 = size_zero_node;
8023 t2 = build_zero_cst (ptr_type_node);
8024 t3 = t2;
8025 t4 = t2;
8026 }
8027 else
8028 {
8029 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
8030 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
8031 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
8032 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
8033 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
8034 }
8035
8036 gimple *g;
8037 bool tagging = false;
8038 /* The maximum number used by any start_ix, without varargs. */
8039 auto_vec<tree, 11> args;
8040 if (is_gimple_omp_oacc (entry_stmt))
8041 {
8042 tree goacc_flags_m = fold_build1 (GOACC_FLAGS_MARSHAL_OP,
8043 TREE_TYPE (goacc_flags), goacc_flags);
8044 goacc_flags_m = force_gimple_operand_gsi (&gsi, goacc_flags_m, true,
8045 NULL_TREE, true,
8046 GSI_SAME_STMT);
8047 args.quick_push (goacc_flags_m);
8048 }
8049 else
8050 args.quick_push (device);
8051 if (offloaded)
8052 args.quick_push (build_fold_addr_expr (child_fn));
8053 args.quick_push (t1);
8054 args.quick_push (t2);
8055 args.quick_push (t3);
8056 args.quick_push (t4);
8057 switch (start_ix)
8058 {
8059 case BUILT_IN_GOACC_DATA_START:
8060 case BUILT_IN_GOACC_DECLARE:
8061 case BUILT_IN_GOMP_TARGET_DATA:
8062 break;
8063 case BUILT_IN_GOMP_TARGET:
8064 case BUILT_IN_GOMP_TARGET_UPDATE:
8065 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
8066 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
8067 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
8068 if (c)
8069 depend = OMP_CLAUSE_DECL (c);
8070 else
8071 depend = build_int_cst (ptr_type_node, 0);
8072 args.quick_push (depend);
8073 if (start_ix == BUILT_IN_GOMP_TARGET)
8074 args.quick_push (get_target_arguments (&gsi, entry_stmt));
8075 break;
8076 case BUILT_IN_GOACC_PARALLEL:
8077 oacc_set_fn_attrib (child_fn, clauses, &args);
8078 tagging = true;
8079 /* FALLTHRU */
8080 case BUILT_IN_GOACC_ENTER_EXIT_DATA:
8081 case BUILT_IN_GOACC_UPDATE:
8082 {
8083 tree t_async = NULL_TREE;
8084
8085 /* If present, use the value specified by the respective
8086 clause, making sure that is of the correct type. */
8087 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
8088 if (c)
8089 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
8090 integer_type_node,
8091 OMP_CLAUSE_ASYNC_EXPR (c));
8092 else if (!tagging)
8093 /* Default values for t_async. */
8094 t_async = fold_convert_loc (gimple_location (entry_stmt),
8095 integer_type_node,
8096 build_int_cst (integer_type_node,
8097 GOMP_ASYNC_SYNC));
8098 if (tagging && t_async)
8099 {
8100 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
8101
8102 if (TREE_CODE (t_async) == INTEGER_CST)
8103 {
8104 /* See if we can pack the async arg in to the tag's
8105 operand. */
8106 i_async = TREE_INT_CST_LOW (t_async);
8107 if (i_async < GOMP_LAUNCH_OP_MAX)
8108 t_async = NULL_TREE;
8109 else
8110 i_async = GOMP_LAUNCH_OP_MAX;
8111 }
8112 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
8113 i_async));
8114 }
8115 if (t_async)
8116 args.safe_push (t_async);
8117
8118 /* Save the argument index, and ... */
8119 unsigned t_wait_idx = args.length ();
8120 unsigned num_waits = 0;
8121 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
8122 if (!tagging || c)
8123 /* ... push a placeholder. */
8124 args.safe_push (integer_zero_node);
8125
8126 for (; c; c = OMP_CLAUSE_CHAIN (c))
8127 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
8128 {
8129 args.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c),
8130 integer_type_node,
8131 OMP_CLAUSE_WAIT_EXPR (c)));
8132 num_waits++;
8133 }
8134
8135 if (!tagging || num_waits)
8136 {
8137 tree len;
8138
8139 /* Now that we know the number, update the placeholder. */
8140 if (tagging)
8141 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
8142 else
8143 len = build_int_cst (integer_type_node, num_waits);
8144 len = fold_convert_loc (gimple_location (entry_stmt),
8145 unsigned_type_node, len);
8146 args[t_wait_idx] = len;
8147 }
8148 }
8149 break;
8150 default:
8151 gcc_unreachable ();
8152 }
8153 if (tagging)
8154 /* Push terminal marker - zero. */
8155 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
8156
8157 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
8158 gimple_set_location (g, gimple_location (entry_stmt));
8159 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
8160 if (!offloaded)
8161 {
8162 g = gsi_stmt (gsi);
8163 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
8164 gsi_remove (&gsi, true);
8165 }
8166 if (data_region && region->exit)
8167 {
8168 gsi = gsi_last_nondebug_bb (region->exit);
8169 g = gsi_stmt (gsi);
8170 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN);
8171 gsi_remove (&gsi, true);
8172 }
8173 }
8174
8175 /* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with
8176 iteration variable derived from the thread number. INTRA_GROUP means this
8177 is an expansion of a loop iterating over work-items within a separate
8178 iteration over groups. */
8179
8180 static void
8181 grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group)
8182 {
8183 gimple_stmt_iterator gsi;
8184 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
8185 gcc_checking_assert (gimple_omp_for_kind (for_stmt)
8186 == GF_OMP_FOR_KIND_GRID_LOOP);
8187 size_t collapse = gimple_omp_for_collapse (for_stmt);
8188 struct omp_for_data_loop *loops
8189 = XALLOCAVEC (struct omp_for_data_loop,
8190 gimple_omp_for_collapse (for_stmt));
8191 struct omp_for_data fd;
8192
8193 remove_edge (BRANCH_EDGE (kfor->entry));
8194 basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest;
8195
8196 gcc_assert (kfor->cont);
8197 omp_extract_for_data (for_stmt, &fd, loops);
8198
8199 gsi = gsi_start_bb (body_bb);
8200
8201 for (size_t dim = 0; dim < collapse; dim++)
8202 {
8203 tree type, itype;
8204 itype = type = TREE_TYPE (fd.loops[dim].v);
8205 if (POINTER_TYPE_P (type))
8206 itype = signed_type_for (type);
8207
8208 tree n1 = fd.loops[dim].n1;
8209 tree step = fd.loops[dim].step;
8210 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
8211 true, NULL_TREE, true, GSI_SAME_STMT);
8212 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
8213 true, NULL_TREE, true, GSI_SAME_STMT);
8214 tree threadid;
8215 if (gimple_omp_for_grid_group_iter (for_stmt))
8216 {
8217 gcc_checking_assert (!intra_group);
8218 threadid = build_call_expr (builtin_decl_explicit
8219 (BUILT_IN_HSA_WORKGROUPID), 1,
8220 build_int_cstu (unsigned_type_node, dim));
8221 }
8222 else if (intra_group)
8223 threadid = build_call_expr (builtin_decl_explicit
8224 (BUILT_IN_HSA_WORKITEMID), 1,
8225 build_int_cstu (unsigned_type_node, dim));
8226 else
8227 threadid = build_call_expr (builtin_decl_explicit
8228 (BUILT_IN_HSA_WORKITEMABSID), 1,
8229 build_int_cstu (unsigned_type_node, dim));
8230 threadid = fold_convert (itype, threadid);
8231 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
8232 true, GSI_SAME_STMT);
8233
8234 tree startvar = fd.loops[dim].v;
8235 tree t = fold_build2 (MULT_EXPR, itype, threadid, step);
8236 if (POINTER_TYPE_P (type))
8237 t = fold_build_pointer_plus (n1, t);
8238 else
8239 t = fold_build2 (PLUS_EXPR, type, t, n1);
8240 t = fold_convert (type, t);
8241 t = force_gimple_operand_gsi (&gsi, t,
8242 DECL_P (startvar)
8243 && TREE_ADDRESSABLE (startvar),
8244 NULL_TREE, true, GSI_SAME_STMT);
8245 gassign *assign_stmt = gimple_build_assign (startvar, t);
8246 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
8247 }
8248 /* Remove the omp for statement. */
8249 gsi = gsi_last_nondebug_bb (kfor->entry);
8250 gsi_remove (&gsi, true);
8251
8252 /* Remove the GIMPLE_OMP_CONTINUE statement. */
8253 gsi = gsi_last_nondebug_bb (kfor->cont);
8254 gcc_assert (!gsi_end_p (gsi)
8255 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE);
8256 gsi_remove (&gsi, true);
8257
8258 /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary. */
8259 gsi = gsi_last_nondebug_bb (kfor->exit);
8260 gcc_assert (!gsi_end_p (gsi)
8261 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
8262 if (intra_group)
8263 gsi_insert_before (&gsi, omp_build_barrier (NULL_TREE), GSI_SAME_STMT);
8264 gsi_remove (&gsi, true);
8265
8266 /* Fixup the much simpler CFG. */
8267 remove_edge (find_edge (kfor->cont, body_bb));
8268
8269 if (kfor->cont != body_bb)
8270 set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb);
8271 set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont);
8272 }
8273
8274 /* Structure passed to grid_remap_kernel_arg_accesses so that it can remap
8275 argument_decls. */
8276
8277 struct grid_arg_decl_map
8278 {
8279 tree old_arg;
8280 tree new_arg;
8281 };
8282
8283 /* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones
8284 pertaining to kernel function. */
8285
8286 static tree
8287 grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data)
8288 {
8289 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
8290 struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info;
8291 tree t = *tp;
8292
8293 if (t == adm->old_arg)
8294 *tp = adm->new_arg;
8295 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
8296 return NULL_TREE;
8297 }
8298
8299 /* If TARGET region contains a kernel body for loop, remove its region from the
8300 TARGET and expand it in HSA gridified kernel fashion. */
8301
8302 static void
8303 grid_expand_target_grid_body (struct omp_region *target)
8304 {
8305 if (!hsa_gen_requested_p ())
8306 return;
8307
8308 gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry));
8309 struct omp_region **pp;
8310
8311 for (pp = &target->inner; *pp; pp = &(*pp)->next)
8312 if ((*pp)->type == GIMPLE_OMP_GRID_BODY)
8313 break;
8314
8315 struct omp_region *gpukernel = *pp;
8316
8317 tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt);
8318 if (!gpukernel)
8319 {
8320 /* HSA cannot handle OACC stuff. */
8321 if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION)
8322 return;
8323 gcc_checking_assert (orig_child_fndecl);
8324 gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
8325 OMP_CLAUSE__GRIDDIM_));
8326 cgraph_node *n = cgraph_node::get (orig_child_fndecl);
8327
8328 hsa_register_kernel (n);
8329 return;
8330 }
8331
8332 gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
8333 OMP_CLAUSE__GRIDDIM_));
8334 tree inside_block
8335 = gimple_block (first_stmt (single_succ (gpukernel->entry)));
8336 *pp = gpukernel->next;
8337 for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next)
8338 if ((*pp)->type == GIMPLE_OMP_FOR)
8339 break;
8340
8341 struct omp_region *kfor = *pp;
8342 gcc_assert (kfor);
8343 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
8344 gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP);
8345 *pp = kfor->next;
8346 if (kfor->inner)
8347 {
8348 if (gimple_omp_for_grid_group_iter (for_stmt))
8349 {
8350 struct omp_region **next_pp;
8351 for (pp = &kfor->inner; *pp; pp = next_pp)
8352 {
8353 next_pp = &(*pp)->next;
8354 if ((*pp)->type != GIMPLE_OMP_FOR)
8355 continue;
8356 gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry));
8357 gcc_assert (gimple_omp_for_kind (inner)
8358 == GF_OMP_FOR_KIND_GRID_LOOP);
8359 grid_expand_omp_for_loop (*pp, true);
8360 *pp = (*pp)->next;
8361 next_pp = pp;
8362 }
8363 }
8364 expand_omp (kfor->inner);
8365 }
8366 if (gpukernel->inner)
8367 expand_omp (gpukernel->inner);
8368
8369 tree kern_fndecl = copy_node (orig_child_fndecl);
8370 DECL_NAME (kern_fndecl) = clone_function_name_numbered (kern_fndecl,
8371 "kernel");
8372 SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl));
8373 tree tgtblock = gimple_block (tgt_stmt);
8374 tree fniniblock = make_node (BLOCK);
8375 BLOCK_ABSTRACT_ORIGIN (fniniblock) = BLOCK_ORIGIN (tgtblock);
8376 BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock);
8377 BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock);
8378 BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl;
8379 DECL_INITIAL (kern_fndecl) = fniniblock;
8380 push_struct_function (kern_fndecl);
8381 cfun->function_end_locus = gimple_location (tgt_stmt);
8382 init_tree_ssa (cfun);
8383 pop_cfun ();
8384
8385 tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl);
8386 gcc_assert (!DECL_CHAIN (old_parm_decl));
8387 tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl));
8388 DECL_CONTEXT (new_parm_decl) = kern_fndecl;
8389 DECL_ARGUMENTS (kern_fndecl) = new_parm_decl;
8390 gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl))));
8391 DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl));
8392 DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl;
8393 struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl);
8394 kern_cfun->curr_properties = cfun->curr_properties;
8395
8396 grid_expand_omp_for_loop (kfor, false);
8397
8398 /* Remove the omp for statement. */
8399 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (gpukernel->entry);
8400 gsi_remove (&gsi, true);
8401 /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real
8402 return. */
8403 gsi = gsi_last_nondebug_bb (gpukernel->exit);
8404 gcc_assert (!gsi_end_p (gsi)
8405 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
8406 gimple *ret_stmt = gimple_build_return (NULL);
8407 gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT);
8408 gsi_remove (&gsi, true);
8409
8410 /* Statements in the first BB in the target construct have been produced by
8411 target lowering and must be copied inside the GPUKERNEL, with the two
8412 exceptions of the first OMP statement and the OMP_DATA assignment
8413 statement. */
8414 gsi = gsi_start_bb (single_succ (gpukernel->entry));
8415 tree data_arg = gimple_omp_target_data_arg (tgt_stmt);
8416 tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL;
8417 for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry));
8418 !gsi_end_p (tsi); gsi_next (&tsi))
8419 {
8420 gimple *stmt = gsi_stmt (tsi);
8421 if (is_gimple_omp (stmt))
8422 break;
8423 if (sender
8424 && is_gimple_assign (stmt)
8425 && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR
8426 && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender)
8427 continue;
8428 gimple *copy = gimple_copy (stmt);
8429 gsi_insert_before (&gsi, copy, GSI_SAME_STMT);
8430 gimple_set_block (copy, fniniblock);
8431 }
8432
8433 move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry),
8434 gpukernel->exit, inside_block);
8435
8436 cgraph_node *kcn = cgraph_node::get_create (kern_fndecl);
8437 kcn->mark_force_output ();
8438 cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl);
8439
8440 hsa_register_kernel (kcn, orig_child);
8441
8442 cgraph_node::add_new_function (kern_fndecl, true);
8443 push_cfun (kern_cfun);
8444 cgraph_edge::rebuild_edges ();
8445
8446 /* Re-map any mention of the PARM_DECL of the original function to the
8447 PARM_DECL of the new one.
8448
8449 TODO: It would be great if lowering produced references into the GPU
8450 kernel decl straight away and we did not have to do this. */
8451 struct grid_arg_decl_map adm;
8452 adm.old_arg = old_parm_decl;
8453 adm.new_arg = new_parm_decl;
8454 basic_block bb;
8455 FOR_EACH_BB_FN (bb, kern_cfun)
8456 {
8457 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
8458 {
8459 gimple *stmt = gsi_stmt (gsi);
8460 struct walk_stmt_info wi;
8461 memset (&wi, 0, sizeof (wi));
8462 wi.info = &adm;
8463 walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi);
8464 }
8465 }
8466 pop_cfun ();
8467
8468 return;
8469 }
8470
8471 /* Expand the parallel region tree rooted at REGION. Expansion
8472 proceeds in depth-first order. Innermost regions are expanded
8473 first. This way, parallel regions that require a new function to
8474 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
8475 internal dependencies in their body. */
8476
8477 static void
8478 expand_omp (struct omp_region *region)
8479 {
8480 omp_any_child_fn_dumped = false;
8481 while (region)
8482 {
8483 location_t saved_location;
8484 gimple *inner_stmt = NULL;
8485
8486 /* First, determine whether this is a combined parallel+workshare
8487 region. */
8488 if (region->type == GIMPLE_OMP_PARALLEL)
8489 determine_parallel_type (region);
8490 else if (region->type == GIMPLE_OMP_TARGET)
8491 grid_expand_target_grid_body (region);
8492
8493 if (region->type == GIMPLE_OMP_FOR
8494 && gimple_omp_for_combined_p (last_stmt (region->entry)))
8495 inner_stmt = last_stmt (region->inner->entry);
8496
8497 if (region->inner)
8498 expand_omp (region->inner);
8499
8500 saved_location = input_location;
8501 if (gimple_has_location (last_stmt (region->entry)))
8502 input_location = gimple_location (last_stmt (region->entry));
8503
8504 switch (region->type)
8505 {
8506 case GIMPLE_OMP_PARALLEL:
8507 case GIMPLE_OMP_TASK:
8508 expand_omp_taskreg (region);
8509 break;
8510
8511 case GIMPLE_OMP_FOR:
8512 expand_omp_for (region, inner_stmt);
8513 break;
8514
8515 case GIMPLE_OMP_SECTIONS:
8516 expand_omp_sections (region);
8517 break;
8518
8519 case GIMPLE_OMP_SECTION:
8520 /* Individual omp sections are handled together with their
8521 parent GIMPLE_OMP_SECTIONS region. */
8522 break;
8523
8524 case GIMPLE_OMP_SINGLE:
8525 expand_omp_single (region);
8526 break;
8527
8528 case GIMPLE_OMP_ORDERED:
8529 {
8530 gomp_ordered *ord_stmt
8531 = as_a <gomp_ordered *> (last_stmt (region->entry));
8532 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
8533 OMP_CLAUSE_DEPEND))
8534 {
8535 /* We'll expand these when expanding corresponding
8536 worksharing region with ordered(n) clause. */
8537 gcc_assert (region->outer
8538 && region->outer->type == GIMPLE_OMP_FOR);
8539 region->ord_stmt = ord_stmt;
8540 break;
8541 }
8542 }
8543 /* FALLTHRU */
8544 case GIMPLE_OMP_MASTER:
8545 case GIMPLE_OMP_TASKGROUP:
8546 case GIMPLE_OMP_CRITICAL:
8547 case GIMPLE_OMP_TEAMS:
8548 expand_omp_synch (region);
8549 break;
8550
8551 case GIMPLE_OMP_ATOMIC_LOAD:
8552 expand_omp_atomic (region);
8553 break;
8554
8555 case GIMPLE_OMP_TARGET:
8556 expand_omp_target (region);
8557 break;
8558
8559 default:
8560 gcc_unreachable ();
8561 }
8562
8563 input_location = saved_location;
8564 region = region->next;
8565 }
8566 if (omp_any_child_fn_dumped)
8567 {
8568 if (dump_file)
8569 dump_function_header (dump_file, current_function_decl, dump_flags);
8570 omp_any_child_fn_dumped = false;
8571 }
8572 }
8573
8574 /* Helper for build_omp_regions. Scan the dominator tree starting at
8575 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
8576 true, the function ends once a single tree is built (otherwise, whole
8577 forest of OMP constructs may be built). */
8578
8579 static void
8580 build_omp_regions_1 (basic_block bb, struct omp_region *parent,
8581 bool single_tree)
8582 {
8583 gimple_stmt_iterator gsi;
8584 gimple *stmt;
8585 basic_block son;
8586
8587 gsi = gsi_last_nondebug_bb (bb);
8588 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
8589 {
8590 struct omp_region *region;
8591 enum gimple_code code;
8592
8593 stmt = gsi_stmt (gsi);
8594 code = gimple_code (stmt);
8595 if (code == GIMPLE_OMP_RETURN)
8596 {
8597 /* STMT is the return point out of region PARENT. Mark it
8598 as the exit point and make PARENT the immediately
8599 enclosing region. */
8600 gcc_assert (parent);
8601 region = parent;
8602 region->exit = bb;
8603 parent = parent->outer;
8604 }
8605 else if (code == GIMPLE_OMP_ATOMIC_STORE)
8606 {
8607 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
8608 GIMPLE_OMP_RETURN, but matches with
8609 GIMPLE_OMP_ATOMIC_LOAD. */
8610 gcc_assert (parent);
8611 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
8612 region = parent;
8613 region->exit = bb;
8614 parent = parent->outer;
8615 }
8616 else if (code == GIMPLE_OMP_CONTINUE)
8617 {
8618 gcc_assert (parent);
8619 parent->cont = bb;
8620 }
8621 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
8622 {
8623 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
8624 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
8625 }
8626 else
8627 {
8628 region = new_omp_region (bb, code, parent);
8629 /* Otherwise... */
8630 if (code == GIMPLE_OMP_TARGET)
8631 {
8632 switch (gimple_omp_target_kind (stmt))
8633 {
8634 case GF_OMP_TARGET_KIND_REGION:
8635 case GF_OMP_TARGET_KIND_DATA:
8636 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8637 case GF_OMP_TARGET_KIND_OACC_KERNELS:
8638 case GF_OMP_TARGET_KIND_OACC_DATA:
8639 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8640 break;
8641 case GF_OMP_TARGET_KIND_UPDATE:
8642 case GF_OMP_TARGET_KIND_ENTER_DATA:
8643 case GF_OMP_TARGET_KIND_EXIT_DATA:
8644 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8645 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8646 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8647 /* ..., other than for those stand-alone directives... */
8648 region = NULL;
8649 break;
8650 default:
8651 gcc_unreachable ();
8652 }
8653 }
8654 else if (code == GIMPLE_OMP_ORDERED
8655 && omp_find_clause (gimple_omp_ordered_clauses
8656 (as_a <gomp_ordered *> (stmt)),
8657 OMP_CLAUSE_DEPEND))
8658 /* #pragma omp ordered depend is also just a stand-alone
8659 directive. */
8660 region = NULL;
8661 else if (code == GIMPLE_OMP_TASK
8662 && gimple_omp_task_taskwait_p (stmt))
8663 /* #pragma omp taskwait depend(...) is a stand-alone directive. */
8664 region = NULL;
8665 /* ..., this directive becomes the parent for a new region. */
8666 if (region)
8667 parent = region;
8668 }
8669 }
8670
8671 if (single_tree && !parent)
8672 return;
8673
8674 for (son = first_dom_son (CDI_DOMINATORS, bb);
8675 son;
8676 son = next_dom_son (CDI_DOMINATORS, son))
8677 build_omp_regions_1 (son, parent, single_tree);
8678 }
8679
8680 /* Builds the tree of OMP regions rooted at ROOT, storing it to
8681 root_omp_region. */
8682
8683 static void
8684 build_omp_regions_root (basic_block root)
8685 {
8686 gcc_assert (root_omp_region == NULL);
8687 build_omp_regions_1 (root, NULL, true);
8688 gcc_assert (root_omp_region != NULL);
8689 }
8690
8691 /* Expands omp construct (and its subconstructs) starting in HEAD. */
8692
8693 void
8694 omp_expand_local (basic_block head)
8695 {
8696 build_omp_regions_root (head);
8697 if (dump_file && (dump_flags & TDF_DETAILS))
8698 {
8699 fprintf (dump_file, "\nOMP region tree\n\n");
8700 dump_omp_region (dump_file, root_omp_region, 0);
8701 fprintf (dump_file, "\n");
8702 }
8703
8704 remove_exit_barriers (root_omp_region);
8705 expand_omp (root_omp_region);
8706
8707 omp_free_regions ();
8708 }
8709
8710 /* Scan the CFG and build a tree of OMP regions. Return the root of
8711 the OMP region tree. */
8712
8713 static void
8714 build_omp_regions (void)
8715 {
8716 gcc_assert (root_omp_region == NULL);
8717 calculate_dominance_info (CDI_DOMINATORS);
8718 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
8719 }
8720
8721 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */
8722
8723 static unsigned int
8724 execute_expand_omp (void)
8725 {
8726 build_omp_regions ();
8727
8728 if (!root_omp_region)
8729 return 0;
8730
8731 if (dump_file)
8732 {
8733 fprintf (dump_file, "\nOMP region tree\n\n");
8734 dump_omp_region (dump_file, root_omp_region, 0);
8735 fprintf (dump_file, "\n");
8736 }
8737
8738 remove_exit_barriers (root_omp_region);
8739
8740 expand_omp (root_omp_region);
8741
8742 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
8743 verify_loop_structure ();
8744 cleanup_tree_cfg ();
8745
8746 omp_free_regions ();
8747
8748 return 0;
8749 }
8750
8751 /* OMP expansion -- the default pass, run before creation of SSA form. */
8752
8753 namespace {
8754
8755 const pass_data pass_data_expand_omp =
8756 {
8757 GIMPLE_PASS, /* type */
8758 "ompexp", /* name */
8759 OPTGROUP_OMP, /* optinfo_flags */
8760 TV_NONE, /* tv_id */
8761 PROP_gimple_any, /* properties_required */
8762 PROP_gimple_eomp, /* properties_provided */
8763 0, /* properties_destroyed */
8764 0, /* todo_flags_start */
8765 0, /* todo_flags_finish */
8766 };
8767
8768 class pass_expand_omp : public gimple_opt_pass
8769 {
8770 public:
8771 pass_expand_omp (gcc::context *ctxt)
8772 : gimple_opt_pass (pass_data_expand_omp, ctxt)
8773 {}
8774
8775 /* opt_pass methods: */
8776 virtual unsigned int execute (function *)
8777 {
8778 bool gate = ((flag_openacc != 0 || flag_openmp != 0
8779 || flag_openmp_simd != 0)
8780 && !seen_error ());
8781
8782 /* This pass always runs, to provide PROP_gimple_eomp.
8783 But often, there is nothing to do. */
8784 if (!gate)
8785 return 0;
8786
8787 return execute_expand_omp ();
8788 }
8789
8790 }; // class pass_expand_omp
8791
8792 } // anon namespace
8793
8794 gimple_opt_pass *
8795 make_pass_expand_omp (gcc::context *ctxt)
8796 {
8797 return new pass_expand_omp (ctxt);
8798 }
8799
8800 namespace {
8801
8802 const pass_data pass_data_expand_omp_ssa =
8803 {
8804 GIMPLE_PASS, /* type */
8805 "ompexpssa", /* name */
8806 OPTGROUP_OMP, /* optinfo_flags */
8807 TV_NONE, /* tv_id */
8808 PROP_cfg | PROP_ssa, /* properties_required */
8809 PROP_gimple_eomp, /* properties_provided */
8810 0, /* properties_destroyed */
8811 0, /* todo_flags_start */
8812 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
8813 };
8814
8815 class pass_expand_omp_ssa : public gimple_opt_pass
8816 {
8817 public:
8818 pass_expand_omp_ssa (gcc::context *ctxt)
8819 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
8820 {}
8821
8822 /* opt_pass methods: */
8823 virtual bool gate (function *fun)
8824 {
8825 return !(fun->curr_properties & PROP_gimple_eomp);
8826 }
8827 virtual unsigned int execute (function *) { return execute_expand_omp (); }
8828 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
8829
8830 }; // class pass_expand_omp_ssa
8831
8832 } // anon namespace
8833
8834 gimple_opt_pass *
8835 make_pass_expand_omp_ssa (gcc::context *ctxt)
8836 {
8837 return new pass_expand_omp_ssa (ctxt);
8838 }
8839
8840 /* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
8841 GIMPLE_* codes. */
8842
8843 bool
8844 omp_make_gimple_edges (basic_block bb, struct omp_region **region,
8845 int *region_idx)
8846 {
8847 gimple *last = last_stmt (bb);
8848 enum gimple_code code = gimple_code (last);
8849 struct omp_region *cur_region = *region;
8850 bool fallthru = false;
8851
8852 switch (code)
8853 {
8854 case GIMPLE_OMP_PARALLEL:
8855 case GIMPLE_OMP_FOR:
8856 case GIMPLE_OMP_SINGLE:
8857 case GIMPLE_OMP_TEAMS:
8858 case GIMPLE_OMP_MASTER:
8859 case GIMPLE_OMP_TASKGROUP:
8860 case GIMPLE_OMP_CRITICAL:
8861 case GIMPLE_OMP_SECTION:
8862 case GIMPLE_OMP_GRID_BODY:
8863 cur_region = new_omp_region (bb, code, cur_region);
8864 fallthru = true;
8865 break;
8866
8867 case GIMPLE_OMP_TASK:
8868 cur_region = new_omp_region (bb, code, cur_region);
8869 fallthru = true;
8870 if (gimple_omp_task_taskwait_p (last))
8871 cur_region = cur_region->outer;
8872 break;
8873
8874 case GIMPLE_OMP_ORDERED:
8875 cur_region = new_omp_region (bb, code, cur_region);
8876 fallthru = true;
8877 if (omp_find_clause (gimple_omp_ordered_clauses
8878 (as_a <gomp_ordered *> (last)),
8879 OMP_CLAUSE_DEPEND))
8880 cur_region = cur_region->outer;
8881 break;
8882
8883 case GIMPLE_OMP_TARGET:
8884 cur_region = new_omp_region (bb, code, cur_region);
8885 fallthru = true;
8886 switch (gimple_omp_target_kind (last))
8887 {
8888 case GF_OMP_TARGET_KIND_REGION:
8889 case GF_OMP_TARGET_KIND_DATA:
8890 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8891 case GF_OMP_TARGET_KIND_OACC_KERNELS:
8892 case GF_OMP_TARGET_KIND_OACC_DATA:
8893 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8894 break;
8895 case GF_OMP_TARGET_KIND_UPDATE:
8896 case GF_OMP_TARGET_KIND_ENTER_DATA:
8897 case GF_OMP_TARGET_KIND_EXIT_DATA:
8898 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8899 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8900 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8901 cur_region = cur_region->outer;
8902 break;
8903 default:
8904 gcc_unreachable ();
8905 }
8906 break;
8907
8908 case GIMPLE_OMP_SECTIONS:
8909 cur_region = new_omp_region (bb, code, cur_region);
8910 fallthru = true;
8911 break;
8912
8913 case GIMPLE_OMP_SECTIONS_SWITCH:
8914 fallthru = false;
8915 break;
8916
8917 case GIMPLE_OMP_ATOMIC_LOAD:
8918 case GIMPLE_OMP_ATOMIC_STORE:
8919 fallthru = true;
8920 break;
8921
8922 case GIMPLE_OMP_RETURN:
8923 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
8924 somewhere other than the next block. This will be
8925 created later. */
8926 cur_region->exit = bb;
8927 if (cur_region->type == GIMPLE_OMP_TASK)
8928 /* Add an edge corresponding to not scheduling the task
8929 immediately. */
8930 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
8931 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
8932 cur_region = cur_region->outer;
8933 break;
8934
8935 case GIMPLE_OMP_CONTINUE:
8936 cur_region->cont = bb;
8937 switch (cur_region->type)
8938 {
8939 case GIMPLE_OMP_FOR:
8940 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
8941 succs edges as abnormal to prevent splitting
8942 them. */
8943 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
8944 /* Make the loopback edge. */
8945 make_edge (bb, single_succ (cur_region->entry),
8946 EDGE_ABNORMAL);
8947
8948 /* Create an edge from GIMPLE_OMP_FOR to exit, which
8949 corresponds to the case that the body of the loop
8950 is not executed at all. */
8951 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
8952 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
8953 fallthru = false;
8954 break;
8955
8956 case GIMPLE_OMP_SECTIONS:
8957 /* Wire up the edges into and out of the nested sections. */
8958 {
8959 basic_block switch_bb = single_succ (cur_region->entry);
8960
8961 struct omp_region *i;
8962 for (i = cur_region->inner; i ; i = i->next)
8963 {
8964 gcc_assert (i->type == GIMPLE_OMP_SECTION);
8965 make_edge (switch_bb, i->entry, 0);
8966 make_edge (i->exit, bb, EDGE_FALLTHRU);
8967 }
8968
8969 /* Make the loopback edge to the block with
8970 GIMPLE_OMP_SECTIONS_SWITCH. */
8971 make_edge (bb, switch_bb, 0);
8972
8973 /* Make the edge from the switch to exit. */
8974 make_edge (switch_bb, bb->next_bb, 0);
8975 fallthru = false;
8976 }
8977 break;
8978
8979 case GIMPLE_OMP_TASK:
8980 fallthru = true;
8981 break;
8982
8983 default:
8984 gcc_unreachable ();
8985 }
8986 break;
8987
8988 default:
8989 gcc_unreachable ();
8990 }
8991
8992 if (*region != cur_region)
8993 {
8994 *region = cur_region;
8995 if (cur_region)
8996 *region_idx = cur_region->entry->index;
8997 else
8998 *region_idx = 0;
8999 }
9000
9001 return fallthru;
9002 }
9003
9004 #include "gt-omp-expand.h"