]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/omp-expand.c
openmp: Improve #pragma omp simd vectorization
[thirdparty/gcc.git] / gcc / omp-expand.c
1 /* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
4
5 Copyright (C) 2005-2020 Free Software Foundation, Inc.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "tree-pass.h"
34 #include "ssa.h"
35 #include "optabs.h"
36 #include "cgraph.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "cfganal.h"
42 #include "internal-fn.h"
43 #include "gimplify.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
47 #include "tree-cfg.h"
48 #include "tree-into-ssa.h"
49 #include "tree-ssa.h"
50 #include "splay-tree.h"
51 #include "cfgloop.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "alloc-pool.h"
56 #include "symbol-summary.h"
57 #include "gomp-constants.h"
58 #include "gimple-pretty-print.h"
59 #include "stringpool.h"
60 #include "attribs.h"
61 #include "tree-eh.h"
62
63 /* OMP region information. Every parallel and workshare
64 directive is enclosed between two markers, the OMP_* directive
65 and a corresponding GIMPLE_OMP_RETURN statement. */
66
67 struct omp_region
68 {
69 /* The enclosing region. */
70 struct omp_region *outer;
71
72 /* First child region. */
73 struct omp_region *inner;
74
75 /* Next peer region. */
76 struct omp_region *next;
77
78 /* Block containing the omp directive as its last stmt. */
79 basic_block entry;
80
81 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
82 basic_block exit;
83
84 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
85 basic_block cont;
86
87 /* If this is a combined parallel+workshare region, this is a list
88 of additional arguments needed by the combined parallel+workshare
89 library call. */
90 vec<tree, va_gc> *ws_args;
91
92 /* The code for the omp directive of this region. */
93 enum gimple_code type;
94
95 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
96 enum omp_clause_schedule_kind sched_kind;
97
98 /* Schedule modifiers. */
99 unsigned char sched_modifiers;
100
101 /* True if this is a combined parallel+workshare region. */
102 bool is_combined_parallel;
103
104 /* Copy of fd.lastprivate_conditional != 0. */
105 bool has_lastprivate_conditional;
106
107 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
108 a depend clause. */
109 gomp_ordered *ord_stmt;
110 };
111
112 static struct omp_region *root_omp_region;
113 static bool omp_any_child_fn_dumped;
114
115 static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
116 bool = false);
117 static gphi *find_phi_with_arg_on_edge (tree, edge);
118 static void expand_omp (struct omp_region *region);
119
120 /* Return true if REGION is a combined parallel+workshare region. */
121
122 static inline bool
123 is_combined_parallel (struct omp_region *region)
124 {
125 return region->is_combined_parallel;
126 }
127
128 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
129 is the immediate dominator of PAR_ENTRY_BB, return true if there
130 are no data dependencies that would prevent expanding the parallel
131 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
132
133 When expanding a combined parallel+workshare region, the call to
134 the child function may need additional arguments in the case of
135 GIMPLE_OMP_FOR regions. In some cases, these arguments are
136 computed out of variables passed in from the parent to the child
137 via 'struct .omp_data_s'. For instance:
138
139 #pragma omp parallel for schedule (guided, i * 4)
140 for (j ...)
141
142 Is lowered into:
143
144 # BLOCK 2 (PAR_ENTRY_BB)
145 .omp_data_o.i = i;
146 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
147
148 # BLOCK 3 (WS_ENTRY_BB)
149 .omp_data_i = &.omp_data_o;
150 D.1667 = .omp_data_i->i;
151 D.1598 = D.1667 * 4;
152 #pragma omp for schedule (guided, D.1598)
153
154 When we outline the parallel region, the call to the child function
155 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
156 that value is computed *after* the call site. So, in principle we
157 cannot do the transformation.
158
159 To see whether the code in WS_ENTRY_BB blocks the combined
160 parallel+workshare call, we collect all the variables used in the
161 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
162 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
163 call.
164
165 FIXME. If we had the SSA form built at this point, we could merely
166 hoist the code in block 3 into block 2 and be done with it. But at
167 this point we don't have dataflow information and though we could
168 hack something up here, it is really not worth the aggravation. */
169
170 static bool
171 workshare_safe_to_combine_p (basic_block ws_entry_bb)
172 {
173 struct omp_for_data fd;
174 gimple *ws_stmt = last_stmt (ws_entry_bb);
175
176 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
177 return true;
178
179 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
180 if (gimple_omp_for_kind (ws_stmt) != GF_OMP_FOR_KIND_FOR)
181 return false;
182
183 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
184
185 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
186 return false;
187 if (fd.iter_type != long_integer_type_node)
188 return false;
189
190 /* FIXME. We give up too easily here. If any of these arguments
191 are not constants, they will likely involve variables that have
192 been mapped into fields of .omp_data_s for sharing with the child
193 function. With appropriate data flow, it would be possible to
194 see through this. */
195 if (!is_gimple_min_invariant (fd.loop.n1)
196 || !is_gimple_min_invariant (fd.loop.n2)
197 || !is_gimple_min_invariant (fd.loop.step)
198 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
199 return false;
200
201 return true;
202 }
203
204 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
205 presence (SIMD_SCHEDULE). */
206
207 static tree
208 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
209 {
210 if (!simd_schedule || integer_zerop (chunk_size))
211 return chunk_size;
212
213 poly_uint64 vf = omp_max_vf ();
214 if (known_eq (vf, 1U))
215 return chunk_size;
216
217 tree type = TREE_TYPE (chunk_size);
218 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
219 build_int_cst (type, vf - 1));
220 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
221 build_int_cst (type, -vf));
222 }
223
224 /* Collect additional arguments needed to emit a combined
225 parallel+workshare call. WS_STMT is the workshare directive being
226 expanded. */
227
228 static vec<tree, va_gc> *
229 get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
230 {
231 tree t;
232 location_t loc = gimple_location (ws_stmt);
233 vec<tree, va_gc> *ws_args;
234
235 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
236 {
237 struct omp_for_data fd;
238 tree n1, n2;
239
240 omp_extract_for_data (for_stmt, &fd, NULL);
241 n1 = fd.loop.n1;
242 n2 = fd.loop.n2;
243
244 if (gimple_omp_for_combined_into_p (for_stmt))
245 {
246 tree innerc
247 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
248 OMP_CLAUSE__LOOPTEMP_);
249 gcc_assert (innerc);
250 n1 = OMP_CLAUSE_DECL (innerc);
251 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
252 OMP_CLAUSE__LOOPTEMP_);
253 gcc_assert (innerc);
254 n2 = OMP_CLAUSE_DECL (innerc);
255 }
256
257 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
258
259 t = fold_convert_loc (loc, long_integer_type_node, n1);
260 ws_args->quick_push (t);
261
262 t = fold_convert_loc (loc, long_integer_type_node, n2);
263 ws_args->quick_push (t);
264
265 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
266 ws_args->quick_push (t);
267
268 if (fd.chunk_size)
269 {
270 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
271 t = omp_adjust_chunk_size (t, fd.simd_schedule);
272 ws_args->quick_push (t);
273 }
274
275 return ws_args;
276 }
277 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
278 {
279 /* Number of sections is equal to the number of edges from the
280 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
281 the exit of the sections region. */
282 basic_block bb = single_succ (gimple_bb (ws_stmt));
283 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
284 vec_alloc (ws_args, 1);
285 ws_args->quick_push (t);
286 return ws_args;
287 }
288
289 gcc_unreachable ();
290 }
291
292 /* Discover whether REGION is a combined parallel+workshare region. */
293
294 static void
295 determine_parallel_type (struct omp_region *region)
296 {
297 basic_block par_entry_bb, par_exit_bb;
298 basic_block ws_entry_bb, ws_exit_bb;
299
300 if (region == NULL || region->inner == NULL
301 || region->exit == NULL || region->inner->exit == NULL
302 || region->inner->cont == NULL)
303 return;
304
305 /* We only support parallel+for and parallel+sections. */
306 if (region->type != GIMPLE_OMP_PARALLEL
307 || (region->inner->type != GIMPLE_OMP_FOR
308 && region->inner->type != GIMPLE_OMP_SECTIONS))
309 return;
310
311 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
312 WS_EXIT_BB -> PAR_EXIT_BB. */
313 par_entry_bb = region->entry;
314 par_exit_bb = region->exit;
315 ws_entry_bb = region->inner->entry;
316 ws_exit_bb = region->inner->exit;
317
318 /* Give up for task reductions on the parallel, while it is implementable,
319 adding another big set of APIs or slowing down the normal paths is
320 not acceptable. */
321 tree pclauses = gimple_omp_parallel_clauses (last_stmt (par_entry_bb));
322 if (omp_find_clause (pclauses, OMP_CLAUSE__REDUCTEMP_))
323 return;
324
325 if (single_succ (par_entry_bb) == ws_entry_bb
326 && single_succ (ws_exit_bb) == par_exit_bb
327 && workshare_safe_to_combine_p (ws_entry_bb)
328 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
329 || (last_and_only_stmt (ws_entry_bb)
330 && last_and_only_stmt (par_exit_bb))))
331 {
332 gimple *par_stmt = last_stmt (par_entry_bb);
333 gimple *ws_stmt = last_stmt (ws_entry_bb);
334
335 if (region->inner->type == GIMPLE_OMP_FOR)
336 {
337 /* If this is a combined parallel loop, we need to determine
338 whether or not to use the combined library calls. There
339 are two cases where we do not apply the transformation:
340 static loops and any kind of ordered loop. In the first
341 case, we already open code the loop so there is no need
342 to do anything else. In the latter case, the combined
343 parallel loop call would still need extra synchronization
344 to implement ordered semantics, so there would not be any
345 gain in using the combined call. */
346 tree clauses = gimple_omp_for_clauses (ws_stmt);
347 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
348 if (c == NULL
349 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
350 == OMP_CLAUSE_SCHEDULE_STATIC)
351 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED)
352 || omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_)
353 || ((c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_))
354 && POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (c)))))
355 return;
356 }
357 else if (region->inner->type == GIMPLE_OMP_SECTIONS
358 && (omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
359 OMP_CLAUSE__REDUCTEMP_)
360 || omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
361 OMP_CLAUSE__CONDTEMP_)))
362 return;
363
364 region->is_combined_parallel = true;
365 region->inner->is_combined_parallel = true;
366 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
367 }
368 }
369
370 /* Debugging dumps for parallel regions. */
371 void dump_omp_region (FILE *, struct omp_region *, int);
372 void debug_omp_region (struct omp_region *);
373 void debug_all_omp_regions (void);
374
375 /* Dump the parallel region tree rooted at REGION. */
376
377 void
378 dump_omp_region (FILE *file, struct omp_region *region, int indent)
379 {
380 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
381 gimple_code_name[region->type]);
382
383 if (region->inner)
384 dump_omp_region (file, region->inner, indent + 4);
385
386 if (region->cont)
387 {
388 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
389 region->cont->index);
390 }
391
392 if (region->exit)
393 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
394 region->exit->index);
395 else
396 fprintf (file, "%*s[no exit marker]\n", indent, "");
397
398 if (region->next)
399 dump_omp_region (file, region->next, indent);
400 }
401
402 DEBUG_FUNCTION void
403 debug_omp_region (struct omp_region *region)
404 {
405 dump_omp_region (stderr, region, 0);
406 }
407
408 DEBUG_FUNCTION void
409 debug_all_omp_regions (void)
410 {
411 dump_omp_region (stderr, root_omp_region, 0);
412 }
413
414 /* Create a new parallel region starting at STMT inside region PARENT. */
415
416 static struct omp_region *
417 new_omp_region (basic_block bb, enum gimple_code type,
418 struct omp_region *parent)
419 {
420 struct omp_region *region = XCNEW (struct omp_region);
421
422 region->outer = parent;
423 region->entry = bb;
424 region->type = type;
425
426 if (parent)
427 {
428 /* This is a nested region. Add it to the list of inner
429 regions in PARENT. */
430 region->next = parent->inner;
431 parent->inner = region;
432 }
433 else
434 {
435 /* This is a toplevel region. Add it to the list of toplevel
436 regions in ROOT_OMP_REGION. */
437 region->next = root_omp_region;
438 root_omp_region = region;
439 }
440
441 return region;
442 }
443
444 /* Release the memory associated with the region tree rooted at REGION. */
445
446 static void
447 free_omp_region_1 (struct omp_region *region)
448 {
449 struct omp_region *i, *n;
450
451 for (i = region->inner; i ; i = n)
452 {
453 n = i->next;
454 free_omp_region_1 (i);
455 }
456
457 free (region);
458 }
459
460 /* Release the memory for the entire omp region tree. */
461
462 void
463 omp_free_regions (void)
464 {
465 struct omp_region *r, *n;
466 for (r = root_omp_region; r ; r = n)
467 {
468 n = r->next;
469 free_omp_region_1 (r);
470 }
471 root_omp_region = NULL;
472 }
473
474 /* A convenience function to build an empty GIMPLE_COND with just the
475 condition. */
476
477 static gcond *
478 gimple_build_cond_empty (tree cond)
479 {
480 enum tree_code pred_code;
481 tree lhs, rhs;
482
483 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
484 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
485 }
486
487 /* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
488 Add CHILD_FNDECL to decl chain of the supercontext of the block
489 ENTRY_BLOCK - this is the block which originally contained the
490 code from which CHILD_FNDECL was created.
491
492 Together, these actions ensure that the debug info for the outlined
493 function will be emitted with the correct lexical scope. */
494
495 static void
496 adjust_context_and_scope (struct omp_region *region, tree entry_block,
497 tree child_fndecl)
498 {
499 tree parent_fndecl = NULL_TREE;
500 gimple *entry_stmt;
501 /* OMP expansion expands inner regions before outer ones, so if
502 we e.g. have explicit task region nested in parallel region, when
503 expanding the task region current_function_decl will be the original
504 source function, but we actually want to use as context the child
505 function of the parallel. */
506 for (region = region->outer;
507 region && parent_fndecl == NULL_TREE; region = region->outer)
508 switch (region->type)
509 {
510 case GIMPLE_OMP_PARALLEL:
511 case GIMPLE_OMP_TASK:
512 case GIMPLE_OMP_TEAMS:
513 entry_stmt = last_stmt (region->entry);
514 parent_fndecl = gimple_omp_taskreg_child_fn (entry_stmt);
515 break;
516 case GIMPLE_OMP_TARGET:
517 entry_stmt = last_stmt (region->entry);
518 parent_fndecl
519 = gimple_omp_target_child_fn (as_a <gomp_target *> (entry_stmt));
520 break;
521 default:
522 break;
523 }
524
525 if (parent_fndecl == NULL_TREE)
526 parent_fndecl = current_function_decl;
527 DECL_CONTEXT (child_fndecl) = parent_fndecl;
528
529 if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
530 {
531 tree b = BLOCK_SUPERCONTEXT (entry_block);
532 if (TREE_CODE (b) == BLOCK)
533 {
534 DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
535 BLOCK_VARS (b) = child_fndecl;
536 }
537 }
538 }
539
540 /* Build the function calls to GOMP_parallel etc to actually
541 generate the parallel operation. REGION is the parallel region
542 being expanded. BB is the block where to insert the code. WS_ARGS
543 will be set if this is a call to a combined parallel+workshare
544 construct, it contains the list of additional arguments needed by
545 the workshare construct. */
546
547 static void
548 expand_parallel_call (struct omp_region *region, basic_block bb,
549 gomp_parallel *entry_stmt,
550 vec<tree, va_gc> *ws_args)
551 {
552 tree t, t1, t2, val, cond, c, clauses, flags;
553 gimple_stmt_iterator gsi;
554 gimple *stmt;
555 enum built_in_function start_ix;
556 int start_ix2;
557 location_t clause_loc;
558 vec<tree, va_gc> *args;
559
560 clauses = gimple_omp_parallel_clauses (entry_stmt);
561
562 /* Determine what flavor of GOMP_parallel we will be
563 emitting. */
564 start_ix = BUILT_IN_GOMP_PARALLEL;
565 tree rtmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
566 if (rtmp)
567 start_ix = BUILT_IN_GOMP_PARALLEL_REDUCTIONS;
568 else if (is_combined_parallel (region))
569 {
570 switch (region->inner->type)
571 {
572 case GIMPLE_OMP_FOR:
573 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
574 switch (region->inner->sched_kind)
575 {
576 case OMP_CLAUSE_SCHEDULE_RUNTIME:
577 /* For lastprivate(conditional:), our implementation
578 requires monotonic behavior. */
579 if (region->inner->has_lastprivate_conditional != 0)
580 start_ix2 = 3;
581 else if ((region->inner->sched_modifiers
582 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
583 start_ix2 = 6;
584 else if ((region->inner->sched_modifiers
585 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
586 start_ix2 = 7;
587 else
588 start_ix2 = 3;
589 break;
590 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
591 case OMP_CLAUSE_SCHEDULE_GUIDED:
592 if ((region->inner->sched_modifiers
593 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
594 && !region->inner->has_lastprivate_conditional)
595 {
596 start_ix2 = 3 + region->inner->sched_kind;
597 break;
598 }
599 /* FALLTHRU */
600 default:
601 start_ix2 = region->inner->sched_kind;
602 break;
603 }
604 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
605 start_ix = (enum built_in_function) start_ix2;
606 break;
607 case GIMPLE_OMP_SECTIONS:
608 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
609 break;
610 default:
611 gcc_unreachable ();
612 }
613 }
614
615 /* By default, the value of NUM_THREADS is zero (selected at run time)
616 and there is no conditional. */
617 cond = NULL_TREE;
618 val = build_int_cst (unsigned_type_node, 0);
619 flags = build_int_cst (unsigned_type_node, 0);
620
621 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
622 if (c)
623 cond = OMP_CLAUSE_IF_EXPR (c);
624
625 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
626 if (c)
627 {
628 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
629 clause_loc = OMP_CLAUSE_LOCATION (c);
630 }
631 else
632 clause_loc = gimple_location (entry_stmt);
633
634 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
635 if (c)
636 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
637
638 /* Ensure 'val' is of the correct type. */
639 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
640
641 /* If we found the clause 'if (cond)', build either
642 (cond != 0) or (cond ? val : 1u). */
643 if (cond)
644 {
645 cond = gimple_boolify (cond);
646
647 if (integer_zerop (val))
648 val = fold_build2_loc (clause_loc,
649 EQ_EXPR, unsigned_type_node, cond,
650 build_int_cst (TREE_TYPE (cond), 0));
651 else
652 {
653 basic_block cond_bb, then_bb, else_bb;
654 edge e, e_then, e_else;
655 tree tmp_then, tmp_else, tmp_join, tmp_var;
656
657 tmp_var = create_tmp_var (TREE_TYPE (val));
658 if (gimple_in_ssa_p (cfun))
659 {
660 tmp_then = make_ssa_name (tmp_var);
661 tmp_else = make_ssa_name (tmp_var);
662 tmp_join = make_ssa_name (tmp_var);
663 }
664 else
665 {
666 tmp_then = tmp_var;
667 tmp_else = tmp_var;
668 tmp_join = tmp_var;
669 }
670
671 e = split_block_after_labels (bb);
672 cond_bb = e->src;
673 bb = e->dest;
674 remove_edge (e);
675
676 then_bb = create_empty_bb (cond_bb);
677 else_bb = create_empty_bb (then_bb);
678 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
679 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
680
681 stmt = gimple_build_cond_empty (cond);
682 gsi = gsi_start_bb (cond_bb);
683 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
684
685 gsi = gsi_start_bb (then_bb);
686 expand_omp_build_assign (&gsi, tmp_then, val, true);
687
688 gsi = gsi_start_bb (else_bb);
689 expand_omp_build_assign (&gsi, tmp_else,
690 build_int_cst (unsigned_type_node, 1),
691 true);
692
693 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
694 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
695 add_bb_to_loop (then_bb, cond_bb->loop_father);
696 add_bb_to_loop (else_bb, cond_bb->loop_father);
697 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
698 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
699
700 if (gimple_in_ssa_p (cfun))
701 {
702 gphi *phi = create_phi_node (tmp_join, bb);
703 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
704 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
705 }
706
707 val = tmp_join;
708 }
709
710 gsi = gsi_start_bb (bb);
711 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
712 false, GSI_CONTINUE_LINKING);
713 }
714
715 gsi = gsi_last_nondebug_bb (bb);
716 t = gimple_omp_parallel_data_arg (entry_stmt);
717 if (t == NULL)
718 t1 = null_pointer_node;
719 else
720 t1 = build_fold_addr_expr (t);
721 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
722 t2 = build_fold_addr_expr (child_fndecl);
723
724 vec_alloc (args, 4 + vec_safe_length (ws_args));
725 args->quick_push (t2);
726 args->quick_push (t1);
727 args->quick_push (val);
728 if (ws_args)
729 args->splice (*ws_args);
730 args->quick_push (flags);
731
732 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
733 builtin_decl_explicit (start_ix), args);
734
735 if (rtmp)
736 {
737 tree type = TREE_TYPE (OMP_CLAUSE_DECL (rtmp));
738 t = build2 (MODIFY_EXPR, type, OMP_CLAUSE_DECL (rtmp),
739 fold_convert (type,
740 fold_convert (pointer_sized_int_node, t)));
741 }
742 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
743 false, GSI_CONTINUE_LINKING);
744 }
745
746 /* Build the function call to GOMP_task to actually
747 generate the task operation. BB is the block where to insert the code. */
748
749 static void
750 expand_task_call (struct omp_region *region, basic_block bb,
751 gomp_task *entry_stmt)
752 {
753 tree t1, t2, t3;
754 gimple_stmt_iterator gsi;
755 location_t loc = gimple_location (entry_stmt);
756
757 tree clauses = gimple_omp_task_clauses (entry_stmt);
758
759 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
760 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
761 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
762 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
763 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
764 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
765
766 unsigned int iflags
767 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
768 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
769 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
770
771 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
772 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
773 tree num_tasks = NULL_TREE;
774 bool ull = false;
775 if (taskloop_p)
776 {
777 gimple *g = last_stmt (region->outer->entry);
778 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
779 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
780 struct omp_for_data fd;
781 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
782 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
783 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
784 OMP_CLAUSE__LOOPTEMP_);
785 startvar = OMP_CLAUSE_DECL (startvar);
786 endvar = OMP_CLAUSE_DECL (endvar);
787 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
788 if (fd.loop.cond_code == LT_EXPR)
789 iflags |= GOMP_TASK_FLAG_UP;
790 tree tclauses = gimple_omp_for_clauses (g);
791 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
792 if (num_tasks)
793 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
794 else
795 {
796 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
797 if (num_tasks)
798 {
799 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
800 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
801 }
802 else
803 num_tasks = integer_zero_node;
804 }
805 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
806 if (ifc == NULL_TREE)
807 iflags |= GOMP_TASK_FLAG_IF;
808 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
809 iflags |= GOMP_TASK_FLAG_NOGROUP;
810 ull = fd.iter_type == long_long_unsigned_type_node;
811 if (omp_find_clause (clauses, OMP_CLAUSE_REDUCTION))
812 iflags |= GOMP_TASK_FLAG_REDUCTION;
813 }
814 else if (priority)
815 iflags |= GOMP_TASK_FLAG_PRIORITY;
816
817 tree flags = build_int_cst (unsigned_type_node, iflags);
818
819 tree cond = boolean_true_node;
820 if (ifc)
821 {
822 if (taskloop_p)
823 {
824 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
825 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
826 build_int_cst (unsigned_type_node,
827 GOMP_TASK_FLAG_IF),
828 build_int_cst (unsigned_type_node, 0));
829 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
830 flags, t);
831 }
832 else
833 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
834 }
835
836 if (finalc)
837 {
838 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
839 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
840 build_int_cst (unsigned_type_node,
841 GOMP_TASK_FLAG_FINAL),
842 build_int_cst (unsigned_type_node, 0));
843 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
844 }
845 if (depend)
846 depend = OMP_CLAUSE_DECL (depend);
847 else
848 depend = build_int_cst (ptr_type_node, 0);
849 if (priority)
850 priority = fold_convert (integer_type_node,
851 OMP_CLAUSE_PRIORITY_EXPR (priority));
852 else
853 priority = integer_zero_node;
854
855 gsi = gsi_last_nondebug_bb (bb);
856 tree t = gimple_omp_task_data_arg (entry_stmt);
857 if (t == NULL)
858 t2 = null_pointer_node;
859 else
860 t2 = build_fold_addr_expr_loc (loc, t);
861 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
862 t = gimple_omp_task_copy_fn (entry_stmt);
863 if (t == NULL)
864 t3 = null_pointer_node;
865 else
866 t3 = build_fold_addr_expr_loc (loc, t);
867
868 if (taskloop_p)
869 t = build_call_expr (ull
870 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
871 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
872 11, t1, t2, t3,
873 gimple_omp_task_arg_size (entry_stmt),
874 gimple_omp_task_arg_align (entry_stmt), flags,
875 num_tasks, priority, startvar, endvar, step);
876 else
877 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
878 9, t1, t2, t3,
879 gimple_omp_task_arg_size (entry_stmt),
880 gimple_omp_task_arg_align (entry_stmt), cond, flags,
881 depend, priority);
882
883 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
884 false, GSI_CONTINUE_LINKING);
885 }
886
887 /* Build the function call to GOMP_taskwait_depend to actually
888 generate the taskwait operation. BB is the block where to insert the
889 code. */
890
891 static void
892 expand_taskwait_call (basic_block bb, gomp_task *entry_stmt)
893 {
894 tree clauses = gimple_omp_task_clauses (entry_stmt);
895 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
896 if (depend == NULL_TREE)
897 return;
898
899 depend = OMP_CLAUSE_DECL (depend);
900
901 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
902 tree t
903 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASKWAIT_DEPEND),
904 1, depend);
905
906 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
907 false, GSI_CONTINUE_LINKING);
908 }
909
910 /* Build the function call to GOMP_teams_reg to actually
911 generate the host teams operation. REGION is the teams region
912 being expanded. BB is the block where to insert the code. */
913
914 static void
915 expand_teams_call (basic_block bb, gomp_teams *entry_stmt)
916 {
917 tree clauses = gimple_omp_teams_clauses (entry_stmt);
918 tree num_teams = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
919 if (num_teams == NULL_TREE)
920 num_teams = build_int_cst (unsigned_type_node, 0);
921 else
922 {
923 num_teams = OMP_CLAUSE_NUM_TEAMS_EXPR (num_teams);
924 num_teams = fold_convert (unsigned_type_node, num_teams);
925 }
926 tree thread_limit = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
927 if (thread_limit == NULL_TREE)
928 thread_limit = build_int_cst (unsigned_type_node, 0);
929 else
930 {
931 thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit);
932 thread_limit = fold_convert (unsigned_type_node, thread_limit);
933 }
934
935 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
936 tree t = gimple_omp_teams_data_arg (entry_stmt), t1;
937 if (t == NULL)
938 t1 = null_pointer_node;
939 else
940 t1 = build_fold_addr_expr (t);
941 tree child_fndecl = gimple_omp_teams_child_fn (entry_stmt);
942 tree t2 = build_fold_addr_expr (child_fndecl);
943
944 vec<tree, va_gc> *args;
945 vec_alloc (args, 5);
946 args->quick_push (t2);
947 args->quick_push (t1);
948 args->quick_push (num_teams);
949 args->quick_push (thread_limit);
950 /* For future extensibility. */
951 args->quick_push (build_zero_cst (unsigned_type_node));
952
953 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
954 builtin_decl_explicit (BUILT_IN_GOMP_TEAMS_REG),
955 args);
956
957 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
958 false, GSI_CONTINUE_LINKING);
959 }
960
961 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
962
963 static tree
964 vec2chain (vec<tree, va_gc> *v)
965 {
966 tree chain = NULL_TREE, t;
967 unsigned ix;
968
969 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
970 {
971 DECL_CHAIN (t) = chain;
972 chain = t;
973 }
974
975 return chain;
976 }
977
978 /* Remove barriers in REGION->EXIT's block. Note that this is only
979 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
980 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
981 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
982 removed. */
983
984 static void
985 remove_exit_barrier (struct omp_region *region)
986 {
987 gimple_stmt_iterator gsi;
988 basic_block exit_bb;
989 edge_iterator ei;
990 edge e;
991 gimple *stmt;
992 int any_addressable_vars = -1;
993
994 exit_bb = region->exit;
995
996 /* If the parallel region doesn't return, we don't have REGION->EXIT
997 block at all. */
998 if (! exit_bb)
999 return;
1000
1001 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
1002 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
1003 statements that can appear in between are extremely limited -- no
1004 memory operations at all. Here, we allow nothing at all, so the
1005 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
1006 gsi = gsi_last_nondebug_bb (exit_bb);
1007 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1008 gsi_prev_nondebug (&gsi);
1009 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
1010 return;
1011
1012 FOR_EACH_EDGE (e, ei, exit_bb->preds)
1013 {
1014 gsi = gsi_last_nondebug_bb (e->src);
1015 if (gsi_end_p (gsi))
1016 continue;
1017 stmt = gsi_stmt (gsi);
1018 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
1019 && !gimple_omp_return_nowait_p (stmt))
1020 {
1021 /* OpenMP 3.0 tasks unfortunately prevent this optimization
1022 in many cases. If there could be tasks queued, the barrier
1023 might be needed to let the tasks run before some local
1024 variable of the parallel that the task uses as shared
1025 runs out of scope. The task can be spawned either
1026 from within current function (this would be easy to check)
1027 or from some function it calls and gets passed an address
1028 of such a variable. */
1029 if (any_addressable_vars < 0)
1030 {
1031 gomp_parallel *parallel_stmt
1032 = as_a <gomp_parallel *> (last_stmt (region->entry));
1033 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
1034 tree local_decls, block, decl;
1035 unsigned ix;
1036
1037 any_addressable_vars = 0;
1038 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
1039 if (TREE_ADDRESSABLE (decl))
1040 {
1041 any_addressable_vars = 1;
1042 break;
1043 }
1044 for (block = gimple_block (stmt);
1045 !any_addressable_vars
1046 && block
1047 && TREE_CODE (block) == BLOCK;
1048 block = BLOCK_SUPERCONTEXT (block))
1049 {
1050 for (local_decls = BLOCK_VARS (block);
1051 local_decls;
1052 local_decls = DECL_CHAIN (local_decls))
1053 if (TREE_ADDRESSABLE (local_decls))
1054 {
1055 any_addressable_vars = 1;
1056 break;
1057 }
1058 if (block == gimple_block (parallel_stmt))
1059 break;
1060 }
1061 }
1062 if (!any_addressable_vars)
1063 gimple_omp_return_set_nowait (stmt);
1064 }
1065 }
1066 }
1067
1068 static void
1069 remove_exit_barriers (struct omp_region *region)
1070 {
1071 if (region->type == GIMPLE_OMP_PARALLEL)
1072 remove_exit_barrier (region);
1073
1074 if (region->inner)
1075 {
1076 region = region->inner;
1077 remove_exit_barriers (region);
1078 while (region->next)
1079 {
1080 region = region->next;
1081 remove_exit_barriers (region);
1082 }
1083 }
1084 }
1085
1086 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
1087 calls. These can't be declared as const functions, but
1088 within one parallel body they are constant, so they can be
1089 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1090 which are declared const. Similarly for task body, except
1091 that in untied task omp_get_thread_num () can change at any task
1092 scheduling point. */
1093
1094 static void
1095 optimize_omp_library_calls (gimple *entry_stmt)
1096 {
1097 basic_block bb;
1098 gimple_stmt_iterator gsi;
1099 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1100 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1101 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1102 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1103 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1104 && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1105 OMP_CLAUSE_UNTIED) != NULL);
1106
1107 FOR_EACH_BB_FN (bb, cfun)
1108 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1109 {
1110 gimple *call = gsi_stmt (gsi);
1111 tree decl;
1112
1113 if (is_gimple_call (call)
1114 && (decl = gimple_call_fndecl (call))
1115 && DECL_EXTERNAL (decl)
1116 && TREE_PUBLIC (decl)
1117 && DECL_INITIAL (decl) == NULL)
1118 {
1119 tree built_in;
1120
1121 if (DECL_NAME (decl) == thr_num_id)
1122 {
1123 /* In #pragma omp task untied omp_get_thread_num () can change
1124 during the execution of the task region. */
1125 if (untied_task)
1126 continue;
1127 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1128 }
1129 else if (DECL_NAME (decl) == num_thr_id)
1130 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1131 else
1132 continue;
1133
1134 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1135 || gimple_call_num_args (call) != 0)
1136 continue;
1137
1138 if (flag_exceptions && !TREE_NOTHROW (decl))
1139 continue;
1140
1141 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1142 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1143 TREE_TYPE (TREE_TYPE (built_in))))
1144 continue;
1145
1146 gimple_call_set_fndecl (call, built_in);
1147 }
1148 }
1149 }
1150
1151 /* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1152 regimplified. */
1153
1154 static tree
1155 expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1156 {
1157 tree t = *tp;
1158
1159 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1160 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1161 return t;
1162
1163 if (TREE_CODE (t) == ADDR_EXPR)
1164 recompute_tree_invariant_for_addr_expr (t);
1165
1166 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1167 return NULL_TREE;
1168 }
1169
1170 /* Prepend or append TO = FROM assignment before or after *GSI_P. */
1171
1172 static void
1173 expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1174 bool after)
1175 {
1176 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1177 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1178 !after, after ? GSI_CONTINUE_LINKING
1179 : GSI_SAME_STMT);
1180 gimple *stmt = gimple_build_assign (to, from);
1181 if (after)
1182 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1183 else
1184 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1185 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1186 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1187 {
1188 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1189 gimple_regimplify_operands (stmt, &gsi);
1190 }
1191 }
1192
1193 /* Expand the OpenMP parallel or task directive starting at REGION. */
1194
1195 static void
1196 expand_omp_taskreg (struct omp_region *region)
1197 {
1198 basic_block entry_bb, exit_bb, new_bb;
1199 struct function *child_cfun;
1200 tree child_fn, block, t;
1201 gimple_stmt_iterator gsi;
1202 gimple *entry_stmt, *stmt;
1203 edge e;
1204 vec<tree, va_gc> *ws_args;
1205
1206 entry_stmt = last_stmt (region->entry);
1207 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1208 && gimple_omp_task_taskwait_p (entry_stmt))
1209 {
1210 new_bb = region->entry;
1211 gsi = gsi_last_nondebug_bb (region->entry);
1212 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1213 gsi_remove (&gsi, true);
1214 expand_taskwait_call (new_bb, as_a <gomp_task *> (entry_stmt));
1215 return;
1216 }
1217
1218 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1219 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1220
1221 entry_bb = region->entry;
1222 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1223 exit_bb = region->cont;
1224 else
1225 exit_bb = region->exit;
1226
1227 if (is_combined_parallel (region))
1228 ws_args = region->ws_args;
1229 else
1230 ws_args = NULL;
1231
1232 if (child_cfun->cfg)
1233 {
1234 /* Due to inlining, it may happen that we have already outlined
1235 the region, in which case all we need to do is make the
1236 sub-graph unreachable and emit the parallel call. */
1237 edge entry_succ_e, exit_succ_e;
1238
1239 entry_succ_e = single_succ_edge (entry_bb);
1240
1241 gsi = gsi_last_nondebug_bb (entry_bb);
1242 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1243 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK
1244 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TEAMS);
1245 gsi_remove (&gsi, true);
1246
1247 new_bb = entry_bb;
1248 if (exit_bb)
1249 {
1250 exit_succ_e = single_succ_edge (exit_bb);
1251 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1252 }
1253 remove_edge_and_dominated_blocks (entry_succ_e);
1254 }
1255 else
1256 {
1257 unsigned srcidx, dstidx, num;
1258
1259 /* If the parallel region needs data sent from the parent
1260 function, then the very first statement (except possible
1261 tree profile counter updates) of the parallel body
1262 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1263 &.OMP_DATA_O is passed as an argument to the child function,
1264 we need to replace it with the argument as seen by the child
1265 function.
1266
1267 In most cases, this will end up being the identity assignment
1268 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1269 a function call that has been inlined, the original PARM_DECL
1270 .OMP_DATA_I may have been converted into a different local
1271 variable. In which case, we need to keep the assignment. */
1272 if (gimple_omp_taskreg_data_arg (entry_stmt))
1273 {
1274 basic_block entry_succ_bb
1275 = single_succ_p (entry_bb) ? single_succ (entry_bb)
1276 : FALLTHRU_EDGE (entry_bb)->dest;
1277 tree arg;
1278 gimple *parcopy_stmt = NULL;
1279
1280 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1281 {
1282 gimple *stmt;
1283
1284 gcc_assert (!gsi_end_p (gsi));
1285 stmt = gsi_stmt (gsi);
1286 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1287 continue;
1288
1289 if (gimple_num_ops (stmt) == 2)
1290 {
1291 tree arg = gimple_assign_rhs1 (stmt);
1292
1293 /* We're ignore the subcode because we're
1294 effectively doing a STRIP_NOPS. */
1295
1296 if (TREE_CODE (arg) == ADDR_EXPR
1297 && (TREE_OPERAND (arg, 0)
1298 == gimple_omp_taskreg_data_arg (entry_stmt)))
1299 {
1300 parcopy_stmt = stmt;
1301 break;
1302 }
1303 }
1304 }
1305
1306 gcc_assert (parcopy_stmt != NULL);
1307 arg = DECL_ARGUMENTS (child_fn);
1308
1309 if (!gimple_in_ssa_p (cfun))
1310 {
1311 if (gimple_assign_lhs (parcopy_stmt) == arg)
1312 gsi_remove (&gsi, true);
1313 else
1314 {
1315 /* ?? Is setting the subcode really necessary ?? */
1316 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1317 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1318 }
1319 }
1320 else
1321 {
1322 tree lhs = gimple_assign_lhs (parcopy_stmt);
1323 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1324 /* We'd like to set the rhs to the default def in the child_fn,
1325 but it's too early to create ssa names in the child_fn.
1326 Instead, we set the rhs to the parm. In
1327 move_sese_region_to_fn, we introduce a default def for the
1328 parm, map the parm to it's default def, and once we encounter
1329 this stmt, replace the parm with the default def. */
1330 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1331 update_stmt (parcopy_stmt);
1332 }
1333 }
1334
1335 /* Declare local variables needed in CHILD_CFUN. */
1336 block = DECL_INITIAL (child_fn);
1337 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1338 /* The gimplifier could record temporaries in parallel/task block
1339 rather than in containing function's local_decls chain,
1340 which would mean cgraph missed finalizing them. Do it now. */
1341 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1342 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1343 varpool_node::finalize_decl (t);
1344 DECL_SAVED_TREE (child_fn) = NULL;
1345 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1346 gimple_set_body (child_fn, NULL);
1347 TREE_USED (block) = 1;
1348
1349 /* Reset DECL_CONTEXT on function arguments. */
1350 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1351 DECL_CONTEXT (t) = child_fn;
1352
1353 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1354 so that it can be moved to the child function. */
1355 gsi = gsi_last_nondebug_bb (entry_bb);
1356 stmt = gsi_stmt (gsi);
1357 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1358 || gimple_code (stmt) == GIMPLE_OMP_TASK
1359 || gimple_code (stmt) == GIMPLE_OMP_TEAMS));
1360 e = split_block (entry_bb, stmt);
1361 gsi_remove (&gsi, true);
1362 entry_bb = e->dest;
1363 edge e2 = NULL;
1364 if (gimple_code (entry_stmt) != GIMPLE_OMP_TASK)
1365 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1366 else
1367 {
1368 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1369 gcc_assert (e2->dest == region->exit);
1370 remove_edge (BRANCH_EDGE (entry_bb));
1371 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1372 gsi = gsi_last_nondebug_bb (region->exit);
1373 gcc_assert (!gsi_end_p (gsi)
1374 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1375 gsi_remove (&gsi, true);
1376 }
1377
1378 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1379 if (exit_bb)
1380 {
1381 gsi = gsi_last_nondebug_bb (exit_bb);
1382 gcc_assert (!gsi_end_p (gsi)
1383 && (gimple_code (gsi_stmt (gsi))
1384 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1385 stmt = gimple_build_return (NULL);
1386 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1387 gsi_remove (&gsi, true);
1388 }
1389
1390 /* Move the parallel region into CHILD_CFUN. */
1391
1392 if (gimple_in_ssa_p (cfun))
1393 {
1394 init_tree_ssa (child_cfun);
1395 init_ssa_operands (child_cfun);
1396 child_cfun->gimple_df->in_ssa_p = true;
1397 block = NULL_TREE;
1398 }
1399 else
1400 block = gimple_block (entry_stmt);
1401
1402 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1403 if (exit_bb)
1404 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1405 if (e2)
1406 {
1407 basic_block dest_bb = e2->dest;
1408 if (!exit_bb)
1409 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1410 remove_edge (e2);
1411 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1412 }
1413 /* When the OMP expansion process cannot guarantee an up-to-date
1414 loop tree arrange for the child function to fixup loops. */
1415 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1416 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1417
1418 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1419 num = vec_safe_length (child_cfun->local_decls);
1420 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1421 {
1422 t = (*child_cfun->local_decls)[srcidx];
1423 if (DECL_CONTEXT (t) == cfun->decl)
1424 continue;
1425 if (srcidx != dstidx)
1426 (*child_cfun->local_decls)[dstidx] = t;
1427 dstidx++;
1428 }
1429 if (dstidx != num)
1430 vec_safe_truncate (child_cfun->local_decls, dstidx);
1431
1432 /* Inform the callgraph about the new function. */
1433 child_cfun->curr_properties = cfun->curr_properties;
1434 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1435 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1436 cgraph_node *node = cgraph_node::get_create (child_fn);
1437 node->parallelized_function = 1;
1438 cgraph_node::add_new_function (child_fn, true);
1439
1440 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1441 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1442
1443 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1444 fixed in a following pass. */
1445 push_cfun (child_cfun);
1446 if (need_asm)
1447 assign_assembler_name_if_needed (child_fn);
1448
1449 if (optimize)
1450 optimize_omp_library_calls (entry_stmt);
1451 update_max_bb_count ();
1452 cgraph_edge::rebuild_edges ();
1453
1454 /* Some EH regions might become dead, see PR34608. If
1455 pass_cleanup_cfg isn't the first pass to happen with the
1456 new child, these dead EH edges might cause problems.
1457 Clean them up now. */
1458 if (flag_exceptions)
1459 {
1460 basic_block bb;
1461 bool changed = false;
1462
1463 FOR_EACH_BB_FN (bb, cfun)
1464 changed |= gimple_purge_dead_eh_edges (bb);
1465 if (changed)
1466 cleanup_tree_cfg ();
1467 }
1468 if (gimple_in_ssa_p (cfun))
1469 update_ssa (TODO_update_ssa);
1470 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1471 verify_loop_structure ();
1472 pop_cfun ();
1473
1474 if (dump_file && !gimple_in_ssa_p (cfun))
1475 {
1476 omp_any_child_fn_dumped = true;
1477 dump_function_header (dump_file, child_fn, dump_flags);
1478 dump_function_to_file (child_fn, dump_file, dump_flags);
1479 }
1480 }
1481
1482 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
1483
1484 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1485 expand_parallel_call (region, new_bb,
1486 as_a <gomp_parallel *> (entry_stmt), ws_args);
1487 else if (gimple_code (entry_stmt) == GIMPLE_OMP_TEAMS)
1488 expand_teams_call (new_bb, as_a <gomp_teams *> (entry_stmt));
1489 else
1490 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1491 if (gimple_in_ssa_p (cfun))
1492 update_ssa (TODO_update_ssa_only_virtuals);
1493 }
1494
1495 /* Information about members of an OpenACC collapsed loop nest. */
1496
1497 struct oacc_collapse
1498 {
1499 tree base; /* Base value. */
1500 tree iters; /* Number of steps. */
1501 tree step; /* Step size. */
1502 tree tile; /* Tile increment (if tiled). */
1503 tree outer; /* Tile iterator var. */
1504 };
1505
1506 /* Helper for expand_oacc_for. Determine collapsed loop information.
1507 Fill in COUNTS array. Emit any initialization code before GSI.
1508 Return the calculated outer loop bound of BOUND_TYPE. */
1509
1510 static tree
1511 expand_oacc_collapse_init (const struct omp_for_data *fd,
1512 gimple_stmt_iterator *gsi,
1513 oacc_collapse *counts, tree bound_type,
1514 location_t loc)
1515 {
1516 tree tiling = fd->tiling;
1517 tree total = build_int_cst (bound_type, 1);
1518 int ix;
1519
1520 gcc_assert (integer_onep (fd->loop.step));
1521 gcc_assert (integer_zerop (fd->loop.n1));
1522
1523 /* When tiling, the first operand of the tile clause applies to the
1524 innermost loop, and we work outwards from there. Seems
1525 backwards, but whatever. */
1526 for (ix = fd->collapse; ix--;)
1527 {
1528 const omp_for_data_loop *loop = &fd->loops[ix];
1529
1530 tree iter_type = TREE_TYPE (loop->v);
1531 tree diff_type = iter_type;
1532 tree plus_type = iter_type;
1533
1534 gcc_assert (loop->cond_code == fd->loop.cond_code);
1535
1536 if (POINTER_TYPE_P (iter_type))
1537 plus_type = sizetype;
1538 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
1539 diff_type = signed_type_for (diff_type);
1540 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
1541 diff_type = integer_type_node;
1542
1543 if (tiling)
1544 {
1545 tree num = build_int_cst (integer_type_node, fd->collapse);
1546 tree loop_no = build_int_cst (integer_type_node, ix);
1547 tree tile = TREE_VALUE (tiling);
1548 gcall *call
1549 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1550 /* gwv-outer=*/integer_zero_node,
1551 /* gwv-inner=*/integer_zero_node);
1552
1553 counts[ix].outer = create_tmp_var (iter_type, ".outer");
1554 counts[ix].tile = create_tmp_var (diff_type, ".tile");
1555 gimple_call_set_lhs (call, counts[ix].tile);
1556 gimple_set_location (call, loc);
1557 gsi_insert_before (gsi, call, GSI_SAME_STMT);
1558
1559 tiling = TREE_CHAIN (tiling);
1560 }
1561 else
1562 {
1563 counts[ix].tile = NULL;
1564 counts[ix].outer = loop->v;
1565 }
1566
1567 tree b = loop->n1;
1568 tree e = loop->n2;
1569 tree s = loop->step;
1570 bool up = loop->cond_code == LT_EXPR;
1571 tree dir = build_int_cst (diff_type, up ? +1 : -1);
1572 bool negating;
1573 tree expr;
1574
1575 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1576 true, GSI_SAME_STMT);
1577 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1578 true, GSI_SAME_STMT);
1579
1580 /* Convert the step, avoiding possible unsigned->signed overflow. */
1581 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1582 if (negating)
1583 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1584 s = fold_convert (diff_type, s);
1585 if (negating)
1586 s = fold_build1 (NEGATE_EXPR, diff_type, s);
1587 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1588 true, GSI_SAME_STMT);
1589
1590 /* Determine the range, avoiding possible unsigned->signed overflow. */
1591 negating = !up && TYPE_UNSIGNED (iter_type);
1592 expr = fold_build2 (MINUS_EXPR, plus_type,
1593 fold_convert (plus_type, negating ? b : e),
1594 fold_convert (plus_type, negating ? e : b));
1595 expr = fold_convert (diff_type, expr);
1596 if (negating)
1597 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1598 tree range = force_gimple_operand_gsi
1599 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1600
1601 /* Determine number of iterations. */
1602 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1603 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1604 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1605
1606 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1607 true, GSI_SAME_STMT);
1608
1609 counts[ix].base = b;
1610 counts[ix].iters = iters;
1611 counts[ix].step = s;
1612
1613 total = fold_build2 (MULT_EXPR, bound_type, total,
1614 fold_convert (bound_type, iters));
1615 }
1616
1617 return total;
1618 }
1619
1620 /* Emit initializers for collapsed loop members. INNER is true if
1621 this is for the element loop of a TILE. IVAR is the outer
1622 loop iteration variable, from which collapsed loop iteration values
1623 are calculated. COUNTS array has been initialized by
1624 expand_oacc_collapse_inits. */
1625
1626 static void
1627 expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
1628 gimple_stmt_iterator *gsi,
1629 const oacc_collapse *counts, tree ivar)
1630 {
1631 tree ivar_type = TREE_TYPE (ivar);
1632
1633 /* The most rapidly changing iteration variable is the innermost
1634 one. */
1635 for (int ix = fd->collapse; ix--;)
1636 {
1637 const omp_for_data_loop *loop = &fd->loops[ix];
1638 const oacc_collapse *collapse = &counts[ix];
1639 tree v = inner ? loop->v : collapse->outer;
1640 tree iter_type = TREE_TYPE (v);
1641 tree diff_type = TREE_TYPE (collapse->step);
1642 tree plus_type = iter_type;
1643 enum tree_code plus_code = PLUS_EXPR;
1644 tree expr;
1645
1646 if (POINTER_TYPE_P (iter_type))
1647 {
1648 plus_code = POINTER_PLUS_EXPR;
1649 plus_type = sizetype;
1650 }
1651
1652 expr = ivar;
1653 if (ix)
1654 {
1655 tree mod = fold_convert (ivar_type, collapse->iters);
1656 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1657 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1658 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1659 true, GSI_SAME_STMT);
1660 }
1661
1662 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1663 collapse->step);
1664 expr = fold_build2 (plus_code, iter_type,
1665 inner ? collapse->outer : collapse->base,
1666 fold_convert (plus_type, expr));
1667 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1668 true, GSI_SAME_STMT);
1669 gassign *ass = gimple_build_assign (v, expr);
1670 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1671 }
1672 }
1673
1674 /* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1675 of the combined collapse > 1 loop constructs, generate code like:
1676 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1677 if (cond3 is <)
1678 adj = STEP3 - 1;
1679 else
1680 adj = STEP3 + 1;
1681 count3 = (adj + N32 - N31) / STEP3;
1682 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1683 if (cond2 is <)
1684 adj = STEP2 - 1;
1685 else
1686 adj = STEP2 + 1;
1687 count2 = (adj + N22 - N21) / STEP2;
1688 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1689 if (cond1 is <)
1690 adj = STEP1 - 1;
1691 else
1692 adj = STEP1 + 1;
1693 count1 = (adj + N12 - N11) / STEP1;
1694 count = count1 * count2 * count3;
1695 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1696 count = 0;
1697 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1698 of the combined loop constructs, just initialize COUNTS array
1699 from the _looptemp_ clauses. For loop nests with non-rectangular
1700 loops, do this only for the rectangular loops. Then pick
1701 the loops which reference outer vars in their bound expressions
1702 and the loops which they refer to and for this sub-nest compute
1703 number of iterations. For triangular loops use Faulhaber's formula,
1704 otherwise as a fallback, compute by iterating the loops.
1705 If e.g. the sub-nest is
1706 for (I = N11; I COND1 N12; I += STEP1)
1707 for (J = M21 * I + N21; J COND2 M22 * I + N22; J += STEP2)
1708 for (K = M31 * J + N31; K COND3 M32 * J + N32; K += STEP3)
1709 do:
1710 COUNT = 0;
1711 for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
1712 for (tmpj = M21 * tmpi + N21;
1713 tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
1714 {
1715 int tmpk1 = M31 * tmpj + N31;
1716 int tmpk2 = M32 * tmpj + N32;
1717 if (tmpk1 COND3 tmpk2)
1718 {
1719 if (COND3 is <)
1720 adj = STEP3 - 1;
1721 else
1722 adj = STEP3 + 1;
1723 COUNT += (adj + tmpk2 - tmpk1) / STEP3;
1724 }
1725 }
1726 and finally multiply the counts of the rectangular loops not
1727 in the sub-nest with COUNT. Also, as counts[fd->last_nonrect]
1728 store number of iterations of the loops from fd->first_nonrect
1729 to fd->last_nonrect inclusive, i.e. the above COUNT multiplied
1730 by the counts of rectangular loops not referenced in any non-rectangular
1731 loops sandwitched in between those. */
1732
1733 /* NOTE: It *could* be better to moosh all of the BBs together,
1734 creating one larger BB with all the computation and the unexpected
1735 jump at the end. I.e.
1736
1737 bool zero3, zero2, zero1, zero;
1738
1739 zero3 = N32 c3 N31;
1740 count3 = (N32 - N31) /[cl] STEP3;
1741 zero2 = N22 c2 N21;
1742 count2 = (N22 - N21) /[cl] STEP2;
1743 zero1 = N12 c1 N11;
1744 count1 = (N12 - N11) /[cl] STEP1;
1745 zero = zero3 || zero2 || zero1;
1746 count = count1 * count2 * count3;
1747 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1748
1749 After all, we expect the zero=false, and thus we expect to have to
1750 evaluate all of the comparison expressions, so short-circuiting
1751 oughtn't be a win. Since the condition isn't protecting a
1752 denominator, we're not concerned about divide-by-zero, so we can
1753 fully evaluate count even if a numerator turned out to be wrong.
1754
1755 It seems like putting this all together would create much better
1756 scheduling opportunities, and less pressure on the chip's branch
1757 predictor. */
1758
1759 static void
1760 expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1761 basic_block &entry_bb, tree *counts,
1762 basic_block &zero_iter1_bb, int &first_zero_iter1,
1763 basic_block &zero_iter2_bb, int &first_zero_iter2,
1764 basic_block &l2_dom_bb)
1765 {
1766 tree t, type = TREE_TYPE (fd->loop.v);
1767 edge e, ne;
1768 int i;
1769
1770 /* Collapsed loops need work for expansion into SSA form. */
1771 gcc_assert (!gimple_in_ssa_p (cfun));
1772
1773 if (gimple_omp_for_combined_into_p (fd->for_stmt)
1774 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1775 {
1776 gcc_assert (fd->ordered == 0);
1777 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1778 isn't supposed to be handled, as the inner loop doesn't
1779 use it. */
1780 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1781 OMP_CLAUSE__LOOPTEMP_);
1782 gcc_assert (innerc);
1783 for (i = 0; i < fd->collapse; i++)
1784 {
1785 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1786 OMP_CLAUSE__LOOPTEMP_);
1787 gcc_assert (innerc);
1788 if (i)
1789 counts[i] = OMP_CLAUSE_DECL (innerc);
1790 else
1791 counts[0] = NULL_TREE;
1792 }
1793 return;
1794 }
1795
1796 for (i = fd->collapse; i < fd->ordered; i++)
1797 {
1798 tree itype = TREE_TYPE (fd->loops[i].v);
1799 counts[i] = NULL_TREE;
1800 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1801 fold_convert (itype, fd->loops[i].n1),
1802 fold_convert (itype, fd->loops[i].n2));
1803 if (t && integer_zerop (t))
1804 {
1805 for (i = fd->collapse; i < fd->ordered; i++)
1806 counts[i] = build_int_cst (type, 0);
1807 break;
1808 }
1809 }
1810 bool rect_count_seen = false;
1811 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1812 {
1813 tree itype = TREE_TYPE (fd->loops[i].v);
1814
1815 if (i >= fd->collapse && counts[i])
1816 continue;
1817 if (fd->non_rect)
1818 {
1819 /* Skip loops that use outer iterators in their expressions
1820 during this phase. */
1821 if (fd->loops[i].m1 || fd->loops[i].m2)
1822 {
1823 counts[i] = build_zero_cst (type);
1824 continue;
1825 }
1826 }
1827 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1828 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1829 fold_convert (itype, fd->loops[i].n1),
1830 fold_convert (itype, fd->loops[i].n2)))
1831 == NULL_TREE || !integer_onep (t)))
1832 {
1833 gcond *cond_stmt;
1834 tree n1, n2;
1835 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1836 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1837 true, GSI_SAME_STMT);
1838 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1839 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1840 true, GSI_SAME_STMT);
1841 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1842 NULL_TREE, NULL_TREE);
1843 gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1844 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1845 expand_omp_regimplify_p, NULL, NULL)
1846 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1847 expand_omp_regimplify_p, NULL, NULL))
1848 {
1849 *gsi = gsi_for_stmt (cond_stmt);
1850 gimple_regimplify_operands (cond_stmt, gsi);
1851 }
1852 e = split_block (entry_bb, cond_stmt);
1853 basic_block &zero_iter_bb
1854 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1855 int &first_zero_iter
1856 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1857 if (zero_iter_bb == NULL)
1858 {
1859 gassign *assign_stmt;
1860 first_zero_iter = i;
1861 zero_iter_bb = create_empty_bb (entry_bb);
1862 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1863 *gsi = gsi_after_labels (zero_iter_bb);
1864 if (i < fd->collapse)
1865 assign_stmt = gimple_build_assign (fd->loop.n2,
1866 build_zero_cst (type));
1867 else
1868 {
1869 counts[i] = create_tmp_reg (type, ".count");
1870 assign_stmt
1871 = gimple_build_assign (counts[i], build_zero_cst (type));
1872 }
1873 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1874 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1875 entry_bb);
1876 }
1877 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1878 ne->probability = profile_probability::very_unlikely ();
1879 e->flags = EDGE_TRUE_VALUE;
1880 e->probability = ne->probability.invert ();
1881 if (l2_dom_bb == NULL)
1882 l2_dom_bb = entry_bb;
1883 entry_bb = e->dest;
1884 *gsi = gsi_last_nondebug_bb (entry_bb);
1885 }
1886
1887 if (POINTER_TYPE_P (itype))
1888 itype = signed_type_for (itype);
1889 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1890 ? -1 : 1));
1891 t = fold_build2 (PLUS_EXPR, itype,
1892 fold_convert (itype, fd->loops[i].step), t);
1893 t = fold_build2 (PLUS_EXPR, itype, t,
1894 fold_convert (itype, fd->loops[i].n2));
1895 t = fold_build2 (MINUS_EXPR, itype, t,
1896 fold_convert (itype, fd->loops[i].n1));
1897 /* ?? We could probably use CEIL_DIV_EXPR instead of
1898 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1899 generate the same code in the end because generically we
1900 don't know that the values involved must be negative for
1901 GT?? */
1902 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1903 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1904 fold_build1 (NEGATE_EXPR, itype, t),
1905 fold_build1 (NEGATE_EXPR, itype,
1906 fold_convert (itype,
1907 fd->loops[i].step)));
1908 else
1909 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1910 fold_convert (itype, fd->loops[i].step));
1911 t = fold_convert (type, t);
1912 if (TREE_CODE (t) == INTEGER_CST)
1913 counts[i] = t;
1914 else
1915 {
1916 if (i < fd->collapse || i != first_zero_iter2)
1917 counts[i] = create_tmp_reg (type, ".count");
1918 expand_omp_build_assign (gsi, counts[i], t);
1919 }
1920 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1921 {
1922 if (fd->non_rect && i >= fd->first_nonrect && i <= fd->last_nonrect)
1923 continue;
1924 if (!rect_count_seen)
1925 {
1926 t = counts[i];
1927 rect_count_seen = true;
1928 }
1929 else
1930 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1931 expand_omp_build_assign (gsi, fd->loop.n2, t);
1932 }
1933 }
1934 if (fd->non_rect && SSA_VAR_P (fd->loop.n2))
1935 {
1936 gcc_assert (fd->last_nonrect != -1);
1937
1938 counts[fd->last_nonrect] = create_tmp_reg (type, ".count");
1939 expand_omp_build_assign (gsi, counts[fd->last_nonrect],
1940 build_zero_cst (type));
1941 for (i = fd->first_nonrect + 1; i < fd->last_nonrect; i++)
1942 if (fd->loops[i].m1
1943 || fd->loops[i].m2
1944 || fd->loops[i].non_rect_referenced)
1945 break;
1946 if (i == fd->last_nonrect
1947 && fd->loops[i].outer == fd->last_nonrect - fd->first_nonrect
1948 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[i].v)))
1949 {
1950 int o = fd->first_nonrect;
1951 tree itype = TREE_TYPE (fd->loops[o].v);
1952 tree n1o = create_tmp_reg (itype, ".n1o");
1953 t = fold_convert (itype, unshare_expr (fd->loops[o].n1));
1954 expand_omp_build_assign (gsi, n1o, t);
1955 tree n2o = create_tmp_reg (itype, ".n2o");
1956 t = fold_convert (itype, unshare_expr (fd->loops[o].n2));
1957 expand_omp_build_assign (gsi, n2o, t);
1958 if (fd->loops[i].m1 && fd->loops[i].m2)
1959 t = fold_build2 (MINUS_EXPR, itype, unshare_expr (fd->loops[i].m2),
1960 unshare_expr (fd->loops[i].m1));
1961 else if (fd->loops[i].m1)
1962 t = fold_unary (NEGATE_EXPR, itype,
1963 unshare_expr (fd->loops[i].m1));
1964 else
1965 t = unshare_expr (fd->loops[i].m2);
1966 tree m2minusm1
1967 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
1968 true, GSI_SAME_STMT);
1969
1970 gimple_stmt_iterator gsi2 = *gsi;
1971 gsi_prev (&gsi2);
1972 e = split_block (entry_bb, gsi_stmt (gsi2));
1973 e = split_block (e->dest, (gimple *) NULL);
1974 basic_block bb1 = e->src;
1975 entry_bb = e->dest;
1976 *gsi = gsi_after_labels (entry_bb);
1977
1978 gsi2 = gsi_after_labels (bb1);
1979 tree ostep = fold_convert (itype, fd->loops[o].step);
1980 t = build_int_cst (itype, (fd->loops[o].cond_code
1981 == LT_EXPR ? -1 : 1));
1982 t = fold_build2 (PLUS_EXPR, itype, ostep, t);
1983 t = fold_build2 (PLUS_EXPR, itype, t, n2o);
1984 t = fold_build2 (MINUS_EXPR, itype, t, n1o);
1985 if (TYPE_UNSIGNED (itype)
1986 && fd->loops[o].cond_code == GT_EXPR)
1987 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1988 fold_build1 (NEGATE_EXPR, itype, t),
1989 fold_build1 (NEGATE_EXPR, itype, ostep));
1990 else
1991 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, ostep);
1992 tree outer_niters
1993 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
1994 true, GSI_SAME_STMT);
1995 t = fold_build2 (MINUS_EXPR, itype, outer_niters,
1996 build_one_cst (itype));
1997 t = fold_build2 (MULT_EXPR, itype, t, ostep);
1998 t = fold_build2 (PLUS_EXPR, itype, n1o, t);
1999 tree last = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2000 true, GSI_SAME_STMT);
2001 tree n1, n2, n1e, n2e;
2002 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2003 if (fd->loops[i].m1)
2004 {
2005 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2006 n1 = fold_build2 (MULT_EXPR, itype, n1o, n1);
2007 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2008 }
2009 else
2010 n1 = t;
2011 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2012 true, GSI_SAME_STMT);
2013 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2014 if (fd->loops[i].m2)
2015 {
2016 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2017 n2 = fold_build2 (MULT_EXPR, itype, n1o, n2);
2018 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2019 }
2020 else
2021 n2 = t;
2022 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2023 true, GSI_SAME_STMT);
2024 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2025 if (fd->loops[i].m1)
2026 {
2027 n1e = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2028 n1e = fold_build2 (MULT_EXPR, itype, last, n1e);
2029 n1e = fold_build2 (PLUS_EXPR, itype, n1e, t);
2030 }
2031 else
2032 n1e = t;
2033 n1e = force_gimple_operand_gsi (&gsi2, n1e, true, NULL_TREE,
2034 true, GSI_SAME_STMT);
2035 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2036 if (fd->loops[i].m2)
2037 {
2038 n2e = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2039 n2e = fold_build2 (MULT_EXPR, itype, last, n2e);
2040 n2e = fold_build2 (PLUS_EXPR, itype, n2e, t);
2041 }
2042 else
2043 n2e = t;
2044 n2e = force_gimple_operand_gsi (&gsi2, n2e, true, NULL_TREE,
2045 true, GSI_SAME_STMT);
2046 gcond *cond_stmt
2047 = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
2048 NULL_TREE, NULL_TREE);
2049 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2050 e = split_block (bb1, cond_stmt);
2051 e->flags = EDGE_TRUE_VALUE;
2052 e->probability = profile_probability::likely ().guessed ();
2053 basic_block bb2 = e->dest;
2054 gsi2 = gsi_after_labels (bb2);
2055
2056 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1e, n2e,
2057 NULL_TREE, NULL_TREE);
2058 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2059 e = split_block (bb2, cond_stmt);
2060 e->flags = EDGE_TRUE_VALUE;
2061 e->probability = profile_probability::likely ().guessed ();
2062 gsi2 = gsi_after_labels (e->dest);
2063
2064 tree step = fold_convert (itype, fd->loops[i].step);
2065 t = build_int_cst (itype, (fd->loops[i].cond_code
2066 == LT_EXPR ? -1 : 1));
2067 t = fold_build2 (PLUS_EXPR, itype, step, t);
2068 t = fold_build2 (PLUS_EXPR, itype, t, n2);
2069 t = fold_build2 (MINUS_EXPR, itype, t, n1);
2070 if (TYPE_UNSIGNED (itype)
2071 && fd->loops[i].cond_code == GT_EXPR)
2072 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2073 fold_build1 (NEGATE_EXPR, itype, t),
2074 fold_build1 (NEGATE_EXPR, itype, step));
2075 else
2076 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2077 tree first_inner_iterations
2078 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2079 true, GSI_SAME_STMT);
2080 t = fold_build2 (MULT_EXPR, itype, m2minusm1, ostep);
2081 if (TYPE_UNSIGNED (itype)
2082 && fd->loops[i].cond_code == GT_EXPR)
2083 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2084 fold_build1 (NEGATE_EXPR, itype, t),
2085 fold_build1 (NEGATE_EXPR, itype, step));
2086 else
2087 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2088 tree factor
2089 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2090 true, GSI_SAME_STMT);
2091 t = fold_build2 (MINUS_EXPR, itype, outer_niters,
2092 build_one_cst (itype));
2093 t = fold_build2 (MULT_EXPR, itype, t, outer_niters);
2094 t = fold_build2 (RSHIFT_EXPR, itype, t, integer_one_node);
2095 t = fold_build2 (MULT_EXPR, itype, factor, t);
2096 t = fold_build2 (PLUS_EXPR, itype,
2097 fold_build2 (MULT_EXPR, itype, outer_niters,
2098 first_inner_iterations), t);
2099 expand_omp_build_assign (&gsi2, counts[fd->last_nonrect],
2100 fold_convert (type, t));
2101
2102 basic_block bb3 = create_empty_bb (bb1);
2103 add_bb_to_loop (bb3, bb1->loop_father);
2104
2105 e = make_edge (bb1, bb3, EDGE_FALSE_VALUE);
2106 e->probability = profile_probability::unlikely ().guessed ();
2107
2108 gsi2 = gsi_after_labels (bb3);
2109 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1e, n2e,
2110 NULL_TREE, NULL_TREE);
2111 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2112 e = split_block (bb3, cond_stmt);
2113 e->flags = EDGE_TRUE_VALUE;
2114 e->probability = profile_probability::likely ().guessed ();
2115 basic_block bb4 = e->dest;
2116
2117 ne = make_edge (bb3, entry_bb, EDGE_FALSE_VALUE);
2118 ne->probability = e->probability.invert ();
2119
2120 basic_block bb5 = create_empty_bb (bb2);
2121 add_bb_to_loop (bb5, bb2->loop_father);
2122
2123 ne = make_edge (bb2, bb5, EDGE_FALSE_VALUE);
2124 ne->probability = profile_probability::unlikely ().guessed ();
2125
2126 for (int j = 0; j < 2; j++)
2127 {
2128 gsi2 = gsi_after_labels (j ? bb5 : bb4);
2129 t = fold_build2 (MINUS_EXPR, itype,
2130 unshare_expr (fd->loops[i].n1),
2131 unshare_expr (fd->loops[i].n2));
2132 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, m2minusm1);
2133 tree tem
2134 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2135 true, GSI_SAME_STMT);
2136 t = fold_build2 (MINUS_EXPR, itype, tem, n1o);
2137 t = fold_build2 (TRUNC_MOD_EXPR, itype, t, ostep);
2138 t = fold_build2 (MINUS_EXPR, itype, tem, t);
2139 tem = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2140 true, GSI_SAME_STMT);
2141 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2142 if (fd->loops[i].m1)
2143 {
2144 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2145 n1 = fold_build2 (MULT_EXPR, itype, tem, n1);
2146 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2147 }
2148 else
2149 n1 = t;
2150 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2151 true, GSI_SAME_STMT);
2152 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2153 if (fd->loops[i].m2)
2154 {
2155 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2156 n2 = fold_build2 (MULT_EXPR, itype, tem, n2);
2157 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2158 }
2159 else
2160 n2 = t;
2161 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2162 true, GSI_SAME_STMT);
2163 expand_omp_build_assign (&gsi2, j ? n2o : n1o, tem);
2164
2165 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
2166 NULL_TREE, NULL_TREE);
2167 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2168 e = split_block (gsi_bb (gsi2), cond_stmt);
2169 e->flags = j ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE;
2170 e->probability = profile_probability::unlikely ().guessed ();
2171 ne = make_edge (e->src, bb1,
2172 j ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE);
2173 ne->probability = e->probability.invert ();
2174 gsi2 = gsi_after_labels (e->dest);
2175
2176 t = fold_build2 (PLUS_EXPR, itype, tem, ostep);
2177 expand_omp_build_assign (&gsi2, j ? n2o : n1o, t);
2178
2179 make_edge (e->dest, bb1, EDGE_FALLTHRU);
2180 }
2181
2182 set_immediate_dominator (CDI_DOMINATORS, bb3, bb1);
2183 set_immediate_dominator (CDI_DOMINATORS, bb5, bb2);
2184 set_immediate_dominator (CDI_DOMINATORS, entry_bb, bb1);
2185
2186 if (fd->first_nonrect + 1 == fd->last_nonrect)
2187 {
2188 fd->first_inner_iterations = first_inner_iterations;
2189 fd->factor = factor;
2190 fd->adjn1 = n1o;
2191 }
2192 }
2193 else
2194 {
2195 /* Fallback implementation. Evaluate the loops with m1/m2
2196 non-NULL as well as their outer loops at runtime using temporaries
2197 instead of the original iteration variables, and in the
2198 body just bump the counter. */
2199 gimple_stmt_iterator gsi2 = *gsi;
2200 gsi_prev (&gsi2);
2201 e = split_block (entry_bb, gsi_stmt (gsi2));
2202 e = split_block (e->dest, (gimple *) NULL);
2203 basic_block cur_bb = e->src;
2204 basic_block next_bb = e->dest;
2205 entry_bb = e->dest;
2206 *gsi = gsi_after_labels (entry_bb);
2207
2208 tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
2209 memset (vs, 0, fd->last_nonrect * sizeof (tree));
2210
2211 for (i = 0; i <= fd->last_nonrect; i++)
2212 {
2213 if (fd->loops[i].m1 == NULL_TREE
2214 && fd->loops[i].m2 == NULL_TREE
2215 && !fd->loops[i].non_rect_referenced)
2216 continue;
2217
2218 tree itype = TREE_TYPE (fd->loops[i].v);
2219
2220 gsi2 = gsi_after_labels (cur_bb);
2221 tree n1, n2;
2222 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2223 if (fd->loops[i].m1)
2224 {
2225 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2226 n1 = fold_build2 (MULT_EXPR, itype,
2227 vs[i - fd->loops[i].outer], n1);
2228 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2229 }
2230 else
2231 n1 = t;
2232 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2233 true, GSI_SAME_STMT);
2234 if (i < fd->last_nonrect)
2235 {
2236 vs[i] = create_tmp_reg (itype, ".it");
2237 expand_omp_build_assign (&gsi2, vs[i], n1);
2238 }
2239 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2240 if (fd->loops[i].m2)
2241 {
2242 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2243 n2 = fold_build2 (MULT_EXPR, itype,
2244 vs[i - fd->loops[i].outer], n2);
2245 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2246 }
2247 else
2248 n2 = t;
2249 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2250 true, GSI_SAME_STMT);
2251 if (i == fd->last_nonrect)
2252 {
2253 gcond *cond_stmt
2254 = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
2255 NULL_TREE, NULL_TREE);
2256 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2257 e = split_block (cur_bb, cond_stmt);
2258 e->flags = EDGE_TRUE_VALUE;
2259 ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2260 e->probability = profile_probability::likely ().guessed ();
2261 ne->probability = e->probability.invert ();
2262 gsi2 = gsi_after_labels (e->dest);
2263
2264 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
2265 ? -1 : 1));
2266 t = fold_build2 (PLUS_EXPR, itype,
2267 fold_convert (itype, fd->loops[i].step), t);
2268 t = fold_build2 (PLUS_EXPR, itype, t, n2);
2269 t = fold_build2 (MINUS_EXPR, itype, t, n1);
2270 tree step = fold_convert (itype, fd->loops[i].step);
2271 if (TYPE_UNSIGNED (itype)
2272 && fd->loops[i].cond_code == GT_EXPR)
2273 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2274 fold_build1 (NEGATE_EXPR, itype, t),
2275 fold_build1 (NEGATE_EXPR, itype, step));
2276 else
2277 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2278 t = fold_convert (type, t);
2279 t = fold_build2 (PLUS_EXPR, type,
2280 counts[fd->last_nonrect], t);
2281 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2282 true, GSI_SAME_STMT);
2283 expand_omp_build_assign (&gsi2, counts[fd->last_nonrect], t);
2284 e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2285 set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2286 break;
2287 }
2288 e = split_block (cur_bb, last_stmt (cur_bb));
2289
2290 basic_block new_cur_bb = create_empty_bb (cur_bb);
2291 add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2292
2293 gsi2 = gsi_after_labels (e->dest);
2294 tree step = fold_convert (itype,
2295 unshare_expr (fd->loops[i].step));
2296 t = fold_build2 (PLUS_EXPR, itype, vs[i], step);
2297 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2298 true, GSI_SAME_STMT);
2299 expand_omp_build_assign (&gsi2, vs[i], t);
2300
2301 ne = split_block (e->dest, last_stmt (e->dest));
2302 gsi2 = gsi_after_labels (ne->dest);
2303
2304 gcond *cond_stmt
2305 = gimple_build_cond (fd->loops[i].cond_code, vs[i], n2,
2306 NULL_TREE, NULL_TREE);
2307 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2308 edge e3, e4;
2309 if (next_bb == entry_bb)
2310 {
2311 e3 = find_edge (ne->dest, next_bb);
2312 e3->flags = EDGE_FALSE_VALUE;
2313 }
2314 else
2315 e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2316 e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2317 e4->probability = profile_probability::likely ().guessed ();
2318 e3->probability = e4->probability.invert ();
2319 basic_block esrc = e->src;
2320 make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2321 cur_bb = new_cur_bb;
2322 basic_block latch_bb = next_bb;
2323 next_bb = e->dest;
2324 remove_edge (e);
2325 set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2326 set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2327 set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
2328 }
2329 }
2330 t = NULL_TREE;
2331 for (i = fd->first_nonrect; i < fd->last_nonrect; i++)
2332 if (!fd->loops[i].non_rect_referenced
2333 && fd->loops[i].m1 == NULL_TREE
2334 && fd->loops[i].m2 == NULL_TREE)
2335 {
2336 if (t == NULL_TREE)
2337 t = counts[i];
2338 else
2339 t = fold_build2 (MULT_EXPR, type, t, counts[i]);
2340 }
2341 if (t)
2342 {
2343 t = fold_build2 (MULT_EXPR, type, counts[fd->last_nonrect], t);
2344 expand_omp_build_assign (gsi, counts[fd->last_nonrect], t);
2345 }
2346 if (!rect_count_seen)
2347 t = counts[fd->last_nonrect];
2348 else
2349 t = fold_build2 (MULT_EXPR, type, fd->loop.n2,
2350 counts[fd->last_nonrect]);
2351 expand_omp_build_assign (gsi, fd->loop.n2, t);
2352 }
2353 else if (fd->non_rect)
2354 {
2355 tree t = fd->loop.n2;
2356 gcc_assert (TREE_CODE (t) == INTEGER_CST);
2357 int non_rect_referenced = 0, non_rect = 0;
2358 for (i = 0; i < fd->collapse; i++)
2359 {
2360 if ((i < fd->first_nonrect || i > fd->last_nonrect)
2361 && !integer_zerop (counts[i]))
2362 t = fold_build2 (TRUNC_DIV_EXPR, type, t, counts[i]);
2363 if (fd->loops[i].non_rect_referenced)
2364 non_rect_referenced++;
2365 if (fd->loops[i].m1 || fd->loops[i].m2)
2366 non_rect++;
2367 }
2368 gcc_assert (non_rect == 1 && non_rect_referenced == 1);
2369 counts[fd->last_nonrect] = t;
2370 }
2371 }
2372
2373 /* Helper function for expand_omp_{for_*,simd}. Generate code like:
2374 T = V;
2375 V3 = N31 + (T % count3) * STEP3;
2376 T = T / count3;
2377 V2 = N21 + (T % count2) * STEP2;
2378 T = T / count2;
2379 V1 = N11 + T * STEP1;
2380 if this loop doesn't have an inner loop construct combined with it.
2381 If it does have an inner loop construct combined with it and the
2382 iteration count isn't known constant, store values from counts array
2383 into its _looptemp_ temporaries instead.
2384 For non-rectangular loops (between fd->first_nonrect and fd->last_nonrect
2385 inclusive), use the count of all those loops together, and either
2386 find quadratic etc. equation roots, or as a fallback, do:
2387 COUNT = 0;
2388 for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
2389 for (tmpj = M21 * tmpi + N21;
2390 tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
2391 {
2392 int tmpk1 = M31 * tmpj + N31;
2393 int tmpk2 = M32 * tmpj + N32;
2394 if (tmpk1 COND3 tmpk2)
2395 {
2396 if (COND3 is <)
2397 adj = STEP3 - 1;
2398 else
2399 adj = STEP3 + 1;
2400 int temp = (adj + tmpk2 - tmpk1) / STEP3;
2401 if (COUNT + temp > T)
2402 {
2403 V1 = tmpi;
2404 V2 = tmpj;
2405 V3 = tmpk1 + (T - COUNT) * STEP3;
2406 goto done;
2407 }
2408 else
2409 COUNT += temp;
2410 }
2411 }
2412 done:;
2413 but for optional innermost or outermost rectangular loops that aren't
2414 referenced by other loop expressions keep doing the division/modulo. */
2415
2416 static void
2417 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
2418 tree *counts, tree *nonrect_bounds,
2419 gimple *inner_stmt, tree startvar)
2420 {
2421 int i;
2422 if (gimple_omp_for_combined_p (fd->for_stmt))
2423 {
2424 /* If fd->loop.n2 is constant, then no propagation of the counts
2425 is needed, they are constant. */
2426 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
2427 return;
2428
2429 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
2430 ? gimple_omp_taskreg_clauses (inner_stmt)
2431 : gimple_omp_for_clauses (inner_stmt);
2432 /* First two _looptemp_ clauses are for istart/iend, counts[0]
2433 isn't supposed to be handled, as the inner loop doesn't
2434 use it. */
2435 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
2436 gcc_assert (innerc);
2437 for (i = 0; i < fd->collapse; i++)
2438 {
2439 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2440 OMP_CLAUSE__LOOPTEMP_);
2441 gcc_assert (innerc);
2442 if (i)
2443 {
2444 tree tem = OMP_CLAUSE_DECL (innerc);
2445 tree t = fold_convert (TREE_TYPE (tem), counts[i]);
2446 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
2447 false, GSI_CONTINUE_LINKING);
2448 gassign *stmt = gimple_build_assign (tem, t);
2449 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2450 }
2451 }
2452 return;
2453 }
2454
2455 tree type = TREE_TYPE (fd->loop.v);
2456 tree tem = create_tmp_reg (type, ".tem");
2457 gassign *stmt = gimple_build_assign (tem, startvar);
2458 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2459
2460 for (i = fd->collapse - 1; i >= 0; i--)
2461 {
2462 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
2463 itype = vtype;
2464 if (POINTER_TYPE_P (vtype))
2465 itype = signed_type_for (vtype);
2466 if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
2467 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
2468 else
2469 t = tem;
2470 if (i == fd->last_nonrect)
2471 {
2472 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
2473 false, GSI_CONTINUE_LINKING);
2474 tree stopval = t;
2475 tree idx = create_tmp_reg (type, ".count");
2476 expand_omp_build_assign (gsi, idx,
2477 build_zero_cst (type), true);
2478 basic_block bb_triang = NULL, bb_triang_dom = NULL;
2479 if (fd->first_nonrect + 1 == fd->last_nonrect
2480 && (TREE_CODE (fd->loop.n2) == INTEGER_CST
2481 || (fd->first_inner_iterations
2482 /* For now. Later add clauses to propagate the
2483 values. */
2484 && !gimple_omp_for_combined_into_p (fd->for_stmt)))
2485 && (optab_handler (sqrt_optab, TYPE_MODE (double_type_node))
2486 != CODE_FOR_nothing))
2487 {
2488 tree outer_n1 = fd->adjn1 ? fd->adjn1 : fd->loops[i - 1].n1;
2489 tree itype = TREE_TYPE (fd->loops[i].v);
2490 tree first_inner_iterations = fd->first_inner_iterations;
2491 tree factor = fd->factor;
2492 gcond *cond_stmt
2493 = gimple_build_cond (NE_EXPR, factor,
2494 build_zero_cst (TREE_TYPE (factor)),
2495 NULL_TREE, NULL_TREE);
2496 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2497 edge e = split_block (gsi_bb (*gsi), cond_stmt);
2498 basic_block bb0 = e->src;
2499 e->flags = EDGE_TRUE_VALUE;
2500 e->probability = profile_probability::likely ();
2501 bb_triang_dom = bb0;
2502 *gsi = gsi_after_labels (e->dest);
2503 tree slltype = long_long_integer_type_node;
2504 tree ulltype = long_long_unsigned_type_node;
2505 tree stopvalull = fold_convert (ulltype, stopval);
2506 stopvalull
2507 = force_gimple_operand_gsi (gsi, stopvalull, true, NULL_TREE,
2508 false, GSI_CONTINUE_LINKING);
2509 first_inner_iterations
2510 = fold_convert (slltype, first_inner_iterations);
2511 first_inner_iterations
2512 = force_gimple_operand_gsi (gsi, first_inner_iterations, true,
2513 NULL_TREE, false,
2514 GSI_CONTINUE_LINKING);
2515 factor = fold_convert (slltype, factor);
2516 factor
2517 = force_gimple_operand_gsi (gsi, factor, true, NULL_TREE,
2518 false, GSI_CONTINUE_LINKING);
2519 tree first_inner_iterationsd
2520 = fold_build1 (FLOAT_EXPR, double_type_node,
2521 first_inner_iterations);
2522 first_inner_iterationsd
2523 = force_gimple_operand_gsi (gsi, first_inner_iterationsd, true,
2524 NULL_TREE, false,
2525 GSI_CONTINUE_LINKING);
2526 tree factord = fold_build1 (FLOAT_EXPR, double_type_node,
2527 factor);
2528 factord = force_gimple_operand_gsi (gsi, factord, true,
2529 NULL_TREE, false,
2530 GSI_CONTINUE_LINKING);
2531 tree stopvald = fold_build1 (FLOAT_EXPR, double_type_node,
2532 stopvalull);
2533 stopvald = force_gimple_operand_gsi (gsi, stopvald, true,
2534 NULL_TREE, false,
2535 GSI_CONTINUE_LINKING);
2536 /* Temporarily disable flag_rounding_math, values will be
2537 decimal numbers divided by 2 and worst case imprecisions
2538 due to too large values ought to be caught later by the
2539 checks for fallback. */
2540 int save_flag_rounding_math = flag_rounding_math;
2541 flag_rounding_math = 0;
2542 t = fold_build2 (RDIV_EXPR, double_type_node, factord,
2543 build_real (double_type_node, dconst2));
2544 tree t3 = fold_build2 (MINUS_EXPR, double_type_node,
2545 first_inner_iterationsd, t);
2546 t3 = force_gimple_operand_gsi (gsi, t3, true, NULL_TREE, false,
2547 GSI_CONTINUE_LINKING);
2548 t = fold_build2 (MULT_EXPR, double_type_node, factord,
2549 build_real (double_type_node, dconst2));
2550 t = fold_build2 (MULT_EXPR, double_type_node, t, stopvald);
2551 t = fold_build2 (PLUS_EXPR, double_type_node, t,
2552 fold_build2 (MULT_EXPR, double_type_node,
2553 t3, t3));
2554 flag_rounding_math = save_flag_rounding_math;
2555 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2556 GSI_CONTINUE_LINKING);
2557 if (flag_exceptions
2558 && cfun->can_throw_non_call_exceptions
2559 && operation_could_trap_p (LT_EXPR, true, false, NULL_TREE))
2560 {
2561 tree tem = fold_build2 (LT_EXPR, boolean_type_node, t,
2562 build_zero_cst (double_type_node));
2563 tem = force_gimple_operand_gsi (gsi, tem, true, NULL_TREE,
2564 false, GSI_CONTINUE_LINKING);
2565 cond_stmt = gimple_build_cond (NE_EXPR, tem,
2566 boolean_false_node,
2567 NULL_TREE, NULL_TREE);
2568 }
2569 else
2570 cond_stmt
2571 = gimple_build_cond (LT_EXPR, t,
2572 build_zero_cst (double_type_node),
2573 NULL_TREE, NULL_TREE);
2574 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2575 e = split_block (gsi_bb (*gsi), cond_stmt);
2576 basic_block bb1 = e->src;
2577 e->flags = EDGE_FALSE_VALUE;
2578 e->probability = profile_probability::very_likely ();
2579 *gsi = gsi_after_labels (e->dest);
2580 gcall *call = gimple_build_call_internal (IFN_SQRT, 1, t);
2581 tree sqrtr = create_tmp_var (double_type_node);
2582 gimple_call_set_lhs (call, sqrtr);
2583 gsi_insert_after (gsi, call, GSI_CONTINUE_LINKING);
2584 t = fold_build2 (MINUS_EXPR, double_type_node, sqrtr, t3);
2585 t = fold_build2 (RDIV_EXPR, double_type_node, t, factord);
2586 t = fold_build1 (FIX_TRUNC_EXPR, ulltype, t);
2587 tree c = create_tmp_var (ulltype);
2588 tree d = create_tmp_var (ulltype);
2589 expand_omp_build_assign (gsi, c, t, true);
2590 t = fold_build2 (MINUS_EXPR, ulltype, c,
2591 build_one_cst (ulltype));
2592 t = fold_build2 (MULT_EXPR, ulltype, c, t);
2593 t = fold_build2 (RSHIFT_EXPR, ulltype, t, integer_one_node);
2594 t = fold_build2 (MULT_EXPR, ulltype,
2595 fold_convert (ulltype, fd->factor), t);
2596 tree t2
2597 = fold_build2 (MULT_EXPR, ulltype, c,
2598 fold_convert (ulltype,
2599 fd->first_inner_iterations));
2600 t = fold_build2 (PLUS_EXPR, ulltype, t, t2);
2601 expand_omp_build_assign (gsi, d, t, true);
2602 t = fold_build2 (MULT_EXPR, ulltype,
2603 fold_convert (ulltype, fd->factor), c);
2604 t = fold_build2 (PLUS_EXPR, ulltype,
2605 t, fold_convert (ulltype,
2606 fd->first_inner_iterations));
2607 t2 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2608 GSI_CONTINUE_LINKING);
2609 cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, d,
2610 NULL_TREE, NULL_TREE);
2611 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2612 e = split_block (gsi_bb (*gsi), cond_stmt);
2613 basic_block bb2 = e->src;
2614 e->flags = EDGE_TRUE_VALUE;
2615 e->probability = profile_probability::very_likely ();
2616 *gsi = gsi_after_labels (e->dest);
2617 t = fold_build2 (PLUS_EXPR, ulltype, d, t2);
2618 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2619 GSI_CONTINUE_LINKING);
2620 cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, t,
2621 NULL_TREE, NULL_TREE);
2622 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2623 e = split_block (gsi_bb (*gsi), cond_stmt);
2624 basic_block bb3 = e->src;
2625 e->flags = EDGE_FALSE_VALUE;
2626 e->probability = profile_probability::very_likely ();
2627 *gsi = gsi_after_labels (e->dest);
2628 t = fold_convert (itype, c);
2629 t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i - 1].step);
2630 t = fold_build2 (PLUS_EXPR, itype, outer_n1, t);
2631 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2632 GSI_CONTINUE_LINKING);
2633 expand_omp_build_assign (gsi, fd->loops[i - 1].v, t, true);
2634 t2 = fold_build2 (MINUS_EXPR, ulltype, stopvalull, d);
2635 t2 = fold_convert (itype, t2);
2636 t2 = fold_build2 (MULT_EXPR, itype, t2, fd->loops[i].step);
2637 t2 = fold_build2 (PLUS_EXPR, itype, t2, fd->loops[i].n1);
2638 if (fd->loops[i].m1)
2639 {
2640 t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i].m1);
2641 t2 = fold_build2 (PLUS_EXPR, itype, t2, t);
2642 }
2643 expand_omp_build_assign (gsi, fd->loops[i].v, t2, true);
2644 e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
2645 bb_triang = e->src;
2646 *gsi = gsi_after_labels (e->dest);
2647 remove_edge (e);
2648 e = make_edge (bb1, gsi_bb (*gsi), EDGE_TRUE_VALUE);
2649 e->probability = profile_probability::very_unlikely ();
2650 e = make_edge (bb2, gsi_bb (*gsi), EDGE_FALSE_VALUE);
2651 e->probability = profile_probability::very_unlikely ();
2652 e = make_edge (bb3, gsi_bb (*gsi), EDGE_TRUE_VALUE);
2653 e->probability = profile_probability::very_unlikely ();
2654
2655 basic_block bb4 = create_empty_bb (bb0);
2656 add_bb_to_loop (bb4, bb0->loop_father);
2657 e = make_edge (bb0, bb4, EDGE_FALSE_VALUE);
2658 e->probability = profile_probability::unlikely ();
2659 make_edge (bb4, gsi_bb (*gsi), EDGE_FALLTHRU);
2660 set_immediate_dominator (CDI_DOMINATORS, bb4, bb0);
2661 set_immediate_dominator (CDI_DOMINATORS, gsi_bb (*gsi), bb0);
2662 gimple_stmt_iterator gsi2 = gsi_after_labels (bb4);
2663 t2 = fold_build2 (TRUNC_DIV_EXPR, type,
2664 counts[i], counts[i - 1]);
2665 t2 = force_gimple_operand_gsi (&gsi2, t2, true, NULL_TREE, false,
2666 GSI_CONTINUE_LINKING);
2667 t = fold_build2 (TRUNC_MOD_EXPR, type, stopval, t2);
2668 t2 = fold_build2 (TRUNC_DIV_EXPR, type, stopval, t2);
2669 t = fold_convert (itype, t);
2670 t2 = fold_convert (itype, t2);
2671 t = fold_build2 (MULT_EXPR, itype, t,
2672 fold_convert (itype, fd->loops[i].step));
2673 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
2674 t2 = fold_build2 (MULT_EXPR, itype, t2,
2675 fold_convert (itype, fd->loops[i - 1].step));
2676 t2 = fold_build2 (PLUS_EXPR, itype, fd->loops[i - 1].n1, t2);
2677 t2 = force_gimple_operand_gsi (&gsi2, t2, false, NULL_TREE,
2678 false, GSI_CONTINUE_LINKING);
2679 stmt = gimple_build_assign (fd->loops[i - 1].v, t2);
2680 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2681 if (fd->loops[i].m1)
2682 {
2683 t2 = fold_build2 (MULT_EXPR, itype, fd->loops[i].m1,
2684 fd->loops[i - 1].v);
2685 t = fold_build2 (PLUS_EXPR, itype, t, t2);
2686 }
2687 t = force_gimple_operand_gsi (&gsi2, t, false, NULL_TREE,
2688 false, GSI_CONTINUE_LINKING);
2689 stmt = gimple_build_assign (fd->loops[i].v, t);
2690 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2691 }
2692 /* Fallback implementation. Evaluate the loops in between
2693 (inclusive) fd->first_nonrect and fd->last_nonrect at
2694 runtime unsing temporaries instead of the original iteration
2695 variables, in the body just bump the counter and compare
2696 with the desired value. */
2697 gimple_stmt_iterator gsi2 = *gsi;
2698 basic_block entry_bb = gsi_bb (gsi2);
2699 edge e = split_block (entry_bb, gsi_stmt (gsi2));
2700 e = split_block (e->dest, (gimple *) NULL);
2701 basic_block dom_bb = NULL;
2702 basic_block cur_bb = e->src;
2703 basic_block next_bb = e->dest;
2704 entry_bb = e->dest;
2705 *gsi = gsi_after_labels (entry_bb);
2706
2707 tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
2708 tree n1 = NULL_TREE, n2 = NULL_TREE;
2709 memset (vs, 0, fd->last_nonrect * sizeof (tree));
2710
2711 for (int j = fd->first_nonrect; j <= fd->last_nonrect; j++)
2712 {
2713 tree itype = TREE_TYPE (fd->loops[j].v);
2714 bool rect_p = (fd->loops[j].m1 == NULL_TREE
2715 && fd->loops[j].m2 == NULL_TREE
2716 && !fd->loops[j].non_rect_referenced);
2717 gsi2 = gsi_after_labels (cur_bb);
2718 t = fold_convert (itype, unshare_expr (fd->loops[j].n1));
2719 if (fd->loops[j].m1)
2720 {
2721 n1 = fold_convert (itype, unshare_expr (fd->loops[j].m1));
2722 n1 = fold_build2 (MULT_EXPR, itype,
2723 vs[j - fd->loops[j].outer], n1);
2724 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2725 }
2726 else if (rect_p)
2727 n1 = build_zero_cst (type);
2728 else
2729 n1 = t;
2730 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2731 true, GSI_SAME_STMT);
2732 if (j < fd->last_nonrect)
2733 {
2734 vs[j] = create_tmp_reg (rect_p ? type : itype, ".it");
2735 expand_omp_build_assign (&gsi2, vs[j], n1);
2736 }
2737 t = fold_convert (itype, unshare_expr (fd->loops[j].n2));
2738 if (fd->loops[j].m2)
2739 {
2740 n2 = fold_convert (itype, unshare_expr (fd->loops[j].m2));
2741 n2 = fold_build2 (MULT_EXPR, itype,
2742 vs[j - fd->loops[j].outer], n2);
2743 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2744 }
2745 else if (rect_p)
2746 n2 = counts[j];
2747 else
2748 n2 = t;
2749 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2750 true, GSI_SAME_STMT);
2751 if (j == fd->last_nonrect)
2752 {
2753 gcond *cond_stmt
2754 = gimple_build_cond (fd->loops[j].cond_code, n1, n2,
2755 NULL_TREE, NULL_TREE);
2756 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2757 e = split_block (cur_bb, cond_stmt);
2758 e->flags = EDGE_TRUE_VALUE;
2759 edge ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2760 e->probability = profile_probability::likely ().guessed ();
2761 ne->probability = e->probability.invert ();
2762 gsi2 = gsi_after_labels (e->dest);
2763
2764 t = build_int_cst (itype, (fd->loops[j].cond_code == LT_EXPR
2765 ? -1 : 1));
2766 t = fold_build2 (PLUS_EXPR, itype,
2767 fold_convert (itype, fd->loops[j].step), t);
2768 t = fold_build2 (PLUS_EXPR, itype, t, n2);
2769 t = fold_build2 (MINUS_EXPR, itype, t, n1);
2770 tree step = fold_convert (itype, fd->loops[j].step);
2771 if (TYPE_UNSIGNED (itype)
2772 && fd->loops[j].cond_code == GT_EXPR)
2773 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2774 fold_build1 (NEGATE_EXPR, itype, t),
2775 fold_build1 (NEGATE_EXPR, itype, step));
2776 else
2777 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2778 t = fold_convert (type, t);
2779 t = fold_build2 (PLUS_EXPR, type, idx, t);
2780 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2781 true, GSI_SAME_STMT);
2782 e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2783 set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2784 cond_stmt
2785 = gimple_build_cond (LE_EXPR, t, stopval, NULL_TREE,
2786 NULL_TREE);
2787 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2788 e = split_block (gsi_bb (gsi2), cond_stmt);
2789 e->flags = EDGE_TRUE_VALUE;
2790 e->probability = profile_probability::likely ().guessed ();
2791 ne = make_edge (e->src, entry_bb, EDGE_FALSE_VALUE);
2792 ne->probability = e->probability.invert ();
2793 gsi2 = gsi_after_labels (e->dest);
2794 expand_omp_build_assign (&gsi2, idx, t);
2795 set_immediate_dominator (CDI_DOMINATORS, entry_bb, dom_bb);
2796 break;
2797 }
2798 e = split_block (cur_bb, last_stmt (cur_bb));
2799
2800 basic_block new_cur_bb = create_empty_bb (cur_bb);
2801 add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2802
2803 gsi2 = gsi_after_labels (e->dest);
2804 if (rect_p)
2805 t = fold_build2 (PLUS_EXPR, type, vs[j],
2806 build_one_cst (type));
2807 else
2808 {
2809 tree step
2810 = fold_convert (itype, unshare_expr (fd->loops[j].step));
2811 t = fold_build2 (PLUS_EXPR, itype, vs[j], step);
2812 }
2813 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2814 true, GSI_SAME_STMT);
2815 expand_omp_build_assign (&gsi2, vs[j], t);
2816
2817 edge ne = split_block (e->dest, last_stmt (e->dest));
2818 gsi2 = gsi_after_labels (ne->dest);
2819
2820 gcond *cond_stmt;
2821 if (next_bb == entry_bb)
2822 /* No need to actually check the outermost condition. */
2823 cond_stmt
2824 = gimple_build_cond (EQ_EXPR, boolean_true_node,
2825 boolean_true_node,
2826 NULL_TREE, NULL_TREE);
2827 else
2828 cond_stmt
2829 = gimple_build_cond (rect_p ? LT_EXPR
2830 : fd->loops[j].cond_code,
2831 vs[j], n2, NULL_TREE, NULL_TREE);
2832 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2833 edge e3, e4;
2834 if (next_bb == entry_bb)
2835 {
2836 e3 = find_edge (ne->dest, next_bb);
2837 e3->flags = EDGE_FALSE_VALUE;
2838 dom_bb = ne->dest;
2839 }
2840 else
2841 e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2842 e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2843 e4->probability = profile_probability::likely ().guessed ();
2844 e3->probability = e4->probability.invert ();
2845 basic_block esrc = e->src;
2846 make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2847 cur_bb = new_cur_bb;
2848 basic_block latch_bb = next_bb;
2849 next_bb = e->dest;
2850 remove_edge (e);
2851 set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2852 set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2853 set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
2854 }
2855 for (int j = fd->last_nonrect; j >= fd->first_nonrect; j--)
2856 {
2857 tree itype = TREE_TYPE (fd->loops[j].v);
2858 bool rect_p = (fd->loops[j].m1 == NULL_TREE
2859 && fd->loops[j].m2 == NULL_TREE
2860 && !fd->loops[j].non_rect_referenced);
2861 if (j == fd->last_nonrect)
2862 {
2863 t = fold_build2 (MINUS_EXPR, type, stopval, idx);
2864 t = fold_convert (itype, t);
2865 tree t2
2866 = fold_convert (itype, unshare_expr (fd->loops[j].step));
2867 t = fold_build2 (MULT_EXPR, itype, t, t2);
2868 t = fold_build2 (PLUS_EXPR, itype, n1, t);
2869 }
2870 else if (rect_p)
2871 {
2872 t = fold_convert (itype, vs[j]);
2873 t = fold_build2 (MULT_EXPR, itype, t,
2874 fold_convert (itype, fd->loops[j].step));
2875 if (POINTER_TYPE_P (vtype))
2876 t = fold_build_pointer_plus (fd->loops[j].n1, t);
2877 else
2878 t = fold_build2 (PLUS_EXPR, itype, fd->loops[j].n1, t);
2879 }
2880 else
2881 t = vs[j];
2882 t = force_gimple_operand_gsi (gsi, t, false,
2883 NULL_TREE, true,
2884 GSI_SAME_STMT);
2885 stmt = gimple_build_assign (fd->loops[j].v, t);
2886 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
2887 }
2888 if (gsi_end_p (*gsi))
2889 *gsi = gsi_last_bb (gsi_bb (*gsi));
2890 else
2891 gsi_prev (gsi);
2892 if (bb_triang)
2893 {
2894 e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
2895 make_edge (bb_triang, e->dest, EDGE_FALLTHRU);
2896 *gsi = gsi_after_labels (e->dest);
2897 if (!gsi_end_p (*gsi))
2898 gsi_insert_before (gsi, gimple_build_nop (), GSI_NEW_STMT);
2899 set_immediate_dominator (CDI_DOMINATORS, e->dest, bb_triang_dom);
2900 }
2901 }
2902 else
2903 {
2904 t = fold_convert (itype, t);
2905 t = fold_build2 (MULT_EXPR, itype, t,
2906 fold_convert (itype, fd->loops[i].step));
2907 if (POINTER_TYPE_P (vtype))
2908 t = fold_build_pointer_plus (fd->loops[i].n1, t);
2909 else
2910 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
2911 t = force_gimple_operand_gsi (gsi, t,
2912 DECL_P (fd->loops[i].v)
2913 && TREE_ADDRESSABLE (fd->loops[i].v),
2914 NULL_TREE, false,
2915 GSI_CONTINUE_LINKING);
2916 stmt = gimple_build_assign (fd->loops[i].v, t);
2917 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2918 }
2919 if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
2920 {
2921 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
2922 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
2923 false, GSI_CONTINUE_LINKING);
2924 stmt = gimple_build_assign (tem, t);
2925 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2926 }
2927 if (i == fd->last_nonrect)
2928 i = fd->first_nonrect;
2929 }
2930 if (fd->non_rect)
2931 for (i = 0; i <= fd->last_nonrect; i++)
2932 if (fd->loops[i].m2)
2933 {
2934 tree itype = TREE_TYPE (fd->loops[i].v);
2935
2936 tree t = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2937 t = fold_build2 (MULT_EXPR, itype,
2938 fd->loops[i - fd->loops[i].outer].v, t);
2939 t = fold_build2 (PLUS_EXPR, itype, t,
2940 fold_convert (itype,
2941 unshare_expr (fd->loops[i].n2)));
2942 nonrect_bounds[i] = create_tmp_reg (itype, ".bound");
2943 t = force_gimple_operand_gsi (gsi, t, false,
2944 NULL_TREE, false,
2945 GSI_CONTINUE_LINKING);
2946 stmt = gimple_build_assign (nonrect_bounds[i], t);
2947 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2948 }
2949 }
2950
2951 /* Helper function for expand_omp_for_*. Generate code like:
2952 L10:
2953 V3 += STEP3;
2954 if (V3 cond3 N32) goto BODY_BB; else goto L11;
2955 L11:
2956 V3 = N31;
2957 V2 += STEP2;
2958 if (V2 cond2 N22) goto BODY_BB; else goto L12;
2959 L12:
2960 V2 = N21;
2961 V1 += STEP1;
2962 goto BODY_BB;
2963 For non-rectangular loops, use temporaries stored in nonrect_bounds
2964 for the upper bounds if M?2 multiplier is present. Given e.g.
2965 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2966 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2967 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
2968 for (V4 = N41 + M41 * V2; V4 cond4 N42 + M42 * V2; V4 += STEP4)
2969 do:
2970 L10:
2971 V4 += STEP4;
2972 if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L11;
2973 L11:
2974 V4 = N41 + M41 * V2; // This can be left out if the loop
2975 // refers to the immediate parent loop
2976 V3 += STEP3;
2977 if (V3 cond3 N32) goto BODY_BB; else goto L12;
2978 L12:
2979 V3 = N31;
2980 V2 += STEP2;
2981 if (V2 cond2 N22) goto L120; else goto L13;
2982 L120:
2983 V4 = N41 + M41 * V2;
2984 NONRECT_BOUND4 = N42 + M42 * V2;
2985 if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L12;
2986 L13:
2987 V2 = N21;
2988 V1 += STEP1;
2989 goto L120; */
2990
2991 static basic_block
2992 extract_omp_for_update_vars (struct omp_for_data *fd, tree *nonrect_bounds,
2993 basic_block cont_bb, basic_block body_bb)
2994 {
2995 basic_block last_bb, bb, collapse_bb = NULL;
2996 int i;
2997 gimple_stmt_iterator gsi;
2998 edge e;
2999 tree t;
3000 gimple *stmt;
3001
3002 last_bb = cont_bb;
3003 for (i = fd->collapse - 1; i >= 0; i--)
3004 {
3005 tree vtype = TREE_TYPE (fd->loops[i].v);
3006
3007 bb = create_empty_bb (last_bb);
3008 add_bb_to_loop (bb, last_bb->loop_father);
3009 gsi = gsi_start_bb (bb);
3010
3011 if (i < fd->collapse - 1)
3012 {
3013 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
3014 e->probability
3015 = profile_probability::guessed_always ().apply_scale (1, 8);
3016
3017 struct omp_for_data_loop *l = &fd->loops[i + 1];
3018 if (l->m1 == NULL_TREE || l->outer != 1)
3019 {
3020 t = l->n1;
3021 if (l->m1)
3022 {
3023 tree t2
3024 = fold_build2 (MULT_EXPR, TREE_TYPE (t),
3025 fd->loops[i + 1 - l->outer].v, l->m1);
3026 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t2, t);
3027 }
3028 t = force_gimple_operand_gsi (&gsi, t,
3029 DECL_P (l->v)
3030 && TREE_ADDRESSABLE (l->v),
3031 NULL_TREE, false,
3032 GSI_CONTINUE_LINKING);
3033 stmt = gimple_build_assign (l->v, t);
3034 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3035 }
3036 }
3037 else
3038 collapse_bb = bb;
3039
3040 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
3041
3042 if (POINTER_TYPE_P (vtype))
3043 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
3044 else
3045 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
3046 t = force_gimple_operand_gsi (&gsi, t,
3047 DECL_P (fd->loops[i].v)
3048 && TREE_ADDRESSABLE (fd->loops[i].v),
3049 NULL_TREE, false, GSI_CONTINUE_LINKING);
3050 stmt = gimple_build_assign (fd->loops[i].v, t);
3051 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3052
3053 if (fd->loops[i].non_rect_referenced)
3054 {
3055 basic_block update_bb = NULL, prev_bb = NULL;
3056 for (int j = i + 1; j <= fd->last_nonrect; j++)
3057 if (j - fd->loops[j].outer == i)
3058 {
3059 tree n1, n2;
3060 struct omp_for_data_loop *l = &fd->loops[j];
3061 basic_block this_bb = create_empty_bb (last_bb);
3062 add_bb_to_loop (this_bb, last_bb->loop_father);
3063 gimple_stmt_iterator gsi2 = gsi_start_bb (this_bb);
3064 if (prev_bb)
3065 {
3066 e = make_edge (prev_bb, this_bb, EDGE_TRUE_VALUE);
3067 e->probability
3068 = profile_probability::guessed_always ().apply_scale (7,
3069 8);
3070 set_immediate_dominator (CDI_DOMINATORS, this_bb, prev_bb);
3071 }
3072 if (l->m1)
3073 {
3074 t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m1), l->m1,
3075 fd->loops[i].v);
3076 t = fold_build2 (PLUS_EXPR, TREE_TYPE (l->v), t, l->n1);
3077 n1 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3078 false,
3079 GSI_CONTINUE_LINKING);
3080 stmt = gimple_build_assign (l->v, n1);
3081 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
3082 n1 = l->v;
3083 }
3084 else
3085 n1 = force_gimple_operand_gsi (&gsi2, l->n1, true,
3086 NULL_TREE, false,
3087 GSI_CONTINUE_LINKING);
3088 if (l->m2)
3089 {
3090 t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m2), l->m2,
3091 fd->loops[i].v);
3092 t = fold_build2 (PLUS_EXPR, TREE_TYPE (nonrect_bounds[j]),
3093 t, unshare_expr (l->n2));
3094 n2 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3095 false,
3096 GSI_CONTINUE_LINKING);
3097 stmt = gimple_build_assign (nonrect_bounds[j], n2);
3098 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
3099 n2 = nonrect_bounds[j];
3100 }
3101 else
3102 n2 = force_gimple_operand_gsi (&gsi2, unshare_expr (l->n2),
3103 true, NULL_TREE, false,
3104 GSI_CONTINUE_LINKING);
3105 gcond *cond_stmt
3106 = gimple_build_cond (l->cond_code, n1, n2,
3107 NULL_TREE, NULL_TREE);
3108 gsi_insert_after (&gsi2, cond_stmt, GSI_CONTINUE_LINKING);
3109 if (update_bb == NULL)
3110 update_bb = this_bb;
3111 e = make_edge (this_bb, bb, EDGE_FALSE_VALUE);
3112 e->probability
3113 = profile_probability::guessed_always ().apply_scale (1, 8);
3114 if (prev_bb == NULL)
3115 set_immediate_dominator (CDI_DOMINATORS, this_bb, bb);
3116 prev_bb = this_bb;
3117 }
3118 e = make_edge (prev_bb, body_bb, EDGE_TRUE_VALUE);
3119 e->probability
3120 = profile_probability::guessed_always ().apply_scale (7, 8);
3121 body_bb = update_bb;
3122 }
3123
3124 if (i > 0)
3125 {
3126 if (fd->loops[i].m2)
3127 t = nonrect_bounds[i];
3128 else
3129 t = unshare_expr (fd->loops[i].n2);
3130 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3131 false, GSI_CONTINUE_LINKING);
3132 tree v = fd->loops[i].v;
3133 if (DECL_P (v) && TREE_ADDRESSABLE (v))
3134 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
3135 false, GSI_CONTINUE_LINKING);
3136 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
3137 stmt = gimple_build_cond_empty (t);
3138 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3139 if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)),
3140 expand_omp_regimplify_p, NULL, NULL)
3141 || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)),
3142 expand_omp_regimplify_p, NULL, NULL))
3143 gimple_regimplify_operands (stmt, &gsi);
3144 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
3145 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
3146 }
3147 else
3148 make_edge (bb, body_bb, EDGE_FALLTHRU);
3149 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
3150 last_bb = bb;
3151 }
3152
3153 return collapse_bb;
3154 }
3155
3156 /* Expand #pragma omp ordered depend(source). */
3157
3158 static void
3159 expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
3160 tree *counts, location_t loc)
3161 {
3162 enum built_in_function source_ix
3163 = fd->iter_type == long_integer_type_node
3164 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
3165 gimple *g
3166 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
3167 build_fold_addr_expr (counts[fd->ordered]));
3168 gimple_set_location (g, loc);
3169 gsi_insert_before (gsi, g, GSI_SAME_STMT);
3170 }
3171
3172 /* Expand a single depend from #pragma omp ordered depend(sink:...). */
3173
3174 static void
3175 expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
3176 tree *counts, tree c, location_t loc)
3177 {
3178 auto_vec<tree, 10> args;
3179 enum built_in_function sink_ix
3180 = fd->iter_type == long_integer_type_node
3181 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
3182 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
3183 int i;
3184 gimple_stmt_iterator gsi2 = *gsi;
3185 bool warned_step = false;
3186
3187 for (i = 0; i < fd->ordered; i++)
3188 {
3189 tree step = NULL_TREE;
3190 off = TREE_PURPOSE (deps);
3191 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
3192 {
3193 step = TREE_OPERAND (off, 1);
3194 off = TREE_OPERAND (off, 0);
3195 }
3196 if (!integer_zerop (off))
3197 {
3198 gcc_assert (fd->loops[i].cond_code == LT_EXPR
3199 || fd->loops[i].cond_code == GT_EXPR);
3200 bool forward = fd->loops[i].cond_code == LT_EXPR;
3201 if (step)
3202 {
3203 /* Non-simple Fortran DO loops. If step is variable,
3204 we don't know at compile even the direction, so can't
3205 warn. */
3206 if (TREE_CODE (step) != INTEGER_CST)
3207 break;
3208 forward = tree_int_cst_sgn (step) != -1;
3209 }
3210 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3211 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
3212 "waiting for lexically later iteration");
3213 break;
3214 }
3215 deps = TREE_CHAIN (deps);
3216 }
3217 /* If all offsets corresponding to the collapsed loops are zero,
3218 this depend clause can be ignored. FIXME: but there is still a
3219 flush needed. We need to emit one __sync_synchronize () for it
3220 though (perhaps conditionally)? Solve this together with the
3221 conservative dependence folding optimization.
3222 if (i >= fd->collapse)
3223 return; */
3224
3225 deps = OMP_CLAUSE_DECL (c);
3226 gsi_prev (&gsi2);
3227 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
3228 edge e2 = split_block_after_labels (e1->dest);
3229
3230 gsi2 = gsi_after_labels (e1->dest);
3231 *gsi = gsi_last_bb (e1->src);
3232 for (i = 0; i < fd->ordered; i++)
3233 {
3234 tree itype = TREE_TYPE (fd->loops[i].v);
3235 tree step = NULL_TREE;
3236 tree orig_off = NULL_TREE;
3237 if (POINTER_TYPE_P (itype))
3238 itype = sizetype;
3239 if (i)
3240 deps = TREE_CHAIN (deps);
3241 off = TREE_PURPOSE (deps);
3242 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
3243 {
3244 step = TREE_OPERAND (off, 1);
3245 off = TREE_OPERAND (off, 0);
3246 gcc_assert (fd->loops[i].cond_code == LT_EXPR
3247 && integer_onep (fd->loops[i].step)
3248 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
3249 }
3250 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
3251 if (step)
3252 {
3253 off = fold_convert_loc (loc, itype, off);
3254 orig_off = off;
3255 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3256 }
3257
3258 if (integer_zerop (off))
3259 t = boolean_true_node;
3260 else
3261 {
3262 tree a;
3263 tree co = fold_convert_loc (loc, itype, off);
3264 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
3265 {
3266 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3267 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
3268 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
3269 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
3270 co);
3271 }
3272 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3273 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3274 fd->loops[i].v, co);
3275 else
3276 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
3277 fd->loops[i].v, co);
3278 if (step)
3279 {
3280 tree t1, t2;
3281 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3282 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3283 fd->loops[i].n1);
3284 else
3285 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3286 fd->loops[i].n2);
3287 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3288 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3289 fd->loops[i].n2);
3290 else
3291 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3292 fd->loops[i].n1);
3293 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
3294 step, build_int_cst (TREE_TYPE (step), 0));
3295 if (TREE_CODE (step) != INTEGER_CST)
3296 {
3297 t1 = unshare_expr (t1);
3298 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
3299 false, GSI_CONTINUE_LINKING);
3300 t2 = unshare_expr (t2);
3301 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
3302 false, GSI_CONTINUE_LINKING);
3303 }
3304 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
3305 t, t2, t1);
3306 }
3307 else if (fd->loops[i].cond_code == LT_EXPR)
3308 {
3309 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3310 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3311 fd->loops[i].n1);
3312 else
3313 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3314 fd->loops[i].n2);
3315 }
3316 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3317 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
3318 fd->loops[i].n2);
3319 else
3320 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
3321 fd->loops[i].n1);
3322 }
3323 if (cond)
3324 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
3325 else
3326 cond = t;
3327
3328 off = fold_convert_loc (loc, itype, off);
3329
3330 if (step
3331 || (fd->loops[i].cond_code == LT_EXPR
3332 ? !integer_onep (fd->loops[i].step)
3333 : !integer_minus_onep (fd->loops[i].step)))
3334 {
3335 if (step == NULL_TREE
3336 && TYPE_UNSIGNED (itype)
3337 && fd->loops[i].cond_code == GT_EXPR)
3338 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
3339 fold_build1_loc (loc, NEGATE_EXPR, itype,
3340 s));
3341 else
3342 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
3343 orig_off ? orig_off : off, s);
3344 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
3345 build_int_cst (itype, 0));
3346 if (integer_zerop (t) && !warned_step)
3347 {
3348 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
3349 "refers to iteration never in the iteration "
3350 "space");
3351 warned_step = true;
3352 }
3353 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
3354 cond, t);
3355 }
3356
3357 if (i <= fd->collapse - 1 && fd->collapse > 1)
3358 t = fd->loop.v;
3359 else if (counts[i])
3360 t = counts[i];
3361 else
3362 {
3363 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3364 fd->loops[i].v, fd->loops[i].n1);
3365 t = fold_convert_loc (loc, fd->iter_type, t);
3366 }
3367 if (step)
3368 /* We have divided off by step already earlier. */;
3369 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
3370 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
3371 fold_build1_loc (loc, NEGATE_EXPR, itype,
3372 s));
3373 else
3374 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3375 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3376 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
3377 off = fold_convert_loc (loc, fd->iter_type, off);
3378 if (i <= fd->collapse - 1 && fd->collapse > 1)
3379 {
3380 if (i)
3381 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
3382 off);
3383 if (i < fd->collapse - 1)
3384 {
3385 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
3386 counts[i]);
3387 continue;
3388 }
3389 }
3390 off = unshare_expr (off);
3391 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
3392 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3393 true, GSI_SAME_STMT);
3394 args.safe_push (t);
3395 }
3396 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
3397 gimple_set_location (g, loc);
3398 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3399
3400 cond = unshare_expr (cond);
3401 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
3402 GSI_CONTINUE_LINKING);
3403 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
3404 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
3405 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
3406 e1->probability = e3->probability.invert ();
3407 e1->flags = EDGE_TRUE_VALUE;
3408 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
3409
3410 *gsi = gsi_after_labels (e2->dest);
3411 }
3412
3413 /* Expand all #pragma omp ordered depend(source) and
3414 #pragma omp ordered depend(sink:...) constructs in the current
3415 #pragma omp for ordered(n) region. */
3416
3417 static void
3418 expand_omp_ordered_source_sink (struct omp_region *region,
3419 struct omp_for_data *fd, tree *counts,
3420 basic_block cont_bb)
3421 {
3422 struct omp_region *inner;
3423 int i;
3424 for (i = fd->collapse - 1; i < fd->ordered; i++)
3425 if (i == fd->collapse - 1 && fd->collapse > 1)
3426 counts[i] = NULL_TREE;
3427 else if (i >= fd->collapse && !cont_bb)
3428 counts[i] = build_zero_cst (fd->iter_type);
3429 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
3430 && integer_onep (fd->loops[i].step))
3431 counts[i] = NULL_TREE;
3432 else
3433 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
3434 tree atype
3435 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
3436 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
3437 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
3438
3439 for (inner = region->inner; inner; inner = inner->next)
3440 if (inner->type == GIMPLE_OMP_ORDERED)
3441 {
3442 gomp_ordered *ord_stmt = inner->ord_stmt;
3443 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
3444 location_t loc = gimple_location (ord_stmt);
3445 tree c;
3446 for (c = gimple_omp_ordered_clauses (ord_stmt);
3447 c; c = OMP_CLAUSE_CHAIN (c))
3448 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
3449 break;
3450 if (c)
3451 expand_omp_ordered_source (&gsi, fd, counts, loc);
3452 for (c = gimple_omp_ordered_clauses (ord_stmt);
3453 c; c = OMP_CLAUSE_CHAIN (c))
3454 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
3455 expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
3456 gsi_remove (&gsi, true);
3457 }
3458 }
3459
3460 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
3461 collapsed. */
3462
3463 static basic_block
3464 expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
3465 basic_block cont_bb, basic_block body_bb,
3466 bool ordered_lastprivate)
3467 {
3468 if (fd->ordered == fd->collapse)
3469 return cont_bb;
3470
3471 if (!cont_bb)
3472 {
3473 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3474 for (int i = fd->collapse; i < fd->ordered; i++)
3475 {
3476 tree type = TREE_TYPE (fd->loops[i].v);
3477 tree n1 = fold_convert (type, fd->loops[i].n1);
3478 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
3479 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3480 size_int (i - fd->collapse + 1),
3481 NULL_TREE, NULL_TREE);
3482 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
3483 }
3484 return NULL;
3485 }
3486
3487 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
3488 {
3489 tree t, type = TREE_TYPE (fd->loops[i].v);
3490 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3491 expand_omp_build_assign (&gsi, fd->loops[i].v,
3492 fold_convert (type, fd->loops[i].n1));
3493 if (counts[i])
3494 expand_omp_build_assign (&gsi, counts[i],
3495 build_zero_cst (fd->iter_type));
3496 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3497 size_int (i - fd->collapse + 1),
3498 NULL_TREE, NULL_TREE);
3499 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
3500 if (!gsi_end_p (gsi))
3501 gsi_prev (&gsi);
3502 else
3503 gsi = gsi_last_bb (body_bb);
3504 edge e1 = split_block (body_bb, gsi_stmt (gsi));
3505 basic_block new_body = e1->dest;
3506 if (body_bb == cont_bb)
3507 cont_bb = new_body;
3508 edge e2 = NULL;
3509 basic_block new_header;
3510 if (EDGE_COUNT (cont_bb->preds) > 0)
3511 {
3512 gsi = gsi_last_bb (cont_bb);
3513 if (POINTER_TYPE_P (type))
3514 t = fold_build_pointer_plus (fd->loops[i].v,
3515 fold_convert (sizetype,
3516 fd->loops[i].step));
3517 else
3518 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
3519 fold_convert (type, fd->loops[i].step));
3520 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
3521 if (counts[i])
3522 {
3523 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
3524 build_int_cst (fd->iter_type, 1));
3525 expand_omp_build_assign (&gsi, counts[i], t);
3526 t = counts[i];
3527 }
3528 else
3529 {
3530 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3531 fd->loops[i].v, fd->loops[i].n1);
3532 t = fold_convert (fd->iter_type, t);
3533 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3534 true, GSI_SAME_STMT);
3535 }
3536 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3537 size_int (i - fd->collapse + 1),
3538 NULL_TREE, NULL_TREE);
3539 expand_omp_build_assign (&gsi, aref, t);
3540 gsi_prev (&gsi);
3541 e2 = split_block (cont_bb, gsi_stmt (gsi));
3542 new_header = e2->dest;
3543 }
3544 else
3545 new_header = cont_bb;
3546 gsi = gsi_after_labels (new_header);
3547 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
3548 true, GSI_SAME_STMT);
3549 tree n2
3550 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
3551 true, NULL_TREE, true, GSI_SAME_STMT);
3552 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
3553 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
3554 edge e3 = split_block (new_header, gsi_stmt (gsi));
3555 cont_bb = e3->dest;
3556 remove_edge (e1);
3557 make_edge (body_bb, new_header, EDGE_FALLTHRU);
3558 e3->flags = EDGE_FALSE_VALUE;
3559 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
3560 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
3561 e1->probability = e3->probability.invert ();
3562
3563 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
3564 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
3565
3566 if (e2)
3567 {
3568 class loop *loop = alloc_loop ();
3569 loop->header = new_header;
3570 loop->latch = e2->src;
3571 add_loop (loop, body_bb->loop_father);
3572 }
3573 }
3574
3575 /* If there are any lastprivate clauses and it is possible some loops
3576 might have zero iterations, ensure all the decls are initialized,
3577 otherwise we could crash evaluating C++ class iterators with lastprivate
3578 clauses. */
3579 bool need_inits = false;
3580 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
3581 if (need_inits)
3582 {
3583 tree type = TREE_TYPE (fd->loops[i].v);
3584 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3585 expand_omp_build_assign (&gsi, fd->loops[i].v,
3586 fold_convert (type, fd->loops[i].n1));
3587 }
3588 else
3589 {
3590 tree type = TREE_TYPE (fd->loops[i].v);
3591 tree this_cond = fold_build2 (fd->loops[i].cond_code,
3592 boolean_type_node,
3593 fold_convert (type, fd->loops[i].n1),
3594 fold_convert (type, fd->loops[i].n2));
3595 if (!integer_onep (this_cond))
3596 need_inits = true;
3597 }
3598
3599 return cont_bb;
3600 }
3601
3602 /* A subroutine of expand_omp_for. Generate code for a parallel
3603 loop with any schedule. Given parameters:
3604
3605 for (V = N1; V cond N2; V += STEP) BODY;
3606
3607 where COND is "<" or ">", we generate pseudocode
3608
3609 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
3610 if (more) goto L0; else goto L3;
3611 L0:
3612 V = istart0;
3613 iend = iend0;
3614 L1:
3615 BODY;
3616 V += STEP;
3617 if (V cond iend) goto L1; else goto L2;
3618 L2:
3619 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3620 L3:
3621
3622 If this is a combined omp parallel loop, instead of the call to
3623 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
3624 If this is gimple_omp_for_combined_p loop, then instead of assigning
3625 V and iend in L0 we assign the first two _looptemp_ clause decls of the
3626 inner GIMPLE_OMP_FOR and V += STEP; and
3627 if (V cond iend) goto L1; else goto L2; are removed.
3628
3629 For collapsed loops, given parameters:
3630 collapse(3)
3631 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
3632 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
3633 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
3634 BODY;
3635
3636 we generate pseudocode
3637
3638 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
3639 if (cond3 is <)
3640 adj = STEP3 - 1;
3641 else
3642 adj = STEP3 + 1;
3643 count3 = (adj + N32 - N31) / STEP3;
3644 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
3645 if (cond2 is <)
3646 adj = STEP2 - 1;
3647 else
3648 adj = STEP2 + 1;
3649 count2 = (adj + N22 - N21) / STEP2;
3650 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
3651 if (cond1 is <)
3652 adj = STEP1 - 1;
3653 else
3654 adj = STEP1 + 1;
3655 count1 = (adj + N12 - N11) / STEP1;
3656 count = count1 * count2 * count3;
3657 goto Z1;
3658 Z0:
3659 count = 0;
3660 Z1:
3661 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
3662 if (more) goto L0; else goto L3;
3663 L0:
3664 V = istart0;
3665 T = V;
3666 V3 = N31 + (T % count3) * STEP3;
3667 T = T / count3;
3668 V2 = N21 + (T % count2) * STEP2;
3669 T = T / count2;
3670 V1 = N11 + T * STEP1;
3671 iend = iend0;
3672 L1:
3673 BODY;
3674 V += 1;
3675 if (V < iend) goto L10; else goto L2;
3676 L10:
3677 V3 += STEP3;
3678 if (V3 cond3 N32) goto L1; else goto L11;
3679 L11:
3680 V3 = N31;
3681 V2 += STEP2;
3682 if (V2 cond2 N22) goto L1; else goto L12;
3683 L12:
3684 V2 = N21;
3685 V1 += STEP1;
3686 goto L1;
3687 L2:
3688 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3689 L3:
3690
3691 */
3692
3693 static void
3694 expand_omp_for_generic (struct omp_region *region,
3695 struct omp_for_data *fd,
3696 enum built_in_function start_fn,
3697 enum built_in_function next_fn,
3698 tree sched_arg,
3699 gimple *inner_stmt)
3700 {
3701 tree type, istart0, iend0, iend;
3702 tree t, vmain, vback, bias = NULL_TREE;
3703 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
3704 basic_block l2_bb = NULL, l3_bb = NULL;
3705 gimple_stmt_iterator gsi;
3706 gassign *assign_stmt;
3707 bool in_combined_parallel = is_combined_parallel (region);
3708 bool broken_loop = region->cont == NULL;
3709 edge e, ne;
3710 tree *counts = NULL;
3711 int i;
3712 bool ordered_lastprivate = false;
3713
3714 gcc_assert (!broken_loop || !in_combined_parallel);
3715 gcc_assert (fd->iter_type == long_integer_type_node
3716 || !in_combined_parallel);
3717
3718 entry_bb = region->entry;
3719 cont_bb = region->cont;
3720 collapse_bb = NULL;
3721 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3722 gcc_assert (broken_loop
3723 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
3724 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3725 l1_bb = single_succ (l0_bb);
3726 if (!broken_loop)
3727 {
3728 l2_bb = create_empty_bb (cont_bb);
3729 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
3730 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
3731 == l1_bb));
3732 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3733 }
3734 else
3735 l2_bb = NULL;
3736 l3_bb = BRANCH_EDGE (entry_bb)->dest;
3737 exit_bb = region->exit;
3738
3739 gsi = gsi_last_nondebug_bb (entry_bb);
3740
3741 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3742 if (fd->ordered
3743 && omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3744 OMP_CLAUSE_LASTPRIVATE))
3745 ordered_lastprivate = false;
3746 tree reductions = NULL_TREE;
3747 tree mem = NULL_TREE, cond_var = NULL_TREE, condtemp = NULL_TREE;
3748 tree memv = NULL_TREE;
3749 if (fd->lastprivate_conditional)
3750 {
3751 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3752 OMP_CLAUSE__CONDTEMP_);
3753 if (fd->have_pointer_condtemp)
3754 condtemp = OMP_CLAUSE_DECL (c);
3755 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
3756 cond_var = OMP_CLAUSE_DECL (c);
3757 }
3758 if (sched_arg)
3759 {
3760 if (fd->have_reductemp)
3761 {
3762 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3763 OMP_CLAUSE__REDUCTEMP_);
3764 reductions = OMP_CLAUSE_DECL (c);
3765 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
3766 gimple *g = SSA_NAME_DEF_STMT (reductions);
3767 reductions = gimple_assign_rhs1 (g);
3768 OMP_CLAUSE_DECL (c) = reductions;
3769 entry_bb = gimple_bb (g);
3770 edge e = split_block (entry_bb, g);
3771 if (region->entry == entry_bb)
3772 region->entry = e->dest;
3773 gsi = gsi_last_bb (entry_bb);
3774 }
3775 else
3776 reductions = null_pointer_node;
3777 if (fd->have_pointer_condtemp)
3778 {
3779 tree type = TREE_TYPE (condtemp);
3780 memv = create_tmp_var (type);
3781 TREE_ADDRESSABLE (memv) = 1;
3782 unsigned HOST_WIDE_INT sz
3783 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
3784 sz *= fd->lastprivate_conditional;
3785 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
3786 false);
3787 mem = build_fold_addr_expr (memv);
3788 }
3789 else
3790 mem = null_pointer_node;
3791 }
3792 if (fd->collapse > 1 || fd->ordered)
3793 {
3794 int first_zero_iter1 = -1, first_zero_iter2 = -1;
3795 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
3796
3797 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
3798 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3799 zero_iter1_bb, first_zero_iter1,
3800 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
3801
3802 if (zero_iter1_bb)
3803 {
3804 /* Some counts[i] vars might be uninitialized if
3805 some loop has zero iterations. But the body shouldn't
3806 be executed in that case, so just avoid uninit warnings. */
3807 for (i = first_zero_iter1;
3808 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
3809 if (SSA_VAR_P (counts[i]))
3810 TREE_NO_WARNING (counts[i]) = 1;
3811 gsi_prev (&gsi);
3812 e = split_block (entry_bb, gsi_stmt (gsi));
3813 entry_bb = e->dest;
3814 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
3815 gsi = gsi_last_nondebug_bb (entry_bb);
3816 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
3817 get_immediate_dominator (CDI_DOMINATORS,
3818 zero_iter1_bb));
3819 }
3820 if (zero_iter2_bb)
3821 {
3822 /* Some counts[i] vars might be uninitialized if
3823 some loop has zero iterations. But the body shouldn't
3824 be executed in that case, so just avoid uninit warnings. */
3825 for (i = first_zero_iter2; i < fd->ordered; i++)
3826 if (SSA_VAR_P (counts[i]))
3827 TREE_NO_WARNING (counts[i]) = 1;
3828 if (zero_iter1_bb)
3829 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
3830 else
3831 {
3832 gsi_prev (&gsi);
3833 e = split_block (entry_bb, gsi_stmt (gsi));
3834 entry_bb = e->dest;
3835 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
3836 gsi = gsi_last_nondebug_bb (entry_bb);
3837 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
3838 get_immediate_dominator
3839 (CDI_DOMINATORS, zero_iter2_bb));
3840 }
3841 }
3842 if (fd->collapse == 1)
3843 {
3844 counts[0] = fd->loop.n2;
3845 fd->loop = fd->loops[0];
3846 }
3847 }
3848
3849 type = TREE_TYPE (fd->loop.v);
3850 istart0 = create_tmp_var (fd->iter_type, ".istart0");
3851 iend0 = create_tmp_var (fd->iter_type, ".iend0");
3852 TREE_ADDRESSABLE (istart0) = 1;
3853 TREE_ADDRESSABLE (iend0) = 1;
3854
3855 /* See if we need to bias by LLONG_MIN. */
3856 if (fd->iter_type == long_long_unsigned_type_node
3857 && TREE_CODE (type) == INTEGER_TYPE
3858 && !TYPE_UNSIGNED (type)
3859 && fd->ordered == 0)
3860 {
3861 tree n1, n2;
3862
3863 if (fd->loop.cond_code == LT_EXPR)
3864 {
3865 n1 = fd->loop.n1;
3866 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
3867 }
3868 else
3869 {
3870 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
3871 n2 = fd->loop.n1;
3872 }
3873 if (TREE_CODE (n1) != INTEGER_CST
3874 || TREE_CODE (n2) != INTEGER_CST
3875 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
3876 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
3877 }
3878
3879 gimple_stmt_iterator gsif = gsi;
3880 gsi_prev (&gsif);
3881
3882 tree arr = NULL_TREE;
3883 if (in_combined_parallel)
3884 {
3885 gcc_assert (fd->ordered == 0);
3886 /* In a combined parallel loop, emit a call to
3887 GOMP_loop_foo_next. */
3888 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
3889 build_fold_addr_expr (istart0),
3890 build_fold_addr_expr (iend0));
3891 }
3892 else
3893 {
3894 tree t0, t1, t2, t3, t4;
3895 /* If this is not a combined parallel loop, emit a call to
3896 GOMP_loop_foo_start in ENTRY_BB. */
3897 t4 = build_fold_addr_expr (iend0);
3898 t3 = build_fold_addr_expr (istart0);
3899 if (fd->ordered)
3900 {
3901 t0 = build_int_cst (unsigned_type_node,
3902 fd->ordered - fd->collapse + 1);
3903 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
3904 fd->ordered
3905 - fd->collapse + 1),
3906 ".omp_counts");
3907 DECL_NAMELESS (arr) = 1;
3908 TREE_ADDRESSABLE (arr) = 1;
3909 TREE_STATIC (arr) = 1;
3910 vec<constructor_elt, va_gc> *v;
3911 vec_alloc (v, fd->ordered - fd->collapse + 1);
3912 int idx;
3913
3914 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
3915 {
3916 tree c;
3917 if (idx == 0 && fd->collapse > 1)
3918 c = fd->loop.n2;
3919 else
3920 c = counts[idx + fd->collapse - 1];
3921 tree purpose = size_int (idx);
3922 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
3923 if (TREE_CODE (c) != INTEGER_CST)
3924 TREE_STATIC (arr) = 0;
3925 }
3926
3927 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
3928 if (!TREE_STATIC (arr))
3929 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
3930 void_type_node, arr),
3931 true, NULL_TREE, true, GSI_SAME_STMT);
3932 t1 = build_fold_addr_expr (arr);
3933 t2 = NULL_TREE;
3934 }
3935 else
3936 {
3937 t2 = fold_convert (fd->iter_type, fd->loop.step);
3938 t1 = fd->loop.n2;
3939 t0 = fd->loop.n1;
3940 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3941 {
3942 tree innerc
3943 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3944 OMP_CLAUSE__LOOPTEMP_);
3945 gcc_assert (innerc);
3946 t0 = OMP_CLAUSE_DECL (innerc);
3947 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3948 OMP_CLAUSE__LOOPTEMP_);
3949 gcc_assert (innerc);
3950 t1 = OMP_CLAUSE_DECL (innerc);
3951 }
3952 if (POINTER_TYPE_P (TREE_TYPE (t0))
3953 && TYPE_PRECISION (TREE_TYPE (t0))
3954 != TYPE_PRECISION (fd->iter_type))
3955 {
3956 /* Avoid casting pointers to integer of a different size. */
3957 tree itype = signed_type_for (type);
3958 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
3959 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
3960 }
3961 else
3962 {
3963 t1 = fold_convert (fd->iter_type, t1);
3964 t0 = fold_convert (fd->iter_type, t0);
3965 }
3966 if (bias)
3967 {
3968 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
3969 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
3970 }
3971 }
3972 if (fd->iter_type == long_integer_type_node || fd->ordered)
3973 {
3974 if (fd->chunk_size)
3975 {
3976 t = fold_convert (fd->iter_type, fd->chunk_size);
3977 t = omp_adjust_chunk_size (t, fd->simd_schedule);
3978 if (sched_arg)
3979 {
3980 if (fd->ordered)
3981 t = build_call_expr (builtin_decl_explicit (start_fn),
3982 8, t0, t1, sched_arg, t, t3, t4,
3983 reductions, mem);
3984 else
3985 t = build_call_expr (builtin_decl_explicit (start_fn),
3986 9, t0, t1, t2, sched_arg, t, t3, t4,
3987 reductions, mem);
3988 }
3989 else if (fd->ordered)
3990 t = build_call_expr (builtin_decl_explicit (start_fn),
3991 5, t0, t1, t, t3, t4);
3992 else
3993 t = build_call_expr (builtin_decl_explicit (start_fn),
3994 6, t0, t1, t2, t, t3, t4);
3995 }
3996 else if (fd->ordered)
3997 t = build_call_expr (builtin_decl_explicit (start_fn),
3998 4, t0, t1, t3, t4);
3999 else
4000 t = build_call_expr (builtin_decl_explicit (start_fn),
4001 5, t0, t1, t2, t3, t4);
4002 }
4003 else
4004 {
4005 tree t5;
4006 tree c_bool_type;
4007 tree bfn_decl;
4008
4009 /* The GOMP_loop_ull_*start functions have additional boolean
4010 argument, true for < loops and false for > loops.
4011 In Fortran, the C bool type can be different from
4012 boolean_type_node. */
4013 bfn_decl = builtin_decl_explicit (start_fn);
4014 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
4015 t5 = build_int_cst (c_bool_type,
4016 fd->loop.cond_code == LT_EXPR ? 1 : 0);
4017 if (fd->chunk_size)
4018 {
4019 tree bfn_decl = builtin_decl_explicit (start_fn);
4020 t = fold_convert (fd->iter_type, fd->chunk_size);
4021 t = omp_adjust_chunk_size (t, fd->simd_schedule);
4022 if (sched_arg)
4023 t = build_call_expr (bfn_decl, 10, t5, t0, t1, t2, sched_arg,
4024 t, t3, t4, reductions, mem);
4025 else
4026 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
4027 }
4028 else
4029 t = build_call_expr (builtin_decl_explicit (start_fn),
4030 6, t5, t0, t1, t2, t3, t4);
4031 }
4032 }
4033 if (TREE_TYPE (t) != boolean_type_node)
4034 t = fold_build2 (NE_EXPR, boolean_type_node,
4035 t, build_int_cst (TREE_TYPE (t), 0));
4036 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4037 true, GSI_SAME_STMT);
4038 if (arr && !TREE_STATIC (arr))
4039 {
4040 tree clobber = build_clobber (TREE_TYPE (arr));
4041 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
4042 GSI_SAME_STMT);
4043 }
4044 if (fd->have_pointer_condtemp)
4045 expand_omp_build_assign (&gsi, condtemp, memv, false);
4046 if (fd->have_reductemp)
4047 {
4048 gimple *g = gsi_stmt (gsi);
4049 gsi_remove (&gsi, true);
4050 release_ssa_name (gimple_assign_lhs (g));
4051
4052 entry_bb = region->entry;
4053 gsi = gsi_last_nondebug_bb (entry_bb);
4054
4055 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4056 }
4057 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4058
4059 /* Remove the GIMPLE_OMP_FOR statement. */
4060 gsi_remove (&gsi, true);
4061
4062 if (gsi_end_p (gsif))
4063 gsif = gsi_after_labels (gsi_bb (gsif));
4064 gsi_next (&gsif);
4065
4066 /* Iteration setup for sequential loop goes in L0_BB. */
4067 tree startvar = fd->loop.v;
4068 tree endvar = NULL_TREE;
4069
4070 if (gimple_omp_for_combined_p (fd->for_stmt))
4071 {
4072 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
4073 && gimple_omp_for_kind (inner_stmt)
4074 == GF_OMP_FOR_KIND_SIMD);
4075 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
4076 OMP_CLAUSE__LOOPTEMP_);
4077 gcc_assert (innerc);
4078 startvar = OMP_CLAUSE_DECL (innerc);
4079 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4080 OMP_CLAUSE__LOOPTEMP_);
4081 gcc_assert (innerc);
4082 endvar = OMP_CLAUSE_DECL (innerc);
4083 }
4084
4085 gsi = gsi_start_bb (l0_bb);
4086 t = istart0;
4087 if (fd->ordered && fd->collapse == 1)
4088 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
4089 fold_convert (fd->iter_type, fd->loop.step));
4090 else if (bias)
4091 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
4092 if (fd->ordered && fd->collapse == 1)
4093 {
4094 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4095 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
4096 fd->loop.n1, fold_convert (sizetype, t));
4097 else
4098 {
4099 t = fold_convert (TREE_TYPE (startvar), t);
4100 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
4101 fd->loop.n1, t);
4102 }
4103 }
4104 else
4105 {
4106 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4107 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
4108 t = fold_convert (TREE_TYPE (startvar), t);
4109 }
4110 t = force_gimple_operand_gsi (&gsi, t,
4111 DECL_P (startvar)
4112 && TREE_ADDRESSABLE (startvar),
4113 NULL_TREE, false, GSI_CONTINUE_LINKING);
4114 assign_stmt = gimple_build_assign (startvar, t);
4115 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4116 if (cond_var)
4117 {
4118 tree itype = TREE_TYPE (cond_var);
4119 /* For lastprivate(conditional:) itervar, we need some iteration
4120 counter that starts at unsigned non-zero and increases.
4121 Prefer as few IVs as possible, so if we can use startvar
4122 itself, use that, or startvar + constant (those would be
4123 incremented with step), and as last resort use the s0 + 1
4124 incremented by 1. */
4125 if ((fd->ordered && fd->collapse == 1)
4126 || bias
4127 || POINTER_TYPE_P (type)
4128 || TREE_CODE (fd->loop.n1) != INTEGER_CST
4129 || fd->loop.cond_code != LT_EXPR)
4130 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, istart0),
4131 build_int_cst (itype, 1));
4132 else if (tree_int_cst_sgn (fd->loop.n1) == 1)
4133 t = fold_convert (itype, t);
4134 else
4135 {
4136 tree c = fold_convert (itype, fd->loop.n1);
4137 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
4138 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
4139 }
4140 t = force_gimple_operand_gsi (&gsi, t, false,
4141 NULL_TREE, false, GSI_CONTINUE_LINKING);
4142 assign_stmt = gimple_build_assign (cond_var, t);
4143 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4144 }
4145
4146 t = iend0;
4147 if (fd->ordered && fd->collapse == 1)
4148 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
4149 fold_convert (fd->iter_type, fd->loop.step));
4150 else if (bias)
4151 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
4152 if (fd->ordered && fd->collapse == 1)
4153 {
4154 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4155 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
4156 fd->loop.n1, fold_convert (sizetype, t));
4157 else
4158 {
4159 t = fold_convert (TREE_TYPE (startvar), t);
4160 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
4161 fd->loop.n1, t);
4162 }
4163 }
4164 else
4165 {
4166 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4167 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
4168 t = fold_convert (TREE_TYPE (startvar), t);
4169 }
4170 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4171 false, GSI_CONTINUE_LINKING);
4172 if (endvar)
4173 {
4174 assign_stmt = gimple_build_assign (endvar, iend);
4175 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4176 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
4177 assign_stmt = gimple_build_assign (fd->loop.v, iend);
4178 else
4179 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
4180 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4181 }
4182 /* Handle linear clause adjustments. */
4183 tree itercnt = NULL_TREE;
4184 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4185 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4186 c; c = OMP_CLAUSE_CHAIN (c))
4187 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4188 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4189 {
4190 tree d = OMP_CLAUSE_DECL (c);
4191 bool is_ref = omp_is_reference (d);
4192 tree t = d, a, dest;
4193 if (is_ref)
4194 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4195 tree type = TREE_TYPE (t);
4196 if (POINTER_TYPE_P (type))
4197 type = sizetype;
4198 dest = unshare_expr (t);
4199 tree v = create_tmp_var (TREE_TYPE (t), NULL);
4200 expand_omp_build_assign (&gsif, v, t);
4201 if (itercnt == NULL_TREE)
4202 {
4203 itercnt = startvar;
4204 tree n1 = fd->loop.n1;
4205 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
4206 {
4207 itercnt
4208 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
4209 itercnt);
4210 n1 = fold_convert (TREE_TYPE (itercnt), n1);
4211 }
4212 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
4213 itercnt, n1);
4214 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
4215 itercnt, fd->loop.step);
4216 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4217 NULL_TREE, false,
4218 GSI_CONTINUE_LINKING);
4219 }
4220 a = fold_build2 (MULT_EXPR, type,
4221 fold_convert (type, itercnt),
4222 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4223 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4224 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4225 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4226 false, GSI_CONTINUE_LINKING);
4227 assign_stmt = gimple_build_assign (dest, t);
4228 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4229 }
4230 if (fd->collapse > 1)
4231 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
4232
4233 if (fd->ordered)
4234 {
4235 /* Until now, counts array contained number of iterations or
4236 variable containing it for ith loop. From now on, we need
4237 those counts only for collapsed loops, and only for the 2nd
4238 till the last collapsed one. Move those one element earlier,
4239 we'll use counts[fd->collapse - 1] for the first source/sink
4240 iteration counter and so on and counts[fd->ordered]
4241 as the array holding the current counter values for
4242 depend(source). */
4243 if (fd->collapse > 1)
4244 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
4245 if (broken_loop)
4246 {
4247 int i;
4248 for (i = fd->collapse; i < fd->ordered; i++)
4249 {
4250 tree type = TREE_TYPE (fd->loops[i].v);
4251 tree this_cond
4252 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
4253 fold_convert (type, fd->loops[i].n1),
4254 fold_convert (type, fd->loops[i].n2));
4255 if (!integer_onep (this_cond))
4256 break;
4257 }
4258 if (i < fd->ordered)
4259 {
4260 cont_bb
4261 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
4262 add_bb_to_loop (cont_bb, l1_bb->loop_father);
4263 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
4264 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
4265 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4266 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
4267 make_edge (cont_bb, l1_bb, 0);
4268 l2_bb = create_empty_bb (cont_bb);
4269 broken_loop = false;
4270 }
4271 }
4272 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
4273 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
4274 ordered_lastprivate);
4275 if (counts[fd->collapse - 1])
4276 {
4277 gcc_assert (fd->collapse == 1);
4278 gsi = gsi_last_bb (l0_bb);
4279 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
4280 istart0, true);
4281 gsi = gsi_last_bb (cont_bb);
4282 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1],
4283 build_int_cst (fd->iter_type, 1));
4284 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
4285 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
4286 size_zero_node, NULL_TREE, NULL_TREE);
4287 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
4288 t = counts[fd->collapse - 1];
4289 }
4290 else if (fd->collapse > 1)
4291 t = fd->loop.v;
4292 else
4293 {
4294 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4295 fd->loops[0].v, fd->loops[0].n1);
4296 t = fold_convert (fd->iter_type, t);
4297 }
4298 gsi = gsi_last_bb (l0_bb);
4299 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
4300 size_zero_node, NULL_TREE, NULL_TREE);
4301 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4302 false, GSI_CONTINUE_LINKING);
4303 expand_omp_build_assign (&gsi, aref, t, true);
4304 }
4305
4306 if (!broken_loop)
4307 {
4308 /* Code to control the increment and predicate for the sequential
4309 loop goes in the CONT_BB. */
4310 gsi = gsi_last_nondebug_bb (cont_bb);
4311 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4312 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
4313 vmain = gimple_omp_continue_control_use (cont_stmt);
4314 vback = gimple_omp_continue_control_def (cont_stmt);
4315
4316 if (cond_var)
4317 {
4318 tree itype = TREE_TYPE (cond_var);
4319 tree t2;
4320 if ((fd->ordered && fd->collapse == 1)
4321 || bias
4322 || POINTER_TYPE_P (type)
4323 || TREE_CODE (fd->loop.n1) != INTEGER_CST
4324 || fd->loop.cond_code != LT_EXPR)
4325 t2 = build_int_cst (itype, 1);
4326 else
4327 t2 = fold_convert (itype, fd->loop.step);
4328 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
4329 t2 = force_gimple_operand_gsi (&gsi, t2, false,
4330 NULL_TREE, true, GSI_SAME_STMT);
4331 assign_stmt = gimple_build_assign (cond_var, t2);
4332 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4333 }
4334
4335 if (!gimple_omp_for_combined_p (fd->for_stmt))
4336 {
4337 if (POINTER_TYPE_P (type))
4338 t = fold_build_pointer_plus (vmain, fd->loop.step);
4339 else
4340 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
4341 t = force_gimple_operand_gsi (&gsi, t,
4342 DECL_P (vback)
4343 && TREE_ADDRESSABLE (vback),
4344 NULL_TREE, true, GSI_SAME_STMT);
4345 assign_stmt = gimple_build_assign (vback, t);
4346 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4347
4348 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
4349 {
4350 tree tem;
4351 if (fd->collapse > 1)
4352 tem = fd->loop.v;
4353 else
4354 {
4355 tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4356 fd->loops[0].v, fd->loops[0].n1);
4357 tem = fold_convert (fd->iter_type, tem);
4358 }
4359 tree aref = build4 (ARRAY_REF, fd->iter_type,
4360 counts[fd->ordered], size_zero_node,
4361 NULL_TREE, NULL_TREE);
4362 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE,
4363 true, GSI_SAME_STMT);
4364 expand_omp_build_assign (&gsi, aref, tem);
4365 }
4366
4367 t = build2 (fd->loop.cond_code, boolean_type_node,
4368 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
4369 iend);
4370 gcond *cond_stmt = gimple_build_cond_empty (t);
4371 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4372 }
4373
4374 /* Remove GIMPLE_OMP_CONTINUE. */
4375 gsi_remove (&gsi, true);
4376
4377 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4378 collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, l1_bb);
4379
4380 /* Emit code to get the next parallel iteration in L2_BB. */
4381 gsi = gsi_start_bb (l2_bb);
4382
4383 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
4384 build_fold_addr_expr (istart0),
4385 build_fold_addr_expr (iend0));
4386 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4387 false, GSI_CONTINUE_LINKING);
4388 if (TREE_TYPE (t) != boolean_type_node)
4389 t = fold_build2 (NE_EXPR, boolean_type_node,
4390 t, build_int_cst (TREE_TYPE (t), 0));
4391 gcond *cond_stmt = gimple_build_cond_empty (t);
4392 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4393 }
4394
4395 /* Add the loop cleanup function. */
4396 gsi = gsi_last_nondebug_bb (exit_bb);
4397 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4398 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
4399 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
4400 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4401 else
4402 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4403 gcall *call_stmt = gimple_build_call (t, 0);
4404 if (fd->ordered)
4405 {
4406 tree arr = counts[fd->ordered];
4407 tree clobber = build_clobber (TREE_TYPE (arr));
4408 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
4409 GSI_SAME_STMT);
4410 }
4411 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
4412 {
4413 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
4414 if (fd->have_reductemp)
4415 {
4416 gimple *g = gimple_build_assign (reductions, NOP_EXPR,
4417 gimple_call_lhs (call_stmt));
4418 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4419 }
4420 }
4421 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
4422 gsi_remove (&gsi, true);
4423
4424 /* Connect the new blocks. */
4425 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
4426 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
4427
4428 if (!broken_loop)
4429 {
4430 gimple_seq phis;
4431
4432 e = find_edge (cont_bb, l3_bb);
4433 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
4434
4435 phis = phi_nodes (l3_bb);
4436 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
4437 {
4438 gimple *phi = gsi_stmt (gsi);
4439 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
4440 PHI_ARG_DEF_FROM_EDGE (phi, e));
4441 }
4442 remove_edge (e);
4443
4444 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
4445 e = find_edge (cont_bb, l1_bb);
4446 if (e == NULL)
4447 {
4448 e = BRANCH_EDGE (cont_bb);
4449 gcc_assert (single_succ (e->dest) == l1_bb);
4450 }
4451 if (gimple_omp_for_combined_p (fd->for_stmt))
4452 {
4453 remove_edge (e);
4454 e = NULL;
4455 }
4456 else if (fd->collapse > 1)
4457 {
4458 remove_edge (e);
4459 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4460 }
4461 else
4462 e->flags = EDGE_TRUE_VALUE;
4463 if (e)
4464 {
4465 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
4466 find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
4467 }
4468 else
4469 {
4470 e = find_edge (cont_bb, l2_bb);
4471 e->flags = EDGE_FALLTHRU;
4472 }
4473 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
4474
4475 if (gimple_in_ssa_p (cfun))
4476 {
4477 /* Add phis to the outer loop that connect to the phis in the inner,
4478 original loop, and move the loop entry value of the inner phi to
4479 the loop entry value of the outer phi. */
4480 gphi_iterator psi;
4481 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
4482 {
4483 location_t locus;
4484 gphi *nphi;
4485 gphi *exit_phi = psi.phi ();
4486
4487 if (virtual_operand_p (gimple_phi_result (exit_phi)))
4488 continue;
4489
4490 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
4491 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
4492
4493 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
4494 edge latch_to_l1 = find_edge (latch, l1_bb);
4495 gphi *inner_phi
4496 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
4497
4498 tree t = gimple_phi_result (exit_phi);
4499 tree new_res = copy_ssa_name (t, NULL);
4500 nphi = create_phi_node (new_res, l0_bb);
4501
4502 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
4503 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
4504 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
4505 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
4506 add_phi_arg (nphi, t, entry_to_l0, locus);
4507
4508 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
4509 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
4510
4511 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
4512 }
4513 }
4514
4515 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
4516 recompute_dominator (CDI_DOMINATORS, l2_bb));
4517 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
4518 recompute_dominator (CDI_DOMINATORS, l3_bb));
4519 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
4520 recompute_dominator (CDI_DOMINATORS, l0_bb));
4521 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
4522 recompute_dominator (CDI_DOMINATORS, l1_bb));
4523
4524 /* We enter expand_omp_for_generic with a loop. This original loop may
4525 have its own loop struct, or it may be part of an outer loop struct
4526 (which may be the fake loop). */
4527 class loop *outer_loop = entry_bb->loop_father;
4528 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
4529
4530 add_bb_to_loop (l2_bb, outer_loop);
4531
4532 /* We've added a new loop around the original loop. Allocate the
4533 corresponding loop struct. */
4534 class loop *new_loop = alloc_loop ();
4535 new_loop->header = l0_bb;
4536 new_loop->latch = l2_bb;
4537 add_loop (new_loop, outer_loop);
4538
4539 /* Allocate a loop structure for the original loop unless we already
4540 had one. */
4541 if (!orig_loop_has_loop_struct
4542 && !gimple_omp_for_combined_p (fd->for_stmt))
4543 {
4544 class loop *orig_loop = alloc_loop ();
4545 orig_loop->header = l1_bb;
4546 /* The loop may have multiple latches. */
4547 add_loop (orig_loop, new_loop);
4548 }
4549 }
4550 }
4551
4552 /* Helper function for expand_omp_for_static_nochunk. If PTR is NULL,
4553 compute needed allocation size. If !ALLOC of team allocations,
4554 if ALLOC of thread allocation. SZ is the initial needed size for
4555 other purposes, ALLOC_ALIGN guaranteed alignment of allocation in bytes,
4556 CNT number of elements of each array, for !ALLOC this is
4557 omp_get_num_threads (), for ALLOC number of iterations handled by the
4558 current thread. If PTR is non-NULL, it is the start of the allocation
4559 and this routine shall assign to OMP_CLAUSE_DECL (c) of those _scantemp_
4560 clauses pointers to the corresponding arrays. */
4561
4562 static tree
4563 expand_omp_scantemp_alloc (tree clauses, tree ptr, unsigned HOST_WIDE_INT sz,
4564 unsigned HOST_WIDE_INT alloc_align, tree cnt,
4565 gimple_stmt_iterator *gsi, bool alloc)
4566 {
4567 tree eltsz = NULL_TREE;
4568 unsigned HOST_WIDE_INT preval = 0;
4569 if (ptr && sz)
4570 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4571 ptr, size_int (sz));
4572 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
4573 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
4574 && !OMP_CLAUSE__SCANTEMP__CONTROL (c)
4575 && (!OMP_CLAUSE__SCANTEMP__ALLOC (c)) != alloc)
4576 {
4577 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
4578 unsigned HOST_WIDE_INT al = TYPE_ALIGN_UNIT (pointee_type);
4579 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4580 {
4581 unsigned HOST_WIDE_INT szl
4582 = tree_to_uhwi (TYPE_SIZE_UNIT (pointee_type));
4583 szl = least_bit_hwi (szl);
4584 if (szl)
4585 al = MIN (al, szl);
4586 }
4587 if (ptr == NULL_TREE)
4588 {
4589 if (eltsz == NULL_TREE)
4590 eltsz = TYPE_SIZE_UNIT (pointee_type);
4591 else
4592 eltsz = size_binop (PLUS_EXPR, eltsz,
4593 TYPE_SIZE_UNIT (pointee_type));
4594 }
4595 if (preval == 0 && al <= alloc_align)
4596 {
4597 unsigned HOST_WIDE_INT diff = ROUND_UP (sz, al) - sz;
4598 sz += diff;
4599 if (diff && ptr)
4600 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4601 ptr, size_int (diff));
4602 }
4603 else if (al > preval)
4604 {
4605 if (ptr)
4606 {
4607 ptr = fold_convert (pointer_sized_int_node, ptr);
4608 ptr = fold_build2 (PLUS_EXPR, pointer_sized_int_node, ptr,
4609 build_int_cst (pointer_sized_int_node,
4610 al - 1));
4611 ptr = fold_build2 (BIT_AND_EXPR, pointer_sized_int_node, ptr,
4612 build_int_cst (pointer_sized_int_node,
4613 -(HOST_WIDE_INT) al));
4614 ptr = fold_convert (ptr_type_node, ptr);
4615 }
4616 else
4617 sz += al - 1;
4618 }
4619 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4620 preval = al;
4621 else
4622 preval = 1;
4623 if (ptr)
4624 {
4625 expand_omp_build_assign (gsi, OMP_CLAUSE_DECL (c), ptr, false);
4626 ptr = OMP_CLAUSE_DECL (c);
4627 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), ptr,
4628 size_binop (MULT_EXPR, cnt,
4629 TYPE_SIZE_UNIT (pointee_type)));
4630 }
4631 }
4632
4633 if (ptr == NULL_TREE)
4634 {
4635 eltsz = size_binop (MULT_EXPR, eltsz, cnt);
4636 if (sz)
4637 eltsz = size_binop (PLUS_EXPR, eltsz, size_int (sz));
4638 return eltsz;
4639 }
4640 else
4641 return ptr;
4642 }
4643
4644 /* A subroutine of expand_omp_for. Generate code for a parallel
4645 loop with static schedule and no specified chunk size. Given
4646 parameters:
4647
4648 for (V = N1; V cond N2; V += STEP) BODY;
4649
4650 where COND is "<" or ">", we generate pseudocode
4651
4652 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
4653 if (cond is <)
4654 adj = STEP - 1;
4655 else
4656 adj = STEP + 1;
4657 if ((__typeof (V)) -1 > 0 && cond is >)
4658 n = -(adj + N2 - N1) / -STEP;
4659 else
4660 n = (adj + N2 - N1) / STEP;
4661 q = n / nthreads;
4662 tt = n % nthreads;
4663 if (threadid < tt) goto L3; else goto L4;
4664 L3:
4665 tt = 0;
4666 q = q + 1;
4667 L4:
4668 s0 = q * threadid + tt;
4669 e0 = s0 + q;
4670 V = s0 * STEP + N1;
4671 if (s0 >= e0) goto L2; else goto L0;
4672 L0:
4673 e = e0 * STEP + N1;
4674 L1:
4675 BODY;
4676 V += STEP;
4677 if (V cond e) goto L1;
4678 L2:
4679 */
4680
4681 static void
4682 expand_omp_for_static_nochunk (struct omp_region *region,
4683 struct omp_for_data *fd,
4684 gimple *inner_stmt)
4685 {
4686 tree n, q, s0, e0, e, t, tt, nthreads = NULL_TREE, threadid;
4687 tree type, itype, vmain, vback;
4688 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
4689 basic_block body_bb, cont_bb, collapse_bb = NULL;
4690 basic_block fin_bb, fourth_bb = NULL, fifth_bb = NULL, sixth_bb = NULL;
4691 basic_block exit1_bb = NULL, exit2_bb = NULL, exit3_bb = NULL;
4692 gimple_stmt_iterator gsi, gsip;
4693 edge ep;
4694 bool broken_loop = region->cont == NULL;
4695 tree *counts = NULL;
4696 tree n1, n2, step;
4697 tree reductions = NULL_TREE;
4698 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
4699
4700 itype = type = TREE_TYPE (fd->loop.v);
4701 if (POINTER_TYPE_P (type))
4702 itype = signed_type_for (type);
4703
4704 entry_bb = region->entry;
4705 cont_bb = region->cont;
4706 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4707 fin_bb = BRANCH_EDGE (entry_bb)->dest;
4708 gcc_assert (broken_loop
4709 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
4710 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
4711 body_bb = single_succ (seq_start_bb);
4712 if (!broken_loop)
4713 {
4714 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
4715 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
4716 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4717 }
4718 exit_bb = region->exit;
4719
4720 /* Iteration space partitioning goes in ENTRY_BB. */
4721 gsi = gsi_last_nondebug_bb (entry_bb);
4722 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4723 gsip = gsi;
4724 gsi_prev (&gsip);
4725
4726 if (fd->collapse > 1)
4727 {
4728 int first_zero_iter = -1, dummy = -1;
4729 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
4730
4731 counts = XALLOCAVEC (tree, fd->collapse);
4732 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4733 fin_bb, first_zero_iter,
4734 dummy_bb, dummy, l2_dom_bb);
4735 t = NULL_TREE;
4736 }
4737 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
4738 t = integer_one_node;
4739 else
4740 t = fold_binary (fd->loop.cond_code, boolean_type_node,
4741 fold_convert (type, fd->loop.n1),
4742 fold_convert (type, fd->loop.n2));
4743 if (fd->collapse == 1
4744 && TYPE_UNSIGNED (type)
4745 && (t == NULL_TREE || !integer_onep (t)))
4746 {
4747 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
4748 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
4749 true, GSI_SAME_STMT);
4750 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
4751 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
4752 true, GSI_SAME_STMT);
4753 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
4754 NULL_TREE, NULL_TREE);
4755 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4756 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
4757 expand_omp_regimplify_p, NULL, NULL)
4758 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
4759 expand_omp_regimplify_p, NULL, NULL))
4760 {
4761 gsi = gsi_for_stmt (cond_stmt);
4762 gimple_regimplify_operands (cond_stmt, &gsi);
4763 }
4764 ep = split_block (entry_bb, cond_stmt);
4765 ep->flags = EDGE_TRUE_VALUE;
4766 entry_bb = ep->dest;
4767 ep->probability = profile_probability::very_likely ();
4768 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
4769 ep->probability = profile_probability::very_unlikely ();
4770 if (gimple_in_ssa_p (cfun))
4771 {
4772 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
4773 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
4774 !gsi_end_p (gpi); gsi_next (&gpi))
4775 {
4776 gphi *phi = gpi.phi ();
4777 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
4778 ep, UNKNOWN_LOCATION);
4779 }
4780 }
4781 gsi = gsi_last_bb (entry_bb);
4782 }
4783
4784 if (fd->lastprivate_conditional)
4785 {
4786 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4787 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
4788 if (fd->have_pointer_condtemp)
4789 condtemp = OMP_CLAUSE_DECL (c);
4790 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
4791 cond_var = OMP_CLAUSE_DECL (c);
4792 }
4793 if (fd->have_reductemp
4794 /* For scan, we don't want to reinitialize condtemp before the
4795 second loop. */
4796 || (fd->have_pointer_condtemp && !fd->have_scantemp)
4797 || fd->have_nonctrl_scantemp)
4798 {
4799 tree t1 = build_int_cst (long_integer_type_node, 0);
4800 tree t2 = build_int_cst (long_integer_type_node, 1);
4801 tree t3 = build_int_cstu (long_integer_type_node,
4802 (HOST_WIDE_INT_1U << 31) + 1);
4803 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4804 gimple_stmt_iterator gsi2 = gsi_none ();
4805 gimple *g = NULL;
4806 tree mem = null_pointer_node, memv = NULL_TREE;
4807 unsigned HOST_WIDE_INT condtemp_sz = 0;
4808 unsigned HOST_WIDE_INT alloc_align = 0;
4809 if (fd->have_reductemp)
4810 {
4811 gcc_assert (!fd->have_nonctrl_scantemp);
4812 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
4813 reductions = OMP_CLAUSE_DECL (c);
4814 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
4815 g = SSA_NAME_DEF_STMT (reductions);
4816 reductions = gimple_assign_rhs1 (g);
4817 OMP_CLAUSE_DECL (c) = reductions;
4818 gsi2 = gsi_for_stmt (g);
4819 }
4820 else
4821 {
4822 if (gsi_end_p (gsip))
4823 gsi2 = gsi_after_labels (region->entry);
4824 else
4825 gsi2 = gsip;
4826 reductions = null_pointer_node;
4827 }
4828 if (fd->have_pointer_condtemp || fd->have_nonctrl_scantemp)
4829 {
4830 tree type;
4831 if (fd->have_pointer_condtemp)
4832 type = TREE_TYPE (condtemp);
4833 else
4834 type = ptr_type_node;
4835 memv = create_tmp_var (type);
4836 TREE_ADDRESSABLE (memv) = 1;
4837 unsigned HOST_WIDE_INT sz = 0;
4838 tree size = NULL_TREE;
4839 if (fd->have_pointer_condtemp)
4840 {
4841 sz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
4842 sz *= fd->lastprivate_conditional;
4843 condtemp_sz = sz;
4844 }
4845 if (fd->have_nonctrl_scantemp)
4846 {
4847 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
4848 gimple *g = gimple_build_call (nthreads, 0);
4849 nthreads = create_tmp_var (integer_type_node);
4850 gimple_call_set_lhs (g, nthreads);
4851 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
4852 nthreads = fold_convert (sizetype, nthreads);
4853 alloc_align = TYPE_ALIGN_UNIT (long_long_integer_type_node);
4854 size = expand_omp_scantemp_alloc (clauses, NULL_TREE, sz,
4855 alloc_align, nthreads, NULL,
4856 false);
4857 size = fold_convert (type, size);
4858 }
4859 else
4860 size = build_int_cst (type, sz);
4861 expand_omp_build_assign (&gsi2, memv, size, false);
4862 mem = build_fold_addr_expr (memv);
4863 }
4864 tree t
4865 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
4866 9, t1, t2, t2, t3, t1, null_pointer_node,
4867 null_pointer_node, reductions, mem);
4868 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
4869 true, GSI_SAME_STMT);
4870 if (fd->have_pointer_condtemp)
4871 expand_omp_build_assign (&gsi2, condtemp, memv, false);
4872 if (fd->have_nonctrl_scantemp)
4873 {
4874 tree ptr = fd->have_pointer_condtemp ? condtemp : memv;
4875 expand_omp_scantemp_alloc (clauses, ptr, condtemp_sz,
4876 alloc_align, nthreads, &gsi2, false);
4877 }
4878 if (fd->have_reductemp)
4879 {
4880 gsi_remove (&gsi2, true);
4881 release_ssa_name (gimple_assign_lhs (g));
4882 }
4883 }
4884 switch (gimple_omp_for_kind (fd->for_stmt))
4885 {
4886 case GF_OMP_FOR_KIND_FOR:
4887 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
4888 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
4889 break;
4890 case GF_OMP_FOR_KIND_DISTRIBUTE:
4891 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
4892 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
4893 break;
4894 default:
4895 gcc_unreachable ();
4896 }
4897 nthreads = build_call_expr (nthreads, 0);
4898 nthreads = fold_convert (itype, nthreads);
4899 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
4900 true, GSI_SAME_STMT);
4901 threadid = build_call_expr (threadid, 0);
4902 threadid = fold_convert (itype, threadid);
4903 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
4904 true, GSI_SAME_STMT);
4905
4906 n1 = fd->loop.n1;
4907 n2 = fd->loop.n2;
4908 step = fd->loop.step;
4909 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4910 {
4911 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4912 OMP_CLAUSE__LOOPTEMP_);
4913 gcc_assert (innerc);
4914 n1 = OMP_CLAUSE_DECL (innerc);
4915 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4916 OMP_CLAUSE__LOOPTEMP_);
4917 gcc_assert (innerc);
4918 n2 = OMP_CLAUSE_DECL (innerc);
4919 }
4920 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
4921 true, NULL_TREE, true, GSI_SAME_STMT);
4922 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
4923 true, NULL_TREE, true, GSI_SAME_STMT);
4924 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
4925 true, NULL_TREE, true, GSI_SAME_STMT);
4926
4927 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
4928 t = fold_build2 (PLUS_EXPR, itype, step, t);
4929 t = fold_build2 (PLUS_EXPR, itype, t, n2);
4930 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
4931 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
4932 t = fold_build2 (TRUNC_DIV_EXPR, itype,
4933 fold_build1 (NEGATE_EXPR, itype, t),
4934 fold_build1 (NEGATE_EXPR, itype, step));
4935 else
4936 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
4937 t = fold_convert (itype, t);
4938 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
4939
4940 q = create_tmp_reg (itype, "q");
4941 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
4942 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
4943 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
4944
4945 tt = create_tmp_reg (itype, "tt");
4946 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
4947 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
4948 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
4949
4950 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
4951 gcond *cond_stmt = gimple_build_cond_empty (t);
4952 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4953
4954 second_bb = split_block (entry_bb, cond_stmt)->dest;
4955 gsi = gsi_last_nondebug_bb (second_bb);
4956 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4957
4958 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
4959 GSI_SAME_STMT);
4960 gassign *assign_stmt
4961 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
4962 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4963
4964 third_bb = split_block (second_bb, assign_stmt)->dest;
4965 gsi = gsi_last_nondebug_bb (third_bb);
4966 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4967
4968 if (fd->have_nonctrl_scantemp)
4969 {
4970 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4971 tree controlp = NULL_TREE, controlb = NULL_TREE;
4972 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
4973 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
4974 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
4975 {
4976 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
4977 controlb = OMP_CLAUSE_DECL (c);
4978 else
4979 controlp = OMP_CLAUSE_DECL (c);
4980 if (controlb && controlp)
4981 break;
4982 }
4983 gcc_assert (controlp && controlb);
4984 tree cnt = create_tmp_var (sizetype);
4985 gimple *g = gimple_build_assign (cnt, NOP_EXPR, q);
4986 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4987 unsigned HOST_WIDE_INT alloc_align = TYPE_ALIGN_UNIT (ptr_type_node);
4988 tree sz = expand_omp_scantemp_alloc (clauses, NULL_TREE, 0,
4989 alloc_align, cnt, NULL, true);
4990 tree size = create_tmp_var (sizetype);
4991 expand_omp_build_assign (&gsi, size, sz, false);
4992 tree cmp = fold_build2 (GT_EXPR, boolean_type_node,
4993 size, size_int (16384));
4994 expand_omp_build_assign (&gsi, controlb, cmp);
4995 g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
4996 NULL_TREE, NULL_TREE);
4997 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4998 fourth_bb = split_block (third_bb, g)->dest;
4999 gsi = gsi_last_nondebug_bb (fourth_bb);
5000 /* FIXME: Once we have allocators, this should use allocator. */
5001 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_MALLOC), 1, size);
5002 gimple_call_set_lhs (g, controlp);
5003 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5004 expand_omp_scantemp_alloc (clauses, controlp, 0, alloc_align, cnt,
5005 &gsi, true);
5006 gsi_prev (&gsi);
5007 g = gsi_stmt (gsi);
5008 fifth_bb = split_block (fourth_bb, g)->dest;
5009 gsi = gsi_last_nondebug_bb (fifth_bb);
5010
5011 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_SAVE), 0);
5012 gimple_call_set_lhs (g, controlp);
5013 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5014 tree alloca_decl = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN);
5015 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5016 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5017 && OMP_CLAUSE__SCANTEMP__ALLOC (c))
5018 {
5019 tree tmp = create_tmp_var (sizetype);
5020 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
5021 g = gimple_build_assign (tmp, MULT_EXPR, cnt,
5022 TYPE_SIZE_UNIT (pointee_type));
5023 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5024 g = gimple_build_call (alloca_decl, 2, tmp,
5025 size_int (TYPE_ALIGN (pointee_type)));
5026 gimple_call_set_lhs (g, OMP_CLAUSE_DECL (c));
5027 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5028 }
5029
5030 sixth_bb = split_block (fifth_bb, g)->dest;
5031 gsi = gsi_last_nondebug_bb (sixth_bb);
5032 }
5033
5034 t = build2 (MULT_EXPR, itype, q, threadid);
5035 t = build2 (PLUS_EXPR, itype, t, tt);
5036 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5037
5038 t = fold_build2 (PLUS_EXPR, itype, s0, q);
5039 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5040
5041 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
5042 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5043
5044 /* Remove the GIMPLE_OMP_FOR statement. */
5045 gsi_remove (&gsi, true);
5046
5047 /* Setup code for sequential iteration goes in SEQ_START_BB. */
5048 gsi = gsi_start_bb (seq_start_bb);
5049
5050 tree startvar = fd->loop.v;
5051 tree endvar = NULL_TREE;
5052
5053 if (gimple_omp_for_combined_p (fd->for_stmt))
5054 {
5055 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
5056 ? gimple_omp_parallel_clauses (inner_stmt)
5057 : gimple_omp_for_clauses (inner_stmt);
5058 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5059 gcc_assert (innerc);
5060 startvar = OMP_CLAUSE_DECL (innerc);
5061 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5062 OMP_CLAUSE__LOOPTEMP_);
5063 gcc_assert (innerc);
5064 endvar = OMP_CLAUSE_DECL (innerc);
5065 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
5066 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
5067 {
5068 int i;
5069 for (i = 1; i < fd->collapse; i++)
5070 {
5071 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5072 OMP_CLAUSE__LOOPTEMP_);
5073 gcc_assert (innerc);
5074 }
5075 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5076 OMP_CLAUSE__LOOPTEMP_);
5077 if (innerc)
5078 {
5079 /* If needed (distribute parallel for with lastprivate),
5080 propagate down the total number of iterations. */
5081 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
5082 fd->loop.n2);
5083 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
5084 GSI_CONTINUE_LINKING);
5085 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5086 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5087 }
5088 }
5089 }
5090 t = fold_convert (itype, s0);
5091 t = fold_build2 (MULT_EXPR, itype, t, step);
5092 if (POINTER_TYPE_P (type))
5093 {
5094 t = fold_build_pointer_plus (n1, t);
5095 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5096 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5097 t = fold_convert (signed_type_for (type), t);
5098 }
5099 else
5100 t = fold_build2 (PLUS_EXPR, type, t, n1);
5101 t = fold_convert (TREE_TYPE (startvar), t);
5102 t = force_gimple_operand_gsi (&gsi, t,
5103 DECL_P (startvar)
5104 && TREE_ADDRESSABLE (startvar),
5105 NULL_TREE, false, GSI_CONTINUE_LINKING);
5106 assign_stmt = gimple_build_assign (startvar, t);
5107 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5108 if (cond_var)
5109 {
5110 tree itype = TREE_TYPE (cond_var);
5111 /* For lastprivate(conditional:) itervar, we need some iteration
5112 counter that starts at unsigned non-zero and increases.
5113 Prefer as few IVs as possible, so if we can use startvar
5114 itself, use that, or startvar + constant (those would be
5115 incremented with step), and as last resort use the s0 + 1
5116 incremented by 1. */
5117 if (POINTER_TYPE_P (type)
5118 || TREE_CODE (n1) != INTEGER_CST
5119 || fd->loop.cond_code != LT_EXPR)
5120 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
5121 build_int_cst (itype, 1));
5122 else if (tree_int_cst_sgn (n1) == 1)
5123 t = fold_convert (itype, t);
5124 else
5125 {
5126 tree c = fold_convert (itype, n1);
5127 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
5128 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
5129 }
5130 t = force_gimple_operand_gsi (&gsi, t, false,
5131 NULL_TREE, false, GSI_CONTINUE_LINKING);
5132 assign_stmt = gimple_build_assign (cond_var, t);
5133 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5134 }
5135
5136 t = fold_convert (itype, e0);
5137 t = fold_build2 (MULT_EXPR, itype, t, step);
5138 if (POINTER_TYPE_P (type))
5139 {
5140 t = fold_build_pointer_plus (n1, t);
5141 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5142 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5143 t = fold_convert (signed_type_for (type), t);
5144 }
5145 else
5146 t = fold_build2 (PLUS_EXPR, type, t, n1);
5147 t = fold_convert (TREE_TYPE (startvar), t);
5148 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5149 false, GSI_CONTINUE_LINKING);
5150 if (endvar)
5151 {
5152 assign_stmt = gimple_build_assign (endvar, e);
5153 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5154 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5155 assign_stmt = gimple_build_assign (fd->loop.v, e);
5156 else
5157 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5158 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5159 }
5160 /* Handle linear clause adjustments. */
5161 tree itercnt = NULL_TREE;
5162 tree *nonrect_bounds = NULL;
5163 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
5164 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
5165 c; c = OMP_CLAUSE_CHAIN (c))
5166 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
5167 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
5168 {
5169 tree d = OMP_CLAUSE_DECL (c);
5170 bool is_ref = omp_is_reference (d);
5171 tree t = d, a, dest;
5172 if (is_ref)
5173 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
5174 if (itercnt == NULL_TREE)
5175 {
5176 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5177 {
5178 itercnt = fold_build2 (MINUS_EXPR, itype,
5179 fold_convert (itype, n1),
5180 fold_convert (itype, fd->loop.n1));
5181 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
5182 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
5183 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
5184 NULL_TREE, false,
5185 GSI_CONTINUE_LINKING);
5186 }
5187 else
5188 itercnt = s0;
5189 }
5190 tree type = TREE_TYPE (t);
5191 if (POINTER_TYPE_P (type))
5192 type = sizetype;
5193 a = fold_build2 (MULT_EXPR, type,
5194 fold_convert (type, itercnt),
5195 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
5196 dest = unshare_expr (t);
5197 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
5198 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
5199 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5200 false, GSI_CONTINUE_LINKING);
5201 assign_stmt = gimple_build_assign (dest, t);
5202 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5203 }
5204 if (fd->collapse > 1)
5205 {
5206 if (fd->non_rect)
5207 {
5208 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
5209 memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1));
5210 }
5211 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt,
5212 startvar);
5213 }
5214
5215 if (!broken_loop)
5216 {
5217 /* The code controlling the sequential loop replaces the
5218 GIMPLE_OMP_CONTINUE. */
5219 gsi = gsi_last_nondebug_bb (cont_bb);
5220 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5221 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5222 vmain = gimple_omp_continue_control_use (cont_stmt);
5223 vback = gimple_omp_continue_control_def (cont_stmt);
5224
5225 if (cond_var)
5226 {
5227 tree itype = TREE_TYPE (cond_var);
5228 tree t2;
5229 if (POINTER_TYPE_P (type)
5230 || TREE_CODE (n1) != INTEGER_CST
5231 || fd->loop.cond_code != LT_EXPR)
5232 t2 = build_int_cst (itype, 1);
5233 else
5234 t2 = fold_convert (itype, step);
5235 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
5236 t2 = force_gimple_operand_gsi (&gsi, t2, false,
5237 NULL_TREE, true, GSI_SAME_STMT);
5238 assign_stmt = gimple_build_assign (cond_var, t2);
5239 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5240 }
5241
5242 if (!gimple_omp_for_combined_p (fd->for_stmt))
5243 {
5244 if (POINTER_TYPE_P (type))
5245 t = fold_build_pointer_plus (vmain, step);
5246 else
5247 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5248 t = force_gimple_operand_gsi (&gsi, t,
5249 DECL_P (vback)
5250 && TREE_ADDRESSABLE (vback),
5251 NULL_TREE, true, GSI_SAME_STMT);
5252 assign_stmt = gimple_build_assign (vback, t);
5253 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5254
5255 t = build2 (fd->loop.cond_code, boolean_type_node,
5256 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5257 ? t : vback, e);
5258 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5259 }
5260
5261 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5262 gsi_remove (&gsi, true);
5263
5264 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5265 collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
5266 cont_bb, body_bb);
5267 }
5268
5269 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
5270 gsi = gsi_last_nondebug_bb (exit_bb);
5271 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
5272 {
5273 t = gimple_omp_return_lhs (gsi_stmt (gsi));
5274 if (fd->have_reductemp
5275 || ((fd->have_pointer_condtemp || fd->have_scantemp)
5276 && !fd->have_nonctrl_scantemp))
5277 {
5278 tree fn;
5279 if (t)
5280 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
5281 else
5282 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
5283 gcall *g = gimple_build_call (fn, 0);
5284 if (t)
5285 {
5286 gimple_call_set_lhs (g, t);
5287 if (fd->have_reductemp)
5288 gsi_insert_after (&gsi, gimple_build_assign (reductions,
5289 NOP_EXPR, t),
5290 GSI_SAME_STMT);
5291 }
5292 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5293 }
5294 else
5295 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
5296 }
5297 else if ((fd->have_pointer_condtemp || fd->have_scantemp)
5298 && !fd->have_nonctrl_scantemp)
5299 {
5300 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
5301 gcall *g = gimple_build_call (fn, 0);
5302 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5303 }
5304 if (fd->have_scantemp && !fd->have_nonctrl_scantemp)
5305 {
5306 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5307 tree controlp = NULL_TREE, controlb = NULL_TREE;
5308 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5309 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5310 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
5311 {
5312 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
5313 controlb = OMP_CLAUSE_DECL (c);
5314 else
5315 controlp = OMP_CLAUSE_DECL (c);
5316 if (controlb && controlp)
5317 break;
5318 }
5319 gcc_assert (controlp && controlb);
5320 gimple *g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
5321 NULL_TREE, NULL_TREE);
5322 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5323 exit1_bb = split_block (exit_bb, g)->dest;
5324 gsi = gsi_after_labels (exit1_bb);
5325 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_FREE), 1,
5326 controlp);
5327 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5328 exit2_bb = split_block (exit1_bb, g)->dest;
5329 gsi = gsi_after_labels (exit2_bb);
5330 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_RESTORE), 1,
5331 controlp);
5332 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5333 exit3_bb = split_block (exit2_bb, g)->dest;
5334 gsi = gsi_after_labels (exit3_bb);
5335 }
5336 gsi_remove (&gsi, true);
5337
5338 /* Connect all the blocks. */
5339 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
5340 ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
5341 ep = find_edge (entry_bb, second_bb);
5342 ep->flags = EDGE_TRUE_VALUE;
5343 ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
5344 if (fourth_bb)
5345 {
5346 ep = make_edge (third_bb, fifth_bb, EDGE_FALSE_VALUE);
5347 ep->probability
5348 = profile_probability::guessed_always ().apply_scale (1, 2);
5349 ep = find_edge (third_bb, fourth_bb);
5350 ep->flags = EDGE_TRUE_VALUE;
5351 ep->probability
5352 = profile_probability::guessed_always ().apply_scale (1, 2);
5353 ep = find_edge (fourth_bb, fifth_bb);
5354 redirect_edge_and_branch (ep, sixth_bb);
5355 }
5356 else
5357 sixth_bb = third_bb;
5358 find_edge (sixth_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
5359 find_edge (sixth_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
5360 if (exit1_bb)
5361 {
5362 ep = make_edge (exit_bb, exit2_bb, EDGE_FALSE_VALUE);
5363 ep->probability
5364 = profile_probability::guessed_always ().apply_scale (1, 2);
5365 ep = find_edge (exit_bb, exit1_bb);
5366 ep->flags = EDGE_TRUE_VALUE;
5367 ep->probability
5368 = profile_probability::guessed_always ().apply_scale (1, 2);
5369 ep = find_edge (exit1_bb, exit2_bb);
5370 redirect_edge_and_branch (ep, exit3_bb);
5371 }
5372
5373 if (!broken_loop)
5374 {
5375 ep = find_edge (cont_bb, body_bb);
5376 if (ep == NULL)
5377 {
5378 ep = BRANCH_EDGE (cont_bb);
5379 gcc_assert (single_succ (ep->dest) == body_bb);
5380 }
5381 if (gimple_omp_for_combined_p (fd->for_stmt))
5382 {
5383 remove_edge (ep);
5384 ep = NULL;
5385 }
5386 else if (fd->collapse > 1)
5387 {
5388 remove_edge (ep);
5389 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5390 }
5391 else
5392 ep->flags = EDGE_TRUE_VALUE;
5393 find_edge (cont_bb, fin_bb)->flags
5394 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5395 }
5396
5397 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
5398 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
5399 if (fourth_bb)
5400 {
5401 set_immediate_dominator (CDI_DOMINATORS, fifth_bb, third_bb);
5402 set_immediate_dominator (CDI_DOMINATORS, sixth_bb, third_bb);
5403 }
5404 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, sixth_bb);
5405
5406 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5407 recompute_dominator (CDI_DOMINATORS, body_bb));
5408 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5409 recompute_dominator (CDI_DOMINATORS, fin_bb));
5410 if (exit1_bb)
5411 {
5412 set_immediate_dominator (CDI_DOMINATORS, exit2_bb, exit_bb);
5413 set_immediate_dominator (CDI_DOMINATORS, exit3_bb, exit_bb);
5414 }
5415
5416 class loop *loop = body_bb->loop_father;
5417 if (loop != entry_bb->loop_father)
5418 {
5419 gcc_assert (broken_loop || loop->header == body_bb);
5420 gcc_assert (broken_loop
5421 || loop->latch == region->cont
5422 || single_pred (loop->latch) == region->cont);
5423 return;
5424 }
5425
5426 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5427 {
5428 loop = alloc_loop ();
5429 loop->header = body_bb;
5430 if (collapse_bb == NULL)
5431 loop->latch = cont_bb;
5432 add_loop (loop, body_bb->loop_father);
5433 }
5434 }
5435
5436 /* Return phi in E->DEST with ARG on edge E. */
5437
5438 static gphi *
5439 find_phi_with_arg_on_edge (tree arg, edge e)
5440 {
5441 basic_block bb = e->dest;
5442
5443 for (gphi_iterator gpi = gsi_start_phis (bb);
5444 !gsi_end_p (gpi);
5445 gsi_next (&gpi))
5446 {
5447 gphi *phi = gpi.phi ();
5448 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
5449 return phi;
5450 }
5451
5452 return NULL;
5453 }
5454
5455 /* A subroutine of expand_omp_for. Generate code for a parallel
5456 loop with static schedule and a specified chunk size. Given
5457 parameters:
5458
5459 for (V = N1; V cond N2; V += STEP) BODY;
5460
5461 where COND is "<" or ">", we generate pseudocode
5462
5463 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
5464 if (cond is <)
5465 adj = STEP - 1;
5466 else
5467 adj = STEP + 1;
5468 if ((__typeof (V)) -1 > 0 && cond is >)
5469 n = -(adj + N2 - N1) / -STEP;
5470 else
5471 n = (adj + N2 - N1) / STEP;
5472 trip = 0;
5473 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
5474 here so that V is defined
5475 if the loop is not entered
5476 L0:
5477 s0 = (trip * nthreads + threadid) * CHUNK;
5478 e0 = min (s0 + CHUNK, n);
5479 if (s0 < n) goto L1; else goto L4;
5480 L1:
5481 V = s0 * STEP + N1;
5482 e = e0 * STEP + N1;
5483 L2:
5484 BODY;
5485 V += STEP;
5486 if (V cond e) goto L2; else goto L3;
5487 L3:
5488 trip += 1;
5489 goto L0;
5490 L4:
5491 */
5492
5493 static void
5494 expand_omp_for_static_chunk (struct omp_region *region,
5495 struct omp_for_data *fd, gimple *inner_stmt)
5496 {
5497 tree n, s0, e0, e, t;
5498 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
5499 tree type, itype, vmain, vback, vextra;
5500 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
5501 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
5502 gimple_stmt_iterator gsi, gsip;
5503 edge se;
5504 bool broken_loop = region->cont == NULL;
5505 tree *counts = NULL;
5506 tree n1, n2, step;
5507 tree reductions = NULL_TREE;
5508 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
5509
5510 itype = type = TREE_TYPE (fd->loop.v);
5511 if (POINTER_TYPE_P (type))
5512 itype = signed_type_for (type);
5513
5514 entry_bb = region->entry;
5515 se = split_block (entry_bb, last_stmt (entry_bb));
5516 entry_bb = se->src;
5517 iter_part_bb = se->dest;
5518 cont_bb = region->cont;
5519 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
5520 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
5521 gcc_assert (broken_loop
5522 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
5523 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
5524 body_bb = single_succ (seq_start_bb);
5525 if (!broken_loop)
5526 {
5527 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
5528 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
5529 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5530 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
5531 }
5532 exit_bb = region->exit;
5533
5534 /* Trip and adjustment setup goes in ENTRY_BB. */
5535 gsi = gsi_last_nondebug_bb (entry_bb);
5536 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5537 gsip = gsi;
5538 gsi_prev (&gsip);
5539
5540 if (fd->collapse > 1)
5541 {
5542 int first_zero_iter = -1, dummy = -1;
5543 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5544
5545 counts = XALLOCAVEC (tree, fd->collapse);
5546 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5547 fin_bb, first_zero_iter,
5548 dummy_bb, dummy, l2_dom_bb);
5549 t = NULL_TREE;
5550 }
5551 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
5552 t = integer_one_node;
5553 else
5554 t = fold_binary (fd->loop.cond_code, boolean_type_node,
5555 fold_convert (type, fd->loop.n1),
5556 fold_convert (type, fd->loop.n2));
5557 if (fd->collapse == 1
5558 && TYPE_UNSIGNED (type)
5559 && (t == NULL_TREE || !integer_onep (t)))
5560 {
5561 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
5562 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
5563 true, GSI_SAME_STMT);
5564 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
5565 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
5566 true, GSI_SAME_STMT);
5567 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
5568 NULL_TREE, NULL_TREE);
5569 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
5570 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
5571 expand_omp_regimplify_p, NULL, NULL)
5572 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
5573 expand_omp_regimplify_p, NULL, NULL))
5574 {
5575 gsi = gsi_for_stmt (cond_stmt);
5576 gimple_regimplify_operands (cond_stmt, &gsi);
5577 }
5578 se = split_block (entry_bb, cond_stmt);
5579 se->flags = EDGE_TRUE_VALUE;
5580 entry_bb = se->dest;
5581 se->probability = profile_probability::very_likely ();
5582 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
5583 se->probability = profile_probability::very_unlikely ();
5584 if (gimple_in_ssa_p (cfun))
5585 {
5586 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
5587 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
5588 !gsi_end_p (gpi); gsi_next (&gpi))
5589 {
5590 gphi *phi = gpi.phi ();
5591 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
5592 se, UNKNOWN_LOCATION);
5593 }
5594 }
5595 gsi = gsi_last_bb (entry_bb);
5596 }
5597
5598 if (fd->lastprivate_conditional)
5599 {
5600 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5601 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
5602 if (fd->have_pointer_condtemp)
5603 condtemp = OMP_CLAUSE_DECL (c);
5604 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
5605 cond_var = OMP_CLAUSE_DECL (c);
5606 }
5607 if (fd->have_reductemp || fd->have_pointer_condtemp)
5608 {
5609 tree t1 = build_int_cst (long_integer_type_node, 0);
5610 tree t2 = build_int_cst (long_integer_type_node, 1);
5611 tree t3 = build_int_cstu (long_integer_type_node,
5612 (HOST_WIDE_INT_1U << 31) + 1);
5613 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5614 gimple_stmt_iterator gsi2 = gsi_none ();
5615 gimple *g = NULL;
5616 tree mem = null_pointer_node, memv = NULL_TREE;
5617 if (fd->have_reductemp)
5618 {
5619 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
5620 reductions = OMP_CLAUSE_DECL (c);
5621 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
5622 g = SSA_NAME_DEF_STMT (reductions);
5623 reductions = gimple_assign_rhs1 (g);
5624 OMP_CLAUSE_DECL (c) = reductions;
5625 gsi2 = gsi_for_stmt (g);
5626 }
5627 else
5628 {
5629 if (gsi_end_p (gsip))
5630 gsi2 = gsi_after_labels (region->entry);
5631 else
5632 gsi2 = gsip;
5633 reductions = null_pointer_node;
5634 }
5635 if (fd->have_pointer_condtemp)
5636 {
5637 tree type = TREE_TYPE (condtemp);
5638 memv = create_tmp_var (type);
5639 TREE_ADDRESSABLE (memv) = 1;
5640 unsigned HOST_WIDE_INT sz
5641 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
5642 sz *= fd->lastprivate_conditional;
5643 expand_omp_build_assign (&gsi2, memv, build_int_cst (type, sz),
5644 false);
5645 mem = build_fold_addr_expr (memv);
5646 }
5647 tree t
5648 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
5649 9, t1, t2, t2, t3, t1, null_pointer_node,
5650 null_pointer_node, reductions, mem);
5651 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
5652 true, GSI_SAME_STMT);
5653 if (fd->have_pointer_condtemp)
5654 expand_omp_build_assign (&gsi2, condtemp, memv, false);
5655 if (fd->have_reductemp)
5656 {
5657 gsi_remove (&gsi2, true);
5658 release_ssa_name (gimple_assign_lhs (g));
5659 }
5660 }
5661 switch (gimple_omp_for_kind (fd->for_stmt))
5662 {
5663 case GF_OMP_FOR_KIND_FOR:
5664 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
5665 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
5666 break;
5667 case GF_OMP_FOR_KIND_DISTRIBUTE:
5668 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
5669 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
5670 break;
5671 default:
5672 gcc_unreachable ();
5673 }
5674 nthreads = build_call_expr (nthreads, 0);
5675 nthreads = fold_convert (itype, nthreads);
5676 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
5677 true, GSI_SAME_STMT);
5678 threadid = build_call_expr (threadid, 0);
5679 threadid = fold_convert (itype, threadid);
5680 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
5681 true, GSI_SAME_STMT);
5682
5683 n1 = fd->loop.n1;
5684 n2 = fd->loop.n2;
5685 step = fd->loop.step;
5686 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5687 {
5688 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5689 OMP_CLAUSE__LOOPTEMP_);
5690 gcc_assert (innerc);
5691 n1 = OMP_CLAUSE_DECL (innerc);
5692 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5693 OMP_CLAUSE__LOOPTEMP_);
5694 gcc_assert (innerc);
5695 n2 = OMP_CLAUSE_DECL (innerc);
5696 }
5697 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5698 true, NULL_TREE, true, GSI_SAME_STMT);
5699 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5700 true, NULL_TREE, true, GSI_SAME_STMT);
5701 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5702 true, NULL_TREE, true, GSI_SAME_STMT);
5703 tree chunk_size = fold_convert (itype, fd->chunk_size);
5704 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
5705 chunk_size
5706 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
5707 GSI_SAME_STMT);
5708
5709 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
5710 t = fold_build2 (PLUS_EXPR, itype, step, t);
5711 t = fold_build2 (PLUS_EXPR, itype, t, n2);
5712 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
5713 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
5714 t = fold_build2 (TRUNC_DIV_EXPR, itype,
5715 fold_build1 (NEGATE_EXPR, itype, t),
5716 fold_build1 (NEGATE_EXPR, itype, step));
5717 else
5718 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
5719 t = fold_convert (itype, t);
5720 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5721 true, GSI_SAME_STMT);
5722
5723 trip_var = create_tmp_reg (itype, ".trip");
5724 if (gimple_in_ssa_p (cfun))
5725 {
5726 trip_init = make_ssa_name (trip_var);
5727 trip_main = make_ssa_name (trip_var);
5728 trip_back = make_ssa_name (trip_var);
5729 }
5730 else
5731 {
5732 trip_init = trip_var;
5733 trip_main = trip_var;
5734 trip_back = trip_var;
5735 }
5736
5737 gassign *assign_stmt
5738 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
5739 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5740
5741 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
5742 t = fold_build2 (MULT_EXPR, itype, t, step);
5743 if (POINTER_TYPE_P (type))
5744 t = fold_build_pointer_plus (n1, t);
5745 else
5746 t = fold_build2 (PLUS_EXPR, type, t, n1);
5747 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5748 true, GSI_SAME_STMT);
5749
5750 /* Remove the GIMPLE_OMP_FOR. */
5751 gsi_remove (&gsi, true);
5752
5753 gimple_stmt_iterator gsif = gsi;
5754
5755 /* Iteration space partitioning goes in ITER_PART_BB. */
5756 gsi = gsi_last_bb (iter_part_bb);
5757
5758 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
5759 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
5760 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
5761 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5762 false, GSI_CONTINUE_LINKING);
5763
5764 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
5765 t = fold_build2 (MIN_EXPR, itype, t, n);
5766 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5767 false, GSI_CONTINUE_LINKING);
5768
5769 t = build2 (LT_EXPR, boolean_type_node, s0, n);
5770 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
5771
5772 /* Setup code for sequential iteration goes in SEQ_START_BB. */
5773 gsi = gsi_start_bb (seq_start_bb);
5774
5775 tree startvar = fd->loop.v;
5776 tree endvar = NULL_TREE;
5777
5778 if (gimple_omp_for_combined_p (fd->for_stmt))
5779 {
5780 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
5781 ? gimple_omp_parallel_clauses (inner_stmt)
5782 : gimple_omp_for_clauses (inner_stmt);
5783 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5784 gcc_assert (innerc);
5785 startvar = OMP_CLAUSE_DECL (innerc);
5786 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5787 OMP_CLAUSE__LOOPTEMP_);
5788 gcc_assert (innerc);
5789 endvar = OMP_CLAUSE_DECL (innerc);
5790 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
5791 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
5792 {
5793 int i;
5794 for (i = 1; i < fd->collapse; i++)
5795 {
5796 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5797 OMP_CLAUSE__LOOPTEMP_);
5798 gcc_assert (innerc);
5799 }
5800 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5801 OMP_CLAUSE__LOOPTEMP_);
5802 if (innerc)
5803 {
5804 /* If needed (distribute parallel for with lastprivate),
5805 propagate down the total number of iterations. */
5806 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
5807 fd->loop.n2);
5808 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
5809 GSI_CONTINUE_LINKING);
5810 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5811 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5812 }
5813 }
5814 }
5815
5816 t = fold_convert (itype, s0);
5817 t = fold_build2 (MULT_EXPR, itype, t, step);
5818 if (POINTER_TYPE_P (type))
5819 {
5820 t = fold_build_pointer_plus (n1, t);
5821 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5822 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5823 t = fold_convert (signed_type_for (type), t);
5824 }
5825 else
5826 t = fold_build2 (PLUS_EXPR, type, t, n1);
5827 t = fold_convert (TREE_TYPE (startvar), t);
5828 t = force_gimple_operand_gsi (&gsi, t,
5829 DECL_P (startvar)
5830 && TREE_ADDRESSABLE (startvar),
5831 NULL_TREE, false, GSI_CONTINUE_LINKING);
5832 assign_stmt = gimple_build_assign (startvar, t);
5833 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5834 if (cond_var)
5835 {
5836 tree itype = TREE_TYPE (cond_var);
5837 /* For lastprivate(conditional:) itervar, we need some iteration
5838 counter that starts at unsigned non-zero and increases.
5839 Prefer as few IVs as possible, so if we can use startvar
5840 itself, use that, or startvar + constant (those would be
5841 incremented with step), and as last resort use the s0 + 1
5842 incremented by 1. */
5843 if (POINTER_TYPE_P (type)
5844 || TREE_CODE (n1) != INTEGER_CST
5845 || fd->loop.cond_code != LT_EXPR)
5846 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
5847 build_int_cst (itype, 1));
5848 else if (tree_int_cst_sgn (n1) == 1)
5849 t = fold_convert (itype, t);
5850 else
5851 {
5852 tree c = fold_convert (itype, n1);
5853 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
5854 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
5855 }
5856 t = force_gimple_operand_gsi (&gsi, t, false,
5857 NULL_TREE, false, GSI_CONTINUE_LINKING);
5858 assign_stmt = gimple_build_assign (cond_var, t);
5859 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5860 }
5861
5862 t = fold_convert (itype, e0);
5863 t = fold_build2 (MULT_EXPR, itype, t, step);
5864 if (POINTER_TYPE_P (type))
5865 {
5866 t = fold_build_pointer_plus (n1, t);
5867 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5868 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5869 t = fold_convert (signed_type_for (type), t);
5870 }
5871 else
5872 t = fold_build2 (PLUS_EXPR, type, t, n1);
5873 t = fold_convert (TREE_TYPE (startvar), t);
5874 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5875 false, GSI_CONTINUE_LINKING);
5876 if (endvar)
5877 {
5878 assign_stmt = gimple_build_assign (endvar, e);
5879 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5880 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5881 assign_stmt = gimple_build_assign (fd->loop.v, e);
5882 else
5883 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5884 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5885 }
5886 /* Handle linear clause adjustments. */
5887 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
5888 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
5889 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
5890 c; c = OMP_CLAUSE_CHAIN (c))
5891 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
5892 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
5893 {
5894 tree d = OMP_CLAUSE_DECL (c);
5895 bool is_ref = omp_is_reference (d);
5896 tree t = d, a, dest;
5897 if (is_ref)
5898 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
5899 tree type = TREE_TYPE (t);
5900 if (POINTER_TYPE_P (type))
5901 type = sizetype;
5902 dest = unshare_expr (t);
5903 tree v = create_tmp_var (TREE_TYPE (t), NULL);
5904 expand_omp_build_assign (&gsif, v, t);
5905 if (itercnt == NULL_TREE)
5906 {
5907 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5908 {
5909 itercntbias
5910 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
5911 fold_convert (itype, fd->loop.n1));
5912 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
5913 itercntbias, step);
5914 itercntbias
5915 = force_gimple_operand_gsi (&gsif, itercntbias, true,
5916 NULL_TREE, true,
5917 GSI_SAME_STMT);
5918 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
5919 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
5920 NULL_TREE, false,
5921 GSI_CONTINUE_LINKING);
5922 }
5923 else
5924 itercnt = s0;
5925 }
5926 a = fold_build2 (MULT_EXPR, type,
5927 fold_convert (type, itercnt),
5928 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
5929 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
5930 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
5931 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5932 false, GSI_CONTINUE_LINKING);
5933 assign_stmt = gimple_build_assign (dest, t);
5934 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5935 }
5936 if (fd->collapse > 1)
5937 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
5938
5939 if (!broken_loop)
5940 {
5941 /* The code controlling the sequential loop goes in CONT_BB,
5942 replacing the GIMPLE_OMP_CONTINUE. */
5943 gsi = gsi_last_nondebug_bb (cont_bb);
5944 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5945 vmain = gimple_omp_continue_control_use (cont_stmt);
5946 vback = gimple_omp_continue_control_def (cont_stmt);
5947
5948 if (cond_var)
5949 {
5950 tree itype = TREE_TYPE (cond_var);
5951 tree t2;
5952 if (POINTER_TYPE_P (type)
5953 || TREE_CODE (n1) != INTEGER_CST
5954 || fd->loop.cond_code != LT_EXPR)
5955 t2 = build_int_cst (itype, 1);
5956 else
5957 t2 = fold_convert (itype, step);
5958 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
5959 t2 = force_gimple_operand_gsi (&gsi, t2, false,
5960 NULL_TREE, true, GSI_SAME_STMT);
5961 assign_stmt = gimple_build_assign (cond_var, t2);
5962 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5963 }
5964
5965 if (!gimple_omp_for_combined_p (fd->for_stmt))
5966 {
5967 if (POINTER_TYPE_P (type))
5968 t = fold_build_pointer_plus (vmain, step);
5969 else
5970 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5971 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
5972 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5973 true, GSI_SAME_STMT);
5974 assign_stmt = gimple_build_assign (vback, t);
5975 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5976
5977 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
5978 t = build2 (EQ_EXPR, boolean_type_node,
5979 build_int_cst (itype, 0),
5980 build_int_cst (itype, 1));
5981 else
5982 t = build2 (fd->loop.cond_code, boolean_type_node,
5983 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5984 ? t : vback, e);
5985 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5986 }
5987
5988 /* Remove GIMPLE_OMP_CONTINUE. */
5989 gsi_remove (&gsi, true);
5990
5991 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5992 collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, body_bb);
5993
5994 /* Trip update code goes into TRIP_UPDATE_BB. */
5995 gsi = gsi_start_bb (trip_update_bb);
5996
5997 t = build_int_cst (itype, 1);
5998 t = build2 (PLUS_EXPR, itype, trip_main, t);
5999 assign_stmt = gimple_build_assign (trip_back, t);
6000 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6001 }
6002
6003 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
6004 gsi = gsi_last_nondebug_bb (exit_bb);
6005 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
6006 {
6007 t = gimple_omp_return_lhs (gsi_stmt (gsi));
6008 if (fd->have_reductemp || fd->have_pointer_condtemp)
6009 {
6010 tree fn;
6011 if (t)
6012 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
6013 else
6014 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
6015 gcall *g = gimple_build_call (fn, 0);
6016 if (t)
6017 {
6018 gimple_call_set_lhs (g, t);
6019 if (fd->have_reductemp)
6020 gsi_insert_after (&gsi, gimple_build_assign (reductions,
6021 NOP_EXPR, t),
6022 GSI_SAME_STMT);
6023 }
6024 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
6025 }
6026 else
6027 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
6028 }
6029 else if (fd->have_pointer_condtemp)
6030 {
6031 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
6032 gcall *g = gimple_build_call (fn, 0);
6033 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
6034 }
6035 gsi_remove (&gsi, true);
6036
6037 /* Connect the new blocks. */
6038 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
6039 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
6040
6041 if (!broken_loop)
6042 {
6043 se = find_edge (cont_bb, body_bb);
6044 if (se == NULL)
6045 {
6046 se = BRANCH_EDGE (cont_bb);
6047 gcc_assert (single_succ (se->dest) == body_bb);
6048 }
6049 if (gimple_omp_for_combined_p (fd->for_stmt))
6050 {
6051 remove_edge (se);
6052 se = NULL;
6053 }
6054 else if (fd->collapse > 1)
6055 {
6056 remove_edge (se);
6057 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
6058 }
6059 else
6060 se->flags = EDGE_TRUE_VALUE;
6061 find_edge (cont_bb, trip_update_bb)->flags
6062 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
6063
6064 redirect_edge_and_branch (single_succ_edge (trip_update_bb),
6065 iter_part_bb);
6066 }
6067
6068 if (gimple_in_ssa_p (cfun))
6069 {
6070 gphi_iterator psi;
6071 gphi *phi;
6072 edge re, ene;
6073 edge_var_map *vm;
6074 size_t i;
6075
6076 gcc_assert (fd->collapse == 1 && !broken_loop);
6077
6078 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
6079 remove arguments of the phi nodes in fin_bb. We need to create
6080 appropriate phi nodes in iter_part_bb instead. */
6081 se = find_edge (iter_part_bb, fin_bb);
6082 re = single_succ_edge (trip_update_bb);
6083 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
6084 ene = single_succ_edge (entry_bb);
6085
6086 psi = gsi_start_phis (fin_bb);
6087 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
6088 gsi_next (&psi), ++i)
6089 {
6090 gphi *nphi;
6091 location_t locus;
6092
6093 phi = psi.phi ();
6094 if (operand_equal_p (gimple_phi_arg_def (phi, 0),
6095 redirect_edge_var_map_def (vm), 0))
6096 continue;
6097
6098 t = gimple_phi_result (phi);
6099 gcc_assert (t == redirect_edge_var_map_result (vm));
6100
6101 if (!single_pred_p (fin_bb))
6102 t = copy_ssa_name (t, phi);
6103
6104 nphi = create_phi_node (t, iter_part_bb);
6105
6106 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
6107 locus = gimple_phi_arg_location_from_edge (phi, se);
6108
6109 /* A special case -- fd->loop.v is not yet computed in
6110 iter_part_bb, we need to use vextra instead. */
6111 if (t == fd->loop.v)
6112 t = vextra;
6113 add_phi_arg (nphi, t, ene, locus);
6114 locus = redirect_edge_var_map_location (vm);
6115 tree back_arg = redirect_edge_var_map_def (vm);
6116 add_phi_arg (nphi, back_arg, re, locus);
6117 edge ce = find_edge (cont_bb, body_bb);
6118 if (ce == NULL)
6119 {
6120 ce = BRANCH_EDGE (cont_bb);
6121 gcc_assert (single_succ (ce->dest) == body_bb);
6122 ce = single_succ_edge (ce->dest);
6123 }
6124 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
6125 gcc_assert (inner_loop_phi != NULL);
6126 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
6127 find_edge (seq_start_bb, body_bb), locus);
6128
6129 if (!single_pred_p (fin_bb))
6130 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
6131 }
6132 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
6133 redirect_edge_var_map_clear (re);
6134 if (single_pred_p (fin_bb))
6135 while (1)
6136 {
6137 psi = gsi_start_phis (fin_bb);
6138 if (gsi_end_p (psi))
6139 break;
6140 remove_phi_node (&psi, false);
6141 }
6142
6143 /* Make phi node for trip. */
6144 phi = create_phi_node (trip_main, iter_part_bb);
6145 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
6146 UNKNOWN_LOCATION);
6147 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
6148 UNKNOWN_LOCATION);
6149 }
6150
6151 if (!broken_loop)
6152 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
6153 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
6154 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
6155 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
6156 recompute_dominator (CDI_DOMINATORS, fin_bb));
6157 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
6158 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
6159 set_immediate_dominator (CDI_DOMINATORS, body_bb,
6160 recompute_dominator (CDI_DOMINATORS, body_bb));
6161
6162 if (!broken_loop)
6163 {
6164 class loop *loop = body_bb->loop_father;
6165 class loop *trip_loop = alloc_loop ();
6166 trip_loop->header = iter_part_bb;
6167 trip_loop->latch = trip_update_bb;
6168 add_loop (trip_loop, iter_part_bb->loop_father);
6169
6170 if (loop != entry_bb->loop_father)
6171 {
6172 gcc_assert (loop->header == body_bb);
6173 gcc_assert (loop->latch == region->cont
6174 || single_pred (loop->latch) == region->cont);
6175 trip_loop->inner = loop;
6176 return;
6177 }
6178
6179 if (!gimple_omp_for_combined_p (fd->for_stmt))
6180 {
6181 loop = alloc_loop ();
6182 loop->header = body_bb;
6183 if (collapse_bb == NULL)
6184 loop->latch = cont_bb;
6185 add_loop (loop, trip_loop);
6186 }
6187 }
6188 }
6189
6190 /* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
6191 loop. Given parameters:
6192
6193 for (V = N1; V cond N2; V += STEP) BODY;
6194
6195 where COND is "<" or ">", we generate pseudocode
6196
6197 V = N1;
6198 goto L1;
6199 L0:
6200 BODY;
6201 V += STEP;
6202 L1:
6203 if (V cond N2) goto L0; else goto L2;
6204 L2:
6205
6206 For collapsed loops, emit the outer loops as scalar
6207 and only try to vectorize the innermost loop. */
6208
6209 static void
6210 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
6211 {
6212 tree type, t;
6213 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
6214 gimple_stmt_iterator gsi;
6215 gimple *stmt;
6216 gcond *cond_stmt;
6217 bool broken_loop = region->cont == NULL;
6218 edge e, ne;
6219 tree *counts = NULL;
6220 int i;
6221 int safelen_int = INT_MAX;
6222 bool dont_vectorize = false;
6223 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6224 OMP_CLAUSE_SAFELEN);
6225 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6226 OMP_CLAUSE__SIMDUID_);
6227 tree ifc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6228 OMP_CLAUSE_IF);
6229 tree simdlen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6230 OMP_CLAUSE_SIMDLEN);
6231 tree condtemp = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6232 OMP_CLAUSE__CONDTEMP_);
6233 tree n1, n2;
6234 tree cond_var = condtemp ? OMP_CLAUSE_DECL (condtemp) : NULL_TREE;
6235
6236 if (safelen)
6237 {
6238 poly_uint64 val;
6239 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
6240 if (!poly_int_tree_p (safelen, &val))
6241 safelen_int = 0;
6242 else
6243 safelen_int = MIN (constant_lower_bound (val), INT_MAX);
6244 if (safelen_int == 1)
6245 safelen_int = 0;
6246 }
6247 if ((ifc && integer_zerop (OMP_CLAUSE_IF_EXPR (ifc)))
6248 || (simdlen && integer_onep (OMP_CLAUSE_SIMDLEN_EXPR (simdlen))))
6249 {
6250 safelen_int = 0;
6251 dont_vectorize = true;
6252 }
6253 type = TREE_TYPE (fd->loop.v);
6254 entry_bb = region->entry;
6255 cont_bb = region->cont;
6256 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
6257 gcc_assert (broken_loop
6258 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
6259 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
6260 if (!broken_loop)
6261 {
6262 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
6263 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
6264 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
6265 l2_bb = BRANCH_EDGE (entry_bb)->dest;
6266 }
6267 else
6268 {
6269 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
6270 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
6271 l2_bb = single_succ (l1_bb);
6272 }
6273 exit_bb = region->exit;
6274 l2_dom_bb = NULL;
6275
6276 gsi = gsi_last_nondebug_bb (entry_bb);
6277
6278 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
6279 /* Not needed in SSA form right now. */
6280 gcc_assert (!gimple_in_ssa_p (cfun));
6281 if (fd->collapse > 1
6282 && (gimple_omp_for_combined_into_p (fd->for_stmt)
6283 || broken_loop))
6284 {
6285 int first_zero_iter = -1, dummy = -1;
6286 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
6287
6288 counts = XALLOCAVEC (tree, fd->collapse);
6289 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
6290 zero_iter_bb, first_zero_iter,
6291 dummy_bb, dummy, l2_dom_bb);
6292 }
6293 if (l2_dom_bb == NULL)
6294 l2_dom_bb = l1_bb;
6295
6296 n1 = fd->loop.n1;
6297 n2 = fd->loop.n2;
6298 if (gimple_omp_for_combined_into_p (fd->for_stmt))
6299 {
6300 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6301 OMP_CLAUSE__LOOPTEMP_);
6302 gcc_assert (innerc);
6303 n1 = OMP_CLAUSE_DECL (innerc);
6304 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
6305 OMP_CLAUSE__LOOPTEMP_);
6306 gcc_assert (innerc);
6307 n2 = OMP_CLAUSE_DECL (innerc);
6308 }
6309 tree step = fd->loop.step;
6310
6311 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6312 OMP_CLAUSE__SIMT_);
6313 if (is_simt)
6314 {
6315 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
6316 is_simt = safelen_int > 1;
6317 }
6318 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
6319 if (is_simt)
6320 {
6321 simt_lane = create_tmp_var (unsigned_type_node);
6322 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
6323 gimple_call_set_lhs (g, simt_lane);
6324 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6325 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
6326 fold_convert (TREE_TYPE (step), simt_lane));
6327 n1 = fold_convert (type, n1);
6328 if (POINTER_TYPE_P (type))
6329 n1 = fold_build_pointer_plus (n1, offset);
6330 else
6331 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
6332
6333 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
6334 if (fd->collapse > 1)
6335 simt_maxlane = build_one_cst (unsigned_type_node);
6336 else if (safelen_int < omp_max_simt_vf ())
6337 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
6338 tree vf
6339 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
6340 unsigned_type_node, 0);
6341 if (simt_maxlane)
6342 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
6343 vf = fold_convert (TREE_TYPE (step), vf);
6344 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
6345 }
6346
6347 tree n2var = NULL_TREE;
6348 tree n2v = NULL_TREE;
6349 tree *nonrect_bounds = NULL;
6350 if (fd->collapse > 1)
6351 {
6352 if (broken_loop || gimple_omp_for_combined_into_p (fd->for_stmt))
6353 {
6354 if (fd->non_rect)
6355 {
6356 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
6357 memset (nonrect_bounds, 0,
6358 sizeof (tree) * (fd->last_nonrect + 1));
6359 }
6360 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
6361 gcc_assert (entry_bb == gsi_bb (gsi));
6362 gcc_assert (fd->for_stmt == gsi_stmt (gsi));
6363 gsi_prev (&gsi);
6364 entry_bb = split_block (entry_bb, gsi_stmt (gsi))->dest;
6365 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds,
6366 NULL, n1);
6367 gsi = gsi_for_stmt (fd->for_stmt);
6368 }
6369 if (broken_loop)
6370 ;
6371 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
6372 {
6373 /* Compute in n2var the limit for the first innermost loop,
6374 i.e. fd->loop.v + MIN (n2 - fd->loop.v, cnt)
6375 where cnt is how many iterations would the loop have if
6376 all further iterations were assigned to the current task. */
6377 n2var = create_tmp_var (type);
6378 i = fd->collapse - 1;
6379 tree itype = TREE_TYPE (fd->loops[i].v);
6380 if (POINTER_TYPE_P (itype))
6381 itype = signed_type_for (itype);
6382 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
6383 ? -1 : 1));
6384 t = fold_build2 (PLUS_EXPR, itype,
6385 fold_convert (itype, fd->loops[i].step), t);
6386 t = fold_build2 (PLUS_EXPR, itype, t,
6387 fold_convert (itype, fd->loops[i].n2));
6388 if (fd->loops[i].m2)
6389 {
6390 tree t2 = fold_convert (itype,
6391 fd->loops[i - fd->loops[i].outer].v);
6392 tree t3 = fold_convert (itype, fd->loops[i].m2);
6393 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6394 t = fold_build2 (PLUS_EXPR, itype, t, t2);
6395 }
6396 t = fold_build2 (MINUS_EXPR, itype, t,
6397 fold_convert (itype, fd->loops[i].v));
6398 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
6399 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6400 fold_build1 (NEGATE_EXPR, itype, t),
6401 fold_build1 (NEGATE_EXPR, itype,
6402 fold_convert (itype,
6403 fd->loops[i].step)));
6404 else
6405 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
6406 fold_convert (itype, fd->loops[i].step));
6407 t = fold_convert (type, t);
6408 tree t2 = fold_build2 (MINUS_EXPR, type, n2, n1);
6409 t = fold_build2 (MIN_EXPR, type, t2, t);
6410 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, t);
6411 expand_omp_build_assign (&gsi, n2var, t);
6412 }
6413 else
6414 {
6415 if (TREE_CODE (n2) == INTEGER_CST)
6416 {
6417 /* Indicate for lastprivate handling that at least one iteration
6418 has been performed, without wasting runtime. */
6419 if (integer_nonzerop (n2))
6420 expand_omp_build_assign (&gsi, fd->loop.v,
6421 fold_convert (type, n2));
6422 else
6423 /* Indicate that no iteration has been performed. */
6424 expand_omp_build_assign (&gsi, fd->loop.v,
6425 build_one_cst (type));
6426 }
6427 else
6428 {
6429 expand_omp_build_assign (&gsi, fd->loop.v,
6430 build_zero_cst (type));
6431 expand_omp_build_assign (&gsi, n2, build_one_cst (type));
6432 }
6433 for (i = 0; i < fd->collapse; i++)
6434 {
6435 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
6436 if (fd->loops[i].m1)
6437 {
6438 tree t2
6439 = fold_convert (TREE_TYPE (t),
6440 fd->loops[i - fd->loops[i].outer].v);
6441 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i].m1);
6442 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6443 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6444 }
6445 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6446 /* For normal non-combined collapsed loops just initialize
6447 the outermost iterator in the entry_bb. */
6448 if (!broken_loop)
6449 break;
6450 }
6451 }
6452 }
6453 else
6454 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
6455 tree altv = NULL_TREE, altn2 = NULL_TREE;
6456 if (fd->collapse == 1
6457 && !broken_loop
6458 && TREE_CODE (fd->loops[0].step) != INTEGER_CST)
6459 {
6460 /* The vectorizer currently punts on loops with non-constant steps
6461 for the main IV (can't compute number of iterations and gives up
6462 because of that). As for OpenMP loops it is always possible to
6463 compute the number of iterations upfront, use an alternate IV
6464 as the loop iterator:
6465 altn2 = n1 < n2 ? (n2 - n1 + step - 1) / step : 0;
6466 for (i = n1, altv = 0; altv < altn2; altv++, i += step) */
6467 altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v)));
6468 expand_omp_build_assign (&gsi, altv, build_zero_cst (TREE_TYPE (altv)));
6469 tree itype = TREE_TYPE (fd->loop.v);
6470 if (POINTER_TYPE_P (itype))
6471 itype = signed_type_for (itype);
6472 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
6473 t = fold_build2 (PLUS_EXPR, itype,
6474 fold_convert (itype, fd->loop.step), t);
6475 t = fold_build2 (PLUS_EXPR, itype, t, fold_convert (itype, n2));
6476 t = fold_build2 (MINUS_EXPR, itype, t,
6477 fold_convert (itype, fd->loop.v));
6478 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
6479 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6480 fold_build1 (NEGATE_EXPR, itype, t),
6481 fold_build1 (NEGATE_EXPR, itype,
6482 fold_convert (itype, fd->loop.step)));
6483 else
6484 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
6485 fold_convert (itype, fd->loop.step));
6486 t = fold_convert (TREE_TYPE (altv), t);
6487 altn2 = create_tmp_var (TREE_TYPE (altv));
6488 expand_omp_build_assign (&gsi, altn2, t);
6489 tree t2 = fold_convert (TREE_TYPE (fd->loop.v), n2);
6490 t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
6491 true, GSI_SAME_STMT);
6492 t2 = fold_build2 (fd->loop.cond_code, boolean_type_node, fd->loop.v, t2);
6493 gassign *g = gimple_build_assign (altn2, COND_EXPR, t2, altn2,
6494 build_zero_cst (TREE_TYPE (altv)));
6495 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6496 }
6497 else if (fd->collapse > 1
6498 && !broken_loop
6499 && !gimple_omp_for_combined_into_p (fd->for_stmt)
6500 && TREE_CODE (fd->loops[fd->collapse - 1].step) != INTEGER_CST)
6501 {
6502 altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v)));
6503 altn2 = create_tmp_var (TREE_TYPE (altv));
6504 }
6505 if (cond_var)
6506 {
6507 if (POINTER_TYPE_P (type)
6508 || TREE_CODE (n1) != INTEGER_CST
6509 || fd->loop.cond_code != LT_EXPR
6510 || tree_int_cst_sgn (n1) != 1)
6511 expand_omp_build_assign (&gsi, cond_var,
6512 build_one_cst (TREE_TYPE (cond_var)));
6513 else
6514 expand_omp_build_assign (&gsi, cond_var,
6515 fold_convert (TREE_TYPE (cond_var), n1));
6516 }
6517
6518 /* Remove the GIMPLE_OMP_FOR statement. */
6519 gsi_remove (&gsi, true);
6520
6521 if (!broken_loop)
6522 {
6523 /* Code to control the increment goes in the CONT_BB. */
6524 gsi = gsi_last_nondebug_bb (cont_bb);
6525 stmt = gsi_stmt (gsi);
6526 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
6527
6528 if (fd->collapse == 1
6529 || gimple_omp_for_combined_into_p (fd->for_stmt))
6530 {
6531 if (POINTER_TYPE_P (type))
6532 t = fold_build_pointer_plus (fd->loop.v, step);
6533 else
6534 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6535 expand_omp_build_assign (&gsi, fd->loop.v, t);
6536 }
6537 else if (TREE_CODE (n2) != INTEGER_CST)
6538 expand_omp_build_assign (&gsi, fd->loop.v, build_one_cst (type));
6539 if (altv)
6540 {
6541 t = fold_build2 (PLUS_EXPR, TREE_TYPE (altv), altv,
6542 build_one_cst (TREE_TYPE (altv)));
6543 expand_omp_build_assign (&gsi, altv, t);
6544 }
6545
6546 if (fd->collapse > 1)
6547 {
6548 i = fd->collapse - 1;
6549 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
6550 {
6551 t = fold_convert (sizetype, fd->loops[i].step);
6552 t = fold_build_pointer_plus (fd->loops[i].v, t);
6553 }
6554 else
6555 {
6556 t = fold_convert (TREE_TYPE (fd->loops[i].v),
6557 fd->loops[i].step);
6558 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
6559 fd->loops[i].v, t);
6560 }
6561 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6562 }
6563 if (cond_var)
6564 {
6565 if (POINTER_TYPE_P (type)
6566 || TREE_CODE (n1) != INTEGER_CST
6567 || fd->loop.cond_code != LT_EXPR
6568 || tree_int_cst_sgn (n1) != 1)
6569 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6570 build_one_cst (TREE_TYPE (cond_var)));
6571 else
6572 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6573 fold_convert (TREE_TYPE (cond_var), step));
6574 expand_omp_build_assign (&gsi, cond_var, t);
6575 }
6576
6577 /* Remove GIMPLE_OMP_CONTINUE. */
6578 gsi_remove (&gsi, true);
6579 }
6580
6581 /* Emit the condition in L1_BB. */
6582 gsi = gsi_start_bb (l1_bb);
6583
6584 if (altv)
6585 t = build2 (LT_EXPR, boolean_type_node, altv, altn2);
6586 else if (fd->collapse > 1
6587 && !gimple_omp_for_combined_into_p (fd->for_stmt)
6588 && !broken_loop)
6589 {
6590 i = fd->collapse - 1;
6591 tree itype = TREE_TYPE (fd->loops[i].v);
6592 if (fd->loops[i].m2)
6593 t = n2v = create_tmp_var (itype);
6594 else
6595 t = fold_convert (itype, fd->loops[i].n2);
6596 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6597 false, GSI_CONTINUE_LINKING);
6598 tree v = fd->loops[i].v;
6599 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6600 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6601 false, GSI_CONTINUE_LINKING);
6602 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
6603 }
6604 else
6605 {
6606 if (fd->collapse > 1 && !broken_loop)
6607 t = n2var;
6608 else
6609 t = fold_convert (type, n2);
6610 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6611 false, GSI_CONTINUE_LINKING);
6612 tree v = fd->loop.v;
6613 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6614 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6615 false, GSI_CONTINUE_LINKING);
6616 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
6617 }
6618 cond_stmt = gimple_build_cond_empty (t);
6619 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
6620 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
6621 NULL, NULL)
6622 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
6623 NULL, NULL))
6624 {
6625 gsi = gsi_for_stmt (cond_stmt);
6626 gimple_regimplify_operands (cond_stmt, &gsi);
6627 }
6628
6629 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
6630 if (is_simt)
6631 {
6632 gsi = gsi_start_bb (l2_bb);
6633 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step);
6634 if (POINTER_TYPE_P (type))
6635 t = fold_build_pointer_plus (fd->loop.v, step);
6636 else
6637 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6638 expand_omp_build_assign (&gsi, fd->loop.v, t);
6639 }
6640
6641 /* Remove GIMPLE_OMP_RETURN. */
6642 gsi = gsi_last_nondebug_bb (exit_bb);
6643 gsi_remove (&gsi, true);
6644
6645 /* Connect the new blocks. */
6646 remove_edge (FALLTHRU_EDGE (entry_bb));
6647
6648 if (!broken_loop)
6649 {
6650 remove_edge (BRANCH_EDGE (entry_bb));
6651 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
6652
6653 e = BRANCH_EDGE (l1_bb);
6654 ne = FALLTHRU_EDGE (l1_bb);
6655 e->flags = EDGE_TRUE_VALUE;
6656 }
6657 else
6658 {
6659 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6660
6661 ne = single_succ_edge (l1_bb);
6662 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
6663
6664 }
6665 ne->flags = EDGE_FALSE_VALUE;
6666 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
6667 ne->probability = e->probability.invert ();
6668
6669 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
6670 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
6671
6672 if (simt_maxlane)
6673 {
6674 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
6675 NULL_TREE, NULL_TREE);
6676 gsi = gsi_last_bb (entry_bb);
6677 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
6678 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
6679 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
6680 FALLTHRU_EDGE (entry_bb)->probability
6681 = profile_probability::guessed_always ().apply_scale (7, 8);
6682 BRANCH_EDGE (entry_bb)->probability
6683 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
6684 l2_dom_bb = entry_bb;
6685 }
6686 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
6687
6688 if (!broken_loop && fd->collapse > 1)
6689 {
6690 basic_block last_bb = l1_bb;
6691 basic_block init_bb = NULL;
6692 for (i = fd->collapse - 2; i >= 0; i--)
6693 {
6694 tree nextn2v = NULL_TREE;
6695 if (EDGE_SUCC (last_bb, 0)->flags & EDGE_FALSE_VALUE)
6696 e = EDGE_SUCC (last_bb, 0);
6697 else
6698 e = EDGE_SUCC (last_bb, 1);
6699 basic_block bb = split_edge (e);
6700 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
6701 {
6702 t = fold_convert (sizetype, fd->loops[i].step);
6703 t = fold_build_pointer_plus (fd->loops[i].v, t);
6704 }
6705 else
6706 {
6707 t = fold_convert (TREE_TYPE (fd->loops[i].v),
6708 fd->loops[i].step);
6709 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
6710 fd->loops[i].v, t);
6711 }
6712 gsi = gsi_after_labels (bb);
6713 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6714
6715 bb = split_block (bb, last_stmt (bb))->dest;
6716 gsi = gsi_start_bb (bb);
6717 tree itype = TREE_TYPE (fd->loops[i].v);
6718 if (fd->loops[i].m2)
6719 t = nextn2v = create_tmp_var (itype);
6720 else
6721 t = fold_convert (itype, fd->loops[i].n2);
6722 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6723 false, GSI_CONTINUE_LINKING);
6724 tree v = fd->loops[i].v;
6725 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6726 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6727 false, GSI_CONTINUE_LINKING);
6728 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
6729 cond_stmt = gimple_build_cond_empty (t);
6730 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
6731 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
6732 expand_omp_regimplify_p, NULL, NULL)
6733 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
6734 expand_omp_regimplify_p, NULL, NULL))
6735 {
6736 gsi = gsi_for_stmt (cond_stmt);
6737 gimple_regimplify_operands (cond_stmt, &gsi);
6738 }
6739 ne = single_succ_edge (bb);
6740 ne->flags = EDGE_FALSE_VALUE;
6741
6742 init_bb = create_empty_bb (bb);
6743 set_immediate_dominator (CDI_DOMINATORS, init_bb, bb);
6744 add_bb_to_loop (init_bb, bb->loop_father);
6745 e = make_edge (bb, init_bb, EDGE_TRUE_VALUE);
6746 e->probability
6747 = profile_probability::guessed_always ().apply_scale (7, 8);
6748 ne->probability = e->probability.invert ();
6749
6750 gsi = gsi_after_labels (init_bb);
6751 t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6752 fd->loops[i + 1].n1);
6753 if (fd->loops[i + 1].m1)
6754 {
6755 tree t2 = fold_convert (TREE_TYPE (t),
6756 fd->loops[i + 1
6757 - fd->loops[i + 1].outer].v);
6758 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m1);
6759 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6760 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6761 }
6762 expand_omp_build_assign (&gsi, fd->loops[i + 1].v, t);
6763 if (fd->loops[i + 1].m2)
6764 {
6765 if (i + 2 == fd->collapse && (n2var || altv))
6766 {
6767 gcc_assert (n2v == NULL_TREE);
6768 n2v = create_tmp_var (TREE_TYPE (fd->loops[i + 1].v));
6769 }
6770 t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6771 fd->loops[i + 1].n2);
6772 tree t2 = fold_convert (TREE_TYPE (t),
6773 fd->loops[i + 1
6774 - fd->loops[i + 1].outer].v);
6775 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m2);
6776 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6777 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6778 expand_omp_build_assign (&gsi, n2v, t);
6779 }
6780 if (i + 2 == fd->collapse && n2var)
6781 {
6782 /* For composite simd, n2 is the first iteration the current
6783 task shouldn't already handle, so we effectively want to use
6784 for (V3 = N31; V < N2 && V3 < N32; V++, V3 += STEP3)
6785 as the vectorized loop. Except the vectorizer will not
6786 vectorize that, so instead compute N2VAR as
6787 N2VAR = V + MIN (N2 - V, COUNTS3) and use
6788 for (V3 = N31; V < N2VAR; V++, V3 += STEP3)
6789 as the loop to vectorize. */
6790 tree t2 = fold_build2 (MINUS_EXPR, type, n2, fd->loop.v);
6791 if (fd->loops[i + 1].m1 || fd->loops[i + 1].m2)
6792 {
6793 t = build_int_cst (itype, (fd->loops[i + 1].cond_code
6794 == LT_EXPR ? -1 : 1));
6795 t = fold_build2 (PLUS_EXPR, itype,
6796 fold_convert (itype,
6797 fd->loops[i + 1].step), t);
6798 if (fd->loops[i + 1].m2)
6799 t = fold_build2 (PLUS_EXPR, itype, t, n2v);
6800 else
6801 t = fold_build2 (PLUS_EXPR, itype, t,
6802 fold_convert (itype,
6803 fd->loops[i + 1].n2));
6804 t = fold_build2 (MINUS_EXPR, itype, t,
6805 fold_convert (itype, fd->loops[i + 1].v));
6806 tree step = fold_convert (itype, fd->loops[i + 1].step);
6807 if (TYPE_UNSIGNED (itype)
6808 && fd->loops[i + 1].cond_code == GT_EXPR)
6809 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6810 fold_build1 (NEGATE_EXPR, itype, t),
6811 fold_build1 (NEGATE_EXPR, itype, step));
6812 else
6813 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
6814 t = fold_convert (type, t);
6815 }
6816 else
6817 t = counts[i + 1];
6818 t = fold_build2 (MIN_EXPR, type, t2, t);
6819 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, t);
6820 expand_omp_build_assign (&gsi, n2var, t);
6821 }
6822 if (i + 2 == fd->collapse && altv)
6823 {
6824 /* The vectorizer currently punts on loops with non-constant
6825 steps for the main IV (can't compute number of iterations
6826 and gives up because of that). As for OpenMP loops it is
6827 always possible to compute the number of iterations upfront,
6828 use an alternate IV as the loop iterator. */
6829 expand_omp_build_assign (&gsi, altv,
6830 build_zero_cst (TREE_TYPE (altv)));
6831 tree itype = TREE_TYPE (fd->loops[i + 1].v);
6832 if (POINTER_TYPE_P (itype))
6833 itype = signed_type_for (itype);
6834 t = build_int_cst (itype, (fd->loops[i + 1].cond_code == LT_EXPR
6835 ? -1 : 1));
6836 t = fold_build2 (PLUS_EXPR, itype,
6837 fold_convert (itype, fd->loops[i + 1].step), t);
6838 t = fold_build2 (PLUS_EXPR, itype, t,
6839 fold_convert (itype,
6840 fd->loops[i + 1].m2
6841 ? n2v : fd->loops[i + 1].n2));
6842 t = fold_build2 (MINUS_EXPR, itype, t,
6843 fold_convert (itype, fd->loops[i + 1].v));
6844 tree step = fold_convert (itype, fd->loops[i + 1].step);
6845 if (TYPE_UNSIGNED (itype)
6846 && fd->loops[i + 1].cond_code == GT_EXPR)
6847 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6848 fold_build1 (NEGATE_EXPR, itype, t),
6849 fold_build1 (NEGATE_EXPR, itype, step));
6850 else
6851 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
6852 t = fold_convert (TREE_TYPE (altv), t);
6853 expand_omp_build_assign (&gsi, altn2, t);
6854 tree t2 = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6855 fd->loops[i + 1].m2
6856 ? n2v : fd->loops[i + 1].n2);
6857 t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
6858 true, GSI_SAME_STMT);
6859 t2 = fold_build2 (fd->loops[i + 1].cond_code, boolean_type_node,
6860 fd->loops[i + 1].v, t2);
6861 gassign *g
6862 = gimple_build_assign (altn2, COND_EXPR, t2, altn2,
6863 build_zero_cst (TREE_TYPE (altv)));
6864 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6865 }
6866 n2v = nextn2v;
6867
6868 make_edge (init_bb, last_bb, EDGE_FALLTHRU);
6869 if (!gimple_omp_for_combined_into_p (fd->for_stmt))
6870 {
6871 e = find_edge (entry_bb, last_bb);
6872 redirect_edge_succ (e, bb);
6873 set_immediate_dominator (CDI_DOMINATORS, bb, entry_bb);
6874 set_immediate_dominator (CDI_DOMINATORS, last_bb, init_bb);
6875 }
6876
6877 last_bb = bb;
6878 }
6879 }
6880 if (!broken_loop)
6881 {
6882 class loop *loop = alloc_loop ();
6883 loop->header = l1_bb;
6884 loop->latch = cont_bb;
6885 add_loop (loop, l1_bb->loop_father);
6886 loop->safelen = safelen_int;
6887 if (simduid)
6888 {
6889 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
6890 cfun->has_simduid_loops = true;
6891 }
6892 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
6893 the loop. */
6894 if ((flag_tree_loop_vectorize
6895 || !global_options_set.x_flag_tree_loop_vectorize)
6896 && flag_tree_loop_optimize
6897 && loop->safelen > 1)
6898 {
6899 loop->force_vectorize = true;
6900 if (simdlen && tree_fits_uhwi_p (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)))
6901 {
6902 unsigned HOST_WIDE_INT v
6903 = tree_to_uhwi (OMP_CLAUSE_SIMDLEN_EXPR (simdlen));
6904 if (v < INT_MAX && v <= (unsigned HOST_WIDE_INT) loop->safelen)
6905 loop->simdlen = v;
6906 }
6907 cfun->has_force_vectorize_loops = true;
6908 }
6909 else if (dont_vectorize)
6910 loop->dont_vectorize = true;
6911 }
6912 else if (simduid)
6913 cfun->has_simduid_loops = true;
6914 }
6915
6916 /* Taskloop construct is represented after gimplification with
6917 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
6918 in between them. This routine expands the outer GIMPLE_OMP_FOR,
6919 which should just compute all the needed loop temporaries
6920 for GIMPLE_OMP_TASK. */
6921
6922 static void
6923 expand_omp_taskloop_for_outer (struct omp_region *region,
6924 struct omp_for_data *fd,
6925 gimple *inner_stmt)
6926 {
6927 tree type, bias = NULL_TREE;
6928 basic_block entry_bb, cont_bb, exit_bb;
6929 gimple_stmt_iterator gsi;
6930 gassign *assign_stmt;
6931 tree *counts = NULL;
6932 int i;
6933
6934 gcc_assert (inner_stmt);
6935 gcc_assert (region->cont);
6936 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
6937 && gimple_omp_task_taskloop_p (inner_stmt));
6938 type = TREE_TYPE (fd->loop.v);
6939
6940 /* See if we need to bias by LLONG_MIN. */
6941 if (fd->iter_type == long_long_unsigned_type_node
6942 && TREE_CODE (type) == INTEGER_TYPE
6943 && !TYPE_UNSIGNED (type))
6944 {
6945 tree n1, n2;
6946
6947 if (fd->loop.cond_code == LT_EXPR)
6948 {
6949 n1 = fd->loop.n1;
6950 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
6951 }
6952 else
6953 {
6954 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
6955 n2 = fd->loop.n1;
6956 }
6957 if (TREE_CODE (n1) != INTEGER_CST
6958 || TREE_CODE (n2) != INTEGER_CST
6959 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
6960 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
6961 }
6962
6963 entry_bb = region->entry;
6964 cont_bb = region->cont;
6965 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
6966 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
6967 exit_bb = region->exit;
6968
6969 gsi = gsi_last_nondebug_bb (entry_bb);
6970 gimple *for_stmt = gsi_stmt (gsi);
6971 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
6972 if (fd->collapse > 1)
6973 {
6974 int first_zero_iter = -1, dummy = -1;
6975 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
6976
6977 counts = XALLOCAVEC (tree, fd->collapse);
6978 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
6979 zero_iter_bb, first_zero_iter,
6980 dummy_bb, dummy, l2_dom_bb);
6981
6982 if (zero_iter_bb)
6983 {
6984 /* Some counts[i] vars might be uninitialized if
6985 some loop has zero iterations. But the body shouldn't
6986 be executed in that case, so just avoid uninit warnings. */
6987 for (i = first_zero_iter; i < fd->collapse; i++)
6988 if (SSA_VAR_P (counts[i]))
6989 TREE_NO_WARNING (counts[i]) = 1;
6990 gsi_prev (&gsi);
6991 edge e = split_block (entry_bb, gsi_stmt (gsi));
6992 entry_bb = e->dest;
6993 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
6994 gsi = gsi_last_bb (entry_bb);
6995 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
6996 get_immediate_dominator (CDI_DOMINATORS,
6997 zero_iter_bb));
6998 }
6999 }
7000
7001 tree t0, t1;
7002 t1 = fd->loop.n2;
7003 t0 = fd->loop.n1;
7004 if (POINTER_TYPE_P (TREE_TYPE (t0))
7005 && TYPE_PRECISION (TREE_TYPE (t0))
7006 != TYPE_PRECISION (fd->iter_type))
7007 {
7008 /* Avoid casting pointers to integer of a different size. */
7009 tree itype = signed_type_for (type);
7010 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
7011 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
7012 }
7013 else
7014 {
7015 t1 = fold_convert (fd->iter_type, t1);
7016 t0 = fold_convert (fd->iter_type, t0);
7017 }
7018 if (bias)
7019 {
7020 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
7021 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
7022 }
7023
7024 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
7025 OMP_CLAUSE__LOOPTEMP_);
7026 gcc_assert (innerc);
7027 tree startvar = OMP_CLAUSE_DECL (innerc);
7028 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
7029 gcc_assert (innerc);
7030 tree endvar = OMP_CLAUSE_DECL (innerc);
7031 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
7032 {
7033 gcc_assert (innerc);
7034 for (i = 1; i < fd->collapse; i++)
7035 {
7036 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
7037 OMP_CLAUSE__LOOPTEMP_);
7038 gcc_assert (innerc);
7039 }
7040 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
7041 OMP_CLAUSE__LOOPTEMP_);
7042 if (innerc)
7043 {
7044 /* If needed (inner taskloop has lastprivate clause), propagate
7045 down the total number of iterations. */
7046 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
7047 NULL_TREE, false,
7048 GSI_CONTINUE_LINKING);
7049 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
7050 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7051 }
7052 }
7053
7054 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
7055 GSI_CONTINUE_LINKING);
7056 assign_stmt = gimple_build_assign (startvar, t0);
7057 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7058
7059 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
7060 GSI_CONTINUE_LINKING);
7061 assign_stmt = gimple_build_assign (endvar, t1);
7062 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7063 if (fd->collapse > 1)
7064 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
7065
7066 /* Remove the GIMPLE_OMP_FOR statement. */
7067 gsi = gsi_for_stmt (for_stmt);
7068 gsi_remove (&gsi, true);
7069
7070 gsi = gsi_last_nondebug_bb (cont_bb);
7071 gsi_remove (&gsi, true);
7072
7073 gsi = gsi_last_nondebug_bb (exit_bb);
7074 gsi_remove (&gsi, true);
7075
7076 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
7077 remove_edge (BRANCH_EDGE (entry_bb));
7078 FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
7079 remove_edge (BRANCH_EDGE (cont_bb));
7080 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
7081 set_immediate_dominator (CDI_DOMINATORS, region->entry,
7082 recompute_dominator (CDI_DOMINATORS, region->entry));
7083 }
7084
7085 /* Taskloop construct is represented after gimplification with
7086 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
7087 in between them. This routine expands the inner GIMPLE_OMP_FOR.
7088 GOMP_taskloop{,_ull} function arranges for each task to be given just
7089 a single range of iterations. */
7090
7091 static void
7092 expand_omp_taskloop_for_inner (struct omp_region *region,
7093 struct omp_for_data *fd,
7094 gimple *inner_stmt)
7095 {
7096 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
7097 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
7098 basic_block fin_bb;
7099 gimple_stmt_iterator gsi;
7100 edge ep;
7101 bool broken_loop = region->cont == NULL;
7102 tree *counts = NULL;
7103 tree n1, n2, step;
7104
7105 itype = type = TREE_TYPE (fd->loop.v);
7106 if (POINTER_TYPE_P (type))
7107 itype = signed_type_for (type);
7108
7109 /* See if we need to bias by LLONG_MIN. */
7110 if (fd->iter_type == long_long_unsigned_type_node
7111 && TREE_CODE (type) == INTEGER_TYPE
7112 && !TYPE_UNSIGNED (type))
7113 {
7114 tree n1, n2;
7115
7116 if (fd->loop.cond_code == LT_EXPR)
7117 {
7118 n1 = fd->loop.n1;
7119 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
7120 }
7121 else
7122 {
7123 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
7124 n2 = fd->loop.n1;
7125 }
7126 if (TREE_CODE (n1) != INTEGER_CST
7127 || TREE_CODE (n2) != INTEGER_CST
7128 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
7129 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
7130 }
7131
7132 entry_bb = region->entry;
7133 cont_bb = region->cont;
7134 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
7135 fin_bb = BRANCH_EDGE (entry_bb)->dest;
7136 gcc_assert (broken_loop
7137 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
7138 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
7139 if (!broken_loop)
7140 {
7141 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
7142 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
7143 }
7144 exit_bb = region->exit;
7145
7146 /* Iteration space partitioning goes in ENTRY_BB. */
7147 gsi = gsi_last_nondebug_bb (entry_bb);
7148 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
7149
7150 if (fd->collapse > 1)
7151 {
7152 int first_zero_iter = -1, dummy = -1;
7153 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
7154
7155 counts = XALLOCAVEC (tree, fd->collapse);
7156 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
7157 fin_bb, first_zero_iter,
7158 dummy_bb, dummy, l2_dom_bb);
7159 t = NULL_TREE;
7160 }
7161 else
7162 t = integer_one_node;
7163
7164 step = fd->loop.step;
7165 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
7166 OMP_CLAUSE__LOOPTEMP_);
7167 gcc_assert (innerc);
7168 n1 = OMP_CLAUSE_DECL (innerc);
7169 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
7170 gcc_assert (innerc);
7171 n2 = OMP_CLAUSE_DECL (innerc);
7172 if (bias)
7173 {
7174 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
7175 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
7176 }
7177 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
7178 true, NULL_TREE, true, GSI_SAME_STMT);
7179 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
7180 true, NULL_TREE, true, GSI_SAME_STMT);
7181 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
7182 true, NULL_TREE, true, GSI_SAME_STMT);
7183
7184 tree startvar = fd->loop.v;
7185 tree endvar = NULL_TREE;
7186
7187 if (gimple_omp_for_combined_p (fd->for_stmt))
7188 {
7189 tree clauses = gimple_omp_for_clauses (inner_stmt);
7190 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
7191 gcc_assert (innerc);
7192 startvar = OMP_CLAUSE_DECL (innerc);
7193 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
7194 OMP_CLAUSE__LOOPTEMP_);
7195 gcc_assert (innerc);
7196 endvar = OMP_CLAUSE_DECL (innerc);
7197 }
7198 t = fold_convert (TREE_TYPE (startvar), n1);
7199 t = force_gimple_operand_gsi (&gsi, t,
7200 DECL_P (startvar)
7201 && TREE_ADDRESSABLE (startvar),
7202 NULL_TREE, false, GSI_CONTINUE_LINKING);
7203 gimple *assign_stmt = gimple_build_assign (startvar, t);
7204 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7205
7206 t = fold_convert (TREE_TYPE (startvar), n2);
7207 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
7208 false, GSI_CONTINUE_LINKING);
7209 if (endvar)
7210 {
7211 assign_stmt = gimple_build_assign (endvar, e);
7212 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7213 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
7214 assign_stmt = gimple_build_assign (fd->loop.v, e);
7215 else
7216 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
7217 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7218 }
7219
7220 tree *nonrect_bounds = NULL;
7221 if (fd->collapse > 1)
7222 {
7223 if (fd->non_rect)
7224 {
7225 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
7226 memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1));
7227 }
7228 gcc_assert (gsi_bb (gsi) == entry_bb);
7229 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt,
7230 startvar);
7231 entry_bb = gsi_bb (gsi);
7232 }
7233
7234 if (!broken_loop)
7235 {
7236 /* The code controlling the sequential loop replaces the
7237 GIMPLE_OMP_CONTINUE. */
7238 gsi = gsi_last_nondebug_bb (cont_bb);
7239 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7240 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
7241 vmain = gimple_omp_continue_control_use (cont_stmt);
7242 vback = gimple_omp_continue_control_def (cont_stmt);
7243
7244 if (!gimple_omp_for_combined_p (fd->for_stmt))
7245 {
7246 if (POINTER_TYPE_P (type))
7247 t = fold_build_pointer_plus (vmain, step);
7248 else
7249 t = fold_build2 (PLUS_EXPR, type, vmain, step);
7250 t = force_gimple_operand_gsi (&gsi, t,
7251 DECL_P (vback)
7252 && TREE_ADDRESSABLE (vback),
7253 NULL_TREE, true, GSI_SAME_STMT);
7254 assign_stmt = gimple_build_assign (vback, t);
7255 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
7256
7257 t = build2 (fd->loop.cond_code, boolean_type_node,
7258 DECL_P (vback) && TREE_ADDRESSABLE (vback)
7259 ? t : vback, e);
7260 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
7261 }
7262
7263 /* Remove the GIMPLE_OMP_CONTINUE statement. */
7264 gsi_remove (&gsi, true);
7265
7266 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
7267 collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
7268 cont_bb, body_bb);
7269 }
7270
7271 /* Remove the GIMPLE_OMP_FOR statement. */
7272 gsi = gsi_for_stmt (fd->for_stmt);
7273 gsi_remove (&gsi, true);
7274
7275 /* Remove the GIMPLE_OMP_RETURN statement. */
7276 gsi = gsi_last_nondebug_bb (exit_bb);
7277 gsi_remove (&gsi, true);
7278
7279 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
7280 if (!broken_loop)
7281 remove_edge (BRANCH_EDGE (entry_bb));
7282 else
7283 {
7284 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
7285 region->outer->cont = NULL;
7286 }
7287
7288 /* Connect all the blocks. */
7289 if (!broken_loop)
7290 {
7291 ep = find_edge (cont_bb, body_bb);
7292 if (gimple_omp_for_combined_p (fd->for_stmt))
7293 {
7294 remove_edge (ep);
7295 ep = NULL;
7296 }
7297 else if (fd->collapse > 1)
7298 {
7299 remove_edge (ep);
7300 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
7301 }
7302 else
7303 ep->flags = EDGE_TRUE_VALUE;
7304 find_edge (cont_bb, fin_bb)->flags
7305 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
7306 }
7307
7308 set_immediate_dominator (CDI_DOMINATORS, body_bb,
7309 recompute_dominator (CDI_DOMINATORS, body_bb));
7310 if (!broken_loop)
7311 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
7312 recompute_dominator (CDI_DOMINATORS, fin_bb));
7313
7314 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
7315 {
7316 class loop *loop = alloc_loop ();
7317 loop->header = body_bb;
7318 if (collapse_bb == NULL)
7319 loop->latch = cont_bb;
7320 add_loop (loop, body_bb->loop_father);
7321 }
7322 }
7323
7324 /* A subroutine of expand_omp_for. Generate code for an OpenACC
7325 partitioned loop. The lowering here is abstracted, in that the
7326 loop parameters are passed through internal functions, which are
7327 further lowered by oacc_device_lower, once we get to the target
7328 compiler. The loop is of the form:
7329
7330 for (V = B; V LTGT E; V += S) {BODY}
7331
7332 where LTGT is < or >. We may have a specified chunking size, CHUNKING
7333 (constant 0 for no chunking) and we will have a GWV partitioning
7334 mask, specifying dimensions over which the loop is to be
7335 partitioned (see note below). We generate code that looks like
7336 (this ignores tiling):
7337
7338 <entry_bb> [incoming FALL->body, BRANCH->exit]
7339 typedef signedintify (typeof (V)) T; // underlying signed integral type
7340 T range = E - B;
7341 T chunk_no = 0;
7342 T DIR = LTGT == '<' ? +1 : -1;
7343 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
7344 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
7345
7346 <head_bb> [created by splitting end of entry_bb]
7347 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
7348 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
7349 if (!(offset LTGT bound)) goto bottom_bb;
7350
7351 <body_bb> [incoming]
7352 V = B + offset;
7353 {BODY}
7354
7355 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
7356 offset += step;
7357 if (offset LTGT bound) goto body_bb; [*]
7358
7359 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
7360 chunk_no++;
7361 if (chunk < chunk_max) goto head_bb;
7362
7363 <exit_bb> [incoming]
7364 V = B + ((range -/+ 1) / S +/- 1) * S [*]
7365
7366 [*] Needed if V live at end of loop. */
7367
7368 static void
7369 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
7370 {
7371 tree v = fd->loop.v;
7372 enum tree_code cond_code = fd->loop.cond_code;
7373 enum tree_code plus_code = PLUS_EXPR;
7374
7375 tree chunk_size = integer_minus_one_node;
7376 tree gwv = integer_zero_node;
7377 tree iter_type = TREE_TYPE (v);
7378 tree diff_type = iter_type;
7379 tree plus_type = iter_type;
7380 struct oacc_collapse *counts = NULL;
7381
7382 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
7383 == GF_OMP_FOR_KIND_OACC_LOOP);
7384 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
7385 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
7386
7387 if (POINTER_TYPE_P (iter_type))
7388 {
7389 plus_code = POINTER_PLUS_EXPR;
7390 plus_type = sizetype;
7391 }
7392 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
7393 diff_type = signed_type_for (diff_type);
7394 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
7395 diff_type = integer_type_node;
7396
7397 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
7398 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
7399 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
7400 basic_block bottom_bb = NULL;
7401
7402 /* entry_bb has two successors; the branch edge is to the exit
7403 block, fallthrough edge to body. */
7404 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
7405 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
7406
7407 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
7408 body_bb, or to a block whose only successor is the body_bb. Its
7409 fallthrough successor is the final block (same as the branch
7410 successor of the entry_bb). */
7411 if (cont_bb)
7412 {
7413 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
7414 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
7415
7416 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
7417 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
7418 }
7419 else
7420 gcc_assert (!gimple_in_ssa_p (cfun));
7421
7422 /* The exit block only has entry_bb and cont_bb as predecessors. */
7423 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
7424
7425 tree chunk_no;
7426 tree chunk_max = NULL_TREE;
7427 tree bound, offset;
7428 tree step = create_tmp_var (diff_type, ".step");
7429 bool up = cond_code == LT_EXPR;
7430 tree dir = build_int_cst (diff_type, up ? +1 : -1);
7431 bool chunking = !gimple_in_ssa_p (cfun);
7432 bool negating;
7433
7434 /* Tiling vars. */
7435 tree tile_size = NULL_TREE;
7436 tree element_s = NULL_TREE;
7437 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
7438 basic_block elem_body_bb = NULL;
7439 basic_block elem_cont_bb = NULL;
7440
7441 /* SSA instances. */
7442 tree offset_incr = NULL_TREE;
7443 tree offset_init = NULL_TREE;
7444
7445 gimple_stmt_iterator gsi;
7446 gassign *ass;
7447 gcall *call;
7448 gimple *stmt;
7449 tree expr;
7450 location_t loc;
7451 edge split, be, fte;
7452
7453 /* Split the end of entry_bb to create head_bb. */
7454 split = split_block (entry_bb, last_stmt (entry_bb));
7455 basic_block head_bb = split->dest;
7456 entry_bb = split->src;
7457
7458 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
7459 gsi = gsi_last_nondebug_bb (entry_bb);
7460 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
7461 loc = gimple_location (for_stmt);
7462
7463 if (gimple_in_ssa_p (cfun))
7464 {
7465 offset_init = gimple_omp_for_index (for_stmt, 0);
7466 gcc_assert (integer_zerop (fd->loop.n1));
7467 /* The SSA parallelizer does gang parallelism. */
7468 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
7469 }
7470
7471 if (fd->collapse > 1 || fd->tiling)
7472 {
7473 gcc_assert (!gimple_in_ssa_p (cfun) && up);
7474 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
7475 tree total = expand_oacc_collapse_init (fd, &gsi, counts,
7476 TREE_TYPE (fd->loop.n2), loc);
7477
7478 if (SSA_VAR_P (fd->loop.n2))
7479 {
7480 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
7481 true, GSI_SAME_STMT);
7482 ass = gimple_build_assign (fd->loop.n2, total);
7483 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7484 }
7485 }
7486
7487 tree b = fd->loop.n1;
7488 tree e = fd->loop.n2;
7489 tree s = fd->loop.step;
7490
7491 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
7492 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
7493
7494 /* Convert the step, avoiding possible unsigned->signed overflow. */
7495 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
7496 if (negating)
7497 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
7498 s = fold_convert (diff_type, s);
7499 if (negating)
7500 s = fold_build1 (NEGATE_EXPR, diff_type, s);
7501 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
7502
7503 if (!chunking)
7504 chunk_size = integer_zero_node;
7505 expr = fold_convert (diff_type, chunk_size);
7506 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
7507 NULL_TREE, true, GSI_SAME_STMT);
7508
7509 if (fd->tiling)
7510 {
7511 /* Determine the tile size and element step,
7512 modify the outer loop step size. */
7513 tile_size = create_tmp_var (diff_type, ".tile_size");
7514 expr = build_int_cst (diff_type, 1);
7515 for (int ix = 0; ix < fd->collapse; ix++)
7516 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
7517 expr = force_gimple_operand_gsi (&gsi, expr, true,
7518 NULL_TREE, true, GSI_SAME_STMT);
7519 ass = gimple_build_assign (tile_size, expr);
7520 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7521
7522 element_s = create_tmp_var (diff_type, ".element_s");
7523 ass = gimple_build_assign (element_s, s);
7524 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7525
7526 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
7527 s = force_gimple_operand_gsi (&gsi, expr, true,
7528 NULL_TREE, true, GSI_SAME_STMT);
7529 }
7530
7531 /* Determine the range, avoiding possible unsigned->signed overflow. */
7532 negating = !up && TYPE_UNSIGNED (iter_type);
7533 expr = fold_build2 (MINUS_EXPR, plus_type,
7534 fold_convert (plus_type, negating ? b : e),
7535 fold_convert (plus_type, negating ? e : b));
7536 expr = fold_convert (diff_type, expr);
7537 if (negating)
7538 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
7539 tree range = force_gimple_operand_gsi (&gsi, expr, true,
7540 NULL_TREE, true, GSI_SAME_STMT);
7541
7542 chunk_no = build_int_cst (diff_type, 0);
7543 if (chunking)
7544 {
7545 gcc_assert (!gimple_in_ssa_p (cfun));
7546
7547 expr = chunk_no;
7548 chunk_max = create_tmp_var (diff_type, ".chunk_max");
7549 chunk_no = create_tmp_var (diff_type, ".chunk_no");
7550
7551 ass = gimple_build_assign (chunk_no, expr);
7552 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7553
7554 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7555 build_int_cst (integer_type_node,
7556 IFN_GOACC_LOOP_CHUNKS),
7557 dir, range, s, chunk_size, gwv);
7558 gimple_call_set_lhs (call, chunk_max);
7559 gimple_set_location (call, loc);
7560 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7561 }
7562 else
7563 chunk_size = chunk_no;
7564
7565 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7566 build_int_cst (integer_type_node,
7567 IFN_GOACC_LOOP_STEP),
7568 dir, range, s, chunk_size, gwv);
7569 gimple_call_set_lhs (call, step);
7570 gimple_set_location (call, loc);
7571 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7572
7573 /* Remove the GIMPLE_OMP_FOR. */
7574 gsi_remove (&gsi, true);
7575
7576 /* Fixup edges from head_bb. */
7577 be = BRANCH_EDGE (head_bb);
7578 fte = FALLTHRU_EDGE (head_bb);
7579 be->flags |= EDGE_FALSE_VALUE;
7580 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
7581
7582 basic_block body_bb = fte->dest;
7583
7584 if (gimple_in_ssa_p (cfun))
7585 {
7586 gsi = gsi_last_nondebug_bb (cont_bb);
7587 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7588
7589 offset = gimple_omp_continue_control_use (cont_stmt);
7590 offset_incr = gimple_omp_continue_control_def (cont_stmt);
7591 }
7592 else
7593 {
7594 offset = create_tmp_var (diff_type, ".offset");
7595 offset_init = offset_incr = offset;
7596 }
7597 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
7598
7599 /* Loop offset & bound go into head_bb. */
7600 gsi = gsi_start_bb (head_bb);
7601
7602 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7603 build_int_cst (integer_type_node,
7604 IFN_GOACC_LOOP_OFFSET),
7605 dir, range, s,
7606 chunk_size, gwv, chunk_no);
7607 gimple_call_set_lhs (call, offset_init);
7608 gimple_set_location (call, loc);
7609 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7610
7611 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7612 build_int_cst (integer_type_node,
7613 IFN_GOACC_LOOP_BOUND),
7614 dir, range, s,
7615 chunk_size, gwv, offset_init);
7616 gimple_call_set_lhs (call, bound);
7617 gimple_set_location (call, loc);
7618 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7619
7620 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
7621 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
7622 GSI_CONTINUE_LINKING);
7623
7624 /* V assignment goes into body_bb. */
7625 if (!gimple_in_ssa_p (cfun))
7626 {
7627 gsi = gsi_start_bb (body_bb);
7628
7629 expr = build2 (plus_code, iter_type, b,
7630 fold_convert (plus_type, offset));
7631 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7632 true, GSI_SAME_STMT);
7633 ass = gimple_build_assign (v, expr);
7634 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7635
7636 if (fd->collapse > 1 || fd->tiling)
7637 expand_oacc_collapse_vars (fd, false, &gsi, counts, v);
7638
7639 if (fd->tiling)
7640 {
7641 /* Determine the range of the element loop -- usually simply
7642 the tile_size, but could be smaller if the final
7643 iteration of the outer loop is a partial tile. */
7644 tree e_range = create_tmp_var (diff_type, ".e_range");
7645
7646 expr = build2 (MIN_EXPR, diff_type,
7647 build2 (MINUS_EXPR, diff_type, bound, offset),
7648 build2 (MULT_EXPR, diff_type, tile_size,
7649 element_s));
7650 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7651 true, GSI_SAME_STMT);
7652 ass = gimple_build_assign (e_range, expr);
7653 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7654
7655 /* Determine bound, offset & step of inner loop. */
7656 e_bound = create_tmp_var (diff_type, ".e_bound");
7657 e_offset = create_tmp_var (diff_type, ".e_offset");
7658 e_step = create_tmp_var (diff_type, ".e_step");
7659
7660 /* Mark these as element loops. */
7661 tree t, e_gwv = integer_minus_one_node;
7662 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */
7663
7664 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
7665 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7666 element_s, chunk, e_gwv, chunk);
7667 gimple_call_set_lhs (call, e_offset);
7668 gimple_set_location (call, loc);
7669 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7670
7671 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
7672 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7673 element_s, chunk, e_gwv, e_offset);
7674 gimple_call_set_lhs (call, e_bound);
7675 gimple_set_location (call, loc);
7676 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7677
7678 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
7679 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
7680 element_s, chunk, e_gwv);
7681 gimple_call_set_lhs (call, e_step);
7682 gimple_set_location (call, loc);
7683 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7684
7685 /* Add test and split block. */
7686 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
7687 stmt = gimple_build_cond_empty (expr);
7688 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7689 split = split_block (body_bb, stmt);
7690 elem_body_bb = split->dest;
7691 if (cont_bb == body_bb)
7692 cont_bb = elem_body_bb;
7693 body_bb = split->src;
7694
7695 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
7696
7697 /* Add a dummy exit for the tiled block when cont_bb is missing. */
7698 if (cont_bb == NULL)
7699 {
7700 edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
7701 e->probability = profile_probability::even ();
7702 split->probability = profile_probability::even ();
7703 }
7704
7705 /* Initialize the user's loop vars. */
7706 gsi = gsi_start_bb (elem_body_bb);
7707 expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset);
7708 }
7709 }
7710
7711 /* Loop increment goes into cont_bb. If this is not a loop, we
7712 will have spawned threads as if it was, and each one will
7713 execute one iteration. The specification is not explicit about
7714 whether such constructs are ill-formed or not, and they can
7715 occur, especially when noreturn routines are involved. */
7716 if (cont_bb)
7717 {
7718 gsi = gsi_last_nondebug_bb (cont_bb);
7719 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7720 loc = gimple_location (cont_stmt);
7721
7722 if (fd->tiling)
7723 {
7724 /* Insert element loop increment and test. */
7725 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
7726 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7727 true, GSI_SAME_STMT);
7728 ass = gimple_build_assign (e_offset, expr);
7729 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7730 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
7731
7732 stmt = gimple_build_cond_empty (expr);
7733 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7734 split = split_block (cont_bb, stmt);
7735 elem_cont_bb = split->src;
7736 cont_bb = split->dest;
7737
7738 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7739 split->probability = profile_probability::unlikely ().guessed ();
7740 edge latch_edge
7741 = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
7742 latch_edge->probability = profile_probability::likely ().guessed ();
7743
7744 edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
7745 skip_edge->probability = profile_probability::unlikely ().guessed ();
7746 edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
7747 loop_entry_edge->probability
7748 = profile_probability::likely ().guessed ();
7749
7750 gsi = gsi_for_stmt (cont_stmt);
7751 }
7752
7753 /* Increment offset. */
7754 if (gimple_in_ssa_p (cfun))
7755 expr = build2 (plus_code, iter_type, offset,
7756 fold_convert (plus_type, step));
7757 else
7758 expr = build2 (PLUS_EXPR, diff_type, offset, step);
7759 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7760 true, GSI_SAME_STMT);
7761 ass = gimple_build_assign (offset_incr, expr);
7762 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7763 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
7764 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
7765
7766 /* Remove the GIMPLE_OMP_CONTINUE. */
7767 gsi_remove (&gsi, true);
7768
7769 /* Fixup edges from cont_bb. */
7770 be = BRANCH_EDGE (cont_bb);
7771 fte = FALLTHRU_EDGE (cont_bb);
7772 be->flags |= EDGE_TRUE_VALUE;
7773 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7774
7775 if (chunking)
7776 {
7777 /* Split the beginning of exit_bb to make bottom_bb. We
7778 need to insert a nop at the start, because splitting is
7779 after a stmt, not before. */
7780 gsi = gsi_start_bb (exit_bb);
7781 stmt = gimple_build_nop ();
7782 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7783 split = split_block (exit_bb, stmt);
7784 bottom_bb = split->src;
7785 exit_bb = split->dest;
7786 gsi = gsi_last_bb (bottom_bb);
7787
7788 /* Chunk increment and test goes into bottom_bb. */
7789 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
7790 build_int_cst (diff_type, 1));
7791 ass = gimple_build_assign (chunk_no, expr);
7792 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
7793
7794 /* Chunk test at end of bottom_bb. */
7795 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
7796 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
7797 GSI_CONTINUE_LINKING);
7798
7799 /* Fixup edges from bottom_bb. */
7800 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7801 split->probability = profile_probability::unlikely ().guessed ();
7802 edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
7803 latch_edge->probability = profile_probability::likely ().guessed ();
7804 }
7805 }
7806
7807 gsi = gsi_last_nondebug_bb (exit_bb);
7808 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7809 loc = gimple_location (gsi_stmt (gsi));
7810
7811 if (!gimple_in_ssa_p (cfun))
7812 {
7813 /* Insert the final value of V, in case it is live. This is the
7814 value for the only thread that survives past the join. */
7815 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
7816 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
7817 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
7818 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
7819 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
7820 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7821 true, GSI_SAME_STMT);
7822 ass = gimple_build_assign (v, expr);
7823 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7824 }
7825
7826 /* Remove the OMP_RETURN. */
7827 gsi_remove (&gsi, true);
7828
7829 if (cont_bb)
7830 {
7831 /* We now have one, two or three nested loops. Update the loop
7832 structures. */
7833 class loop *parent = entry_bb->loop_father;
7834 class loop *body = body_bb->loop_father;
7835
7836 if (chunking)
7837 {
7838 class loop *chunk_loop = alloc_loop ();
7839 chunk_loop->header = head_bb;
7840 chunk_loop->latch = bottom_bb;
7841 add_loop (chunk_loop, parent);
7842 parent = chunk_loop;
7843 }
7844 else if (parent != body)
7845 {
7846 gcc_assert (body->header == body_bb);
7847 gcc_assert (body->latch == cont_bb
7848 || single_pred (body->latch) == cont_bb);
7849 parent = NULL;
7850 }
7851
7852 if (parent)
7853 {
7854 class loop *body_loop = alloc_loop ();
7855 body_loop->header = body_bb;
7856 body_loop->latch = cont_bb;
7857 add_loop (body_loop, parent);
7858
7859 if (fd->tiling)
7860 {
7861 /* Insert tiling's element loop. */
7862 class loop *inner_loop = alloc_loop ();
7863 inner_loop->header = elem_body_bb;
7864 inner_loop->latch = elem_cont_bb;
7865 add_loop (inner_loop, body_loop);
7866 }
7867 }
7868 }
7869 }
7870
7871 /* Expand the OMP loop defined by REGION. */
7872
7873 static void
7874 expand_omp_for (struct omp_region *region, gimple *inner_stmt)
7875 {
7876 struct omp_for_data fd;
7877 struct omp_for_data_loop *loops;
7878
7879 loops = XALLOCAVEC (struct omp_for_data_loop,
7880 gimple_omp_for_collapse (last_stmt (region->entry)));
7881 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
7882 &fd, loops);
7883 region->sched_kind = fd.sched_kind;
7884 region->sched_modifiers = fd.sched_modifiers;
7885 region->has_lastprivate_conditional = fd.lastprivate_conditional != 0;
7886 if (fd.non_rect && !gimple_omp_for_combined_into_p (fd.for_stmt))
7887 {
7888 for (int i = fd.first_nonrect; i <= fd.last_nonrect; i++)
7889 if ((loops[i].m1 || loops[i].m2)
7890 && (loops[i].m1 == NULL_TREE
7891 || TREE_CODE (loops[i].m1) == INTEGER_CST)
7892 && (loops[i].m2 == NULL_TREE
7893 || TREE_CODE (loops[i].m2) == INTEGER_CST)
7894 && TREE_CODE (loops[i].step) == INTEGER_CST
7895 && TREE_CODE (loops[i - loops[i].outer].step) == INTEGER_CST)
7896 {
7897 tree t;
7898 tree itype = TREE_TYPE (loops[i].v);
7899 if (loops[i].m1 && loops[i].m2)
7900 t = fold_build2 (MINUS_EXPR, itype, loops[i].m2, loops[i].m1);
7901 else if (loops[i].m1)
7902 t = fold_build1 (NEGATE_EXPR, itype, loops[i].m1);
7903 else
7904 t = loops[i].m2;
7905 t = fold_build2 (MULT_EXPR, itype, t,
7906 fold_convert (itype,
7907 loops[i - loops[i].outer].step));
7908 if (TYPE_UNSIGNED (itype) && loops[i].cond_code == GT_EXPR)
7909 t = fold_build2 (TRUNC_MOD_EXPR, itype,
7910 fold_build1 (NEGATE_EXPR, itype, t),
7911 fold_build1 (NEGATE_EXPR, itype,
7912 fold_convert (itype,
7913 loops[i].step)));
7914 else
7915 t = fold_build2 (TRUNC_MOD_EXPR, itype, t,
7916 fold_convert (itype, loops[i].step));
7917 if (integer_nonzerop (t))
7918 error_at (gimple_location (fd.for_stmt),
7919 "invalid OpenMP non-rectangular loop step; "
7920 "%<(%E - %E) * %E%> is not a multiple of loop %d "
7921 "step %qE",
7922 loops[i].m2 ? loops[i].m2 : integer_zero_node,
7923 loops[i].m1 ? loops[i].m1 : integer_zero_node,
7924 loops[i - loops[i].outer].step, i + 1,
7925 loops[i].step);
7926 }
7927 }
7928
7929 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
7930 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
7931 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
7932 if (region->cont)
7933 {
7934 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
7935 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
7936 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
7937 }
7938 else
7939 /* If there isn't a continue then this is a degerate case where
7940 the introduction of abnormal edges during lowering will prevent
7941 original loops from being detected. Fix that up. */
7942 loops_state_set (LOOPS_NEED_FIXUP);
7943
7944 if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_SIMD)
7945 expand_omp_simd (region, &fd);
7946 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
7947 {
7948 gcc_assert (!inner_stmt && !fd.non_rect);
7949 expand_oacc_for (region, &fd);
7950 }
7951 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
7952 {
7953 if (gimple_omp_for_combined_into_p (fd.for_stmt))
7954 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
7955 else
7956 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
7957 }
7958 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
7959 && !fd.have_ordered)
7960 {
7961 if (fd.chunk_size == NULL)
7962 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
7963 else
7964 expand_omp_for_static_chunk (region, &fd, inner_stmt);
7965 }
7966 else
7967 {
7968 int fn_index, start_ix, next_ix;
7969 unsigned HOST_WIDE_INT sched = 0;
7970 tree sched_arg = NULL_TREE;
7971
7972 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
7973 == GF_OMP_FOR_KIND_FOR && !fd.non_rect);
7974 if (fd.chunk_size == NULL
7975 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
7976 fd.chunk_size = integer_zero_node;
7977 switch (fd.sched_kind)
7978 {
7979 case OMP_CLAUSE_SCHEDULE_RUNTIME:
7980 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0
7981 && fd.lastprivate_conditional == 0)
7982 {
7983 gcc_assert (!fd.have_ordered);
7984 fn_index = 6;
7985 sched = 4;
7986 }
7987 else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
7988 && !fd.have_ordered
7989 && fd.lastprivate_conditional == 0)
7990 fn_index = 7;
7991 else
7992 {
7993 fn_index = 3;
7994 sched = (HOST_WIDE_INT_1U << 31);
7995 }
7996 break;
7997 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
7998 case OMP_CLAUSE_SCHEDULE_GUIDED:
7999 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
8000 && !fd.have_ordered
8001 && fd.lastprivate_conditional == 0)
8002 {
8003 fn_index = 3 + fd.sched_kind;
8004 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
8005 break;
8006 }
8007 fn_index = fd.sched_kind;
8008 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
8009 sched += (HOST_WIDE_INT_1U << 31);
8010 break;
8011 case OMP_CLAUSE_SCHEDULE_STATIC:
8012 gcc_assert (fd.have_ordered);
8013 fn_index = 0;
8014 sched = (HOST_WIDE_INT_1U << 31) + 1;
8015 break;
8016 default:
8017 gcc_unreachable ();
8018 }
8019 if (!fd.ordered)
8020 fn_index += fd.have_ordered * 8;
8021 if (fd.ordered)
8022 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
8023 else
8024 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
8025 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
8026 if (fd.have_reductemp || fd.have_pointer_condtemp)
8027 {
8028 if (fd.ordered)
8029 start_ix = (int)BUILT_IN_GOMP_LOOP_DOACROSS_START;
8030 else if (fd.have_ordered)
8031 start_ix = (int)BUILT_IN_GOMP_LOOP_ORDERED_START;
8032 else
8033 start_ix = (int)BUILT_IN_GOMP_LOOP_START;
8034 sched_arg = build_int_cstu (long_integer_type_node, sched);
8035 if (!fd.chunk_size)
8036 fd.chunk_size = integer_zero_node;
8037 }
8038 if (fd.iter_type == long_long_unsigned_type_node)
8039 {
8040 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
8041 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
8042 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
8043 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
8044 }
8045 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
8046 (enum built_in_function) next_ix, sched_arg,
8047 inner_stmt);
8048 }
8049
8050 if (gimple_in_ssa_p (cfun))
8051 update_ssa (TODO_update_ssa_only_virtuals);
8052 }
8053
8054 /* Expand code for an OpenMP sections directive. In pseudo code, we generate
8055
8056 v = GOMP_sections_start (n);
8057 L0:
8058 switch (v)
8059 {
8060 case 0:
8061 goto L2;
8062 case 1:
8063 section 1;
8064 goto L1;
8065 case 2:
8066 ...
8067 case n:
8068 ...
8069 default:
8070 abort ();
8071 }
8072 L1:
8073 v = GOMP_sections_next ();
8074 goto L0;
8075 L2:
8076 reduction;
8077
8078 If this is a combined parallel sections, replace the call to
8079 GOMP_sections_start with call to GOMP_sections_next. */
8080
8081 static void
8082 expand_omp_sections (struct omp_region *region)
8083 {
8084 tree t, u, vin = NULL, vmain, vnext, l2;
8085 unsigned len;
8086 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
8087 gimple_stmt_iterator si, switch_si;
8088 gomp_sections *sections_stmt;
8089 gimple *stmt;
8090 gomp_continue *cont;
8091 edge_iterator ei;
8092 edge e;
8093 struct omp_region *inner;
8094 unsigned i, casei;
8095 bool exit_reachable = region->cont != NULL;
8096
8097 gcc_assert (region->exit != NULL);
8098 entry_bb = region->entry;
8099 l0_bb = single_succ (entry_bb);
8100 l1_bb = region->cont;
8101 l2_bb = region->exit;
8102 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
8103 l2 = gimple_block_label (l2_bb);
8104 else
8105 {
8106 /* This can happen if there are reductions. */
8107 len = EDGE_COUNT (l0_bb->succs);
8108 gcc_assert (len > 0);
8109 e = EDGE_SUCC (l0_bb, len - 1);
8110 si = gsi_last_nondebug_bb (e->dest);
8111 l2 = NULL_TREE;
8112 if (gsi_end_p (si)
8113 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
8114 l2 = gimple_block_label (e->dest);
8115 else
8116 FOR_EACH_EDGE (e, ei, l0_bb->succs)
8117 {
8118 si = gsi_last_nondebug_bb (e->dest);
8119 if (gsi_end_p (si)
8120 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
8121 {
8122 l2 = gimple_block_label (e->dest);
8123 break;
8124 }
8125 }
8126 }
8127 if (exit_reachable)
8128 default_bb = create_empty_bb (l1_bb->prev_bb);
8129 else
8130 default_bb = create_empty_bb (l0_bb);
8131
8132 /* We will build a switch() with enough cases for all the
8133 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
8134 and a default case to abort if something goes wrong. */
8135 len = EDGE_COUNT (l0_bb->succs);
8136
8137 /* Use vec::quick_push on label_vec throughout, since we know the size
8138 in advance. */
8139 auto_vec<tree> label_vec (len);
8140
8141 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
8142 GIMPLE_OMP_SECTIONS statement. */
8143 si = gsi_last_nondebug_bb (entry_bb);
8144 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
8145 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
8146 vin = gimple_omp_sections_control (sections_stmt);
8147 tree clauses = gimple_omp_sections_clauses (sections_stmt);
8148 tree reductmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
8149 tree condtmp = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
8150 tree cond_var = NULL_TREE;
8151 if (reductmp || condtmp)
8152 {
8153 tree reductions = null_pointer_node, mem = null_pointer_node;
8154 tree memv = NULL_TREE, condtemp = NULL_TREE;
8155 gimple_stmt_iterator gsi = gsi_none ();
8156 gimple *g = NULL;
8157 if (reductmp)
8158 {
8159 reductions = OMP_CLAUSE_DECL (reductmp);
8160 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
8161 g = SSA_NAME_DEF_STMT (reductions);
8162 reductions = gimple_assign_rhs1 (g);
8163 OMP_CLAUSE_DECL (reductmp) = reductions;
8164 gsi = gsi_for_stmt (g);
8165 }
8166 else
8167 gsi = si;
8168 if (condtmp)
8169 {
8170 condtemp = OMP_CLAUSE_DECL (condtmp);
8171 tree c = omp_find_clause (OMP_CLAUSE_CHAIN (condtmp),
8172 OMP_CLAUSE__CONDTEMP_);
8173 cond_var = OMP_CLAUSE_DECL (c);
8174 tree type = TREE_TYPE (condtemp);
8175 memv = create_tmp_var (type);
8176 TREE_ADDRESSABLE (memv) = 1;
8177 unsigned cnt = 0;
8178 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
8179 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
8180 && OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (c))
8181 ++cnt;
8182 unsigned HOST_WIDE_INT sz
8183 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))) * cnt;
8184 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
8185 false);
8186 mem = build_fold_addr_expr (memv);
8187 }
8188 t = build_int_cst (unsigned_type_node, len - 1);
8189 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS2_START);
8190 stmt = gimple_build_call (u, 3, t, reductions, mem);
8191 gimple_call_set_lhs (stmt, vin);
8192 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
8193 if (condtmp)
8194 {
8195 expand_omp_build_assign (&gsi, condtemp, memv, false);
8196 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
8197 vin, build_one_cst (TREE_TYPE (cond_var)));
8198 expand_omp_build_assign (&gsi, cond_var, t, false);
8199 }
8200 if (reductmp)
8201 {
8202 gsi_remove (&gsi, true);
8203 release_ssa_name (gimple_assign_lhs (g));
8204 }
8205 }
8206 else if (!is_combined_parallel (region))
8207 {
8208 /* If we are not inside a combined parallel+sections region,
8209 call GOMP_sections_start. */
8210 t = build_int_cst (unsigned_type_node, len - 1);
8211 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
8212 stmt = gimple_build_call (u, 1, t);
8213 }
8214 else
8215 {
8216 /* Otherwise, call GOMP_sections_next. */
8217 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
8218 stmt = gimple_build_call (u, 0);
8219 }
8220 if (!reductmp && !condtmp)
8221 {
8222 gimple_call_set_lhs (stmt, vin);
8223 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
8224 }
8225 gsi_remove (&si, true);
8226
8227 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
8228 L0_BB. */
8229 switch_si = gsi_last_nondebug_bb (l0_bb);
8230 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
8231 if (exit_reachable)
8232 {
8233 cont = as_a <gomp_continue *> (last_stmt (l1_bb));
8234 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
8235 vmain = gimple_omp_continue_control_use (cont);
8236 vnext = gimple_omp_continue_control_def (cont);
8237 }
8238 else
8239 {
8240 vmain = vin;
8241 vnext = NULL_TREE;
8242 }
8243
8244 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
8245 label_vec.quick_push (t);
8246 i = 1;
8247
8248 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
8249 for (inner = region->inner, casei = 1;
8250 inner;
8251 inner = inner->next, i++, casei++)
8252 {
8253 basic_block s_entry_bb, s_exit_bb;
8254
8255 /* Skip optional reduction region. */
8256 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
8257 {
8258 --i;
8259 --casei;
8260 continue;
8261 }
8262
8263 s_entry_bb = inner->entry;
8264 s_exit_bb = inner->exit;
8265
8266 t = gimple_block_label (s_entry_bb);
8267 u = build_int_cst (unsigned_type_node, casei);
8268 u = build_case_label (u, NULL, t);
8269 label_vec.quick_push (u);
8270
8271 si = gsi_last_nondebug_bb (s_entry_bb);
8272 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
8273 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
8274 gsi_remove (&si, true);
8275 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
8276
8277 if (s_exit_bb == NULL)
8278 continue;
8279
8280 si = gsi_last_nondebug_bb (s_exit_bb);
8281 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
8282 gsi_remove (&si, true);
8283
8284 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
8285 }
8286
8287 /* Error handling code goes in DEFAULT_BB. */
8288 t = gimple_block_label (default_bb);
8289 u = build_case_label (NULL, NULL, t);
8290 make_edge (l0_bb, default_bb, 0);
8291 add_bb_to_loop (default_bb, current_loops->tree_root);
8292
8293 stmt = gimple_build_switch (vmain, u, label_vec);
8294 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
8295 gsi_remove (&switch_si, true);
8296
8297 si = gsi_start_bb (default_bb);
8298 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
8299 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
8300
8301 if (exit_reachable)
8302 {
8303 tree bfn_decl;
8304
8305 /* Code to get the next section goes in L1_BB. */
8306 si = gsi_last_nondebug_bb (l1_bb);
8307 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
8308
8309 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
8310 stmt = gimple_build_call (bfn_decl, 0);
8311 gimple_call_set_lhs (stmt, vnext);
8312 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8313 if (cond_var)
8314 {
8315 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
8316 vnext, build_one_cst (TREE_TYPE (cond_var)));
8317 expand_omp_build_assign (&si, cond_var, t, false);
8318 }
8319 gsi_remove (&si, true);
8320
8321 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
8322 }
8323
8324 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
8325 si = gsi_last_nondebug_bb (l2_bb);
8326 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
8327 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
8328 else if (gimple_omp_return_lhs (gsi_stmt (si)))
8329 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
8330 else
8331 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
8332 stmt = gimple_build_call (t, 0);
8333 if (gimple_omp_return_lhs (gsi_stmt (si)))
8334 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
8335 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
8336 gsi_remove (&si, true);
8337
8338 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
8339 }
8340
8341 /* Expand code for an OpenMP single directive. We've already expanded
8342 much of the code, here we simply place the GOMP_barrier call. */
8343
8344 static void
8345 expand_omp_single (struct omp_region *region)
8346 {
8347 basic_block entry_bb, exit_bb;
8348 gimple_stmt_iterator si;
8349
8350 entry_bb = region->entry;
8351 exit_bb = region->exit;
8352
8353 si = gsi_last_nondebug_bb (entry_bb);
8354 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
8355 gsi_remove (&si, true);
8356 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8357
8358 si = gsi_last_nondebug_bb (exit_bb);
8359 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
8360 {
8361 tree t = gimple_omp_return_lhs (gsi_stmt (si));
8362 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
8363 }
8364 gsi_remove (&si, true);
8365 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
8366 }
8367
8368 /* Generic expansion for OpenMP synchronization directives: master,
8369 ordered and critical. All we need to do here is remove the entry
8370 and exit markers for REGION. */
8371
8372 static void
8373 expand_omp_synch (struct omp_region *region)
8374 {
8375 basic_block entry_bb, exit_bb;
8376 gimple_stmt_iterator si;
8377
8378 entry_bb = region->entry;
8379 exit_bb = region->exit;
8380
8381 si = gsi_last_nondebug_bb (entry_bb);
8382 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
8383 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
8384 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
8385 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
8386 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
8387 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
8388 if (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS
8389 && gimple_omp_teams_host (as_a <gomp_teams *> (gsi_stmt (si))))
8390 {
8391 expand_omp_taskreg (region);
8392 return;
8393 }
8394 gsi_remove (&si, true);
8395 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8396
8397 if (exit_bb)
8398 {
8399 si = gsi_last_nondebug_bb (exit_bb);
8400 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
8401 gsi_remove (&si, true);
8402 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
8403 }
8404 }
8405
8406 /* Translate enum omp_memory_order to enum memmodel. The two enums
8407 are using different numbers so that OMP_MEMORY_ORDER_UNSPECIFIED
8408 is 0. */
8409
8410 static enum memmodel
8411 omp_memory_order_to_memmodel (enum omp_memory_order mo)
8412 {
8413 switch (mo)
8414 {
8415 case OMP_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
8416 case OMP_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
8417 case OMP_MEMORY_ORDER_RELEASE: return MEMMODEL_RELEASE;
8418 case OMP_MEMORY_ORDER_ACQ_REL: return MEMMODEL_ACQ_REL;
8419 case OMP_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
8420 default: gcc_unreachable ();
8421 }
8422 }
8423
8424 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8425 operation as a normal volatile load. */
8426
8427 static bool
8428 expand_omp_atomic_load (basic_block load_bb, tree addr,
8429 tree loaded_val, int index)
8430 {
8431 enum built_in_function tmpbase;
8432 gimple_stmt_iterator gsi;
8433 basic_block store_bb;
8434 location_t loc;
8435 gimple *stmt;
8436 tree decl, call, type, itype;
8437
8438 gsi = gsi_last_nondebug_bb (load_bb);
8439 stmt = gsi_stmt (gsi);
8440 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
8441 loc = gimple_location (stmt);
8442
8443 /* ??? If the target does not implement atomic_load_optab[mode], and mode
8444 is smaller than word size, then expand_atomic_load assumes that the load
8445 is atomic. We could avoid the builtin entirely in this case. */
8446
8447 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
8448 decl = builtin_decl_explicit (tmpbase);
8449 if (decl == NULL_TREE)
8450 return false;
8451
8452 type = TREE_TYPE (loaded_val);
8453 itype = TREE_TYPE (TREE_TYPE (decl));
8454
8455 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
8456 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
8457 call = build_call_expr_loc (loc, decl, 2, addr, mo);
8458 if (!useless_type_conversion_p (type, itype))
8459 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
8460 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
8461
8462 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8463 gsi_remove (&gsi, true);
8464
8465 store_bb = single_succ (load_bb);
8466 gsi = gsi_last_nondebug_bb (store_bb);
8467 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
8468 gsi_remove (&gsi, true);
8469
8470 if (gimple_in_ssa_p (cfun))
8471 update_ssa (TODO_update_ssa_no_phi);
8472
8473 return true;
8474 }
8475
8476 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8477 operation as a normal volatile store. */
8478
8479 static bool
8480 expand_omp_atomic_store (basic_block load_bb, tree addr,
8481 tree loaded_val, tree stored_val, int index)
8482 {
8483 enum built_in_function tmpbase;
8484 gimple_stmt_iterator gsi;
8485 basic_block store_bb = single_succ (load_bb);
8486 location_t loc;
8487 gimple *stmt;
8488 tree decl, call, type, itype;
8489 machine_mode imode;
8490 bool exchange;
8491
8492 gsi = gsi_last_nondebug_bb (load_bb);
8493 stmt = gsi_stmt (gsi);
8494 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
8495
8496 /* If the load value is needed, then this isn't a store but an exchange. */
8497 exchange = gimple_omp_atomic_need_value_p (stmt);
8498
8499 gsi = gsi_last_nondebug_bb (store_bb);
8500 stmt = gsi_stmt (gsi);
8501 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
8502 loc = gimple_location (stmt);
8503
8504 /* ??? If the target does not implement atomic_store_optab[mode], and mode
8505 is smaller than word size, then expand_atomic_store assumes that the store
8506 is atomic. We could avoid the builtin entirely in this case. */
8507
8508 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
8509 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
8510 decl = builtin_decl_explicit (tmpbase);
8511 if (decl == NULL_TREE)
8512 return false;
8513
8514 type = TREE_TYPE (stored_val);
8515
8516 /* Dig out the type of the function's second argument. */
8517 itype = TREE_TYPE (decl);
8518 itype = TYPE_ARG_TYPES (itype);
8519 itype = TREE_CHAIN (itype);
8520 itype = TREE_VALUE (itype);
8521 imode = TYPE_MODE (itype);
8522
8523 if (exchange && !can_atomic_exchange_p (imode, true))
8524 return false;
8525
8526 if (!useless_type_conversion_p (itype, type))
8527 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
8528 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
8529 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
8530 call = build_call_expr_loc (loc, decl, 3, addr, stored_val, mo);
8531 if (exchange)
8532 {
8533 if (!useless_type_conversion_p (type, itype))
8534 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
8535 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
8536 }
8537
8538 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8539 gsi_remove (&gsi, true);
8540
8541 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
8542 gsi = gsi_last_nondebug_bb (load_bb);
8543 gsi_remove (&gsi, true);
8544
8545 if (gimple_in_ssa_p (cfun))
8546 update_ssa (TODO_update_ssa_no_phi);
8547
8548 return true;
8549 }
8550
8551 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8552 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
8553 size of the data type, and thus usable to find the index of the builtin
8554 decl. Returns false if the expression is not of the proper form. */
8555
8556 static bool
8557 expand_omp_atomic_fetch_op (basic_block load_bb,
8558 tree addr, tree loaded_val,
8559 tree stored_val, int index)
8560 {
8561 enum built_in_function oldbase, newbase, tmpbase;
8562 tree decl, itype, call;
8563 tree lhs, rhs;
8564 basic_block store_bb = single_succ (load_bb);
8565 gimple_stmt_iterator gsi;
8566 gimple *stmt;
8567 location_t loc;
8568 enum tree_code code;
8569 bool need_old, need_new;
8570 machine_mode imode;
8571
8572 /* We expect to find the following sequences:
8573
8574 load_bb:
8575 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
8576
8577 store_bb:
8578 val = tmp OP something; (or: something OP tmp)
8579 GIMPLE_OMP_STORE (val)
8580
8581 ???FIXME: Allow a more flexible sequence.
8582 Perhaps use data flow to pick the statements.
8583
8584 */
8585
8586 gsi = gsi_after_labels (store_bb);
8587 stmt = gsi_stmt (gsi);
8588 if (is_gimple_debug (stmt))
8589 {
8590 gsi_next_nondebug (&gsi);
8591 if (gsi_end_p (gsi))
8592 return false;
8593 stmt = gsi_stmt (gsi);
8594 }
8595 loc = gimple_location (stmt);
8596 if (!is_gimple_assign (stmt))
8597 return false;
8598 gsi_next_nondebug (&gsi);
8599 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
8600 return false;
8601 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
8602 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
8603 enum omp_memory_order omo
8604 = gimple_omp_atomic_memory_order (last_stmt (load_bb));
8605 enum memmodel mo = omp_memory_order_to_memmodel (omo);
8606 gcc_checking_assert (!need_old || !need_new);
8607
8608 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
8609 return false;
8610
8611 /* Check for one of the supported fetch-op operations. */
8612 code = gimple_assign_rhs_code (stmt);
8613 switch (code)
8614 {
8615 case PLUS_EXPR:
8616 case POINTER_PLUS_EXPR:
8617 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
8618 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
8619 break;
8620 case MINUS_EXPR:
8621 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
8622 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
8623 break;
8624 case BIT_AND_EXPR:
8625 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
8626 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
8627 break;
8628 case BIT_IOR_EXPR:
8629 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
8630 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
8631 break;
8632 case BIT_XOR_EXPR:
8633 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
8634 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
8635 break;
8636 default:
8637 return false;
8638 }
8639
8640 /* Make sure the expression is of the proper form. */
8641 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
8642 rhs = gimple_assign_rhs2 (stmt);
8643 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
8644 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
8645 rhs = gimple_assign_rhs1 (stmt);
8646 else
8647 return false;
8648
8649 tmpbase = ((enum built_in_function)
8650 ((need_new ? newbase : oldbase) + index + 1));
8651 decl = builtin_decl_explicit (tmpbase);
8652 if (decl == NULL_TREE)
8653 return false;
8654 itype = TREE_TYPE (TREE_TYPE (decl));
8655 imode = TYPE_MODE (itype);
8656
8657 /* We could test all of the various optabs involved, but the fact of the
8658 matter is that (with the exception of i486 vs i586 and xadd) all targets
8659 that support any atomic operaton optab also implements compare-and-swap.
8660 Let optabs.c take care of expanding any compare-and-swap loop. */
8661 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
8662 return false;
8663
8664 gsi = gsi_last_nondebug_bb (load_bb);
8665 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
8666
8667 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
8668 It only requires that the operation happen atomically. Thus we can
8669 use the RELAXED memory model. */
8670 call = build_call_expr_loc (loc, decl, 3, addr,
8671 fold_convert_loc (loc, itype, rhs),
8672 build_int_cst (NULL, mo));
8673
8674 if (need_old || need_new)
8675 {
8676 lhs = need_old ? loaded_val : stored_val;
8677 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
8678 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
8679 }
8680 else
8681 call = fold_convert_loc (loc, void_type_node, call);
8682 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8683 gsi_remove (&gsi, true);
8684
8685 gsi = gsi_last_nondebug_bb (store_bb);
8686 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
8687 gsi_remove (&gsi, true);
8688 gsi = gsi_last_nondebug_bb (store_bb);
8689 stmt = gsi_stmt (gsi);
8690 gsi_remove (&gsi, true);
8691
8692 if (gimple_in_ssa_p (cfun))
8693 {
8694 release_defs (stmt);
8695 update_ssa (TODO_update_ssa_no_phi);
8696 }
8697
8698 return true;
8699 }
8700
8701 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
8702
8703 oldval = *addr;
8704 repeat:
8705 newval = rhs; // with oldval replacing *addr in rhs
8706 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
8707 if (oldval != newval)
8708 goto repeat;
8709
8710 INDEX is log2 of the size of the data type, and thus usable to find the
8711 index of the builtin decl. */
8712
8713 static bool
8714 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
8715 tree addr, tree loaded_val, tree stored_val,
8716 int index)
8717 {
8718 tree loadedi, storedi, initial, new_storedi, old_vali;
8719 tree type, itype, cmpxchg, iaddr, atype;
8720 gimple_stmt_iterator si;
8721 basic_block loop_header = single_succ (load_bb);
8722 gimple *phi, *stmt;
8723 edge e;
8724 enum built_in_function fncode;
8725
8726 /* ??? We need a non-pointer interface to __atomic_compare_exchange in
8727 order to use the RELAXED memory model effectively. */
8728 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
8729 + index + 1);
8730 cmpxchg = builtin_decl_explicit (fncode);
8731 if (cmpxchg == NULL_TREE)
8732 return false;
8733 type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
8734 atype = type;
8735 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
8736
8737 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
8738 || !can_atomic_load_p (TYPE_MODE (itype)))
8739 return false;
8740
8741 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
8742 si = gsi_last_nondebug_bb (load_bb);
8743 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
8744
8745 /* For floating-point values, we'll need to view-convert them to integers
8746 so that we can perform the atomic compare and swap. Simplify the
8747 following code by always setting up the "i"ntegral variables. */
8748 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
8749 {
8750 tree iaddr_val;
8751
8752 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
8753 true));
8754 atype = itype;
8755 iaddr_val
8756 = force_gimple_operand_gsi (&si,
8757 fold_convert (TREE_TYPE (iaddr), addr),
8758 false, NULL_TREE, true, GSI_SAME_STMT);
8759 stmt = gimple_build_assign (iaddr, iaddr_val);
8760 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8761 loadedi = create_tmp_var (itype);
8762 if (gimple_in_ssa_p (cfun))
8763 loadedi = make_ssa_name (loadedi);
8764 }
8765 else
8766 {
8767 iaddr = addr;
8768 loadedi = loaded_val;
8769 }
8770
8771 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
8772 tree loaddecl = builtin_decl_explicit (fncode);
8773 if (loaddecl)
8774 initial
8775 = fold_convert (atype,
8776 build_call_expr (loaddecl, 2, iaddr,
8777 build_int_cst (NULL_TREE,
8778 MEMMODEL_RELAXED)));
8779 else
8780 {
8781 tree off
8782 = build_int_cst (build_pointer_type_for_mode (atype, ptr_mode,
8783 true), 0);
8784 initial = build2 (MEM_REF, atype, iaddr, off);
8785 }
8786
8787 initial
8788 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
8789 GSI_SAME_STMT);
8790
8791 /* Move the value to the LOADEDI temporary. */
8792 if (gimple_in_ssa_p (cfun))
8793 {
8794 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
8795 phi = create_phi_node (loadedi, loop_header);
8796 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
8797 initial);
8798 }
8799 else
8800 gsi_insert_before (&si,
8801 gimple_build_assign (loadedi, initial),
8802 GSI_SAME_STMT);
8803 if (loadedi != loaded_val)
8804 {
8805 gimple_stmt_iterator gsi2;
8806 tree x;
8807
8808 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
8809 gsi2 = gsi_start_bb (loop_header);
8810 if (gimple_in_ssa_p (cfun))
8811 {
8812 gassign *stmt;
8813 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
8814 true, GSI_SAME_STMT);
8815 stmt = gimple_build_assign (loaded_val, x);
8816 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
8817 }
8818 else
8819 {
8820 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
8821 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
8822 true, GSI_SAME_STMT);
8823 }
8824 }
8825 gsi_remove (&si, true);
8826
8827 si = gsi_last_nondebug_bb (store_bb);
8828 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
8829
8830 if (iaddr == addr)
8831 storedi = stored_val;
8832 else
8833 storedi
8834 = force_gimple_operand_gsi (&si,
8835 build1 (VIEW_CONVERT_EXPR, itype,
8836 stored_val), true, NULL_TREE, true,
8837 GSI_SAME_STMT);
8838
8839 /* Build the compare&swap statement. */
8840 new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
8841 new_storedi = force_gimple_operand_gsi (&si,
8842 fold_convert (TREE_TYPE (loadedi),
8843 new_storedi),
8844 true, NULL_TREE,
8845 true, GSI_SAME_STMT);
8846
8847 if (gimple_in_ssa_p (cfun))
8848 old_vali = loadedi;
8849 else
8850 {
8851 old_vali = create_tmp_var (TREE_TYPE (loadedi));
8852 stmt = gimple_build_assign (old_vali, loadedi);
8853 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8854
8855 stmt = gimple_build_assign (loadedi, new_storedi);
8856 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8857 }
8858
8859 /* Note that we always perform the comparison as an integer, even for
8860 floating point. This allows the atomic operation to properly
8861 succeed even with NaNs and -0.0. */
8862 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
8863 stmt = gimple_build_cond_empty (ne);
8864 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8865
8866 /* Update cfg. */
8867 e = single_succ_edge (store_bb);
8868 e->flags &= ~EDGE_FALLTHRU;
8869 e->flags |= EDGE_FALSE_VALUE;
8870 /* Expect no looping. */
8871 e->probability = profile_probability::guessed_always ();
8872
8873 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
8874 e->probability = profile_probability::guessed_never ();
8875
8876 /* Copy the new value to loadedi (we already did that before the condition
8877 if we are not in SSA). */
8878 if (gimple_in_ssa_p (cfun))
8879 {
8880 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
8881 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
8882 }
8883
8884 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
8885 gsi_remove (&si, true);
8886
8887 class loop *loop = alloc_loop ();
8888 loop->header = loop_header;
8889 loop->latch = store_bb;
8890 add_loop (loop, loop_header->loop_father);
8891
8892 if (gimple_in_ssa_p (cfun))
8893 update_ssa (TODO_update_ssa_no_phi);
8894
8895 return true;
8896 }
8897
8898 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
8899
8900 GOMP_atomic_start ();
8901 *addr = rhs;
8902 GOMP_atomic_end ();
8903
8904 The result is not globally atomic, but works so long as all parallel
8905 references are within #pragma omp atomic directives. According to
8906 responses received from omp@openmp.org, appears to be within spec.
8907 Which makes sense, since that's how several other compilers handle
8908 this situation as well.
8909 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
8910 expanding. STORED_VAL is the operand of the matching
8911 GIMPLE_OMP_ATOMIC_STORE.
8912
8913 We replace
8914 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
8915 loaded_val = *addr;
8916
8917 and replace
8918 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
8919 *addr = stored_val;
8920 */
8921
8922 static bool
8923 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
8924 tree addr, tree loaded_val, tree stored_val)
8925 {
8926 gimple_stmt_iterator si;
8927 gassign *stmt;
8928 tree t;
8929
8930 si = gsi_last_nondebug_bb (load_bb);
8931 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
8932
8933 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
8934 t = build_call_expr (t, 0);
8935 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
8936
8937 tree mem = build_simple_mem_ref (addr);
8938 TREE_TYPE (mem) = TREE_TYPE (loaded_val);
8939 TREE_OPERAND (mem, 1)
8940 = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode,
8941 true),
8942 TREE_OPERAND (mem, 1));
8943 stmt = gimple_build_assign (loaded_val, mem);
8944 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8945 gsi_remove (&si, true);
8946
8947 si = gsi_last_nondebug_bb (store_bb);
8948 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
8949
8950 stmt = gimple_build_assign (unshare_expr (mem), stored_val);
8951 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8952
8953 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
8954 t = build_call_expr (t, 0);
8955 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
8956 gsi_remove (&si, true);
8957
8958 if (gimple_in_ssa_p (cfun))
8959 update_ssa (TODO_update_ssa_no_phi);
8960 return true;
8961 }
8962
8963 /* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
8964 using expand_omp_atomic_fetch_op. If it failed, we try to
8965 call expand_omp_atomic_pipeline, and if it fails too, the
8966 ultimate fallback is wrapping the operation in a mutex
8967 (expand_omp_atomic_mutex). REGION is the atomic region built
8968 by build_omp_regions_1(). */
8969
8970 static void
8971 expand_omp_atomic (struct omp_region *region)
8972 {
8973 basic_block load_bb = region->entry, store_bb = region->exit;
8974 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
8975 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
8976 tree loaded_val = gimple_omp_atomic_load_lhs (load);
8977 tree addr = gimple_omp_atomic_load_rhs (load);
8978 tree stored_val = gimple_omp_atomic_store_val (store);
8979 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
8980 HOST_WIDE_INT index;
8981
8982 /* Make sure the type is one of the supported sizes. */
8983 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
8984 index = exact_log2 (index);
8985 if (index >= 0 && index <= 4)
8986 {
8987 unsigned int align = TYPE_ALIGN_UNIT (type);
8988
8989 /* __sync builtins require strict data alignment. */
8990 if (exact_log2 (align) >= index)
8991 {
8992 /* Atomic load. */
8993 scalar_mode smode;
8994 if (loaded_val == stored_val
8995 && (is_int_mode (TYPE_MODE (type), &smode)
8996 || is_float_mode (TYPE_MODE (type), &smode))
8997 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
8998 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
8999 return;
9000
9001 /* Atomic store. */
9002 if ((is_int_mode (TYPE_MODE (type), &smode)
9003 || is_float_mode (TYPE_MODE (type), &smode))
9004 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
9005 && store_bb == single_succ (load_bb)
9006 && first_stmt (store_bb) == store
9007 && expand_omp_atomic_store (load_bb, addr, loaded_val,
9008 stored_val, index))
9009 return;
9010
9011 /* When possible, use specialized atomic update functions. */
9012 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
9013 && store_bb == single_succ (load_bb)
9014 && expand_omp_atomic_fetch_op (load_bb, addr,
9015 loaded_val, stored_val, index))
9016 return;
9017
9018 /* If we don't have specialized __sync builtins, try and implement
9019 as a compare and swap loop. */
9020 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
9021 loaded_val, stored_val, index))
9022 return;
9023 }
9024 }
9025
9026 /* The ultimate fallback is wrapping the operation in a mutex. */
9027 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
9028 }
9029
9030 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
9031 at REGION_EXIT. */
9032
9033 static void
9034 mark_loops_in_oacc_kernels_region (basic_block region_entry,
9035 basic_block region_exit)
9036 {
9037 class loop *outer = region_entry->loop_father;
9038 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
9039
9040 /* Don't parallelize the kernels region if it contains more than one outer
9041 loop. */
9042 unsigned int nr_outer_loops = 0;
9043 class loop *single_outer = NULL;
9044 for (class loop *loop = outer->inner; loop != NULL; loop = loop->next)
9045 {
9046 gcc_assert (loop_outer (loop) == outer);
9047
9048 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
9049 continue;
9050
9051 if (region_exit != NULL
9052 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
9053 continue;
9054
9055 nr_outer_loops++;
9056 single_outer = loop;
9057 }
9058 if (nr_outer_loops != 1)
9059 return;
9060
9061 for (class loop *loop = single_outer->inner;
9062 loop != NULL;
9063 loop = loop->inner)
9064 if (loop->next)
9065 return;
9066
9067 /* Mark the loops in the region. */
9068 for (class loop *loop = single_outer; loop != NULL; loop = loop->inner)
9069 loop->in_oacc_kernels_region = true;
9070 }
9071
9072 /* Build target argument identifier from the DEVICE identifier, value
9073 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
9074
9075 static tree
9076 get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
9077 {
9078 tree t = build_int_cst (integer_type_node, device);
9079 if (subseqent_param)
9080 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9081 build_int_cst (integer_type_node,
9082 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
9083 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9084 build_int_cst (integer_type_node, id));
9085 return t;
9086 }
9087
9088 /* Like above but return it in type that can be directly stored as an element
9089 of the argument array. */
9090
9091 static tree
9092 get_target_argument_identifier (int device, bool subseqent_param, int id)
9093 {
9094 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
9095 return fold_convert (ptr_type_node, t);
9096 }
9097
9098 /* Return a target argument consisting of DEVICE identifier, value identifier
9099 ID, and the actual VALUE. */
9100
9101 static tree
9102 get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
9103 tree value)
9104 {
9105 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
9106 fold_convert (integer_type_node, value),
9107 build_int_cst (unsigned_type_node,
9108 GOMP_TARGET_ARG_VALUE_SHIFT));
9109 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9110 get_target_argument_identifier_1 (device, false, id));
9111 t = fold_convert (ptr_type_node, t);
9112 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
9113 }
9114
9115 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
9116 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
9117 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
9118 arguments. */
9119
9120 static void
9121 push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
9122 int id, tree value, vec <tree> *args)
9123 {
9124 if (tree_fits_shwi_p (value)
9125 && tree_to_shwi (value) > -(1 << 15)
9126 && tree_to_shwi (value) < (1 << 15))
9127 args->quick_push (get_target_argument_value (gsi, device, id, value));
9128 else
9129 {
9130 args->quick_push (get_target_argument_identifier (device, true, id));
9131 value = fold_convert (ptr_type_node, value);
9132 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
9133 GSI_SAME_STMT);
9134 args->quick_push (value);
9135 }
9136 }
9137
9138 /* Create an array of arguments that is then passed to GOMP_target. */
9139
9140 static tree
9141 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
9142 {
9143 auto_vec <tree, 6> args;
9144 tree clauses = gimple_omp_target_clauses (tgt_stmt);
9145 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
9146 if (c)
9147 t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
9148 else
9149 t = integer_minus_one_node;
9150 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
9151 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
9152
9153 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
9154 if (c)
9155 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
9156 else
9157 t = integer_minus_one_node;
9158 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
9159 GOMP_TARGET_ARG_THREAD_LIMIT, t,
9160 &args);
9161
9162 /* Produce more, perhaps device specific, arguments here. */
9163
9164 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
9165 args.length () + 1),
9166 ".omp_target_args");
9167 for (unsigned i = 0; i < args.length (); i++)
9168 {
9169 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
9170 build_int_cst (integer_type_node, i),
9171 NULL_TREE, NULL_TREE);
9172 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
9173 GSI_SAME_STMT);
9174 }
9175 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
9176 build_int_cst (integer_type_node, args.length ()),
9177 NULL_TREE, NULL_TREE);
9178 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
9179 GSI_SAME_STMT);
9180 TREE_ADDRESSABLE (argarray) = 1;
9181 return build_fold_addr_expr (argarray);
9182 }
9183
9184 /* Expand the GIMPLE_OMP_TARGET starting at REGION. */
9185
9186 static void
9187 expand_omp_target (struct omp_region *region)
9188 {
9189 basic_block entry_bb, exit_bb, new_bb;
9190 struct function *child_cfun;
9191 tree child_fn, block, t;
9192 gimple_stmt_iterator gsi;
9193 gomp_target *entry_stmt;
9194 gimple *stmt;
9195 edge e;
9196 bool offloaded, data_region;
9197 int target_kind;
9198
9199 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
9200 target_kind = gimple_omp_target_kind (entry_stmt);
9201 new_bb = region->entry;
9202
9203 offloaded = is_gimple_omp_offloaded (entry_stmt);
9204 switch (target_kind)
9205 {
9206 case GF_OMP_TARGET_KIND_REGION:
9207 case GF_OMP_TARGET_KIND_UPDATE:
9208 case GF_OMP_TARGET_KIND_ENTER_DATA:
9209 case GF_OMP_TARGET_KIND_EXIT_DATA:
9210 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9211 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9212 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9213 case GF_OMP_TARGET_KIND_OACC_UPDATE:
9214 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
9215 case GF_OMP_TARGET_KIND_OACC_DECLARE:
9216 data_region = false;
9217 break;
9218 case GF_OMP_TARGET_KIND_DATA:
9219 case GF_OMP_TARGET_KIND_OACC_DATA:
9220 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9221 data_region = true;
9222 break;
9223 default:
9224 gcc_unreachable ();
9225 }
9226
9227 child_fn = NULL_TREE;
9228 child_cfun = NULL;
9229 if (offloaded)
9230 {
9231 child_fn = gimple_omp_target_child_fn (entry_stmt);
9232 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
9233 }
9234
9235 /* Supported by expand_omp_taskreg, but not here. */
9236 if (child_cfun != NULL)
9237 gcc_checking_assert (!child_cfun->cfg);
9238 gcc_checking_assert (!gimple_in_ssa_p (cfun));
9239
9240 entry_bb = region->entry;
9241 exit_bb = region->exit;
9242
9243 switch (target_kind)
9244 {
9245 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9246 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
9247
9248 /* Further down, all OpenACC compute constructs will be mapped to
9249 BUILT_IN_GOACC_PARALLEL, and to distinguish between them, there
9250 is an "oacc kernels" attribute set for OpenACC kernels. */
9251 DECL_ATTRIBUTES (child_fn)
9252 = tree_cons (get_identifier ("oacc kernels"),
9253 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9254 break;
9255 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9256 /* Further down, all OpenACC compute constructs will be mapped to
9257 BUILT_IN_GOACC_PARALLEL, and to distinguish between them, there
9258 is an "oacc serial" attribute set for OpenACC serial. */
9259 DECL_ATTRIBUTES (child_fn)
9260 = tree_cons (get_identifier ("oacc serial"),
9261 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9262 break;
9263 default:
9264 break;
9265 }
9266
9267 if (offloaded)
9268 {
9269 unsigned srcidx, dstidx, num;
9270
9271 /* If the offloading region needs data sent from the parent
9272 function, then the very first statement (except possible
9273 tree profile counter updates) of the offloading body
9274 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
9275 &.OMP_DATA_O is passed as an argument to the child function,
9276 we need to replace it with the argument as seen by the child
9277 function.
9278
9279 In most cases, this will end up being the identity assignment
9280 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
9281 a function call that has been inlined, the original PARM_DECL
9282 .OMP_DATA_I may have been converted into a different local
9283 variable. In which case, we need to keep the assignment. */
9284 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
9285 if (data_arg)
9286 {
9287 basic_block entry_succ_bb = single_succ (entry_bb);
9288 gimple_stmt_iterator gsi;
9289 tree arg;
9290 gimple *tgtcopy_stmt = NULL;
9291 tree sender = TREE_VEC_ELT (data_arg, 0);
9292
9293 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
9294 {
9295 gcc_assert (!gsi_end_p (gsi));
9296 stmt = gsi_stmt (gsi);
9297 if (gimple_code (stmt) != GIMPLE_ASSIGN)
9298 continue;
9299
9300 if (gimple_num_ops (stmt) == 2)
9301 {
9302 tree arg = gimple_assign_rhs1 (stmt);
9303
9304 /* We're ignoring the subcode because we're
9305 effectively doing a STRIP_NOPS. */
9306
9307 if (TREE_CODE (arg) == ADDR_EXPR
9308 && TREE_OPERAND (arg, 0) == sender)
9309 {
9310 tgtcopy_stmt = stmt;
9311 break;
9312 }
9313 }
9314 }
9315
9316 gcc_assert (tgtcopy_stmt != NULL);
9317 arg = DECL_ARGUMENTS (child_fn);
9318
9319 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
9320 gsi_remove (&gsi, true);
9321 }
9322
9323 /* Declare local variables needed in CHILD_CFUN. */
9324 block = DECL_INITIAL (child_fn);
9325 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
9326 /* The gimplifier could record temporaries in the offloading block
9327 rather than in containing function's local_decls chain,
9328 which would mean cgraph missed finalizing them. Do it now. */
9329 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
9330 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
9331 varpool_node::finalize_decl (t);
9332 DECL_SAVED_TREE (child_fn) = NULL;
9333 /* We'll create a CFG for child_fn, so no gimple body is needed. */
9334 gimple_set_body (child_fn, NULL);
9335 TREE_USED (block) = 1;
9336
9337 /* Reset DECL_CONTEXT on function arguments. */
9338 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
9339 DECL_CONTEXT (t) = child_fn;
9340
9341 /* Split ENTRY_BB at GIMPLE_*,
9342 so that it can be moved to the child function. */
9343 gsi = gsi_last_nondebug_bb (entry_bb);
9344 stmt = gsi_stmt (gsi);
9345 gcc_assert (stmt
9346 && gimple_code (stmt) == gimple_code (entry_stmt));
9347 e = split_block (entry_bb, stmt);
9348 gsi_remove (&gsi, true);
9349 entry_bb = e->dest;
9350 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
9351
9352 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
9353 if (exit_bb)
9354 {
9355 gsi = gsi_last_nondebug_bb (exit_bb);
9356 gcc_assert (!gsi_end_p (gsi)
9357 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
9358 stmt = gimple_build_return (NULL);
9359 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
9360 gsi_remove (&gsi, true);
9361 }
9362
9363 /* Move the offloading region into CHILD_CFUN. */
9364
9365 block = gimple_block (entry_stmt);
9366
9367 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
9368 if (exit_bb)
9369 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
9370 /* When the OMP expansion process cannot guarantee an up-to-date
9371 loop tree arrange for the child function to fixup loops. */
9372 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9373 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
9374
9375 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
9376 num = vec_safe_length (child_cfun->local_decls);
9377 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
9378 {
9379 t = (*child_cfun->local_decls)[srcidx];
9380 if (DECL_CONTEXT (t) == cfun->decl)
9381 continue;
9382 if (srcidx != dstidx)
9383 (*child_cfun->local_decls)[dstidx] = t;
9384 dstidx++;
9385 }
9386 if (dstidx != num)
9387 vec_safe_truncate (child_cfun->local_decls, dstidx);
9388
9389 /* Inform the callgraph about the new function. */
9390 child_cfun->curr_properties = cfun->curr_properties;
9391 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
9392 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
9393 cgraph_node *node = cgraph_node::get_create (child_fn);
9394 node->parallelized_function = 1;
9395 cgraph_node::add_new_function (child_fn, true);
9396
9397 /* Add the new function to the offload table. */
9398 if (ENABLE_OFFLOADING)
9399 {
9400 if (in_lto_p)
9401 DECL_PRESERVE_P (child_fn) = 1;
9402 vec_safe_push (offload_funcs, child_fn);
9403 }
9404
9405 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
9406 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
9407
9408 /* Fix the callgraph edges for child_cfun. Those for cfun will be
9409 fixed in a following pass. */
9410 push_cfun (child_cfun);
9411 if (need_asm)
9412 assign_assembler_name_if_needed (child_fn);
9413 cgraph_edge::rebuild_edges ();
9414
9415 /* Some EH regions might become dead, see PR34608. If
9416 pass_cleanup_cfg isn't the first pass to happen with the
9417 new child, these dead EH edges might cause problems.
9418 Clean them up now. */
9419 if (flag_exceptions)
9420 {
9421 basic_block bb;
9422 bool changed = false;
9423
9424 FOR_EACH_BB_FN (bb, cfun)
9425 changed |= gimple_purge_dead_eh_edges (bb);
9426 if (changed)
9427 cleanup_tree_cfg ();
9428 }
9429 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9430 verify_loop_structure ();
9431 pop_cfun ();
9432
9433 if (dump_file && !gimple_in_ssa_p (cfun))
9434 {
9435 omp_any_child_fn_dumped = true;
9436 dump_function_header (dump_file, child_fn, dump_flags);
9437 dump_function_to_file (child_fn, dump_file, dump_flags);
9438 }
9439
9440 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
9441 }
9442
9443 /* Emit a library call to launch the offloading region, or do data
9444 transfers. */
9445 tree t1, t2, t3, t4, depend, c, clauses;
9446 enum built_in_function start_ix;
9447 unsigned int flags_i = 0;
9448
9449 switch (gimple_omp_target_kind (entry_stmt))
9450 {
9451 case GF_OMP_TARGET_KIND_REGION:
9452 start_ix = BUILT_IN_GOMP_TARGET;
9453 break;
9454 case GF_OMP_TARGET_KIND_DATA:
9455 start_ix = BUILT_IN_GOMP_TARGET_DATA;
9456 break;
9457 case GF_OMP_TARGET_KIND_UPDATE:
9458 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
9459 break;
9460 case GF_OMP_TARGET_KIND_ENTER_DATA:
9461 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
9462 break;
9463 case GF_OMP_TARGET_KIND_EXIT_DATA:
9464 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
9465 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
9466 break;
9467 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9468 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9469 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9470 start_ix = BUILT_IN_GOACC_PARALLEL;
9471 break;
9472 case GF_OMP_TARGET_KIND_OACC_DATA:
9473 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9474 start_ix = BUILT_IN_GOACC_DATA_START;
9475 break;
9476 case GF_OMP_TARGET_KIND_OACC_UPDATE:
9477 start_ix = BUILT_IN_GOACC_UPDATE;
9478 break;
9479 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
9480 start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA;
9481 break;
9482 case GF_OMP_TARGET_KIND_OACC_DECLARE:
9483 start_ix = BUILT_IN_GOACC_DECLARE;
9484 break;
9485 default:
9486 gcc_unreachable ();
9487 }
9488
9489 clauses = gimple_omp_target_clauses (entry_stmt);
9490
9491 tree device = NULL_TREE;
9492 location_t device_loc = UNKNOWN_LOCATION;
9493 tree goacc_flags = NULL_TREE;
9494 if (is_gimple_omp_oacc (entry_stmt))
9495 {
9496 /* By default, no GOACC_FLAGs are set. */
9497 goacc_flags = integer_zero_node;
9498 }
9499 else
9500 {
9501 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
9502 if (c)
9503 {
9504 device = OMP_CLAUSE_DEVICE_ID (c);
9505 device_loc = OMP_CLAUSE_LOCATION (c);
9506 }
9507 else
9508 {
9509 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
9510 library choose). */
9511 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
9512 device_loc = gimple_location (entry_stmt);
9513 }
9514
9515 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
9516 if (c)
9517 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
9518 }
9519
9520 /* By default, there is no conditional. */
9521 tree cond = NULL_TREE;
9522 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
9523 if (c)
9524 cond = OMP_CLAUSE_IF_EXPR (c);
9525 /* If we found the clause 'if (cond)', build:
9526 OpenACC: goacc_flags = (cond ? goacc_flags : flags | GOACC_FLAG_HOST_FALLBACK)
9527 OpenMP: device = (cond ? device : GOMP_DEVICE_HOST_FALLBACK) */
9528 if (cond)
9529 {
9530 tree *tp;
9531 if (is_gimple_omp_oacc (entry_stmt))
9532 tp = &goacc_flags;
9533 else
9534 {
9535 /* Ensure 'device' is of the correct type. */
9536 device = fold_convert_loc (device_loc, integer_type_node, device);
9537
9538 tp = &device;
9539 }
9540
9541 cond = gimple_boolify (cond);
9542
9543 basic_block cond_bb, then_bb, else_bb;
9544 edge e;
9545 tree tmp_var;
9546
9547 tmp_var = create_tmp_var (TREE_TYPE (*tp));
9548 if (offloaded)
9549 e = split_block_after_labels (new_bb);
9550 else
9551 {
9552 gsi = gsi_last_nondebug_bb (new_bb);
9553 gsi_prev (&gsi);
9554 e = split_block (new_bb, gsi_stmt (gsi));
9555 }
9556 cond_bb = e->src;
9557 new_bb = e->dest;
9558 remove_edge (e);
9559
9560 then_bb = create_empty_bb (cond_bb);
9561 else_bb = create_empty_bb (then_bb);
9562 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
9563 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
9564
9565 stmt = gimple_build_cond_empty (cond);
9566 gsi = gsi_last_bb (cond_bb);
9567 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
9568
9569 gsi = gsi_start_bb (then_bb);
9570 stmt = gimple_build_assign (tmp_var, *tp);
9571 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
9572
9573 gsi = gsi_start_bb (else_bb);
9574 if (is_gimple_omp_oacc (entry_stmt))
9575 stmt = gimple_build_assign (tmp_var,
9576 BIT_IOR_EXPR,
9577 *tp,
9578 build_int_cst (integer_type_node,
9579 GOACC_FLAG_HOST_FALLBACK));
9580 else
9581 stmt = gimple_build_assign (tmp_var,
9582 build_int_cst (integer_type_node,
9583 GOMP_DEVICE_HOST_FALLBACK));
9584 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
9585
9586 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
9587 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
9588 add_bb_to_loop (then_bb, cond_bb->loop_father);
9589 add_bb_to_loop (else_bb, cond_bb->loop_father);
9590 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
9591 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
9592
9593 *tp = tmp_var;
9594
9595 gsi = gsi_last_nondebug_bb (new_bb);
9596 }
9597 else
9598 {
9599 gsi = gsi_last_nondebug_bb (new_bb);
9600
9601 if (device != NULL_TREE)
9602 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
9603 true, GSI_SAME_STMT);
9604 }
9605
9606 t = gimple_omp_target_data_arg (entry_stmt);
9607 if (t == NULL)
9608 {
9609 t1 = size_zero_node;
9610 t2 = build_zero_cst (ptr_type_node);
9611 t3 = t2;
9612 t4 = t2;
9613 }
9614 else
9615 {
9616 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
9617 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
9618 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
9619 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
9620 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
9621 }
9622
9623 gimple *g;
9624 bool tagging = false;
9625 /* The maximum number used by any start_ix, without varargs. */
9626 auto_vec<tree, 11> args;
9627 if (is_gimple_omp_oacc (entry_stmt))
9628 {
9629 tree goacc_flags_m = fold_build1 (GOACC_FLAGS_MARSHAL_OP,
9630 TREE_TYPE (goacc_flags), goacc_flags);
9631 goacc_flags_m = force_gimple_operand_gsi (&gsi, goacc_flags_m, true,
9632 NULL_TREE, true,
9633 GSI_SAME_STMT);
9634 args.quick_push (goacc_flags_m);
9635 }
9636 else
9637 args.quick_push (device);
9638 if (offloaded)
9639 args.quick_push (build_fold_addr_expr (child_fn));
9640 args.quick_push (t1);
9641 args.quick_push (t2);
9642 args.quick_push (t3);
9643 args.quick_push (t4);
9644 switch (start_ix)
9645 {
9646 case BUILT_IN_GOACC_DATA_START:
9647 case BUILT_IN_GOACC_DECLARE:
9648 case BUILT_IN_GOMP_TARGET_DATA:
9649 break;
9650 case BUILT_IN_GOMP_TARGET:
9651 case BUILT_IN_GOMP_TARGET_UPDATE:
9652 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
9653 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
9654 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
9655 if (c)
9656 depend = OMP_CLAUSE_DECL (c);
9657 else
9658 depend = build_int_cst (ptr_type_node, 0);
9659 args.quick_push (depend);
9660 if (start_ix == BUILT_IN_GOMP_TARGET)
9661 args.quick_push (get_target_arguments (&gsi, entry_stmt));
9662 break;
9663 case BUILT_IN_GOACC_PARALLEL:
9664 if (lookup_attribute ("oacc serial", DECL_ATTRIBUTES (child_fn)) != NULL)
9665 {
9666 tree dims = NULL_TREE;
9667 unsigned int ix;
9668
9669 /* For serial constructs we set all dimensions to 1. */
9670 for (ix = GOMP_DIM_MAX; ix--;)
9671 dims = tree_cons (NULL_TREE, integer_one_node, dims);
9672 oacc_replace_fn_attrib (child_fn, dims);
9673 }
9674 else
9675 oacc_set_fn_attrib (child_fn, clauses, &args);
9676 tagging = true;
9677 /* FALLTHRU */
9678 case BUILT_IN_GOACC_ENTER_EXIT_DATA:
9679 case BUILT_IN_GOACC_UPDATE:
9680 {
9681 tree t_async = NULL_TREE;
9682
9683 /* If present, use the value specified by the respective
9684 clause, making sure that is of the correct type. */
9685 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
9686 if (c)
9687 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
9688 integer_type_node,
9689 OMP_CLAUSE_ASYNC_EXPR (c));
9690 else if (!tagging)
9691 /* Default values for t_async. */
9692 t_async = fold_convert_loc (gimple_location (entry_stmt),
9693 integer_type_node,
9694 build_int_cst (integer_type_node,
9695 GOMP_ASYNC_SYNC));
9696 if (tagging && t_async)
9697 {
9698 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
9699
9700 if (TREE_CODE (t_async) == INTEGER_CST)
9701 {
9702 /* See if we can pack the async arg in to the tag's
9703 operand. */
9704 i_async = TREE_INT_CST_LOW (t_async);
9705 if (i_async < GOMP_LAUNCH_OP_MAX)
9706 t_async = NULL_TREE;
9707 else
9708 i_async = GOMP_LAUNCH_OP_MAX;
9709 }
9710 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
9711 i_async));
9712 }
9713 if (t_async)
9714 args.safe_push (force_gimple_operand_gsi (&gsi, t_async, true,
9715 NULL_TREE, true,
9716 GSI_SAME_STMT));
9717
9718 /* Save the argument index, and ... */
9719 unsigned t_wait_idx = args.length ();
9720 unsigned num_waits = 0;
9721 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
9722 if (!tagging || c)
9723 /* ... push a placeholder. */
9724 args.safe_push (integer_zero_node);
9725
9726 for (; c; c = OMP_CLAUSE_CHAIN (c))
9727 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
9728 {
9729 tree arg = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
9730 integer_type_node,
9731 OMP_CLAUSE_WAIT_EXPR (c));
9732 arg = force_gimple_operand_gsi (&gsi, arg, true, NULL_TREE, true,
9733 GSI_SAME_STMT);
9734 args.safe_push (arg);
9735 num_waits++;
9736 }
9737
9738 if (!tagging || num_waits)
9739 {
9740 tree len;
9741
9742 /* Now that we know the number, update the placeholder. */
9743 if (tagging)
9744 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
9745 else
9746 len = build_int_cst (integer_type_node, num_waits);
9747 len = fold_convert_loc (gimple_location (entry_stmt),
9748 unsigned_type_node, len);
9749 args[t_wait_idx] = len;
9750 }
9751 }
9752 break;
9753 default:
9754 gcc_unreachable ();
9755 }
9756 if (tagging)
9757 /* Push terminal marker - zero. */
9758 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
9759
9760 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
9761 gimple_set_location (g, gimple_location (entry_stmt));
9762 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9763 if (!offloaded)
9764 {
9765 g = gsi_stmt (gsi);
9766 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
9767 gsi_remove (&gsi, true);
9768 }
9769 if (data_region && region->exit)
9770 {
9771 gsi = gsi_last_nondebug_bb (region->exit);
9772 g = gsi_stmt (gsi);
9773 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN);
9774 gsi_remove (&gsi, true);
9775 }
9776 }
9777
9778 /* Expand the parallel region tree rooted at REGION. Expansion
9779 proceeds in depth-first order. Innermost regions are expanded
9780 first. This way, parallel regions that require a new function to
9781 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
9782 internal dependencies in their body. */
9783
9784 static void
9785 expand_omp (struct omp_region *region)
9786 {
9787 omp_any_child_fn_dumped = false;
9788 while (region)
9789 {
9790 location_t saved_location;
9791 gimple *inner_stmt = NULL;
9792
9793 /* First, determine whether this is a combined parallel+workshare
9794 region. */
9795 if (region->type == GIMPLE_OMP_PARALLEL)
9796 determine_parallel_type (region);
9797
9798 if (region->type == GIMPLE_OMP_FOR
9799 && gimple_omp_for_combined_p (last_stmt (region->entry)))
9800 inner_stmt = last_stmt (region->inner->entry);
9801
9802 if (region->inner)
9803 expand_omp (region->inner);
9804
9805 saved_location = input_location;
9806 if (gimple_has_location (last_stmt (region->entry)))
9807 input_location = gimple_location (last_stmt (region->entry));
9808
9809 switch (region->type)
9810 {
9811 case GIMPLE_OMP_PARALLEL:
9812 case GIMPLE_OMP_TASK:
9813 expand_omp_taskreg (region);
9814 break;
9815
9816 case GIMPLE_OMP_FOR:
9817 expand_omp_for (region, inner_stmt);
9818 break;
9819
9820 case GIMPLE_OMP_SECTIONS:
9821 expand_omp_sections (region);
9822 break;
9823
9824 case GIMPLE_OMP_SECTION:
9825 /* Individual omp sections are handled together with their
9826 parent GIMPLE_OMP_SECTIONS region. */
9827 break;
9828
9829 case GIMPLE_OMP_SINGLE:
9830 expand_omp_single (region);
9831 break;
9832
9833 case GIMPLE_OMP_ORDERED:
9834 {
9835 gomp_ordered *ord_stmt
9836 = as_a <gomp_ordered *> (last_stmt (region->entry));
9837 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
9838 OMP_CLAUSE_DEPEND))
9839 {
9840 /* We'll expand these when expanding corresponding
9841 worksharing region with ordered(n) clause. */
9842 gcc_assert (region->outer
9843 && region->outer->type == GIMPLE_OMP_FOR);
9844 region->ord_stmt = ord_stmt;
9845 break;
9846 }
9847 }
9848 /* FALLTHRU */
9849 case GIMPLE_OMP_MASTER:
9850 case GIMPLE_OMP_TASKGROUP:
9851 case GIMPLE_OMP_CRITICAL:
9852 case GIMPLE_OMP_TEAMS:
9853 expand_omp_synch (region);
9854 break;
9855
9856 case GIMPLE_OMP_ATOMIC_LOAD:
9857 expand_omp_atomic (region);
9858 break;
9859
9860 case GIMPLE_OMP_TARGET:
9861 expand_omp_target (region);
9862 break;
9863
9864 default:
9865 gcc_unreachable ();
9866 }
9867
9868 input_location = saved_location;
9869 region = region->next;
9870 }
9871 if (omp_any_child_fn_dumped)
9872 {
9873 if (dump_file)
9874 dump_function_header (dump_file, current_function_decl, dump_flags);
9875 omp_any_child_fn_dumped = false;
9876 }
9877 }
9878
9879 /* Helper for build_omp_regions. Scan the dominator tree starting at
9880 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
9881 true, the function ends once a single tree is built (otherwise, whole
9882 forest of OMP constructs may be built). */
9883
9884 static void
9885 build_omp_regions_1 (basic_block bb, struct omp_region *parent,
9886 bool single_tree)
9887 {
9888 gimple_stmt_iterator gsi;
9889 gimple *stmt;
9890 basic_block son;
9891
9892 gsi = gsi_last_nondebug_bb (bb);
9893 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
9894 {
9895 struct omp_region *region;
9896 enum gimple_code code;
9897
9898 stmt = gsi_stmt (gsi);
9899 code = gimple_code (stmt);
9900 if (code == GIMPLE_OMP_RETURN)
9901 {
9902 /* STMT is the return point out of region PARENT. Mark it
9903 as the exit point and make PARENT the immediately
9904 enclosing region. */
9905 gcc_assert (parent);
9906 region = parent;
9907 region->exit = bb;
9908 parent = parent->outer;
9909 }
9910 else if (code == GIMPLE_OMP_ATOMIC_STORE)
9911 {
9912 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
9913 GIMPLE_OMP_RETURN, but matches with
9914 GIMPLE_OMP_ATOMIC_LOAD. */
9915 gcc_assert (parent);
9916 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
9917 region = parent;
9918 region->exit = bb;
9919 parent = parent->outer;
9920 }
9921 else if (code == GIMPLE_OMP_CONTINUE)
9922 {
9923 gcc_assert (parent);
9924 parent->cont = bb;
9925 }
9926 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
9927 {
9928 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
9929 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
9930 }
9931 else
9932 {
9933 region = new_omp_region (bb, code, parent);
9934 /* Otherwise... */
9935 if (code == GIMPLE_OMP_TARGET)
9936 {
9937 switch (gimple_omp_target_kind (stmt))
9938 {
9939 case GF_OMP_TARGET_KIND_REGION:
9940 case GF_OMP_TARGET_KIND_DATA:
9941 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9942 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9943 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9944 case GF_OMP_TARGET_KIND_OACC_DATA:
9945 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9946 break;
9947 case GF_OMP_TARGET_KIND_UPDATE:
9948 case GF_OMP_TARGET_KIND_ENTER_DATA:
9949 case GF_OMP_TARGET_KIND_EXIT_DATA:
9950 case GF_OMP_TARGET_KIND_OACC_UPDATE:
9951 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
9952 case GF_OMP_TARGET_KIND_OACC_DECLARE:
9953 /* ..., other than for those stand-alone directives... */
9954 region = NULL;
9955 break;
9956 default:
9957 gcc_unreachable ();
9958 }
9959 }
9960 else if (code == GIMPLE_OMP_ORDERED
9961 && omp_find_clause (gimple_omp_ordered_clauses
9962 (as_a <gomp_ordered *> (stmt)),
9963 OMP_CLAUSE_DEPEND))
9964 /* #pragma omp ordered depend is also just a stand-alone
9965 directive. */
9966 region = NULL;
9967 else if (code == GIMPLE_OMP_TASK
9968 && gimple_omp_task_taskwait_p (stmt))
9969 /* #pragma omp taskwait depend(...) is a stand-alone directive. */
9970 region = NULL;
9971 /* ..., this directive becomes the parent for a new region. */
9972 if (region)
9973 parent = region;
9974 }
9975 }
9976
9977 if (single_tree && !parent)
9978 return;
9979
9980 for (son = first_dom_son (CDI_DOMINATORS, bb);
9981 son;
9982 son = next_dom_son (CDI_DOMINATORS, son))
9983 build_omp_regions_1 (son, parent, single_tree);
9984 }
9985
9986 /* Builds the tree of OMP regions rooted at ROOT, storing it to
9987 root_omp_region. */
9988
9989 static void
9990 build_omp_regions_root (basic_block root)
9991 {
9992 gcc_assert (root_omp_region == NULL);
9993 build_omp_regions_1 (root, NULL, true);
9994 gcc_assert (root_omp_region != NULL);
9995 }
9996
9997 /* Expands omp construct (and its subconstructs) starting in HEAD. */
9998
9999 void
10000 omp_expand_local (basic_block head)
10001 {
10002 build_omp_regions_root (head);
10003 if (dump_file && (dump_flags & TDF_DETAILS))
10004 {
10005 fprintf (dump_file, "\nOMP region tree\n\n");
10006 dump_omp_region (dump_file, root_omp_region, 0);
10007 fprintf (dump_file, "\n");
10008 }
10009
10010 remove_exit_barriers (root_omp_region);
10011 expand_omp (root_omp_region);
10012
10013 omp_free_regions ();
10014 }
10015
10016 /* Scan the CFG and build a tree of OMP regions. Return the root of
10017 the OMP region tree. */
10018
10019 static void
10020 build_omp_regions (void)
10021 {
10022 gcc_assert (root_omp_region == NULL);
10023 calculate_dominance_info (CDI_DOMINATORS);
10024 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
10025 }
10026
10027 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */
10028
10029 static unsigned int
10030 execute_expand_omp (void)
10031 {
10032 build_omp_regions ();
10033
10034 if (!root_omp_region)
10035 return 0;
10036
10037 if (dump_file)
10038 {
10039 fprintf (dump_file, "\nOMP region tree\n\n");
10040 dump_omp_region (dump_file, root_omp_region, 0);
10041 fprintf (dump_file, "\n");
10042 }
10043
10044 remove_exit_barriers (root_omp_region);
10045
10046 expand_omp (root_omp_region);
10047
10048 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
10049 verify_loop_structure ();
10050 cleanup_tree_cfg ();
10051
10052 omp_free_regions ();
10053
10054 return 0;
10055 }
10056
10057 /* OMP expansion -- the default pass, run before creation of SSA form. */
10058
10059 namespace {
10060
10061 const pass_data pass_data_expand_omp =
10062 {
10063 GIMPLE_PASS, /* type */
10064 "ompexp", /* name */
10065 OPTGROUP_OMP, /* optinfo_flags */
10066 TV_NONE, /* tv_id */
10067 PROP_gimple_any, /* properties_required */
10068 PROP_gimple_eomp, /* properties_provided */
10069 0, /* properties_destroyed */
10070 0, /* todo_flags_start */
10071 0, /* todo_flags_finish */
10072 };
10073
10074 class pass_expand_omp : public gimple_opt_pass
10075 {
10076 public:
10077 pass_expand_omp (gcc::context *ctxt)
10078 : gimple_opt_pass (pass_data_expand_omp, ctxt)
10079 {}
10080
10081 /* opt_pass methods: */
10082 virtual unsigned int execute (function *)
10083 {
10084 bool gate = ((flag_openacc != 0 || flag_openmp != 0
10085 || flag_openmp_simd != 0)
10086 && !seen_error ());
10087
10088 /* This pass always runs, to provide PROP_gimple_eomp.
10089 But often, there is nothing to do. */
10090 if (!gate)
10091 return 0;
10092
10093 return execute_expand_omp ();
10094 }
10095
10096 }; // class pass_expand_omp
10097
10098 } // anon namespace
10099
10100 gimple_opt_pass *
10101 make_pass_expand_omp (gcc::context *ctxt)
10102 {
10103 return new pass_expand_omp (ctxt);
10104 }
10105
10106 namespace {
10107
10108 const pass_data pass_data_expand_omp_ssa =
10109 {
10110 GIMPLE_PASS, /* type */
10111 "ompexpssa", /* name */
10112 OPTGROUP_OMP, /* optinfo_flags */
10113 TV_NONE, /* tv_id */
10114 PROP_cfg | PROP_ssa, /* properties_required */
10115 PROP_gimple_eomp, /* properties_provided */
10116 0, /* properties_destroyed */
10117 0, /* todo_flags_start */
10118 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
10119 };
10120
10121 class pass_expand_omp_ssa : public gimple_opt_pass
10122 {
10123 public:
10124 pass_expand_omp_ssa (gcc::context *ctxt)
10125 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
10126 {}
10127
10128 /* opt_pass methods: */
10129 virtual bool gate (function *fun)
10130 {
10131 return !(fun->curr_properties & PROP_gimple_eomp);
10132 }
10133 virtual unsigned int execute (function *) { return execute_expand_omp (); }
10134 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
10135
10136 }; // class pass_expand_omp_ssa
10137
10138 } // anon namespace
10139
10140 gimple_opt_pass *
10141 make_pass_expand_omp_ssa (gcc::context *ctxt)
10142 {
10143 return new pass_expand_omp_ssa (ctxt);
10144 }
10145
10146 /* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
10147 GIMPLE_* codes. */
10148
10149 bool
10150 omp_make_gimple_edges (basic_block bb, struct omp_region **region,
10151 int *region_idx)
10152 {
10153 gimple *last = last_stmt (bb);
10154 enum gimple_code code = gimple_code (last);
10155 struct omp_region *cur_region = *region;
10156 bool fallthru = false;
10157
10158 switch (code)
10159 {
10160 case GIMPLE_OMP_PARALLEL:
10161 case GIMPLE_OMP_FOR:
10162 case GIMPLE_OMP_SINGLE:
10163 case GIMPLE_OMP_TEAMS:
10164 case GIMPLE_OMP_MASTER:
10165 case GIMPLE_OMP_TASKGROUP:
10166 case GIMPLE_OMP_CRITICAL:
10167 case GIMPLE_OMP_SECTION:
10168 cur_region = new_omp_region (bb, code, cur_region);
10169 fallthru = true;
10170 break;
10171
10172 case GIMPLE_OMP_TASK:
10173 cur_region = new_omp_region (bb, code, cur_region);
10174 fallthru = true;
10175 if (gimple_omp_task_taskwait_p (last))
10176 cur_region = cur_region->outer;
10177 break;
10178
10179 case GIMPLE_OMP_ORDERED:
10180 cur_region = new_omp_region (bb, code, cur_region);
10181 fallthru = true;
10182 if (omp_find_clause (gimple_omp_ordered_clauses
10183 (as_a <gomp_ordered *> (last)),
10184 OMP_CLAUSE_DEPEND))
10185 cur_region = cur_region->outer;
10186 break;
10187
10188 case GIMPLE_OMP_TARGET:
10189 cur_region = new_omp_region (bb, code, cur_region);
10190 fallthru = true;
10191 switch (gimple_omp_target_kind (last))
10192 {
10193 case GF_OMP_TARGET_KIND_REGION:
10194 case GF_OMP_TARGET_KIND_DATA:
10195 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
10196 case GF_OMP_TARGET_KIND_OACC_KERNELS:
10197 case GF_OMP_TARGET_KIND_OACC_SERIAL:
10198 case GF_OMP_TARGET_KIND_OACC_DATA:
10199 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
10200 break;
10201 case GF_OMP_TARGET_KIND_UPDATE:
10202 case GF_OMP_TARGET_KIND_ENTER_DATA:
10203 case GF_OMP_TARGET_KIND_EXIT_DATA:
10204 case GF_OMP_TARGET_KIND_OACC_UPDATE:
10205 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
10206 case GF_OMP_TARGET_KIND_OACC_DECLARE:
10207 cur_region = cur_region->outer;
10208 break;
10209 default:
10210 gcc_unreachable ();
10211 }
10212 break;
10213
10214 case GIMPLE_OMP_SECTIONS:
10215 cur_region = new_omp_region (bb, code, cur_region);
10216 fallthru = true;
10217 break;
10218
10219 case GIMPLE_OMP_SECTIONS_SWITCH:
10220 fallthru = false;
10221 break;
10222
10223 case GIMPLE_OMP_ATOMIC_LOAD:
10224 case GIMPLE_OMP_ATOMIC_STORE:
10225 fallthru = true;
10226 break;
10227
10228 case GIMPLE_OMP_RETURN:
10229 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
10230 somewhere other than the next block. This will be
10231 created later. */
10232 cur_region->exit = bb;
10233 if (cur_region->type == GIMPLE_OMP_TASK)
10234 /* Add an edge corresponding to not scheduling the task
10235 immediately. */
10236 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
10237 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
10238 cur_region = cur_region->outer;
10239 break;
10240
10241 case GIMPLE_OMP_CONTINUE:
10242 cur_region->cont = bb;
10243 switch (cur_region->type)
10244 {
10245 case GIMPLE_OMP_FOR:
10246 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
10247 succs edges as abnormal to prevent splitting
10248 them. */
10249 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
10250 /* Make the loopback edge. */
10251 make_edge (bb, single_succ (cur_region->entry),
10252 EDGE_ABNORMAL);
10253
10254 /* Create an edge from GIMPLE_OMP_FOR to exit, which
10255 corresponds to the case that the body of the loop
10256 is not executed at all. */
10257 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
10258 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
10259 fallthru = false;
10260 break;
10261
10262 case GIMPLE_OMP_SECTIONS:
10263 /* Wire up the edges into and out of the nested sections. */
10264 {
10265 basic_block switch_bb = single_succ (cur_region->entry);
10266
10267 struct omp_region *i;
10268 for (i = cur_region->inner; i ; i = i->next)
10269 {
10270 gcc_assert (i->type == GIMPLE_OMP_SECTION);
10271 make_edge (switch_bb, i->entry, 0);
10272 make_edge (i->exit, bb, EDGE_FALLTHRU);
10273 }
10274
10275 /* Make the loopback edge to the block with
10276 GIMPLE_OMP_SECTIONS_SWITCH. */
10277 make_edge (bb, switch_bb, 0);
10278
10279 /* Make the edge from the switch to exit. */
10280 make_edge (switch_bb, bb->next_bb, 0);
10281 fallthru = false;
10282 }
10283 break;
10284
10285 case GIMPLE_OMP_TASK:
10286 fallthru = true;
10287 break;
10288
10289 default:
10290 gcc_unreachable ();
10291 }
10292 break;
10293
10294 default:
10295 gcc_unreachable ();
10296 }
10297
10298 if (*region != cur_region)
10299 {
10300 *region = cur_region;
10301 if (cur_region)
10302 *region_idx = cur_region->entry->index;
10303 else
10304 *region_idx = 0;
10305 }
10306
10307 return fallthru;
10308 }